xref: /aosp_15_r20/external/stg/post_processing.cc (revision 9e3b08ae94a55201065475453d799e8b1378bea6)
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022-2023 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Siddharth Nayyar
19 
20 #include "post_processing.h"
21 
22 #include <algorithm>
23 #include <cstddef>
24 #include <cstdint>
25 #include <iostream>
26 #include <map>
27 #include <ostream>
28 #include <regex>
29 #include <sstream>
30 #include <string>
31 #include <unordered_map>
32 #include <utility>
33 #include <vector>
34 
35 namespace stg {
36 
37 namespace {
38 
SummariseCRCChanges(const std::vector<std::string> & report,size_t limit)39 std::vector<std::string> SummariseCRCChanges(
40     const std::vector<std::string>& report, size_t limit) {
41   const std::regex symbol_changed_re("^.* symbol .* changed$");
42   const std::regex crc_re("^  CRC changed from [^ ]* to [^ ]*$");
43   const std::regex empty_re("^$");
44   const std::regex section_re("^[^ \\n].*$");
45   const std::regex symbol_re("^.* symbol .*$");
46 
47   std::vector<std::string> new_report;
48   std::vector<std::pair<std::string, std::string>> pending;
49 
50   auto emit_pending = [&]() {
51     const size_t crc_only_changes = pending.size();
52     for (size_t ix = 0; ix < std::min(crc_only_changes, limit); ++ix) {
53       new_report.push_back(pending[ix].first);
54       new_report.push_back(pending[ix].second);
55       new_report.emplace_back();
56     }
57     if (crc_only_changes > limit) {
58       std::ostringstream os;
59       os << "... " << crc_only_changes - limit << " omitted; "
60          << crc_only_changes << " symbols have only CRC changes";
61       new_report.push_back(os.str());
62       new_report.emplace_back();
63     }
64     pending.clear();
65   };
66 
67   for (size_t ix = 0; ix < report.size(); ++ix) {
68     if (std::regex_match(report[ix], section_re) &&
69         !std::regex_match(report[ix], symbol_re)) {
70       emit_pending();
71       new_report.push_back(report[ix]);
72     } else if (ix + 2 < report.size() &&
73                std::regex_match(report[ix], symbol_changed_re) &&
74                std::regex_match(report[ix + 1], crc_re) &&
75                std::regex_match(report[ix + 2], empty_re)) {
76       pending.emplace_back(report[ix], report[ix + 1]);
77       // consumed 3 lines in total => 2 extra lines
78       ix += 2;
79     } else {
80       new_report.push_back(report[ix]);
81     }
82   }
83 
84   emit_pending();
85   return new_report;
86 }
87 
SummariseOffsetChanges(const std::vector<std::string> & report)88 std::vector<std::string> SummariseOffsetChanges(
89     const std::vector<std::string>& report) {
90   const std::regex re1("^( *)member ('.*') changed$");
91   const std::regex re2("^( *)offset changed from (\\d+) to (\\d+)$");
92   const std::regex re3("^( *).*$");
93 
94   std::smatch match1;
95   std::smatch match2;
96   std::smatch match3;
97   size_t indent = 0;
98   int64_t offset = 0;
99   std::vector<std::string> vars;
100   std::vector<std::string> new_report;
101 
102   auto emit_pending = [&]() {
103     if (vars.empty()) {
104       return;
105     }
106     std::ostringstream line1;
107     line1 << std::string(indent, ' ');
108     if (vars.size() == 1) {
109       line1 << "member " << vars.front() << " changed";
110     } else {
111       line1 << vars.size() << " members (" << vars.front() << " .. "
112             << vars.back() << ") changed";
113     }
114     new_report.push_back(line1.str());
115     std::ostringstream line2;
116     line2 << std::string(indent, ' ') << "  offset changed by " << offset;
117     new_report.push_back(line2.str());
118     vars.clear();
119   };
120 
121   for (size_t ix = 0; ix < report.size(); ++ix) {
122     if (ix + 2 < report.size() && std::regex_match(report[ix], match1, re1) &&
123         std::regex_match(report[ix + 1], match2, re2) &&
124         std::regex_match(report[ix + 2], match3, re3)) {
125       const size_t indent1 = match1[1].length();
126       const size_t indent2 = match2[1].length();
127       const size_t indent3 = match3[1].length();
128       if (indent1 + 2 == indent2 && indent1 >= indent3) {
129         const auto new_indent = indent1;
130         const int64_t new_offset =
131             std::stoll(match2[3].str()) - std::stoll(match2[2].str());
132         if (new_indent != indent || new_offset != offset) {
133           emit_pending();
134           indent = new_indent;
135           offset = new_offset;
136         }
137         vars.push_back(match1[2]);
138         // consumed 2 lines in total => 1 extra line
139         ++ix;
140         continue;
141       }
142     }
143     emit_pending();
144     new_report.push_back(report[ix]);
145   }
146 
147   emit_pending();
148   return new_report;
149 }
150 
GroupRemovedAddedSymbols(const std::vector<std::string> & report)151 std::vector<std::string> GroupRemovedAddedSymbols(
152     const std::vector<std::string>& report) {
153   const std::regex symbol_re("^(.*) symbol (.*) was (added|removed)$");
154   const std::regex empty_re("^$");
155 
156   std::vector<std::string> new_report;
157   std::unordered_map<std::string,
158       std::map<std::string, std::vector<std::string>>> pending;
159 
160   auto emit_pending = [&]() {
161     for (const auto& which : {"removed", "added"}) {
162       auto& pending_kinds = pending[which];
163       for (auto& [kind, pending_symbols] : pending_kinds) {
164         if (!pending_symbols.empty()) {
165           std::ostringstream os;
166           os << pending_symbols.size() << ' ' << kind << " symbol(s) " << which;
167           new_report.push_back(os.str());
168           for (const auto& symbol : std::exchange(pending_symbols, {})) {
169             new_report.push_back("  " + symbol);
170           }
171           new_report.emplace_back();
172         }
173       }
174     }
175   };
176 
177   for (size_t ix = 0; ix < report.size(); ++ix) {
178     std::smatch match;
179     if (ix + 1 < report.size() &&
180         std::regex_match(report[ix], match, symbol_re) &&
181         std::regex_match(report[ix + 1], empty_re)) {
182       pending[match[3].str()][match[1].str()].push_back(match[2].str());
183       // consumed 2 lines in total => 1 extra line (there is always an empty
184       // line after symbol added/removed line)
185       ++ix;
186     } else {
187       emit_pending();
188       new_report.push_back(report[ix]);
189     }
190   }
191 
192   emit_pending();
193   return new_report;
194 }
195 
SummariseEnumeratorAdditionsAndRemovals(const std::vector<std::string> & report,size_t limit)196 std::vector<std::string> SummariseEnumeratorAdditionsAndRemovals(
197     const std::vector<std::string>& report, size_t limit) {
198   const std::regex re("^( *)enumerator (.*) was (added|removed)$");
199 
200   std::vector<std::string> new_report;
201   size_t indent = 0;
202   std::string which;
203   std::vector<std::string> pending;
204 
205   auto emit_pending = [&]() {
206     for (size_t ix = 0; ix < std::min(pending.size(), limit); ++ix) {
207       new_report.push_back(pending[ix]);
208     }
209     if (pending.size() > limit) {
210       std::ostringstream os;
211       os << std::string(indent, ' ') << "... " << pending.size() - limit
212          << " other enumerator(s) " << which;
213       new_report.push_back(os.str());
214     }
215     pending.clear();
216   };
217 
218   for (const auto& line : report) {
219     std::smatch match;
220     if (std::regex_match(line, match, re)) {
221       const size_t new_indent = match[1].length();
222       const std::string new_which = match[3].str();
223       if (new_indent != indent || new_which != which) {
224         emit_pending();
225         indent = new_indent;
226         which = new_which;
227       }
228       pending.push_back(line);
229     } else {
230       emit_pending();
231       new_report.push_back(line);
232     }
233   }
234 
235   emit_pending();
236   return new_report;
237 }
238 
239 }  // namespace
240 
PostProcess(const std::vector<std::string> & report)241 std::vector<std::string> PostProcess(const std::vector<std::string>& report) {
242   std::vector<std::string> new_report;
243   // limit the mentions of symbols with only CRC changes
244   new_report = SummariseCRCChanges(report, 3);
245   // collect together function / object symbol additions / removals
246   new_report = GroupRemovedAddedSymbols(new_report);
247   // collapse runs of identical member offset changes
248   new_report = SummariseOffsetChanges(new_report);
249   // limit the mentions of consecutive enumerator additions / removals
250   new_report = SummariseEnumeratorAdditionsAndRemovals(new_report, 1);
251   return new_report;
252 }
253 
254 }  // namespace stg
255