1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022-2023 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Siddharth Nayyar
19
20 #include "post_processing.h"
21
22 #include <algorithm>
23 #include <cstddef>
24 #include <cstdint>
25 #include <iostream>
26 #include <map>
27 #include <ostream>
28 #include <regex>
29 #include <sstream>
30 #include <string>
31 #include <unordered_map>
32 #include <utility>
33 #include <vector>
34
35 namespace stg {
36
37 namespace {
38
SummariseCRCChanges(const std::vector<std::string> & report,size_t limit)39 std::vector<std::string> SummariseCRCChanges(
40 const std::vector<std::string>& report, size_t limit) {
41 const std::regex symbol_changed_re("^.* symbol .* changed$");
42 const std::regex crc_re("^ CRC changed from [^ ]* to [^ ]*$");
43 const std::regex empty_re("^$");
44 const std::regex section_re("^[^ \\n].*$");
45 const std::regex symbol_re("^.* symbol .*$");
46
47 std::vector<std::string> new_report;
48 std::vector<std::pair<std::string, std::string>> pending;
49
50 auto emit_pending = [&]() {
51 const size_t crc_only_changes = pending.size();
52 for (size_t ix = 0; ix < std::min(crc_only_changes, limit); ++ix) {
53 new_report.push_back(pending[ix].first);
54 new_report.push_back(pending[ix].second);
55 new_report.emplace_back();
56 }
57 if (crc_only_changes > limit) {
58 std::ostringstream os;
59 os << "... " << crc_only_changes - limit << " omitted; "
60 << crc_only_changes << " symbols have only CRC changes";
61 new_report.push_back(os.str());
62 new_report.emplace_back();
63 }
64 pending.clear();
65 };
66
67 for (size_t ix = 0; ix < report.size(); ++ix) {
68 if (std::regex_match(report[ix], section_re) &&
69 !std::regex_match(report[ix], symbol_re)) {
70 emit_pending();
71 new_report.push_back(report[ix]);
72 } else if (ix + 2 < report.size() &&
73 std::regex_match(report[ix], symbol_changed_re) &&
74 std::regex_match(report[ix + 1], crc_re) &&
75 std::regex_match(report[ix + 2], empty_re)) {
76 pending.emplace_back(report[ix], report[ix + 1]);
77 // consumed 3 lines in total => 2 extra lines
78 ix += 2;
79 } else {
80 new_report.push_back(report[ix]);
81 }
82 }
83
84 emit_pending();
85 return new_report;
86 }
87
SummariseOffsetChanges(const std::vector<std::string> & report)88 std::vector<std::string> SummariseOffsetChanges(
89 const std::vector<std::string>& report) {
90 const std::regex re1("^( *)member ('.*') changed$");
91 const std::regex re2("^( *)offset changed from (\\d+) to (\\d+)$");
92 const std::regex re3("^( *).*$");
93
94 std::smatch match1;
95 std::smatch match2;
96 std::smatch match3;
97 size_t indent = 0;
98 int64_t offset = 0;
99 std::vector<std::string> vars;
100 std::vector<std::string> new_report;
101
102 auto emit_pending = [&]() {
103 if (vars.empty()) {
104 return;
105 }
106 std::ostringstream line1;
107 line1 << std::string(indent, ' ');
108 if (vars.size() == 1) {
109 line1 << "member " << vars.front() << " changed";
110 } else {
111 line1 << vars.size() << " members (" << vars.front() << " .. "
112 << vars.back() << ") changed";
113 }
114 new_report.push_back(line1.str());
115 std::ostringstream line2;
116 line2 << std::string(indent, ' ') << " offset changed by " << offset;
117 new_report.push_back(line2.str());
118 vars.clear();
119 };
120
121 for (size_t ix = 0; ix < report.size(); ++ix) {
122 if (ix + 2 < report.size() && std::regex_match(report[ix], match1, re1) &&
123 std::regex_match(report[ix + 1], match2, re2) &&
124 std::regex_match(report[ix + 2], match3, re3)) {
125 const size_t indent1 = match1[1].length();
126 const size_t indent2 = match2[1].length();
127 const size_t indent3 = match3[1].length();
128 if (indent1 + 2 == indent2 && indent1 >= indent3) {
129 const auto new_indent = indent1;
130 const int64_t new_offset =
131 std::stoll(match2[3].str()) - std::stoll(match2[2].str());
132 if (new_indent != indent || new_offset != offset) {
133 emit_pending();
134 indent = new_indent;
135 offset = new_offset;
136 }
137 vars.push_back(match1[2]);
138 // consumed 2 lines in total => 1 extra line
139 ++ix;
140 continue;
141 }
142 }
143 emit_pending();
144 new_report.push_back(report[ix]);
145 }
146
147 emit_pending();
148 return new_report;
149 }
150
GroupRemovedAddedSymbols(const std::vector<std::string> & report)151 std::vector<std::string> GroupRemovedAddedSymbols(
152 const std::vector<std::string>& report) {
153 const std::regex symbol_re("^(.*) symbol (.*) was (added|removed)$");
154 const std::regex empty_re("^$");
155
156 std::vector<std::string> new_report;
157 std::unordered_map<std::string,
158 std::map<std::string, std::vector<std::string>>> pending;
159
160 auto emit_pending = [&]() {
161 for (const auto& which : {"removed", "added"}) {
162 auto& pending_kinds = pending[which];
163 for (auto& [kind, pending_symbols] : pending_kinds) {
164 if (!pending_symbols.empty()) {
165 std::ostringstream os;
166 os << pending_symbols.size() << ' ' << kind << " symbol(s) " << which;
167 new_report.push_back(os.str());
168 for (const auto& symbol : std::exchange(pending_symbols, {})) {
169 new_report.push_back(" " + symbol);
170 }
171 new_report.emplace_back();
172 }
173 }
174 }
175 };
176
177 for (size_t ix = 0; ix < report.size(); ++ix) {
178 std::smatch match;
179 if (ix + 1 < report.size() &&
180 std::regex_match(report[ix], match, symbol_re) &&
181 std::regex_match(report[ix + 1], empty_re)) {
182 pending[match[3].str()][match[1].str()].push_back(match[2].str());
183 // consumed 2 lines in total => 1 extra line (there is always an empty
184 // line after symbol added/removed line)
185 ++ix;
186 } else {
187 emit_pending();
188 new_report.push_back(report[ix]);
189 }
190 }
191
192 emit_pending();
193 return new_report;
194 }
195
SummariseEnumeratorAdditionsAndRemovals(const std::vector<std::string> & report,size_t limit)196 std::vector<std::string> SummariseEnumeratorAdditionsAndRemovals(
197 const std::vector<std::string>& report, size_t limit) {
198 const std::regex re("^( *)enumerator (.*) was (added|removed)$");
199
200 std::vector<std::string> new_report;
201 size_t indent = 0;
202 std::string which;
203 std::vector<std::string> pending;
204
205 auto emit_pending = [&]() {
206 for (size_t ix = 0; ix < std::min(pending.size(), limit); ++ix) {
207 new_report.push_back(pending[ix]);
208 }
209 if (pending.size() > limit) {
210 std::ostringstream os;
211 os << std::string(indent, ' ') << "... " << pending.size() - limit
212 << " other enumerator(s) " << which;
213 new_report.push_back(os.str());
214 }
215 pending.clear();
216 };
217
218 for (const auto& line : report) {
219 std::smatch match;
220 if (std::regex_match(line, match, re)) {
221 const size_t new_indent = match[1].length();
222 const std::string new_which = match[3].str();
223 if (new_indent != indent || new_which != which) {
224 emit_pending();
225 indent = new_indent;
226 which = new_which;
227 }
228 pending.push_back(line);
229 } else {
230 emit_pending();
231 new_report.push_back(line);
232 }
233 }
234
235 emit_pending();
236 return new_report;
237 }
238
239 } // namespace
240
PostProcess(const std::vector<std::string> & report)241 std::vector<std::string> PostProcess(const std::vector<std::string>& report) {
242 std::vector<std::string> new_report;
243 // limit the mentions of symbols with only CRC changes
244 new_report = SummariseCRCChanges(report, 3);
245 // collect together function / object symbol additions / removals
246 new_report = GroupRemovedAddedSymbols(new_report);
247 // collapse runs of identical member offset changes
248 new_report = SummariseOffsetChanges(new_report);
249 // limit the mentions of consecutive enumerator additions / removals
250 new_report = SummariseEnumeratorAdditionsAndRemovals(new_report, 1);
251 return new_report;
252 }
253
254 } // namespace stg
255