xref: /aosp_15_r20/external/perfetto/src/tools/proto_filter/proto_filter.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "perfetto/ext/base/file_utils.h"
18 #include "perfetto/ext/base/getopt.h"
19 #include "perfetto/ext/base/scoped_file.h"
20 #include "perfetto/ext/base/string_utils.h"
21 #include "perfetto/ext/base/version.h"
22 #include "protos/perfetto/config/trace_config.gen.h"
23 #include "src/protozero/filtering/filter_util.h"
24 #include "src/protozero/filtering/message_filter.h"
25 #include "src/trace_config_utils/txt_to_pb.h"
26 
27 namespace perfetto {
28 namespace proto_filter {
29 namespace {
30 
31 const char kUsage[] =
32     R"(Usage: proto_filter [options]
33 
34 -s --schema-in:      Path to the root .proto file. Required for most operations
35 -I --proto_path:     Extra include directory for proto includes. If omitted assumed CWD.
36 -r --root_message:   Fully qualified name for the root proto message (e.g. perfetto.protos.Trace)
37                      If omitted the first message defined in the schema will be used.
38 -i --msg_in:         Path of a binary-encoded proto message which will be filtered.
39 -o --msg_out:        Path of the binary-encoded filtered proto message written in output.
40 -c --config_in:      Path of a TraceConfig textproto (note: only trace_filter field is considered).
41 -f --filter_in:      Path of a filter bytecode file previously generated by this tool.
42 -F --filter_out:     Path of the filter bytecode file generated from the --schema-in definition.
43 -T --filter_oct_out: Like --filter_out, but emits a octal-escaped C string suitable for .pbtx.
44 -d --dedupe:         Minimize filter size by deduping leaf messages with same field ids.
45 -x --passthrough:    Passthrough a nested message as an opaque bytes field.
46 -g --filter_string:  Filter the string using separately specified rules before passing it through.
47 
48 Example usage:
49 
50 # Convert a .proto schema file into a diff-friendly list of messages/fields>
51 
52   proto_filter -r perfetto.protos.Trace -s protos/perfetto/trace/trace.proto
53 
54 # Generate the filter bytecode from a .proto schema
55 
56   proto_filter -r perfetto.protos.Trace -s protos/perfetto/trace/trace.proto \
57                -F /tmp/bytecode [--dedupe] \
58                [-x protos.Message:message_field_to_pass] \
59                [-g protos.Message:string_field_to_filter]
60 
61 # List the used/filtered fields from a trace file
62 
63   proto_filter -r perfetto.protos.Trace -s protos/perfetto/trace/trace.proto \
64                -i test/data/example_android_trace_30s.pb -f /tmp/bytecode
65 
66 # Filter a trace using a filter bytecode
67 
68   proto_filter -i test/data/example_android_trace_30s.pb -f /tmp/bytecode \
69                -o /tmp/filtered_trace
70 
71 # Filter a trace using a TraceConfig textproto
72 
73   proto_filter -i test/data/example_android_trace_30s.pb \
74                -c /tmp/config.textproto \
75                -o /tmp/filtered_trace
76 
77 # Show which fields are allowed by a filter bytecode
78 
79   proto_filter -r perfetto.protos.Trace -s protos/perfetto/trace/trace.proto \
80                [-g protos.Message:string_field_to_filter] \
81                -f /tmp/bytecode
82 )";
83 
84 using TraceFilter = protos::gen::TraceConfig::TraceFilter;
ConvertPolicy(TraceFilter::StringFilterPolicy policy)85 std::optional<protozero::StringFilter::Policy> ConvertPolicy(
86     TraceFilter::StringFilterPolicy policy) {
87   switch (policy) {
88     case TraceFilter::SFP_UNSPECIFIED:
89       return std::nullopt;
90     case TraceFilter::SFP_MATCH_REDACT_GROUPS:
91       return protozero::StringFilter::Policy::kMatchRedactGroups;
92     case TraceFilter::SFP_ATRACE_MATCH_REDACT_GROUPS:
93       return protozero::StringFilter::Policy::kAtraceMatchRedactGroups;
94     case TraceFilter::SFP_MATCH_BREAK:
95       return protozero::StringFilter::Policy::kMatchBreak;
96     case TraceFilter::SFP_ATRACE_MATCH_BREAK:
97       return protozero::StringFilter::Policy::kAtraceMatchBreak;
98     case TraceFilter::SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS:
99       return protozero::StringFilter::Policy::kAtraceRepeatedSearchRedactGroups;
100   }
101   return std::nullopt;
102 }
103 
Main(int argc,char ** argv)104 int Main(int argc, char** argv) {
105   static const option long_options[] = {
106       {"help", no_argument, nullptr, 'h'},
107       {"version", no_argument, nullptr, 'v'},
108       {"dedupe", no_argument, nullptr, 'd'},
109       {"proto_path", required_argument, nullptr, 'I'},
110       {"schema_in", required_argument, nullptr, 's'},
111       {"root_message", required_argument, nullptr, 'r'},
112       {"msg_in", required_argument, nullptr, 'i'},
113       {"msg_out", required_argument, nullptr, 'o'},
114       {"config_in", required_argument, nullptr, 'c'},
115       {"filter_in", required_argument, nullptr, 'f'},
116       {"filter_out", required_argument, nullptr, 'F'},
117       {"filter_oct_out", required_argument, nullptr, 'T'},
118       {"passthrough", required_argument, nullptr, 'x'},
119       {"filter_string", required_argument, nullptr, 'g'},
120       {nullptr, 0, nullptr, 0}};
121 
122   std::string msg_in;
123   std::string msg_out;
124   std::string config_in;
125   std::string filter_in;
126   std::string schema_in;
127   std::string filter_out;
128   std::string filter_oct_out;
129   std::string proto_path;
130   std::string root_message_arg;
131   std::set<std::string> passthrough_fields;
132   std::set<std::string> filter_string_fields;
133   bool dedupe = false;
134 
135   for (;;) {
136     int option = getopt_long(
137         argc, argv, "hvdI:s:r:i:o:f:F:T:x:g:c:", long_options, nullptr);
138 
139     if (option == -1)
140       break;  // EOF.
141 
142     if (option == 'v') {
143       printf("%s\n", base::GetVersionString());
144       exit(0);
145     }
146 
147     if (option == 'd') {
148       dedupe = true;
149       continue;
150     }
151 
152     if (option == 'I') {
153       proto_path = optarg;
154       continue;
155     }
156 
157     if (option == 's') {
158       schema_in = optarg;
159       continue;
160     }
161 
162     if (option == 'c') {
163       config_in = optarg;
164       continue;
165     }
166 
167     if (option == 'r') {
168       root_message_arg = optarg;
169       continue;
170     }
171 
172     if (option == 'i') {
173       msg_in = optarg;
174       continue;
175     }
176 
177     if (option == 'o') {
178       msg_out = optarg;
179       continue;
180     }
181 
182     if (option == 'f') {
183       filter_in = optarg;
184       continue;
185     }
186 
187     if (option == 'F') {
188       filter_out = optarg;
189       continue;
190     }
191 
192     if (option == 'T') {
193       filter_oct_out = optarg;
194       continue;
195     }
196 
197     if (option == 'x') {
198       passthrough_fields.insert(optarg);
199       continue;
200     }
201 
202     if (option == 'g') {
203       filter_string_fields.insert(optarg);
204       continue;
205     }
206 
207     if (option == 'h') {
208       fprintf(stdout, kUsage);
209       exit(0);
210     }
211 
212     fprintf(stderr, kUsage);
213     exit(1);
214   }
215 
216   if (msg_in.empty() && filter_in.empty() && schema_in.empty()) {
217     fprintf(stderr, kUsage);
218     return 1;
219   }
220 
221   if (!filter_in.empty() && !config_in.empty()) {
222     fprintf(stderr, kUsage);
223     return 1;
224   }
225 
226   std::string msg_in_data;
227   if (!msg_in.empty()) {
228     PERFETTO_LOG("Loading proto-encoded message from %s", msg_in.c_str());
229     if (!base::ReadFile(msg_in, &msg_in_data)) {
230       PERFETTO_ELOG("Could not open message file %s", msg_in.c_str());
231       return 1;
232     }
233   }
234 
235   protozero::FilterUtil filter;
236   if (!schema_in.empty()) {
237     PERFETTO_LOG("Loading proto schema from %s", schema_in.c_str());
238     if (!filter.LoadMessageDefinition(schema_in, root_message_arg, proto_path,
239                                       passthrough_fields,
240                                       filter_string_fields)) {
241       PERFETTO_ELOG("Failed to parse proto schema from %s", schema_in.c_str());
242       return 1;
243     }
244     if (dedupe)
245       filter.Dedupe();
246   }
247 
248   protozero::MessageFilter msg_filter;
249   std::string filter_data;
250   std::string filter_data_src;
251   if (!filter_in.empty()) {
252     PERFETTO_LOG("Loading filter bytecode from %s", filter_in.c_str());
253     if (!base::ReadFile(filter_in, &filter_data)) {
254       PERFETTO_ELOG("Could not open filter file %s", filter_in.c_str());
255       return 1;
256     }
257     filter_data_src = filter_in;
258   } else if (!config_in.empty()) {
259     PERFETTO_LOG("Loading filter bytecode and rules from %s",
260                  config_in.c_str());
261     std::string config_data;
262     if (!base::ReadFile(config_in, &config_data)) {
263       PERFETTO_ELOG("Could not open config file %s", config_in.c_str());
264       return 1;
265     }
266     auto res = TraceConfigTxtToPb(config_data, config_in);
267     if (!res.ok()) {
268       fprintf(stderr, "%s\n", res.status().c_message());
269       return 1;
270     }
271 
272     std::vector<uint8_t>& config_bytes = res.value();
273     protos::gen::TraceConfig config;
274     config.ParseFromArray(config_bytes.data(), config_bytes.size());
275 
276     const auto& trace_filter = config.trace_filter();
277     for (const auto& rule : trace_filter.string_filter_chain().rules()) {
278       auto opt_policy = ConvertPolicy(rule.policy());
279       if (!opt_policy) {
280         PERFETTO_ELOG("Unknown string filter policy %d", rule.policy());
281         return 1;
282       }
283       msg_filter.string_filter().AddRule(*opt_policy, rule.regex_pattern(),
284                                          rule.atrace_payload_starts_with());
285     }
286     filter_data = trace_filter.bytecode_v2().empty()
287                       ? trace_filter.bytecode()
288                       : trace_filter.bytecode_v2();
289     filter_data_src = config_in;
290   } else if (!schema_in.empty()) {
291     PERFETTO_LOG("Generating filter bytecode from %s", schema_in.c_str());
292     filter_data = filter.GenerateFilterBytecode();
293     filter_data_src = schema_in;
294   }
295 
296   if (!filter_data.empty()) {
297     const uint8_t* data = reinterpret_cast<const uint8_t*>(filter_data.data());
298     if (!msg_filter.LoadFilterBytecode(data, filter_data.size())) {
299       PERFETTO_ELOG("Failed to parse filter bytecode from %s",
300                     filter_data_src.c_str());
301       return 1;
302     }
303   }
304 
305   // Write the filter bytecode in output.
306   if (!filter_out.empty()) {
307     auto fd = base::OpenFile(filter_out, O_WRONLY | O_TRUNC | O_CREAT, 0644);
308     if (!fd) {
309       PERFETTO_ELOG("Could not open filter out path %s", filter_out.c_str());
310       return 1;
311     }
312     PERFETTO_LOG("Writing filter bytecode (%zu bytes) into %s",
313                  filter_data.size(), filter_out.c_str());
314     base::WriteAll(*fd, filter_data.data(), filter_data.size());
315   }
316 
317   if (!filter_oct_out.empty()) {
318     auto fd =
319         base::OpenFile(filter_oct_out, O_WRONLY | O_TRUNC | O_CREAT, 0644);
320     if (!fd) {
321       PERFETTO_ELOG("Could not open filter out path %s",
322                     filter_oct_out.c_str());
323       return 1;
324     }
325     std::string oct_str;
326     oct_str.reserve(filter_data.size() * 4 + 64);
327     oct_str.append("trace_filter {\n  bytecode: \"");
328     for (char c : filter_data) {
329       uint8_t octect = static_cast<uint8_t>(c);
330       char buf[5]{'\\', '0', '0', '0', 0};
331       for (uint8_t i = 0; i < 3; ++i) {
332         buf[3 - i] = static_cast<char>('0' + static_cast<uint8_t>(octect) % 8);
333         octect /= 8;
334       }
335       oct_str.append(buf);
336     }
337     oct_str.append("\"\n}\n");
338     PERFETTO_LOG("Writing filter bytecode (%zu bytes) into %s", oct_str.size(),
339                  filter_oct_out.c_str());
340     base::WriteAll(*fd, oct_str.data(), oct_str.size());
341   }
342 
343   // Apply the filter to the input message (if any).
344   std::vector<uint8_t> msg_filtered_data;
345   if (!msg_in.empty()) {
346     PERFETTO_LOG("Applying filter %s to proto message %s",
347                  filter_data_src.c_str(), msg_in.c_str());
348     msg_filter.enable_field_usage_tracking(true);
349     auto res = msg_filter.FilterMessage(msg_in_data.data(), msg_in_data.size());
350     if (res.error)
351       PERFETTO_FATAL("Filtering failed");
352     msg_filtered_data.insert(msg_filtered_data.end(), res.data.get(),
353                              res.data.get() + res.size);
354   }
355 
356   // Write out the filtered message.
357   if (!msg_out.empty()) {
358     PERFETTO_LOG("Writing filtered proto bytes (%zu bytes) into %s",
359                  msg_filtered_data.size(), msg_out.c_str());
360     auto fd = base::OpenFile(msg_out, O_WRONLY | O_TRUNC | O_CREAT, 0644);
361     base::WriteAll(*fd, msg_filtered_data.data(), msg_filtered_data.size());
362   }
363 
364   if (!msg_in.empty()) {
365     const auto& field_usage_map = msg_filter.field_usage();
366     for (const auto& it : field_usage_map) {
367       const std::string& field_path_varint = it.first;
368       int32_t num_occurrences = it.second;
369       std::string path_str = filter.LookupField(field_path_varint);
370       printf("%-100s %s %d\n", path_str.c_str(),
371              num_occurrences < 0 ? "DROP" : "PASS", std::abs(num_occurrences));
372     }
373   } else if (!schema_in.empty()) {
374     filter.PrintAsText(!filter_data.empty() ? std::make_optional(filter_data)
375                                             : std::nullopt);
376   }
377 
378   if ((!filter_out.empty() || !filter_oct_out.empty()) && !dedupe) {
379     PERFETTO_ELOG(
380         "Warning: looks like you are generating a filter without --dedupe. For "
381         "production use cases, --dedupe can make the output bytecode "
382         "significantly smaller.");
383   }
384   return 0;
385 }
386 
387 }  // namespace
388 }  // namespace proto_filter
389 }  // namespace perfetto
390 
main(int argc,char ** argv)391 int main(int argc, char** argv) {
392   return perfetto::proto_filter::Main(argc, argv);
393 }
394