1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/protozero/filtering/string_filter.h"
18
19 #include <cstring>
20 #include <regex>
21 #include <string_view>
22
23 #include "perfetto/base/compiler.h"
24 #include "perfetto/base/logging.h"
25 #include "perfetto/ext/base/string_view.h"
26 #include "perfetto/public/compiler.h"
27
28 namespace protozero {
29 namespace {
30
31 using Matches = std::match_results<char*>;
32
33 static constexpr std::string_view kRedacted = "P60REDACTED";
34 static constexpr char kRedactedDash = '-';
35
36 // Returns a pointer to the first character after the tgid pipe character in
37 // the atrace string given by [ptr, end). Returns null if no such character
38 // exists.
39 //
40 // Examples:
41 // E|1024 -> nullptr
42 // foobarbaz -> nullptr
43 // B|1024|x -> pointer to x
FindAtracePayloadPtr(const char * ptr,const char * end)44 const char* FindAtracePayloadPtr(const char* ptr, const char* end) {
45 // Don't even bother checking any strings which are so short that they could
46 // not contain a post-tgid section. This filters out strings like "E|" which
47 // emitted by Bionic.
48 //
49 // Also filter out any other strings starting with "E" as they never contain
50 // anything past the tgid: this removes >half of the strings for ~zero cost.
51 static constexpr size_t kEarliestSecondPipeIndex = 2;
52 const char* search_start = ptr + kEarliestSecondPipeIndex;
53 if (search_start >= end || *ptr == 'E') {
54 return nullptr;
55 }
56
57 // We skipped past the first '|' character by starting at the character at
58 // index 2. Just find the next pipe character (i.e. the one after tgid) using
59 // memchr.
60 const char* pipe = static_cast<const char*>(
61 memchr(search_start, '|', size_t(end - search_start)));
62 return pipe ? pipe + 1 : nullptr;
63 }
64
StartsWith(const char * ptr,const char * end,const std::string & starts_with)65 bool StartsWith(const char* ptr,
66 const char* end,
67 const std::string& starts_with) {
68 // Verify that the atrace string has enough characters to match against all
69 // the characters in the "starts with" string. If it does, memcmp to check if
70 // all the characters match and return true if they do.
71 return ptr + starts_with.size() <= end &&
72 memcmp(ptr, starts_with.data(), starts_with.size()) == 0;
73 }
74
RedactMatches(const Matches & matches)75 void RedactMatches(const Matches& matches) {
76 // Go through every group in the matches.
77 for (size_t i = 1; i < matches.size(); ++i) {
78 const auto& match = matches[i];
79 PERFETTO_CHECK(match.second >= match.first);
80
81 // Overwrite the match with characters from |kRedacted|. If match is
82 // smaller, we will not use all of |kRedacted| but that's fine (i.e. we
83 // will overwrite with a truncated |kRedacted|).
84 size_t match_len = static_cast<size_t>(match.second - match.first);
85 size_t redacted_len = std::min(match_len, kRedacted.size());
86 memcpy(match.first, kRedacted.data(), redacted_len);
87
88 // Overwrite any characters after |kRedacted| with |kRedactedDash|.
89 memset(match.first + redacted_len, kRedactedDash, match_len - redacted_len);
90 }
91 }
92
93 } // namespace
94
AddRule(Policy policy,std::string_view pattern_str,std::string atrace_payload_starts_with)95 void StringFilter::AddRule(Policy policy,
96 std::string_view pattern_str,
97 std::string atrace_payload_starts_with) {
98 rules_.emplace_back(StringFilter::Rule{
99 policy,
100 std::regex(pattern_str.begin(), pattern_str.end(),
101 std::regex::ECMAScript | std::regex_constants::optimize),
102 std::move(atrace_payload_starts_with)});
103 }
104
MaybeFilterInternal(char * ptr,size_t len) const105 bool StringFilter::MaybeFilterInternal(char* ptr, size_t len) const {
106 std::match_results<char*> matches;
107 bool atrace_find_tried = false;
108 const char* atrace_payload_ptr = nullptr;
109 for (const Rule& rule : rules_) {
110 switch (rule.policy) {
111 case Policy::kMatchRedactGroups:
112 case Policy::kMatchBreak:
113 if (std::regex_match(ptr, ptr + len, matches, rule.pattern)) {
114 if (rule.policy == Policy::kMatchBreak) {
115 return false;
116 }
117 RedactMatches(matches);
118 return true;
119 }
120 break;
121 case Policy::kAtraceMatchRedactGroups:
122 case Policy::kAtraceMatchBreak:
123 atrace_payload_ptr = atrace_find_tried
124 ? atrace_payload_ptr
125 : FindAtracePayloadPtr(ptr, ptr + len);
126 atrace_find_tried = true;
127 if (atrace_payload_ptr &&
128 StartsWith(atrace_payload_ptr, ptr + len,
129 rule.atrace_payload_starts_with) &&
130 std::regex_match(ptr, ptr + len, matches, rule.pattern)) {
131 if (rule.policy == Policy::kAtraceMatchBreak) {
132 return false;
133 }
134 RedactMatches(matches);
135 return true;
136 }
137 break;
138 case Policy::kAtraceRepeatedSearchRedactGroups:
139 atrace_payload_ptr = atrace_find_tried
140 ? atrace_payload_ptr
141 : FindAtracePayloadPtr(ptr, ptr + len);
142 atrace_find_tried = true;
143 if (atrace_payload_ptr && StartsWith(atrace_payload_ptr, ptr + len,
144 rule.atrace_payload_starts_with)) {
145 auto beg = std::regex_iterator<char*>(ptr, ptr + len, rule.pattern);
146 auto end = std::regex_iterator<char*>();
147 bool has_any_matches = beg != end;
148 for (auto it = std::move(beg); it != end; ++it) {
149 RedactMatches(*it);
150 }
151 if (has_any_matches) {
152 return true;
153 }
154 }
155 break;
156 }
157 }
158 return false;
159 }
160
161 } // namespace protozero
162