1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef SRC_TRACE_REDACTION_TRACE_REDACTION_FRAMEWORK_H_
18 #define SRC_TRACE_REDACTION_TRACE_REDACTION_FRAMEWORK_H_
19
20 #include <bitset>
21 #include <cstdint>
22 #include <memory>
23 #include <optional>
24 #include <string>
25 #include <unordered_set>
26 #include <vector>
27
28 #include "perfetto/base/flat_set.h"
29 #include "perfetto/base/status.h"
30 #include "src/trace_redaction/frame_cookie.h"
31 #include "src/trace_redaction/process_thread_timeline.h"
32
33 #include "protos/perfetto/trace/trace_packet.pbzero.h"
34
35 namespace perfetto::trace_redaction {
36
37 // Multiple packages can share the same name. This is common when a device has
38 // multiple users. When this happens, each instance shares the 5 least
39 // significant digits.
NormalizeUid(uint64_t uid)40 constexpr uint64_t NormalizeUid(uint64_t uid) {
41 return uid % 1000000;
42 }
43
44 class SystemInfo {
45 public:
AllocateSynthThread()46 int32_t AllocateSynthThread() { return ++next_synth_thread_; }
47
ReserveCpu(uint32_t cpu)48 uint32_t ReserveCpu(uint32_t cpu) {
49 last_cpu_ = std::max(last_cpu_, cpu);
50 return last_cpu_;
51 }
52
cpu_count()53 uint32_t cpu_count() const { return last_cpu_ + 1; }
54
55 private:
56 // This is the last allocated tid. Using a tid equal to or less than this tid
57 // risks a collision with another tid. If a tid is ever created (by a
58 // primitive) this should be advanced to the max between this value and the
59 // new tid.
60 //
61 // On a 64 bit machine, the max pid limit is 2^22 (approximately 4 million).
62 // Perfetto uses a 32 (signed) int for the pid. Even in this case, there is
63 // room for 2^9 synthetic threads (2 ^ (31 - 22) = 2 ^ 9).
64 //
65 // Futhermore, ther Android source code return 4194304 (2 ^ 22) on 64 bit
66 // devices.
67 //
68 // /proc/sys/kernel/pid_max (since Linux 2.5.34)
69 // This file specifies the value at which PIDs wrap around
70 // (i.e., the value in this file is one greater than the
71 // maximum PID). PIDs greater than this value are not
72 // allocated; thus, the value in this file also acts as a
73 // system-wide limit on the total number of processes and
74 // threads. The default value for this file, 32768, results
75 // in the same range of PIDs as on earlier kernels. On
76 // 32-bit platforms, 32768 is the maximum value for pid_max.
77 // On 64-bit systems, pid_max can be set to any value up to
78 // 2^22 (PID_MAX_LIMIT, approximately 4 million).
79 //
80 // SOURCE: https://man7.org/linux/man-pages/man5/proc.5.html
81 int32_t next_synth_thread_ = 1 << 22;
82
83 // The last CPU index seen. If this value is 7, it means there are at least
84 // 8 CPUs.
85 uint32_t last_cpu_ = 0;
86 };
87
88 class SyntheticProcess {
89 public:
SyntheticProcess(const std::vector<int32_t> & tids)90 explicit SyntheticProcess(const std::vector<int32_t>& tids) : tids_(tids) {}
91
92 // Use the SYSTEM_UID (i.e. 1000) because it best represents this "type" of
93 // process.
uid()94 int32_t uid() const { return 1000; }
95
96 // Use ppid == 1 which is normally considered to be init on Linux?
ppid()97 int32_t ppid() const { return 1; }
98
tgid()99 int32_t tgid() const { return tids_.front(); }
100
tids()101 const std::vector<int32_t>& tids() const { return tids_; }
102
RunningOn(uint32_t cpu)103 int32_t RunningOn(uint32_t cpu) const { return tids_.at(1 + cpu); }
104
RunningOn(int32_t cpu)105 int32_t RunningOn(int32_t cpu) const {
106 return tids_.at(1 + static_cast<size_t>(cpu));
107 }
108
109 private:
110 std::vector<int32_t> tids_;
111 };
112
113 // Primitives should be stateless. All state should be stored in the context.
114 // Primitives should depend on data in the context, not the origin of the data.
115 // This allows primitives to be swapped out or work together to populate data
116 // needed by another primitive.
117 //
118 // For this to work, primitives are divided into three types:
119 //
120 // `CollectPrimitive` : Reads data from trace packets and saves low-level data
121 // in the context.
122 //
123 // `BuildPrimitive` : Reads low-level data from the context and builds
124 // high-level (read-optimized) data structures.
125 //
126 // `TransformPrimitive`: Reads high-level data from the context and modifies
127 // trace packets.
128 class Context {
129 public:
130 // Each packet will have a trusted uid. This is the package emitting the
131 // event. In production we only expect to see system uids. 9999 is the
132 // last allowed uid (allow all uids less than or equal to 9999).
133 static constexpr int32_t kMaxTrustedUid = 9999;
134
135 // The package that should not be redacted. This must be populated before
136 // running any primitives.
137 std::string package_name;
138
139 // The package list maps a package name to a uid. It is possible for multiple
140 // package names to map to the same uid, for example:
141 //
142 // packages {
143 // name: "com.google.android.gms"
144 // uid: 10113
145 // debuggable: false
146 // profileable_from_shell: false
147 // version_code: 235013038
148 // }
149 //
150 // Processes reference their package using a uid:
151 //
152 // processes {
153 // pid: 18176
154 // ppid: 904
155 // cmdline: "com.google.android.gms.persistent"
156 // uid: 10113
157 // }
158 //
159 // An oddity within Android is that two or more processes can reference the
160 // same package using different uids:
161 //
162 // A = package(M * 100000 + X)
163 // B = package(N * 100000 + X)
164 //
165 // A and B map to the same package. This happens when there are two or more
166 // profiles on the device (e.g. a work profile and a personal profile).
167 //
168 // From the example above:
169 //
170 // uid = package_uid_for("com.google.android.gms")
171 // pid = main_thread_for(uid)
172 // ASSERT(pid == 18176)
173 //
174 // However, if there is another profile:
175 //
176 // processes {
177 // pid: 18176
178 // ppid: 904
179 // cmdline: "com.google.android.gms.persistent"
180 // uid: 10113
181 // }
182 // processes {
183 // pid: 21388
184 // ppid: 904
185 // cmdline: "com.google.android.gms.persistent"
186 // uid: 1010113
187 // }
188 //
189 // The logic from before still hold, however, if the traced process was pid
190 // 21388, it will be merged with the other threads.
191 //
192 // To avoid this problem from happening, we normalize the uids and treat
193 // both instances as a single process:
194 //
195 // processes {
196 // pid: 18176
197 // ppid: 904
198 // cmdline: "com.google.android.gms.persistent"
199 // uid: 10113
200 // }
201 // processes {
202 // pid: 21388
203 // ppid: 904
204 // cmdline: "com.google.android.gms.persistent"
205 // - uid: 1010113
206 // + uid: 10113
207 // }
208 //
209 // It sounds like there would be a privacy concern, but because both processes
210 // are from the same app and are being collected from the same user, there
211 // are no new privacy issues by doing this.
212 //
213 // But where should the uids be normalized? The dividing line is the timeline
214 // interface, specifically, should the timeline know anything about uids
215 // (other than "it's a number").
216 //
217 // To avoid expanding the timeline's scope, the uid normalizations is done
218 // outside of the timeline. When a uid is passed into the timeline, it should
219 // be normalized (i.e. 5 != 100005). When the timeline is queried, the uid
220 // should be normalized. This increases the risk for error, but there are only
221 // two places where uids are set, writing the uid to the context and writing
222 // the uid to the timeline.
223 std::optional<uint64_t> package_uid;
224
225 // Trace packets contain a "one of" entry called "data". This field can be
226 // thought of as the message. A track packet with have other fields along
227 // side "data" (e.g. "timestamp"). These fields can be thought of as metadata.
228 //
229 // A message should be removed if:
230 //
231 // ...we know it contains too much sensitive information
232 //
233 // ...we know it contains sensitive information and we know how to remove
234 // the sensitive information, but don't have the resources to do it
235 // right now
236 //
237 // ...we know it provide little value
238 //
239 // "trace_packet_allow_list" contains the field ids of trace packets we want
240 // to pass onto later transformations. Examples are:
241 //
242 // - protos::pbzero::TracePacket::kProcessTreeFieldNumber
243 // - protos::pbzero::TracePacket::kProcessStatsFieldNumber
244 // - protos::pbzero::TracePacket::kClockSnapshotFieldNumber
245 //
246 // If the mask is set to 0x00, all fields would be removed. This should not
247 // happen as some metadata provides context between packets.
248 //
249 // TracePacket has kForTestingFieldNumber which is set to 900.
250 using TracePacketMask = std::bitset<1024>;
251 TracePacketMask packet_mask;
252
253 // Ftrace packets contain a "one of" entry called "event". Within the scope of
254 // a ftrace event, the event can be considered the payload and other other
255 // values can be considered metadata (e.g. timestamp and pid).
256 //
257 // A ftrace event should be removed if:
258 //
259 // ... we know it contains too much sensitive information
260 //
261 // ... we know it contains sensitive information and we have some ideas on
262 // to remove it, but don't have the resources to do it right now (e.g.
263 // print).
264 //
265 // ... we don't see value in including it
266 //
267 // "ftrace_packet_allow_list" contains field ids of ftrace packets that we
268 // want to pass onto later transformations. An example would be:
269 //
270 // ... kSchedWakingFieldNumber because it contains cpu activity information
271 //
272 // Compared against track days, the rules around removing ftrace packets are
273 // complicated because...
274 //
275 // packet {
276 // ftrace_packets { <-- ONE-OF (1)
277 // event { <-- REPEATED (2)
278 // cpu_idle { } <-- ONE-OF (3)
279 // }
280 // event { ... }
281 // }
282 // }
283 //
284 // 1. A ftrace packet will populate the one-of slot in the trace packet.
285 //
286 // 2. A ftrace packet can have multiple events
287 //
288 // 3. In this example, a cpu_idle event populates the one-of slot in the
289 // ftrace event
290 //
291 // Ftrace event has kMaliMaliPMMCURESETWAITFieldNumber which is set to 532.
292 using FtraceEventMask = std::bitset<1024>;
293 FtraceEventMask ftrace_mask;
294
295 // message SuspendResumeFtraceEvent {
296 // optional string action = 1 [(datapol.semantic_type) = ST_NOT_REQUIRED];
297 // optional int32 val = 2;
298 // optional uint32 start = 3 [(datapol.semantic_type) = ST_NOT_REQUIRED];
299 // }
300 //
301 // The "action" in SuspendResumeFtraceEvent is a free-form string. There are
302 // some know and expected values. Those values are stored here and all events
303 // who's action value is not found here, the ftrace event will be dropped.
304 base::FlatSet<std::string> suspend_result_allow_list;
305
306 // The timeline is a query-focused data structure that connects a pid to a
307 // uid at specific point in time.
308 //
309 // A timeline has two modes:
310 //
311 // 1. write-only
312 // 2. read-only
313 //
314 // Attempting to use the timeline incorrectly results in undefined behaviour.
315 //
316 // To use a timeline, the primitive needs to be "built" (add events) and then
317 // "sealed" (transition to read-only).
318 //
319 // A timeline must have Sort() called to change from write-only to read-only.
320 // After Sort(), Flatten() and Reduce() can be called (optional) to improve
321 // the practical look-up times (compared to theoretical look-up times).
322 std::unique_ptr<ProcessThreadTimeline> timeline;
323
324 // All frame events:
325 //
326 // - ActualDisplayFrame
327 // - ActualSurfaceFrame
328 // - ExpectedDisplayFrame
329 // - ExpectedSurfaceFrame
330 //
331 // Connect a time, a pid, and a cookie value. Cookies are unqiue within a
332 // trace, so if a cookie was connected to the target package, it can always be
333 // used.
334 //
335 // End events (i.e. FrameEnd) only have a time and cookie value. The cookie
336 // value connects it to its start time.
337 //
338 // In the collect phase, all start events are collected and converted to a
339 // simpler structure.
340 //
341 // In the build phase, the cookies are filtered to only include the ones that
342 // belong to the target package. This is down in the build phase, and not the
343 // collect phase, because the timeline is needed to determine if the cookie
344 // belongs to the target package.
345 std::vector<FrameCookie> global_frame_cookies;
346
347 // The collect of cookies that belong to the target package. Because cookie
348 // values are unique within the scope of the trace, pid and time are no longer
349 // needed and a set can be used for faster queries.
350 std::unordered_set<int64_t> package_frame_cookies;
351
352 std::optional<SystemInfo> system_info;
353
354 std::unique_ptr<SyntheticProcess> synthetic_process;
355 };
356
357 // Extracts low-level data from the trace and writes it into the context. The
358 // life cycle of a collect primitive is:
359 //
360 // primitive.Begin(&context);
361 //
362 // for (auto& packet : packets) {
363 // primitive.Collect(packet, &context);
364 // }
365 //
366 // primitive.End(&context);
367 class CollectPrimitive {
368 public:
369 virtual ~CollectPrimitive();
370
371 // Called once before the first call to Collect(...).
372 virtual base::Status Begin(Context*) const;
373
374 // Reads a trace packet and updates the context.
375 virtual base::Status Collect(const protos::pbzero::TracePacket::Decoder&,
376 Context*) const = 0;
377
378 // Called once after the last call to Collect(...).
379 virtual base::Status End(Context*) const;
380 };
381
382 // Responsible for converting low-level data from the context and storing it in
383 // the context (high-level data).
384 class BuildPrimitive {
385 public:
386 virtual ~BuildPrimitive();
387
388 // Reads low-level data from the context and writes high-level data to the
389 // context.
390 virtual base::Status Build(Context* context) const = 0;
391 };
392
393 // Responsible for modifying trace packets using data from the context.
394 class TransformPrimitive {
395 public:
396 virtual ~TransformPrimitive();
397
398 // Modifies a packet using data from the context.
399 virtual base::Status Transform(const Context& context,
400 std::string* packet) const = 0;
401 };
402
403 } // namespace perfetto::trace_redaction
404
405 #endif // SRC_TRACE_REDACTION_TRACE_REDACTION_FRAMEWORK_H_
406