xref: /aosp_15_r20/external/perfetto/include/perfetto/ext/tracing/core/shared_memory_abi.h (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_EXT_TRACING_CORE_SHARED_MEMORY_ABI_H_
18 #define INCLUDE_PERFETTO_EXT_TRACING_CORE_SHARED_MEMORY_ABI_H_
19 
20 #include <stddef.h>
21 #include <stdint.h>
22 
23 #include <array>
24 #include <atomic>
25 #include <bitset>
26 #include <thread>
27 #include <type_traits>
28 #include <utility>
29 
30 #include "perfetto/base/logging.h"
31 #include "perfetto/protozero/proto_utils.h"
32 
33 namespace perfetto {
34 
35 // This file defines the binary interface of the memory buffers shared between
36 // Producer and Service. This is a long-term stable ABI and has to be backwards
37 // compatible to deal with mismatching Producer and Service versions.
38 //
39 // Overview
40 // --------
41 // SMB := "Shared Memory Buffer".
42 // In the most typical case of a multi-process architecture (i.e. Producer and
43 // Service are hosted by different processes), a Producer means almost always
44 // a "client process producing data" (almost: in some cases a process might host
45 // > 1 Producer, if it links two libraries, independent of each other, that both
46 // use Perfetto tracing).
47 // The Service has one SMB for each Producer.
48 // A producer has one or (typically) more data sources. They all share the same
49 // SMB.
50 // The SMB is a staging area to decouple data sources living in the Producer
51 // and allow them to do non-blocking async writes.
52 // The SMB is *not* the ultimate logging buffer seen by the Consumer. That one
53 // is larger (~MBs) and not shared with Producers.
54 // Each SMB is small, typically few KB. Its size is configurable by the producer
55 // within a max limit of ~MB (see kMaxShmSize in tracing_service_impl.cc).
56 // The SMB is partitioned into fixed-size Page(s). The size of the Pages are
57 // determined by each Producer at connection time and cannot be changed.
58 // Hence, different producers can have SMB(s) that have a different Page size
59 // from each other, but the page size will be constant throughout all the
60 // lifetime of the SMB.
61 // Page(s) are partitioned by the Producer into variable size Chunk(s):
62 //
63 // +------------+      +--------------------------+
64 // | Producer 1 |  <-> |      SMB 1 [~32K - 1MB]  |
65 // +------------+      +--------+--------+--------+
66 //                     |  Page  |  Page  |  Page  |
67 //                     +--------+--------+--------+
68 //                     | Chunk  |        | Chunk  |
69 //                     +--------+  Chunk +--------+ <----+
70 //                     | Chunk  |        | Chunk  |      |
71 //                     +--------+--------+--------+      +---------------------+
72 //                                                       |       Service       |
73 // +------------+      +--------------------------+      +---------------------+
74 // | Producer 2 |  <-> |      SMB 2 [~32K - 1MB]  |     /| large ring buffers  |
75 // +------------+      +--------+--------+--------+ <--+ | (100K - several MB) |
76 //                     |  Page  |  Page  |  Page  |      +---------------------+
77 //                     +--------+--------+--------+
78 //                     | Chunk  |        | Chunk  |
79 //                     +--------+  Chunk +--------+
80 //                     | Chunk  |        | Chunk  |
81 //                     +--------+--------+--------+
82 //
83 // * Sizes of both SMB and ring buffers are purely indicative and decided at
84 // configuration time by the Producer (for SMB sizes) and the Consumer (for the
85 // final ring buffer size).
86 
87 // Page
88 // ----
89 // A page is a portion of the shared memory buffer and defines the granularity
90 // of the interaction between the Producer and tracing Service. When scanning
91 // the shared memory buffer to determine if something should be moved to the
92 // central logging buffers, the Service most of the times looks at and moves
93 // whole pages. Similarly, the Producer sends an IPC to invite the Service to
94 // drain the shared memory buffer only when a whole page is filled.
95 // Having fixed the total SMB size (hence the total memory overhead), the page
96 // size is a triangular tradeoff between:
97 // 1) IPC traffic: smaller pages -> more IPCs.
98 // 2) Producer lock freedom: larger pages -> larger chunks -> data sources can
99 //    write more data without needing to swap chunks and synchronize.
100 // 3) Risk of write-starving the SMB: larger pages -> higher chance that the
101 //    Service won't manage to drain them and the SMB remains full.
102 // The page size, on the other side, has no implications on wasted memory due to
103 // fragmentations (see Chunk below).
104 // The size of the page is chosen by the Service at connection time and stays
105 // fixed throughout all the lifetime of the Producer. Different producers (i.e.
106 // ~ different client processes) can use different page sizes.
107 // The page size must be an integer multiple of 4k (this is to allow VM page
108 // stealing optimizations) and obviously has to be an integer divisor of the
109 // total SMB size.
110 
111 // Chunk
112 // -----
113 // A chunk is a portion of a Page which is written and handled by a Producer.
114 // A chunk contains a linear sequence of TracePacket(s) (the root proto).
115 // A chunk cannot be written concurrently by two data sources. Protobufs must be
116 // encoded as contiguous byte streams and cannot be interleaved. Therefore, on
117 // the Producer side, a chunk is almost always owned exclusively by one thread
118 // (% extremely peculiar slow-path cases).
119 // Chunks are essentially single-writer single-thread lock-free arenas. Locking
120 // happens only when a Chunk is full and a new one needs to be acquired.
121 // Locking happens only within the scope of a Producer process. There is no
122 // inter-process locking. The Producer cannot lock the Service and viceversa.
123 // In the worst case, any of the two can starve the SMB, by marking all chunks
124 // as either being read or written. But that has the only side effect of
125 // losing the trace data.
126 // The Producer can decide to partition each page into a number of limited
127 // configurations (e.g., 1 page == 1 chunk, 1 page == 2 chunks and so on).
128 
129 // TracePacket
130 // -----------
131 // Is the atom of tracing. Putting aside pages and chunks a trace is merely a
132 // sequence of TracePacket(s). TracePacket is the root protobuf message.
133 // A TracePacket can span across several chunks (hence even across several
134 // pages). A TracePacket can therefore be >> chunk size, >> page size and even
135 // >> SMB size. The Chunk header carries metadata to deal with the TracePacket
136 // splitting case.
137 
138 // Use only explicitly-sized types below. DO NOT use size_t or any architecture
139 // dependent size (e.g. size_t) in the struct fields. This buffer will be read
140 // and written by processes that have a different bitness in the same OS.
141 // Instead it's fine to assume little-endianess. Big-endian is a dream we are
142 // not currently pursuing.
143 
144 class SharedMemoryABI {
145  public:
146   static constexpr size_t kMinPageSize = 4 * 1024;
147 
148   // This is due to Chunk::size being 16 bits.
149   static constexpr size_t kMaxPageSize = 64 * 1024;
150 
151   // "14" is the max number that can be encoded in a 32 bit atomic word using
152   // 2 state bits per Chunk and leaving 4 bits for the page layout.
153   // See PageLayout below.
154   static constexpr size_t kMaxChunksPerPage = 14;
155 
156   // Each TracePacket fragment in the Chunk is prefixed by a VarInt stating its
157   // size that is up to 4 bytes long. Since the size is often known after the
158   // fragment has been filled, the VarInt is often redundantly encoded (see
159   // proto_utils.h) to be exactly 4 bytes.
160   static constexpr size_t kPacketHeaderSize = 4;
161 
162   // TraceWriter specifies this invalid packet/fragment size to signal to the
163   // service that a packet should be discarded, because the TraceWriter couldn't
164   // write its remaining fragments (e.g. because the SMB was exhausted).
165   static constexpr size_t kPacketSizeDropPacket =
166       protozero::proto_utils::kMaxMessageLength;
167 
168   // Chunk states and transitions:
169   //    kChunkFree  <----------------+
170   //         |  (Producer)           |
171   //         V                       |
172   //  kChunkBeingWritten             |
173   //         |  (Producer)           |
174   //         V                       |
175   //  kChunkComplete                 |
176   //         |  (Service)            |
177   //         V                       |
178   //  kChunkBeingRead                |
179   //        |   (Service)            |
180   //        +------------------------+
181   //
182   // The ABI has an "emulation mode" for transports where shared memory isn't
183   // supported. In this mode, kChunkBeingRead is skipped. A chunk in the
184   // kChunkComplete state is released as free after the producer serializes
185   // chunk content to the protobuf message.
186   enum ChunkState : uint32_t {
187     // The Chunk is free. The Service shall never touch it, the Producer can
188     // acquire it and transition it into kChunkBeingWritten.
189     kChunkFree = 0,
190 
191     // The Chunk is being used by the Producer and is not complete yet.
192     // The Service shall never touch kChunkBeingWritten pages.
193     kChunkBeingWritten = 1,
194 
195     // The Service is moving the page into its non-shared ring buffer. The
196     // Producer shall never touch kChunkBeingRead pages.
197     kChunkBeingRead = 2,
198 
199     // The Producer is done writing the page and won't touch it again. The
200     // Service can now move it to its non-shared ring buffer.
201     // kAllChunksComplete relies on this being == 3.
202     kChunkComplete = 3,
203   };
204   static constexpr const char* kChunkStateStr[] = {"Free", "BeingWritten",
205                                                    "BeingRead", "Complete"};
206 
207   enum PageLayout : uint32_t {
208     // The page is fully free and has not been partitioned yet.
209     kPageNotPartitioned = 0,
210 
211     // TODO(primiano): Aligning a chunk @ 16 bytes could allow to use faster
212     // intrinsics based on quad-word moves. Do the math and check what is the
213     // fragmentation loss.
214 
215     // align4(X) := the largest integer N s.t. (N % 4) == 0 && N <= X.
216     // 8 == sizeof(PageHeader).
217     kPageDiv1 = 1,   // Only one chunk of size: PAGE_SIZE - 8.
218     kPageDiv2 = 2,   // Two chunks of size: align4((PAGE_SIZE - 8) / 2).
219     kPageDiv4 = 3,   // Four chunks of size: align4((PAGE_SIZE - 8) / 4).
220     kPageDiv7 = 4,   // Seven chunks of size: align4((PAGE_SIZE - 8) / 7).
221     kPageDiv14 = 5,  // Fourteen chunks of size: align4((PAGE_SIZE - 8) / 14).
222 
223     // The rationale for 7 and 14 above is to maximize the page usage for the
224     // likely case of |page_size| == 4096:
225     // (((4096 - 8) / 14) % 4) == 0, while (((4096 - 8) / 16 % 4)) == 3. So
226     // Div16 would waste 3 * 16 = 48 bytes per page for chunk alignment gaps.
227 
228     kPageDivReserved1 = 6,
229     kPageDivReserved2 = 7,
230     kNumPageLayouts = 8,
231   };
232 
233   // Keep this consistent with the PageLayout enum above.
234   static constexpr uint32_t kNumChunksForLayout[] = {0, 1, 2, 4, 7, 14, 0, 0};
235 
236   enum class ShmemMode {
237     // The default mode, where the shared buffer is visible to both the producer
238     // and the service.
239     kDefault,
240 
241     // The emulation mode, used for producer ports without shared memory. The
242     // state transitions are all done in the producer process.
243     kShmemEmulation,
244   };
245 
246   // Layout of a Page.
247   // +===================================================+
248   // | Page header [8 bytes]                             |
249   // | Tells how many chunks there are, how big they are |
250   // | and their state (free, read, write, complete).    |
251   // +===================================================+
252   // +***************************************************+
253   // | Chunk #0 header [8 bytes]                         |
254   // | Tells how many packets there are and whether the  |
255   // | whether the 1st and last ones are fragmented.     |
256   // | Also has a chunk id to reassemble fragments.    |
257   // +***************************************************+
258   // +---------------------------------------------------+
259   // | Packet #0 size [varint, up to 4 bytes]            |
260   // + - - - - - - - - - - - - - - - - - - - - - - - - - +
261   // | Packet #0 payload                                 |
262   // | A TracePacket protobuf message                    |
263   // +---------------------------------------------------+
264   //                         ...
265   // + . . . . . . . . . . . . . . . . . . . . . . . . . +
266   // |      Optional padding to maintain aligment        |
267   // + . . . . . . . . . . . . . . . . . . . . . . . . . +
268   // +---------------------------------------------------+
269   // | Packet #N size [varint, up to 4 bytes]            |
270   // + - - - - - - - - - - - - - - - - - - - - - - - - - +
271   // | Packet #N payload                                 |
272   // | A TracePacket protobuf message                    |
273   // +---------------------------------------------------+
274   //                         ...
275   // +***************************************************+
276   // | Chunk #M header [8 bytes]                         |
277   //                         ...
278 
279   // Alignment applies to start offset only. The Chunk size is *not* aligned.
280   static constexpr uint32_t kChunkAlignment = 4;
281   static constexpr uint32_t kChunkShift = 2;
282   static constexpr uint32_t kChunkMask = 0x3;
283   static constexpr uint32_t kLayoutMask = 0x70000000;
284   static constexpr uint32_t kLayoutShift = 28;
285   static constexpr uint32_t kAllChunksMask = 0x0FFFFFFF;
286 
287   // This assumes that kChunkComplete == 3.
288   static constexpr uint32_t kAllChunksComplete = 0x0FFFFFFF;
289   static constexpr uint32_t kAllChunksFree = 0;
290   static constexpr size_t kInvalidPageIdx = static_cast<size_t>(-1);
291 
292   // There is one page header per page, at the beginning of the page.
293   struct PageHeader {
294     // |header_bitmap| bits:
295     // [31] [30:28] [27:26] ... [1:0]
296     //  |      |       |     |    |
297     //  |      |       |     |    +---------- ChunkState[0]
298     //  |      |       |     +--------------- ChunkState[12..1]
299     //  |      |       +--------------------- ChunkState[13]
300     //  |      +----------------------------- PageLayout (0 == page fully free)
301     //  +------------------------------------ Reserved for future use
302     std::atomic<uint32_t> header_bitmap;
303 
304     // If we'll ever going to use this in the future it might come handy
305     // reviving the kPageBeingPartitioned logic (look in git log, it was there
306     // at some point in the past).
307     uint32_t reserved;
308   };
309 
310   // There is one Chunk header per chunk (hence PageLayout per page) at the
311   // beginning of each chunk.
312   struct ChunkHeader {
313     enum Flags : uint8_t {
314       // If set, the first TracePacket in the chunk is partial and continues
315       // from |chunk_id| - 1 (within the same |writer_id|).
316       kFirstPacketContinuesFromPrevChunk = 1 << 0,
317 
318       // If set, the last TracePacket in the chunk is partial and continues on
319       // |chunk_id| + 1 (within the same |writer_id|).
320       kLastPacketContinuesOnNextChunk = 1 << 1,
321 
322       // If set, the last (fragmented) TracePacket in the chunk has holes (even
323       // if the chunk is marked as kChunkComplete) that need to be patched
324       // out-of-band before the chunk can be read.
325       kChunkNeedsPatching = 1 << 2,
326     };
327 
328     struct Packets {
329       // Number of valid TracePacket protobuf messages contained in the chunk.
330       // Each TracePacket is prefixed by its own size. This field is
331       // monotonically updated by the Producer with release store semantic when
332       // the packet at position |count| is started. This last packet may not be
333       // considered complete until |count| is incremented for the subsequent
334       // packet or the chunk is completed.
335       uint16_t count : 10;
336       static constexpr size_t kMaxCount = (1 << 10) - 1;
337 
338       // See Flags above.
339       uint16_t flags : 6;
340     };
341 
342     // A monotonic counter of the chunk within the scoped of a |writer_id|.
343     // The tuple (ProducerID, WriterID, ChunkID) allows to figure out if two
344     // chunks are contiguous (and hence a trace packets spanning across them can
345     // be glued) or we had some holes due to the ring buffer wrapping.
346     // This is set only when transitioning from kChunkFree to kChunkBeingWritten
347     // and remains unchanged throughout the remaining lifetime of the chunk.
348     std::atomic<uint32_t> chunk_id;
349 
350     // ID of the writer, unique within the producer.
351     // Like |chunk_id|, this is set only when transitioning from kChunkFree to
352     // kChunkBeingWritten.
353     std::atomic<uint16_t> writer_id;
354 
355     // There is no ProducerID here. The service figures that out from the IPC
356     // channel, which is unspoofable.
357 
358     // Updated with release-store semantics.
359     std::atomic<Packets> packets;
360   };
361 
362   class Chunk {
363    public:
364     Chunk();  // Constructs an invalid chunk.
365 
366     // Chunk is move-only, to document the scope of the Acquire/Release
367     // TryLock operations below.
368     Chunk(const Chunk&) = delete;
369     Chunk operator=(const Chunk&) = delete;
370     Chunk(Chunk&&) noexcept;
371     Chunk& operator=(Chunk&&);
372 
begin()373     uint8_t* begin() const { return begin_; }
end()374     uint8_t* end() const { return begin_ + size_; }
375 
376     // Size, including Chunk header.
size()377     size_t size() const { return size_; }
378 
379     // Begin of the first packet (or packet fragment).
payload_begin()380     uint8_t* payload_begin() const { return begin_ + sizeof(ChunkHeader); }
payload_size()381     size_t payload_size() const {
382       PERFETTO_DCHECK(size_ >= sizeof(ChunkHeader));
383       return size_ - sizeof(ChunkHeader);
384     }
385 
is_valid()386     bool is_valid() const { return begin_ && size_; }
387 
388     // Index of the chunk within the page [0..13] (13 comes from kPageDiv14).
chunk_idx()389     uint8_t chunk_idx() const { return chunk_idx_; }
390 
header()391     ChunkHeader* header() { return reinterpret_cast<ChunkHeader*>(begin_); }
392 
writer_id()393     uint16_t writer_id() {
394       return header()->writer_id.load(std::memory_order_relaxed);
395     }
396 
397     // Returns the count of packets and the flags with acquire-load semantics.
GetPacketCountAndFlags()398     std::pair<uint16_t, uint8_t> GetPacketCountAndFlags() {
399       auto packets = header()->packets.load(std::memory_order_acquire);
400       const uint16_t packets_count = packets.count;
401       const uint8_t packets_flags = packets.flags;
402       return std::make_pair(packets_count, packets_flags);
403     }
404 
405     // Increases |packets.count| with release semantics (note, however, that the
406     // packet count is incremented *before* starting writing a packet). Returns
407     // the new packet count. The increment is atomic but NOT race-free (i.e. no
408     // CAS). Only the Producer is supposed to perform this increment, and it's
409     // supposed to do that in a thread-safe way (holding a lock). A Chunk cannot
410     // be shared by multiple Producer threads without locking. The packet count
411     // is cleared by TryAcquireChunk(), when passing the new header for the
412     // chunk.
IncrementPacketCount()413     uint16_t IncrementPacketCount() {
414       ChunkHeader* chunk_header = header();
415       auto packets = chunk_header->packets.load(std::memory_order_relaxed);
416       packets.count++;
417       chunk_header->packets.store(packets, std::memory_order_release);
418       return packets.count;
419     }
420 
421     // Flags are cleared by TryAcquireChunk(), by passing the new header for
422     // the chunk, or through ClearNeedsPatchingFlag.
SetFlag(ChunkHeader::Flags flag)423     void SetFlag(ChunkHeader::Flags flag) {
424       ChunkHeader* chunk_header = header();
425       auto packets = chunk_header->packets.load(std::memory_order_relaxed);
426       packets.flags |= flag;
427       chunk_header->packets.store(packets, std::memory_order_release);
428     }
429 
430     // This flag can only be cleared by the producer while it is still holding
431     // on to the chunk - i.e. while the chunk is still in state
432     // ChunkState::kChunkBeingWritten and hasn't been transitioned to
433     // ChunkState::kChunkComplete. This is ok, because the service is oblivious
434     // to the needs patching flag before the chunk is released as complete.
ClearNeedsPatchingFlag()435     void ClearNeedsPatchingFlag() {
436       ChunkHeader* chunk_header = header();
437       auto packets = chunk_header->packets.load(std::memory_order_relaxed);
438       packets.flags &= ~ChunkHeader::kChunkNeedsPatching;
439       chunk_header->packets.store(packets, std::memory_order_release);
440     }
441 
442    private:
443     friend class SharedMemoryABI;
444     Chunk(uint8_t* begin, uint16_t size, uint8_t chunk_idx);
445 
446     // Don't add extra fields, keep the move operator fast.
447     uint8_t* begin_ = nullptr;
448     uint16_t size_ = 0;
449     uint8_t chunk_idx_ = 0;
450 
451    public:
452     static constexpr size_t kMaxSize = 1ULL << sizeof(size_) * 8;
453   };
454 
455   // Construct an instance from an existing shared memory buffer.
456   SharedMemoryABI(uint8_t* start,
457                   size_t size,
458                   size_t page_size,
459                   ShmemMode mode);
460   SharedMemoryABI();
461 
462   void Initialize(uint8_t* start,
463                   size_t size,
464                   size_t page_size,
465                   ShmemMode mode);
466 
start()467   uint8_t* start() const { return start_; }
end()468   uint8_t* end() const { return start_ + size_; }
size()469   size_t size() const { return size_; }
page_size()470   size_t page_size() const { return page_size_; }
num_pages()471   size_t num_pages() const { return num_pages_; }
is_valid()472   bool is_valid() { return num_pages() > 0; }
473 
page_start(size_t page_idx)474   uint8_t* page_start(size_t page_idx) {
475     PERFETTO_DCHECK(page_idx < num_pages_);
476     return start_ + page_size_ * page_idx;
477   }
478 
page_header(size_t page_idx)479   PageHeader* page_header(size_t page_idx) {
480     return reinterpret_cast<PageHeader*>(page_start(page_idx));
481   }
482 
483   // Returns true if the page is fully clear and has not been partitioned yet.
484   // The state of the page can change at any point after this returns (or even
485   // before). The Producer should use this only as a hint to decide out whether
486   // it should TryPartitionPage() or acquire an individual chunk.
is_page_free(size_t page_idx)487   bool is_page_free(size_t page_idx) {
488     return GetPageHeaderBitmap(page_idx, std::memory_order_relaxed) == 0;
489   }
490 
491   // Returns true if all chunks in the page are kChunkComplete. As above, this
492   // is advisory only. The Service is supposed to use this only to decide
493   // whether to TryAcquireAllChunksForReading() or not.
is_page_complete(size_t page_idx)494   bool is_page_complete(size_t page_idx) {
495     auto bitmap = GetPageHeaderBitmap(page_idx, std::memory_order_relaxed);
496     const uint32_t num_chunks = GetNumChunksFromHeaderBitmap(bitmap);
497     if (num_chunks == 0)
498       return false;  // Non partitioned pages cannot be complete.
499     return (bitmap & kAllChunksMask) ==
500            (kAllChunksComplete & ((1 << (num_chunks * kChunkShift)) - 1));
501   }
502 
503   // For testing / debugging only.
page_header_dbg(size_t page_idx)504   std::string page_header_dbg(size_t page_idx) {
505     uint32_t x = GetPageHeaderBitmap(page_idx, std::memory_order_relaxed);
506     return std::bitset<32>(x).to_string();
507   }
508 
509   // Returns the page header bitmap, which is a bitmap that specifies the
510   // chunking layout of the page and each chunk's current state. Unless
511   // explicitly specified, reads with an acquire-load semantic to ensure a
512   // producer's writes corresponding to an update of the bitmap (e.g. clearing
513   // a chunk's header) are observed consistently.
514   uint32_t GetPageHeaderBitmap(
515       size_t page_idx,
516       std::memory_order order = std::memory_order_acquire) {
517     return page_header(page_idx)->header_bitmap.load(order);
518   }
519 
520   // Returns a bitmap in which each bit is set if the corresponding Chunk exists
521   // in the page (according to the page header bitmap) and is free. If the page
522   // is not partitioned it returns 0 (as if the page had no free chunks).
523   uint32_t GetFreeChunks(size_t page_idx);
524 
525   // Tries to atomically partition a page with the given |layout|. Returns true
526   // if the page was free and has been partitioned with the given |layout|,
527   // false if the page wasn't free anymore by the time we got there.
528   // If succeeds all the chunks are atomically set in the kChunkFree state.
529   bool TryPartitionPage(size_t page_idx, PageLayout layout);
530 
531   // Tries to atomically mark a single chunk within the page as
532   // kChunkBeingWritten. Returns an invalid chunk if the page is not partitioned
533   // or the chunk is not in the kChunkFree state. If succeeds sets the chunk
534   // header to |header|.
TryAcquireChunkForWriting(size_t page_idx,size_t chunk_idx,const ChunkHeader * header)535   Chunk TryAcquireChunkForWriting(size_t page_idx,
536                                   size_t chunk_idx,
537                                   const ChunkHeader* header) {
538     return TryAcquireChunk(page_idx, chunk_idx, kChunkBeingWritten, header);
539   }
540 
541   // Similar to TryAcquireChunkForWriting. Fails if the chunk isn't in the
542   // kChunkComplete state.
TryAcquireChunkForReading(size_t page_idx,size_t chunk_idx)543   Chunk TryAcquireChunkForReading(size_t page_idx, size_t chunk_idx) {
544     return TryAcquireChunk(page_idx, chunk_idx, kChunkBeingRead, nullptr);
545   }
546 
547   // The caller must have successfully TryAcquireAllChunksForReading() or it
548   // needs to guarantee that the chunk is already in the kChunkBeingWritten
549   // state.
550   Chunk GetChunkUnchecked(size_t page_idx,
551                           uint32_t header_bitmap,
552                           size_t chunk_idx);
553 
554   // Creates a Chunk by adopting the given buffer (|data| and |size|) and chunk
555   // index. This is used for chunk data passed over the wire (e.g. tcp or
556   // vsock). The chunk should *not* be freed to the shared memory.
MakeChunkFromSerializedData(uint8_t * data,uint16_t size,uint8_t chunk_idx)557   static Chunk MakeChunkFromSerializedData(uint8_t* data,
558                                            uint16_t size,
559                                            uint8_t chunk_idx) {
560     return Chunk(data, size, chunk_idx);
561   }
562 
563   // Puts a chunk into the kChunkComplete state. Returns the page index.
ReleaseChunkAsComplete(Chunk chunk)564   size_t ReleaseChunkAsComplete(Chunk chunk) {
565     return ReleaseChunk(std::move(chunk), kChunkComplete);
566   }
567 
568   // Puts a chunk into the kChunkFree state. Returns the page index.
ReleaseChunkAsFree(Chunk chunk)569   size_t ReleaseChunkAsFree(Chunk chunk) {
570     return ReleaseChunk(std::move(chunk), kChunkFree);
571   }
572 
GetChunkState(size_t page_idx,size_t chunk_idx)573   ChunkState GetChunkState(size_t page_idx, size_t chunk_idx) {
574     uint32_t bitmap = GetPageHeaderBitmap(page_idx, std::memory_order_relaxed);
575     return GetChunkStateFromHeaderBitmap(bitmap, chunk_idx);
576   }
577 
578   std::pair<size_t, size_t> GetPageAndChunkIndex(const Chunk& chunk);
579 
GetChunkSizeFromHeaderBitmap(uint32_t header_bitmap)580   uint16_t GetChunkSizeFromHeaderBitmap(uint32_t header_bitmap) const {
581     return chunk_sizes_[GetLayoutFromHeaderBitmap(header_bitmap)];
582   }
583 
GetChunkStateFromHeaderBitmap(uint32_t header_bitmap,size_t chunk_idx)584   static ChunkState GetChunkStateFromHeaderBitmap(uint32_t header_bitmap,
585                                                   size_t chunk_idx) {
586     return static_cast<ChunkState>(
587         (header_bitmap >> (chunk_idx * kChunkShift)) & kChunkMask);
588   }
589 
GetLayoutFromHeaderBitmap(uint32_t header_bitmap)590   static constexpr PageLayout GetLayoutFromHeaderBitmap(
591       uint32_t header_bitmap) {
592     return static_cast<PageLayout>((header_bitmap & kLayoutMask) >>
593                                    kLayoutShift);
594   }
595 
GetNumChunksFromHeaderBitmap(uint32_t header_bitmap)596   static constexpr uint32_t GetNumChunksFromHeaderBitmap(
597       uint32_t header_bitmap) {
598     return kNumChunksForLayout[GetLayoutFromHeaderBitmap(header_bitmap)];
599   }
600 
601   // Returns a bitmap in which each bit is set if the corresponding Chunk exists
602   // in the page (according to the page layout) and is not free. If the page is
603   // not partitioned it returns 0 (as if the page had no used chunks). Bit N
604   // corresponds to Chunk N.
GetUsedChunks(uint32_t header_bitmap)605   static uint32_t GetUsedChunks(uint32_t header_bitmap) {
606     const uint32_t num_chunks = GetNumChunksFromHeaderBitmap(header_bitmap);
607     uint32_t res = 0;
608     for (uint32_t i = 0; i < num_chunks; i++) {
609       res |= (GetChunkStateFromHeaderBitmap(header_bitmap, i) != kChunkFree)
610                  ? (1 << i)
611                  : 0;
612     }
613     return res;
614   }
615 
616  private:
617   SharedMemoryABI(const SharedMemoryABI&) = delete;
618   SharedMemoryABI& operator=(const SharedMemoryABI&) = delete;
619 
620   Chunk TryAcquireChunk(size_t page_idx,
621                         size_t chunk_idx,
622                         ChunkState,
623                         const ChunkHeader*);
624   size_t ReleaseChunk(Chunk chunk, ChunkState);
625 
626   uint8_t* start_ = nullptr;
627   size_t size_ = 0;
628   size_t page_size_ = 0;
629   bool use_shmem_emulation_ = false;
630   size_t num_pages_ = 0;
631   std::array<uint16_t, kNumPageLayouts> chunk_sizes_;
632 };
633 
634 }  // namespace perfetto
635 
636 #endif  // INCLUDE_PERFETTO_EXT_TRACING_CORE_SHARED_MEMORY_ABI_H_
637