1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/profiling/memory/shared_ring_buffer.h"
18
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <sys/mman.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24
25 #include <atomic>
26 #include <cinttypes>
27 #include <type_traits>
28
29 #include "perfetto/base/build_config.h"
30 #include "perfetto/ext/base/scoped_file.h"
31 #include "perfetto/ext/base/temp_file.h"
32 #include "src/profiling/memory/scoped_spinlock.h"
33
34 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
35 #include <linux/memfd.h>
36 #include <sys/syscall.h>
37 #endif
38
39 namespace perfetto {
40 namespace profiling {
41
42 namespace {
43
44 constexpr auto kAlignment = 8; // 64 bits to use aligned memcpy().
45 constexpr auto kHeaderSize = kAlignment;
46 constexpr auto kGuardSize = 1024 * 1024 * 64; // 64 MB.
47 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
48 constexpr auto kFDSeals = F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_SEAL;
49 #endif
50
meta_page_size()51 size_t meta_page_size() {
52 return base::GetSysPageSize();
53 }
54
55 } // namespace
56
SharedRingBuffer(CreateFlag,size_t size)57 SharedRingBuffer::SharedRingBuffer(CreateFlag, size_t size) {
58 size_t size_with_meta = size + meta_page_size();
59 base::ScopedFile fd;
60 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
61 bool is_memfd = false;
62 fd.reset(static_cast<int>(syscall(__NR_memfd_create, "heapprofd_ringbuf",
63 MFD_CLOEXEC | MFD_ALLOW_SEALING)));
64 is_memfd = !!fd;
65
66 if (!fd) {
67 #if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
68 // In-tree builds only allow mem_fd, so we can inspect the seals to verify
69 // the fd is appropriately sealed.
70 PERFETTO_ELOG("memfd_create() failed");
71 return;
72 #else
73 PERFETTO_DPLOG("memfd_create() failed");
74 #endif
75 }
76 #endif
77
78 if (!fd)
79 fd = base::TempFile::CreateUnlinked().ReleaseFD();
80
81 PERFETTO_CHECK(fd);
82 int res = ftruncate(fd.get(), static_cast<off_t>(size_with_meta));
83 PERFETTO_CHECK(res == 0);
84 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
85 if (is_memfd) {
86 res = fcntl(*fd, F_ADD_SEALS, kFDSeals);
87 if (res != 0) {
88 PERFETTO_PLOG("Failed to seal FD.");
89 return;
90 }
91 }
92 #endif
93 Initialize(std::move(fd));
94 if (!is_valid())
95 return;
96
97 new (meta_) MetadataPage();
98 }
99
~SharedRingBuffer()100 SharedRingBuffer::~SharedRingBuffer() {
101 // MetadataPage is not trivially constructible because it contains a Spinlock,
102 // which contains std::atomic, which is no longer trivially constructible
103 // after wg21.link/p0883r2 changed std::atomic() to do value-initialization.
104 static_assert(std::is_trivially_destructible<MetadataPage>::value,
105 "MetadataPage must be trivially destructible");
106
107 if (is_valid()) {
108 size_t outer_size = meta_page_size() + size_ * 2 + kGuardSize;
109 munmap(meta_, outer_size);
110 }
111
112 // This is work-around for code like the following:
113 // https://android.googlesource.com/platform/libcore/+/4ecb71f94378716f88703b9f7548b5d24839262f/ojluni/src/main/native/UNIXProcess_md.c#427
114 // They fork, close all fds by iterating over /proc/self/fd using opendir.
115 // Unfortunately closedir calls free, which detects the fork, and then tries
116 // to destruct the Client that holds this SharedRingBuffer.
117 //
118 // ScopedResource crashes on failure to close, so we explicitly ignore
119 // failures here.
120 int fd = mem_fd_.release();
121 if (fd != -1)
122 close(fd);
123 }
124
Initialize(base::ScopedFile mem_fd)125 void SharedRingBuffer::Initialize(base::ScopedFile mem_fd) {
126 #if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
127 int seals = fcntl(*mem_fd, F_GET_SEALS);
128 if (seals == -1) {
129 PERFETTO_PLOG("Failed to get seals of FD.");
130 return;
131 }
132 if ((seals & kFDSeals) != kFDSeals) {
133 PERFETTO_ELOG("FD not properly sealed. Expected %x, got %x", kFDSeals,
134 seals);
135 return;
136 }
137 #endif
138
139 struct stat stat_buf = {};
140 int res = fstat(*mem_fd, &stat_buf);
141 if (res != 0 || stat_buf.st_size == 0) {
142 PERFETTO_PLOG("Could not attach to fd.");
143 return;
144 }
145 auto size_with_meta = static_cast<size_t>(stat_buf.st_size);
146 auto size = size_with_meta - meta_page_size();
147
148 // |size_with_meta| must be a power of two number of pages + 1 page (for
149 // metadata).
150 if (size_with_meta < 2 * base::GetSysPageSize() ||
151 size % base::GetSysPageSize() || (size & (size - 1))) {
152 PERFETTO_ELOG("SharedRingBuffer size is invalid (%zu)", size_with_meta);
153 return;
154 }
155
156 // First of all reserve the whole virtual region to fit the buffer twice
157 // + metadata page + red zone at the end.
158 size_t outer_size = meta_page_size() + size * 2 + kGuardSize;
159 uint8_t* region = reinterpret_cast<uint8_t*>(
160 mmap(nullptr, outer_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
161 if (region == MAP_FAILED) {
162 PERFETTO_PLOG("mmap(PROT_NONE) failed");
163 return;
164 }
165
166 // Map first the whole buffer (including the initial metadata page) @ off=0.
167 void* reg1 = mmap(region, size_with_meta, PROT_READ | PROT_WRITE,
168 MAP_SHARED | MAP_FIXED, *mem_fd, 0);
169
170 // Then map again the buffer, skipping the metadata page. The final result is:
171 // [ METADATA ] [ RING BUFFER SHMEM ] [ RING BUFFER SHMEM ]
172 void* reg2 = mmap(region + size_with_meta, size, PROT_READ | PROT_WRITE,
173 MAP_SHARED | MAP_FIXED, *mem_fd,
174 /*offset=*/static_cast<ssize_t>(meta_page_size()));
175
176 if (reg1 != region || reg2 != region + size_with_meta) {
177 PERFETTO_PLOG("mmap(MAP_SHARED) failed");
178 munmap(region, outer_size);
179 return;
180 }
181 set_size(size);
182 meta_ = reinterpret_cast<MetadataPage*>(region);
183 mem_ = region + meta_page_size();
184 mem_fd_ = std::move(mem_fd);
185 }
186
BeginWrite(const ScopedSpinlock & spinlock,size_t size)187 SharedRingBuffer::Buffer SharedRingBuffer::BeginWrite(
188 const ScopedSpinlock& spinlock,
189 size_t size) {
190 PERFETTO_DCHECK(spinlock.locked());
191 Buffer result;
192
193 std::optional<PointerPositions> opt_pos = GetPointerPositions();
194 if (!opt_pos) {
195 meta_->stats.num_writes_corrupt++;
196 errno = EBADF;
197 return result;
198 }
199 auto pos = opt_pos.value();
200
201 const uint64_t size_with_header =
202 base::AlignUp<kAlignment>(size + kHeaderSize);
203
204 // size_with_header < size is for catching overflow of size_with_header.
205 if (PERFETTO_UNLIKELY(size_with_header < size)) {
206 errno = EINVAL;
207 return result;
208 }
209
210 if (size_with_header > write_avail(pos)) {
211 meta_->stats.num_writes_overflow++;
212 errno = EAGAIN;
213 return result;
214 }
215
216 uint8_t* wr_ptr = at(pos.write_pos);
217
218 result.size = size;
219 result.data = wr_ptr + kHeaderSize;
220 result.bytes_free = write_avail(pos);
221 meta_->stats.bytes_written += size;
222 meta_->stats.num_writes_succeeded++;
223
224 // We can make this a relaxed store, as this gets picked up by the acquire
225 // load in GetPointerPositions (and the release store below).
226 reinterpret_cast<std::atomic<uint32_t>*>(wr_ptr)->store(
227 0, std::memory_order_relaxed);
228
229 // This needs to happen after the store above, so the reader never observes an
230 // incorrect byte count. This is matched by the acquire load in
231 // GetPointerPositions.
232 meta_->write_pos.fetch_add(size_with_header, std::memory_order_release);
233 return result;
234 }
235
EndWrite(Buffer buf)236 void SharedRingBuffer::EndWrite(Buffer buf) {
237 if (!buf)
238 return;
239 uint8_t* wr_ptr = buf.data - kHeaderSize;
240 PERFETTO_DCHECK(reinterpret_cast<uintptr_t>(wr_ptr) % kAlignment == 0);
241
242 // This needs to release to make sure the reader sees the payload written
243 // between the BeginWrite and EndWrite calls.
244 //
245 // This is matched by the acquire load in BeginRead where it reads the
246 // record's size.
247 reinterpret_cast<std::atomic<uint32_t>*>(wr_ptr)->store(
248 static_cast<uint32_t>(buf.size), std::memory_order_release);
249 }
250
BeginRead()251 SharedRingBuffer::Buffer SharedRingBuffer::BeginRead() {
252 std::optional<PointerPositions> opt_pos = GetPointerPositions();
253 if (!opt_pos) {
254 meta_->stats.num_reads_corrupt++;
255 errno = EBADF;
256 return Buffer();
257 }
258 auto pos = opt_pos.value();
259
260 size_t avail_read = read_avail(pos);
261
262 if (avail_read < kHeaderSize) {
263 meta_->stats.num_reads_nodata++;
264 errno = EAGAIN;
265 return Buffer(); // No data
266 }
267
268 uint8_t* rd_ptr = at(pos.read_pos);
269 PERFETTO_DCHECK(reinterpret_cast<uintptr_t>(rd_ptr) % kAlignment == 0);
270 const size_t size = reinterpret_cast<std::atomic<uint32_t>*>(rd_ptr)->load(
271 std::memory_order_acquire);
272 if (size == 0) {
273 meta_->stats.num_reads_nodata++;
274 errno = EAGAIN;
275 return Buffer();
276 }
277 const size_t size_with_header = base::AlignUp<kAlignment>(size + kHeaderSize);
278
279 if (size_with_header > avail_read) {
280 PERFETTO_ELOG(
281 "Corrupted header detected, size=%zu"
282 ", read_avail=%zu, rd=%" PRIu64 ", wr=%" PRIu64,
283 size, avail_read, pos.read_pos, pos.write_pos);
284 meta_->stats.num_reads_corrupt++;
285 errno = EBADF;
286 return Buffer();
287 }
288
289 rd_ptr += kHeaderSize;
290 PERFETTO_DCHECK(reinterpret_cast<uintptr_t>(rd_ptr) % kAlignment == 0);
291 return Buffer(rd_ptr, size, write_avail(pos));
292 }
293
EndRead(Buffer buf)294 size_t SharedRingBuffer::EndRead(Buffer buf) {
295 if (!buf)
296 return 0;
297 size_t size_with_header = base::AlignUp<kAlignment>(buf.size + kHeaderSize);
298 meta_->read_pos.fetch_add(size_with_header, std::memory_order_relaxed);
299 meta_->stats.num_reads_succeeded++;
300 return size_with_header;
301 }
302
IsCorrupt(const PointerPositions & pos)303 bool SharedRingBuffer::IsCorrupt(const PointerPositions& pos) {
304 if (pos.write_pos < pos.read_pos || pos.write_pos - pos.read_pos > size_ ||
305 pos.write_pos % kAlignment || pos.read_pos % kAlignment) {
306 PERFETTO_ELOG("Ring buffer corrupted, rd=%" PRIu64 ", wr=%" PRIu64
307 ", size=%zu",
308 pos.read_pos, pos.write_pos, size_);
309 return true;
310 }
311 return false;
312 }
313
SharedRingBuffer(SharedRingBuffer && other)314 SharedRingBuffer::SharedRingBuffer(SharedRingBuffer&& other) noexcept {
315 *this = std::move(other);
316 }
317
operator =(SharedRingBuffer && other)318 SharedRingBuffer& SharedRingBuffer::operator=(
319 SharedRingBuffer&& other) noexcept {
320 mem_fd_ = std::move(other.mem_fd_);
321 std::tie(meta_, mem_, size_, size_mask_) =
322 std::tie(other.meta_, other.mem_, other.size_, other.size_mask_);
323 std::tie(other.meta_, other.mem_, other.size_, other.size_mask_) =
324 std::make_tuple(nullptr, nullptr, 0, 0);
325 return *this;
326 }
327
328 // static
Create(size_t size)329 std::optional<SharedRingBuffer> SharedRingBuffer::Create(size_t size) {
330 auto buf = SharedRingBuffer(CreateFlag(), size);
331 if (!buf.is_valid())
332 return std::nullopt;
333 return std::make_optional(std::move(buf));
334 }
335
336 // static
Attach(base::ScopedFile mem_fd)337 std::optional<SharedRingBuffer> SharedRingBuffer::Attach(
338 base::ScopedFile mem_fd) {
339 auto buf = SharedRingBuffer(AttachFlag(), std::move(mem_fd));
340 if (!buf.is_valid())
341 return std::nullopt;
342 return std::make_optional(std::move(buf));
343 }
344
345 } // namespace profiling
346 } // namespace perfetto
347