1*287e80b3SSadaf Ebrahimi // SPDX-License-Identifier: LGPL-2.1
2*287e80b3SSadaf Ebrahimi /*
3*287e80b3SSadaf Ebrahimi * Copyright (C) 2022 Google Inc, Steven Rostedt <[email protected]>
4*287e80b3SSadaf Ebrahimi */
5*287e80b3SSadaf Ebrahimi #include <stdio.h>
6*287e80b3SSadaf Ebrahimi #include <stdlib.h>
7*287e80b3SSadaf Ebrahimi #include <dirent.h>
8*287e80b3SSadaf Ebrahimi #include <unistd.h>
9*287e80b3SSadaf Ebrahimi #include <fcntl.h>
10*287e80b3SSadaf Ebrahimi #include <limits.h>
11*287e80b3SSadaf Ebrahimi #include <errno.h>
12*287e80b3SSadaf Ebrahimi #include <sys/stat.h>
13*287e80b3SSadaf Ebrahimi #include <sys/ioctl.h>
14*287e80b3SSadaf Ebrahimi #include <sys/select.h>
15*287e80b3SSadaf Ebrahimi
16*287e80b3SSadaf Ebrahimi #include <kbuffer.h>
17*287e80b3SSadaf Ebrahimi
18*287e80b3SSadaf Ebrahimi #include "tracefs.h"
19*287e80b3SSadaf Ebrahimi #include "tracefs-local.h"
20*287e80b3SSadaf Ebrahimi
21*287e80b3SSadaf Ebrahimi enum {
22*287e80b3SSadaf Ebrahimi TC_STOP = 1 << 0, /* Stop reading */
23*287e80b3SSadaf Ebrahimi TC_PERM_NONBLOCK = 1 << 1, /* read is always non blocking */
24*287e80b3SSadaf Ebrahimi TC_NONBLOCK = 1 << 2, /* read is non blocking */
25*287e80b3SSadaf Ebrahimi };
26*287e80b3SSadaf Ebrahimi
27*287e80b3SSadaf Ebrahimi struct tracefs_cpu {
28*287e80b3SSadaf Ebrahimi int fd;
29*287e80b3SSadaf Ebrahimi int flags;
30*287e80b3SSadaf Ebrahimi int nfds;
31*287e80b3SSadaf Ebrahimi int ctrl_pipe[2];
32*287e80b3SSadaf Ebrahimi int splice_pipe[2];
33*287e80b3SSadaf Ebrahimi int pipe_size;
34*287e80b3SSadaf Ebrahimi int subbuf_size;
35*287e80b3SSadaf Ebrahimi int buffered;
36*287e80b3SSadaf Ebrahimi int splice_read_flags;
37*287e80b3SSadaf Ebrahimi };
38*287e80b3SSadaf Ebrahimi
39*287e80b3SSadaf Ebrahimi /**
40*287e80b3SSadaf Ebrahimi * tracefs_cpu_alloc_fd - create a tracefs_cpu instance for an existing fd
41*287e80b3SSadaf Ebrahimi * @fd: The file descriptor to attach the tracefs_cpu to
42*287e80b3SSadaf Ebrahimi * @subbuf_size: The expected size to read the subbuffer with
43*287e80b3SSadaf Ebrahimi * @nonblock: If true, the file will be opened in O_NONBLOCK mode
44*287e80b3SSadaf Ebrahimi *
45*287e80b3SSadaf Ebrahimi * Return a descriptor that can read the tracefs trace_pipe_raw file
46*287e80b3SSadaf Ebrahimi * that is associated with the given @fd and must be read in @subbuf_size.
47*287e80b3SSadaf Ebrahimi *
48*287e80b3SSadaf Ebrahimi * Returns NULL on error.
49*287e80b3SSadaf Ebrahimi */
50*287e80b3SSadaf Ebrahimi struct tracefs_cpu *
tracefs_cpu_alloc_fd(int fd,int subbuf_size,bool nonblock)51*287e80b3SSadaf Ebrahimi tracefs_cpu_alloc_fd(int fd, int subbuf_size, bool nonblock)
52*287e80b3SSadaf Ebrahimi {
53*287e80b3SSadaf Ebrahimi struct tracefs_cpu *tcpu;
54*287e80b3SSadaf Ebrahimi int mode = O_RDONLY;
55*287e80b3SSadaf Ebrahimi int ret;
56*287e80b3SSadaf Ebrahimi
57*287e80b3SSadaf Ebrahimi tcpu = calloc(1, sizeof(*tcpu));
58*287e80b3SSadaf Ebrahimi if (!tcpu)
59*287e80b3SSadaf Ebrahimi return NULL;
60*287e80b3SSadaf Ebrahimi
61*287e80b3SSadaf Ebrahimi if (nonblock) {
62*287e80b3SSadaf Ebrahimi mode |= O_NONBLOCK;
63*287e80b3SSadaf Ebrahimi tcpu->flags |= TC_NONBLOCK | TC_PERM_NONBLOCK;
64*287e80b3SSadaf Ebrahimi }
65*287e80b3SSadaf Ebrahimi
66*287e80b3SSadaf Ebrahimi tcpu->splice_pipe[0] = -1;
67*287e80b3SSadaf Ebrahimi tcpu->splice_pipe[1] = -1;
68*287e80b3SSadaf Ebrahimi
69*287e80b3SSadaf Ebrahimi tcpu->fd = fd;
70*287e80b3SSadaf Ebrahimi
71*287e80b3SSadaf Ebrahimi tcpu->subbuf_size = subbuf_size;
72*287e80b3SSadaf Ebrahimi
73*287e80b3SSadaf Ebrahimi if (tcpu->flags & TC_PERM_NONBLOCK) {
74*287e80b3SSadaf Ebrahimi tcpu->ctrl_pipe[0] = -1;
75*287e80b3SSadaf Ebrahimi tcpu->ctrl_pipe[1] = -1;
76*287e80b3SSadaf Ebrahimi } else {
77*287e80b3SSadaf Ebrahimi /* ctrl_pipe is used to break out of blocked reads */
78*287e80b3SSadaf Ebrahimi ret = pipe(tcpu->ctrl_pipe);
79*287e80b3SSadaf Ebrahimi if (ret < 0)
80*287e80b3SSadaf Ebrahimi goto fail;
81*287e80b3SSadaf Ebrahimi if (tcpu->ctrl_pipe[0] > tcpu->fd)
82*287e80b3SSadaf Ebrahimi tcpu->nfds = tcpu->ctrl_pipe[0] + 1;
83*287e80b3SSadaf Ebrahimi else
84*287e80b3SSadaf Ebrahimi tcpu->nfds = tcpu->fd + 1;
85*287e80b3SSadaf Ebrahimi }
86*287e80b3SSadaf Ebrahimi
87*287e80b3SSadaf Ebrahimi return tcpu;
88*287e80b3SSadaf Ebrahimi fail:
89*287e80b3SSadaf Ebrahimi free(tcpu);
90*287e80b3SSadaf Ebrahimi return NULL;
91*287e80b3SSadaf Ebrahimi }
92*287e80b3SSadaf Ebrahimi
93*287e80b3SSadaf Ebrahimi /**
94*287e80b3SSadaf Ebrahimi * tracefs_cpu_open - open an instance raw trace file
95*287e80b3SSadaf Ebrahimi * @instance: the instance (NULL for toplevel) of the cpu raw file to open
96*287e80b3SSadaf Ebrahimi * @cpu: The CPU that the raw trace file is associated with
97*287e80b3SSadaf Ebrahimi * @nonblock: If true, the file will be opened in O_NONBLOCK mode
98*287e80b3SSadaf Ebrahimi *
99*287e80b3SSadaf Ebrahimi * Return a descriptor that can read the tracefs trace_pipe_raw file
100*287e80b3SSadaf Ebrahimi * for a give @cpu in a given @instance.
101*287e80b3SSadaf Ebrahimi *
102*287e80b3SSadaf Ebrahimi * Returns NULL on error.
103*287e80b3SSadaf Ebrahimi */
104*287e80b3SSadaf Ebrahimi struct tracefs_cpu *
tracefs_cpu_open(struct tracefs_instance * instance,int cpu,bool nonblock)105*287e80b3SSadaf Ebrahimi tracefs_cpu_open(struct tracefs_instance *instance, int cpu, bool nonblock)
106*287e80b3SSadaf Ebrahimi {
107*287e80b3SSadaf Ebrahimi struct tracefs_cpu *tcpu;
108*287e80b3SSadaf Ebrahimi struct tep_handle *tep;
109*287e80b3SSadaf Ebrahimi char path[128];
110*287e80b3SSadaf Ebrahimi char *buf;
111*287e80b3SSadaf Ebrahimi int mode = O_RDONLY;
112*287e80b3SSadaf Ebrahimi int subbuf_size;
113*287e80b3SSadaf Ebrahimi int len;
114*287e80b3SSadaf Ebrahimi int ret;
115*287e80b3SSadaf Ebrahimi int fd;
116*287e80b3SSadaf Ebrahimi
117*287e80b3SSadaf Ebrahimi if (nonblock)
118*287e80b3SSadaf Ebrahimi mode |= O_NONBLOCK;
119*287e80b3SSadaf Ebrahimi
120*287e80b3SSadaf Ebrahimi sprintf(path, "per_cpu/cpu%d/trace_pipe_raw", cpu);
121*287e80b3SSadaf Ebrahimi
122*287e80b3SSadaf Ebrahimi fd = tracefs_instance_file_open(instance, path, mode);
123*287e80b3SSadaf Ebrahimi if (fd < 0)
124*287e80b3SSadaf Ebrahimi return NULL;
125*287e80b3SSadaf Ebrahimi
126*287e80b3SSadaf Ebrahimi tep = tep_alloc();
127*287e80b3SSadaf Ebrahimi if (!tep)
128*287e80b3SSadaf Ebrahimi goto fail;
129*287e80b3SSadaf Ebrahimi
130*287e80b3SSadaf Ebrahimi /* Get the size of the page */
131*287e80b3SSadaf Ebrahimi buf = tracefs_instance_file_read(NULL, "events/header_page", &len);
132*287e80b3SSadaf Ebrahimi if (!buf)
133*287e80b3SSadaf Ebrahimi goto fail;
134*287e80b3SSadaf Ebrahimi
135*287e80b3SSadaf Ebrahimi ret = tep_parse_header_page(tep, buf, len, sizeof(long));
136*287e80b3SSadaf Ebrahimi free(buf);
137*287e80b3SSadaf Ebrahimi if (ret < 0)
138*287e80b3SSadaf Ebrahimi goto fail;
139*287e80b3SSadaf Ebrahimi
140*287e80b3SSadaf Ebrahimi subbuf_size = tep_get_sub_buffer_size(tep);
141*287e80b3SSadaf Ebrahimi tep_free(tep);
142*287e80b3SSadaf Ebrahimi tep = NULL;
143*287e80b3SSadaf Ebrahimi
144*287e80b3SSadaf Ebrahimi tcpu = tracefs_cpu_alloc_fd(fd, subbuf_size, nonblock);
145*287e80b3SSadaf Ebrahimi if (!tcpu)
146*287e80b3SSadaf Ebrahimi goto fail;
147*287e80b3SSadaf Ebrahimi
148*287e80b3SSadaf Ebrahimi return tcpu;
149*287e80b3SSadaf Ebrahimi fail:
150*287e80b3SSadaf Ebrahimi tep_free(tep);
151*287e80b3SSadaf Ebrahimi close(fd);
152*287e80b3SSadaf Ebrahimi return NULL;
153*287e80b3SSadaf Ebrahimi }
154*287e80b3SSadaf Ebrahimi
close_fd(int fd)155*287e80b3SSadaf Ebrahimi static void close_fd(int fd)
156*287e80b3SSadaf Ebrahimi {
157*287e80b3SSadaf Ebrahimi if (fd < 0)
158*287e80b3SSadaf Ebrahimi return;
159*287e80b3SSadaf Ebrahimi close(fd);
160*287e80b3SSadaf Ebrahimi }
161*287e80b3SSadaf Ebrahimi
162*287e80b3SSadaf Ebrahimi /**
163*287e80b3SSadaf Ebrahimi * tracefs_cpu_free_fd - clean up the tracefs_cpu descriptor
164*287e80b3SSadaf Ebrahimi * @tcpu: The descriptor created with tracefs_cpu_alloc_fd()
165*287e80b3SSadaf Ebrahimi *
166*287e80b3SSadaf Ebrahimi * Closes all the internal file descriptors that were opened by
167*287e80b3SSadaf Ebrahimi * tracefs_cpu_alloc_fd(), and frees the descriptor.
168*287e80b3SSadaf Ebrahimi */
tracefs_cpu_free_fd(struct tracefs_cpu * tcpu)169*287e80b3SSadaf Ebrahimi void tracefs_cpu_free_fd(struct tracefs_cpu *tcpu)
170*287e80b3SSadaf Ebrahimi {
171*287e80b3SSadaf Ebrahimi close_fd(tcpu->ctrl_pipe[0]);
172*287e80b3SSadaf Ebrahimi close_fd(tcpu->ctrl_pipe[1]);
173*287e80b3SSadaf Ebrahimi close_fd(tcpu->splice_pipe[0]);
174*287e80b3SSadaf Ebrahimi close_fd(tcpu->splice_pipe[1]);
175*287e80b3SSadaf Ebrahimi
176*287e80b3SSadaf Ebrahimi free(tcpu);
177*287e80b3SSadaf Ebrahimi }
178*287e80b3SSadaf Ebrahimi
179*287e80b3SSadaf Ebrahimi /**
180*287e80b3SSadaf Ebrahimi * tracefs_cpu_close - clean up and close a raw trace descriptor
181*287e80b3SSadaf Ebrahimi * @tcpu: The descriptor created with tracefs_cpu_open()
182*287e80b3SSadaf Ebrahimi *
183*287e80b3SSadaf Ebrahimi * Closes all the file descriptors associated to the trace_pipe_raw
184*287e80b3SSadaf Ebrahimi * opened by tracefs_cpu_open().
185*287e80b3SSadaf Ebrahimi */
tracefs_cpu_close(struct tracefs_cpu * tcpu)186*287e80b3SSadaf Ebrahimi void tracefs_cpu_close(struct tracefs_cpu *tcpu)
187*287e80b3SSadaf Ebrahimi {
188*287e80b3SSadaf Ebrahimi if (!tcpu)
189*287e80b3SSadaf Ebrahimi return;
190*287e80b3SSadaf Ebrahimi
191*287e80b3SSadaf Ebrahimi close(tcpu->fd);
192*287e80b3SSadaf Ebrahimi tracefs_cpu_free_fd(tcpu);
193*287e80b3SSadaf Ebrahimi }
194*287e80b3SSadaf Ebrahimi
195*287e80b3SSadaf Ebrahimi /**
196*287e80b3SSadaf Ebrahimi * tracefs_cpu_read_size - Return the size of the sub buffer
197*287e80b3SSadaf Ebrahimi * @tcpu: The descriptor that holds the size of the sub buffer
198*287e80b3SSadaf Ebrahimi *
199*287e80b3SSadaf Ebrahimi * A lot of the functions that read the data from the trace_pipe_raw
200*287e80b3SSadaf Ebrahimi * expect the caller to have allocated enough space to store a full
201*287e80b3SSadaf Ebrahimi * subbuffer. Calling this function is a requirement to do so.
202*287e80b3SSadaf Ebrahimi */
tracefs_cpu_read_size(struct tracefs_cpu * tcpu)203*287e80b3SSadaf Ebrahimi int tracefs_cpu_read_size(struct tracefs_cpu *tcpu)
204*287e80b3SSadaf Ebrahimi {
205*287e80b3SSadaf Ebrahimi if (!tcpu)
206*287e80b3SSadaf Ebrahimi return -1;
207*287e80b3SSadaf Ebrahimi return tcpu->subbuf_size;
208*287e80b3SSadaf Ebrahimi }
209*287e80b3SSadaf Ebrahimi
set_nonblock(struct tracefs_cpu * tcpu)210*287e80b3SSadaf Ebrahimi static void set_nonblock(struct tracefs_cpu *tcpu)
211*287e80b3SSadaf Ebrahimi {
212*287e80b3SSadaf Ebrahimi long flags;
213*287e80b3SSadaf Ebrahimi
214*287e80b3SSadaf Ebrahimi if (tcpu->flags & TC_NONBLOCK)
215*287e80b3SSadaf Ebrahimi return;
216*287e80b3SSadaf Ebrahimi
217*287e80b3SSadaf Ebrahimi flags = fcntl(tcpu->fd, F_GETFL);
218*287e80b3SSadaf Ebrahimi fcntl(tcpu->fd, F_SETFL, flags | O_NONBLOCK);
219*287e80b3SSadaf Ebrahimi tcpu->flags |= TC_NONBLOCK;
220*287e80b3SSadaf Ebrahimi }
221*287e80b3SSadaf Ebrahimi
unset_nonblock(struct tracefs_cpu * tcpu)222*287e80b3SSadaf Ebrahimi static void unset_nonblock(struct tracefs_cpu *tcpu)
223*287e80b3SSadaf Ebrahimi {
224*287e80b3SSadaf Ebrahimi long flags;
225*287e80b3SSadaf Ebrahimi
226*287e80b3SSadaf Ebrahimi if (!(tcpu->flags & TC_NONBLOCK))
227*287e80b3SSadaf Ebrahimi return;
228*287e80b3SSadaf Ebrahimi
229*287e80b3SSadaf Ebrahimi flags = fcntl(tcpu->fd, F_GETFL);
230*287e80b3SSadaf Ebrahimi flags &= ~O_NONBLOCK;
231*287e80b3SSadaf Ebrahimi fcntl(tcpu->fd, F_SETFL, flags);
232*287e80b3SSadaf Ebrahimi tcpu->flags &= ~TC_NONBLOCK;
233*287e80b3SSadaf Ebrahimi }
234*287e80b3SSadaf Ebrahimi
235*287e80b3SSadaf Ebrahimi /*
236*287e80b3SSadaf Ebrahimi * If set to blocking mode, block until the watermark has been
237*287e80b3SSadaf Ebrahimi * reached, or the control has said to stop. If the contol is
238*287e80b3SSadaf Ebrahimi * set, then nonblock will be set to true on the way out.
239*287e80b3SSadaf Ebrahimi */
wait_on_input(struct tracefs_cpu * tcpu,bool nonblock)240*287e80b3SSadaf Ebrahimi static int wait_on_input(struct tracefs_cpu *tcpu, bool nonblock)
241*287e80b3SSadaf Ebrahimi {
242*287e80b3SSadaf Ebrahimi fd_set rfds;
243*287e80b3SSadaf Ebrahimi int ret;
244*287e80b3SSadaf Ebrahimi
245*287e80b3SSadaf Ebrahimi if (tcpu->flags & TC_PERM_NONBLOCK)
246*287e80b3SSadaf Ebrahimi return 1;
247*287e80b3SSadaf Ebrahimi
248*287e80b3SSadaf Ebrahimi if (nonblock) {
249*287e80b3SSadaf Ebrahimi set_nonblock(tcpu);
250*287e80b3SSadaf Ebrahimi return 1;
251*287e80b3SSadaf Ebrahimi } else {
252*287e80b3SSadaf Ebrahimi unset_nonblock(tcpu);
253*287e80b3SSadaf Ebrahimi }
254*287e80b3SSadaf Ebrahimi
255*287e80b3SSadaf Ebrahimi FD_ZERO(&rfds);
256*287e80b3SSadaf Ebrahimi FD_SET(tcpu->fd, &rfds);
257*287e80b3SSadaf Ebrahimi FD_SET(tcpu->ctrl_pipe[0], &rfds);
258*287e80b3SSadaf Ebrahimi
259*287e80b3SSadaf Ebrahimi ret = select(tcpu->nfds, &rfds, NULL, NULL, NULL);
260*287e80b3SSadaf Ebrahimi
261*287e80b3SSadaf Ebrahimi /* Let the application decide what to do with signals and such */
262*287e80b3SSadaf Ebrahimi if (ret < 0)
263*287e80b3SSadaf Ebrahimi return ret;
264*287e80b3SSadaf Ebrahimi
265*287e80b3SSadaf Ebrahimi if (FD_ISSET(tcpu->ctrl_pipe[0], &rfds)) {
266*287e80b3SSadaf Ebrahimi /* Flush the ctrl pipe */
267*287e80b3SSadaf Ebrahimi read(tcpu->ctrl_pipe[0], &ret, 1);
268*287e80b3SSadaf Ebrahimi
269*287e80b3SSadaf Ebrahimi /* Make nonblock as it is now stopped */
270*287e80b3SSadaf Ebrahimi set_nonblock(tcpu);
271*287e80b3SSadaf Ebrahimi /* Permanently set unblock */
272*287e80b3SSadaf Ebrahimi tcpu->flags |= TC_PERM_NONBLOCK;
273*287e80b3SSadaf Ebrahimi }
274*287e80b3SSadaf Ebrahimi
275*287e80b3SSadaf Ebrahimi return FD_ISSET(tcpu->fd, &rfds);
276*287e80b3SSadaf Ebrahimi }
277*287e80b3SSadaf Ebrahimi
278*287e80b3SSadaf Ebrahimi /**
279*287e80b3SSadaf Ebrahimi * tracefs_cpu_read - read from the raw trace file
280*287e80b3SSadaf Ebrahimi * @tcpu: The descriptor representing the raw trace file
281*287e80b3SSadaf Ebrahimi * @buffer: Where to read into (must be at least the size of the subbuffer)
282*287e80b3SSadaf Ebrahimi * @nonblock: Hint to not block on the read if there's no data.
283*287e80b3SSadaf Ebrahimi *
284*287e80b3SSadaf Ebrahimi * Reads the trace_pipe_raw files associated to @tcpu into @buffer.
285*287e80b3SSadaf Ebrahimi * @buffer must be at least the size of the sub buffer of the ring buffer,
286*287e80b3SSadaf Ebrahimi * which is returned by tracefs_cpu_read_size().
287*287e80b3SSadaf Ebrahimi *
288*287e80b3SSadaf Ebrahimi * If @nonblock is set, and there's no data available, it will return
289*287e80b3SSadaf Ebrahimi * immediately. Otherwise depending on how @tcpu was opened, it will
290*287e80b3SSadaf Ebrahimi * block. If @tcpu was opened with nonblock set, then this @nonblock
291*287e80b3SSadaf Ebrahimi * will make no difference.
292*287e80b3SSadaf Ebrahimi *
293*287e80b3SSadaf Ebrahimi * Returns the amount read or -1 on error.
294*287e80b3SSadaf Ebrahimi */
tracefs_cpu_read(struct tracefs_cpu * tcpu,void * buffer,bool nonblock)295*287e80b3SSadaf Ebrahimi int tracefs_cpu_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
296*287e80b3SSadaf Ebrahimi {
297*287e80b3SSadaf Ebrahimi int ret;
298*287e80b3SSadaf Ebrahimi
299*287e80b3SSadaf Ebrahimi /*
300*287e80b3SSadaf Ebrahimi * If nonblock is set, then the wait_on_input() will return
301*287e80b3SSadaf Ebrahimi * immediately, if there's nothing in the buffer, with
302*287e80b3SSadaf Ebrahimi * ret == 0.
303*287e80b3SSadaf Ebrahimi */
304*287e80b3SSadaf Ebrahimi ret = wait_on_input(tcpu, nonblock);
305*287e80b3SSadaf Ebrahimi if (ret <= 0)
306*287e80b3SSadaf Ebrahimi return ret;
307*287e80b3SSadaf Ebrahimi
308*287e80b3SSadaf Ebrahimi ret = read(tcpu->fd, buffer, tcpu->subbuf_size);
309*287e80b3SSadaf Ebrahimi
310*287e80b3SSadaf Ebrahimi /* It's OK if there's no data to read */
311*287e80b3SSadaf Ebrahimi if (ret < 0 && errno == EAGAIN) {
312*287e80b3SSadaf Ebrahimi /* Reset errno */
313*287e80b3SSadaf Ebrahimi errno = 0;
314*287e80b3SSadaf Ebrahimi ret = 0;
315*287e80b3SSadaf Ebrahimi }
316*287e80b3SSadaf Ebrahimi
317*287e80b3SSadaf Ebrahimi return ret;
318*287e80b3SSadaf Ebrahimi }
319*287e80b3SSadaf Ebrahimi
init_splice(struct tracefs_cpu * tcpu)320*287e80b3SSadaf Ebrahimi static int init_splice(struct tracefs_cpu *tcpu)
321*287e80b3SSadaf Ebrahimi {
322*287e80b3SSadaf Ebrahimi int ret;
323*287e80b3SSadaf Ebrahimi
324*287e80b3SSadaf Ebrahimi if (tcpu->splice_pipe[0] >= 0)
325*287e80b3SSadaf Ebrahimi return 0;
326*287e80b3SSadaf Ebrahimi
327*287e80b3SSadaf Ebrahimi ret = pipe(tcpu->splice_pipe);
328*287e80b3SSadaf Ebrahimi if (ret < 0)
329*287e80b3SSadaf Ebrahimi return ret;
330*287e80b3SSadaf Ebrahimi
331*287e80b3SSadaf Ebrahimi ret = fcntl(tcpu->splice_pipe[0], F_GETPIPE_SZ, &tcpu->pipe_size);
332*287e80b3SSadaf Ebrahimi /*
333*287e80b3SSadaf Ebrahimi * F_GETPIPE_SZ was introduced in 2.6.35, ftrace was introduced
334*287e80b3SSadaf Ebrahimi * in 2.6.31. If we are running on an older kernel, just fall
335*287e80b3SSadaf Ebrahimi * back to using subbuf_size for splice(). It could also return
336*287e80b3SSadaf Ebrahimi * the size of the pipe and not set pipe_size.
337*287e80b3SSadaf Ebrahimi */
338*287e80b3SSadaf Ebrahimi if (ret > 0 && !tcpu->pipe_size)
339*287e80b3SSadaf Ebrahimi tcpu->pipe_size = ret;
340*287e80b3SSadaf Ebrahimi else if (ret < 0)
341*287e80b3SSadaf Ebrahimi tcpu->pipe_size = tcpu->subbuf_size;
342*287e80b3SSadaf Ebrahimi
343*287e80b3SSadaf Ebrahimi tcpu->splice_read_flags = SPLICE_F_MOVE;
344*287e80b3SSadaf Ebrahimi if (tcpu->flags & TC_NONBLOCK)
345*287e80b3SSadaf Ebrahimi tcpu->splice_read_flags |= SPLICE_F_NONBLOCK;
346*287e80b3SSadaf Ebrahimi
347*287e80b3SSadaf Ebrahimi return 0;
348*287e80b3SSadaf Ebrahimi }
349*287e80b3SSadaf Ebrahimi
350*287e80b3SSadaf Ebrahimi /**
351*287e80b3SSadaf Ebrahimi * tracefs_cpu_buffered_read - Read the raw trace data buffering through a pipe
352*287e80b3SSadaf Ebrahimi * @tcpu: The descriptor representing the raw trace file
353*287e80b3SSadaf Ebrahimi * @buffer: Where to read into (must be at least the size of the subbuffer)
354*287e80b3SSadaf Ebrahimi * @nonblock: Hint to not block on the read if there's no data.
355*287e80b3SSadaf Ebrahimi *
356*287e80b3SSadaf Ebrahimi * This is basically the same as tracefs_cpu_read() except that it uses
357*287e80b3SSadaf Ebrahimi * a pipe through splice to buffer reads. This will batch reads keeping
358*287e80b3SSadaf Ebrahimi * the reading from the ring buffer less intrusive to the system, as
359*287e80b3SSadaf Ebrahimi * just reading all the time can cause quite a disturbance.
360*287e80b3SSadaf Ebrahimi *
361*287e80b3SSadaf Ebrahimi * Note, one difference between this and tracefs_cpu_read() is that it
362*287e80b3SSadaf Ebrahimi * will read only in sub buffer pages. If the ring buffer has not filled
363*287e80b3SSadaf Ebrahimi * a page, then it will not return anything, even with @nonblock set.
364*287e80b3SSadaf Ebrahimi * Calls to tracefs_cpu_flush() should be done to read the rest of
365*287e80b3SSadaf Ebrahimi * the file at the end of the trace.
366*287e80b3SSadaf Ebrahimi *
367*287e80b3SSadaf Ebrahimi * Returns the amount read or -1 on error.
368*287e80b3SSadaf Ebrahimi */
tracefs_cpu_buffered_read(struct tracefs_cpu * tcpu,void * buffer,bool nonblock)369*287e80b3SSadaf Ebrahimi int tracefs_cpu_buffered_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
370*287e80b3SSadaf Ebrahimi {
371*287e80b3SSadaf Ebrahimi int mode = SPLICE_F_MOVE;
372*287e80b3SSadaf Ebrahimi int ret;
373*287e80b3SSadaf Ebrahimi
374*287e80b3SSadaf Ebrahimi if (tcpu->buffered < 0)
375*287e80b3SSadaf Ebrahimi tcpu->buffered = 0;
376*287e80b3SSadaf Ebrahimi
377*287e80b3SSadaf Ebrahimi if (tcpu->buffered)
378*287e80b3SSadaf Ebrahimi goto do_read;
379*287e80b3SSadaf Ebrahimi
380*287e80b3SSadaf Ebrahimi ret = wait_on_input(tcpu, nonblock);
381*287e80b3SSadaf Ebrahimi if (ret <= 0)
382*287e80b3SSadaf Ebrahimi return ret;
383*287e80b3SSadaf Ebrahimi
384*287e80b3SSadaf Ebrahimi if (tcpu->flags & TC_NONBLOCK)
385*287e80b3SSadaf Ebrahimi mode |= SPLICE_F_NONBLOCK;
386*287e80b3SSadaf Ebrahimi
387*287e80b3SSadaf Ebrahimi ret = init_splice(tcpu);
388*287e80b3SSadaf Ebrahimi if (ret < 0)
389*287e80b3SSadaf Ebrahimi return ret;
390*287e80b3SSadaf Ebrahimi
391*287e80b3SSadaf Ebrahimi ret = splice(tcpu->fd, NULL, tcpu->splice_pipe[1], NULL,
392*287e80b3SSadaf Ebrahimi tcpu->pipe_size, mode);
393*287e80b3SSadaf Ebrahimi if (ret <= 0)
394*287e80b3SSadaf Ebrahimi return ret;
395*287e80b3SSadaf Ebrahimi
396*287e80b3SSadaf Ebrahimi tcpu->buffered = ret;
397*287e80b3SSadaf Ebrahimi
398*287e80b3SSadaf Ebrahimi do_read:
399*287e80b3SSadaf Ebrahimi ret = read(tcpu->splice_pipe[0], buffer, tcpu->subbuf_size);
400*287e80b3SSadaf Ebrahimi if (ret > 0)
401*287e80b3SSadaf Ebrahimi tcpu->buffered -= ret;
402*287e80b3SSadaf Ebrahimi return ret;
403*287e80b3SSadaf Ebrahimi }
404*287e80b3SSadaf Ebrahimi
405*287e80b3SSadaf Ebrahimi /**
406*287e80b3SSadaf Ebrahimi * tracefs_cpu_stop - Stop a blocked read of the raw tracing file
407*287e80b3SSadaf Ebrahimi * @tcpu: The descriptor representing the raw trace file
408*287e80b3SSadaf Ebrahimi *
409*287e80b3SSadaf Ebrahimi * This will attempt to unblock a task blocked on @tcpu reading it.
410*287e80b3SSadaf Ebrahimi * On older kernels, it may not do anything for the pipe reads, as
411*287e80b3SSadaf Ebrahimi * older kernels do not wake up tasks waiting on the ring buffer.
412*287e80b3SSadaf Ebrahimi *
413*287e80b3SSadaf Ebrahimi * Returns 0 if the tasks reading the raw tracing file does not
414*287e80b3SSadaf Ebrahimi * need a nudge.
415*287e80b3SSadaf Ebrahimi *
416*287e80b3SSadaf Ebrahimi * Returns 1 if that tasks may need a nudge (send a signal).
417*287e80b3SSadaf Ebrahimi *
418*287e80b3SSadaf Ebrahimi * Returns negative on error.
419*287e80b3SSadaf Ebrahimi */
tracefs_cpu_stop(struct tracefs_cpu * tcpu)420*287e80b3SSadaf Ebrahimi int tracefs_cpu_stop(struct tracefs_cpu *tcpu)
421*287e80b3SSadaf Ebrahimi {
422*287e80b3SSadaf Ebrahimi int ret = 1;
423*287e80b3SSadaf Ebrahimi
424*287e80b3SSadaf Ebrahimi if (tcpu->flags & TC_PERM_NONBLOCK)
425*287e80b3SSadaf Ebrahimi return 0;
426*287e80b3SSadaf Ebrahimi
427*287e80b3SSadaf Ebrahimi ret = write(tcpu->ctrl_pipe[1], &ret, 1);
428*287e80b3SSadaf Ebrahimi if (ret < 0)
429*287e80b3SSadaf Ebrahimi return ret;
430*287e80b3SSadaf Ebrahimi
431*287e80b3SSadaf Ebrahimi /* Calling ioctl() on recent kernels will wake up the waiters */
432*287e80b3SSadaf Ebrahimi ret = ioctl(tcpu->fd, 0);
433*287e80b3SSadaf Ebrahimi if (ret < 0)
434*287e80b3SSadaf Ebrahimi ret = 1;
435*287e80b3SSadaf Ebrahimi else
436*287e80b3SSadaf Ebrahimi ret = 0;
437*287e80b3SSadaf Ebrahimi
438*287e80b3SSadaf Ebrahimi set_nonblock(tcpu);
439*287e80b3SSadaf Ebrahimi
440*287e80b3SSadaf Ebrahimi return ret;
441*287e80b3SSadaf Ebrahimi }
442*287e80b3SSadaf Ebrahimi
443*287e80b3SSadaf Ebrahimi /**
444*287e80b3SSadaf Ebrahimi * tracefs_cpu_flush - Finish out and read the rest of the raw tracing file
445*287e80b3SSadaf Ebrahimi * @tcpu: The descriptor representing the raw trace file
446*287e80b3SSadaf Ebrahimi * @buffer: Where to read into (must be at least the size of the subbuffer)
447*287e80b3SSadaf Ebrahimi *
448*287e80b3SSadaf Ebrahimi * Reads the trace_pipe_raw file associated by the @tcpu and puts it
449*287e80b3SSadaf Ebrahimi * into @buffer, which must be the size of the sub buffer which is retrieved.
450*287e80b3SSadaf Ebrahimi * by tracefs_cpu_read_size(). This should be called at the end of tracing
451*287e80b3SSadaf Ebrahimi * to get the rest of the data.
452*287e80b3SSadaf Ebrahimi *
453*287e80b3SSadaf Ebrahimi * This will set the file descriptor for reading to non-blocking mode.
454*287e80b3SSadaf Ebrahimi *
455*287e80b3SSadaf Ebrahimi * Returns the number of bytes read, or negative on error.
456*287e80b3SSadaf Ebrahimi */
tracefs_cpu_flush(struct tracefs_cpu * tcpu,void * buffer)457*287e80b3SSadaf Ebrahimi int tracefs_cpu_flush(struct tracefs_cpu *tcpu, void *buffer)
458*287e80b3SSadaf Ebrahimi {
459*287e80b3SSadaf Ebrahimi int ret;
460*287e80b3SSadaf Ebrahimi
461*287e80b3SSadaf Ebrahimi /* Make sure that reading is now non blocking */
462*287e80b3SSadaf Ebrahimi set_nonblock(tcpu);
463*287e80b3SSadaf Ebrahimi
464*287e80b3SSadaf Ebrahimi if (tcpu->buffered < 0)
465*287e80b3SSadaf Ebrahimi tcpu->buffered = 0;
466*287e80b3SSadaf Ebrahimi
467*287e80b3SSadaf Ebrahimi if (tcpu->buffered) {
468*287e80b3SSadaf Ebrahimi ret = read(tcpu->splice_pipe[0], buffer, tcpu->subbuf_size);
469*287e80b3SSadaf Ebrahimi if (ret > 0)
470*287e80b3SSadaf Ebrahimi tcpu->buffered -= ret;
471*287e80b3SSadaf Ebrahimi return ret;
472*287e80b3SSadaf Ebrahimi }
473*287e80b3SSadaf Ebrahimi
474*287e80b3SSadaf Ebrahimi ret = read(tcpu->fd, buffer, tcpu->subbuf_size);
475*287e80b3SSadaf Ebrahimi if (ret > 0 && tcpu->buffered)
476*287e80b3SSadaf Ebrahimi tcpu->buffered -= ret;
477*287e80b3SSadaf Ebrahimi
478*287e80b3SSadaf Ebrahimi /* It's OK if there's no data to read */
479*287e80b3SSadaf Ebrahimi if (ret < 0 && errno == EAGAIN) {
480*287e80b3SSadaf Ebrahimi /* Reset errno */
481*287e80b3SSadaf Ebrahimi errno = 0;
482*287e80b3SSadaf Ebrahimi ret = 0;
483*287e80b3SSadaf Ebrahimi }
484*287e80b3SSadaf Ebrahimi
485*287e80b3SSadaf Ebrahimi return ret;
486*287e80b3SSadaf Ebrahimi }
487*287e80b3SSadaf Ebrahimi
488*287e80b3SSadaf Ebrahimi /**
489*287e80b3SSadaf Ebrahimi * tracefs_cpu_flush_write - Finish out and read the rest of the raw tracing file
490*287e80b3SSadaf Ebrahimi * @tcpu: The descriptor representing the raw trace file
491*287e80b3SSadaf Ebrahimi * @wfd: The write file descriptor to write the data to
492*287e80b3SSadaf Ebrahimi *
493*287e80b3SSadaf Ebrahimi * Reads the trace_pipe_raw file associated by the @tcpu and writes it to
494*287e80b3SSadaf Ebrahimi * @wfd. This should be called at the end of tracing to get the rest of the data.
495*287e80b3SSadaf Ebrahimi *
496*287e80b3SSadaf Ebrahimi * Returns the number of bytes written, or negative on error.
497*287e80b3SSadaf Ebrahimi */
tracefs_cpu_flush_write(struct tracefs_cpu * tcpu,int wfd)498*287e80b3SSadaf Ebrahimi int tracefs_cpu_flush_write(struct tracefs_cpu *tcpu, int wfd)
499*287e80b3SSadaf Ebrahimi {
500*287e80b3SSadaf Ebrahimi char buffer[tcpu->subbuf_size];
501*287e80b3SSadaf Ebrahimi int ret;
502*287e80b3SSadaf Ebrahimi
503*287e80b3SSadaf Ebrahimi ret = tracefs_cpu_flush(tcpu, buffer);
504*287e80b3SSadaf Ebrahimi if (ret > 0)
505*287e80b3SSadaf Ebrahimi ret = write(wfd, buffer, ret);
506*287e80b3SSadaf Ebrahimi
507*287e80b3SSadaf Ebrahimi /* It's OK if there's no data to read */
508*287e80b3SSadaf Ebrahimi if (ret < 0 && errno == EAGAIN)
509*287e80b3SSadaf Ebrahimi ret = 0;
510*287e80b3SSadaf Ebrahimi
511*287e80b3SSadaf Ebrahimi return ret;
512*287e80b3SSadaf Ebrahimi }
513*287e80b3SSadaf Ebrahimi
514*287e80b3SSadaf Ebrahimi /**
515*287e80b3SSadaf Ebrahimi * tracefs_cpu_write - Write the raw trace file into a file descriptor
516*287e80b3SSadaf Ebrahimi * @tcpu: The descriptor representing the raw trace file
517*287e80b3SSadaf Ebrahimi * @wfd: The write file descriptor to write the data to
518*287e80b3SSadaf Ebrahimi * @nonblock: Hint to not block on the read if there's no data.
519*287e80b3SSadaf Ebrahimi *
520*287e80b3SSadaf Ebrahimi * This will pipe the data from the trace_pipe_raw file associated with @tcpu
521*287e80b3SSadaf Ebrahimi * into the @wfd file descriptor. If @nonblock is set, then it will not
522*287e80b3SSadaf Ebrahimi * block on if there's nothing to write. Note, it will only write sub buffer
523*287e80b3SSadaf Ebrahimi * size data to @wfd. Calls to tracefs_cpu_flush_write() are needed to
524*287e80b3SSadaf Ebrahimi * write out the rest.
525*287e80b3SSadaf Ebrahimi *
526*287e80b3SSadaf Ebrahimi * Returns the number of bytes read or negative on error.
527*287e80b3SSadaf Ebrahimi */
tracefs_cpu_write(struct tracefs_cpu * tcpu,int wfd,bool nonblock)528*287e80b3SSadaf Ebrahimi int tracefs_cpu_write(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
529*287e80b3SSadaf Ebrahimi {
530*287e80b3SSadaf Ebrahimi char buffer[tcpu->subbuf_size];
531*287e80b3SSadaf Ebrahimi int mode = SPLICE_F_MOVE;
532*287e80b3SSadaf Ebrahimi int tot_write = 0;
533*287e80b3SSadaf Ebrahimi int tot;
534*287e80b3SSadaf Ebrahimi int ret;
535*287e80b3SSadaf Ebrahimi
536*287e80b3SSadaf Ebrahimi ret = wait_on_input(tcpu, nonblock);
537*287e80b3SSadaf Ebrahimi if (ret <= 0)
538*287e80b3SSadaf Ebrahimi return ret;
539*287e80b3SSadaf Ebrahimi
540*287e80b3SSadaf Ebrahimi if (tcpu->flags & TC_NONBLOCK)
541*287e80b3SSadaf Ebrahimi mode |= SPLICE_F_NONBLOCK;
542*287e80b3SSadaf Ebrahimi
543*287e80b3SSadaf Ebrahimi ret = init_splice(tcpu);
544*287e80b3SSadaf Ebrahimi if (ret < 0)
545*287e80b3SSadaf Ebrahimi return ret;
546*287e80b3SSadaf Ebrahimi
547*287e80b3SSadaf Ebrahimi tot = splice(tcpu->fd, NULL, tcpu->splice_pipe[1], NULL,
548*287e80b3SSadaf Ebrahimi tcpu->pipe_size, mode);
549*287e80b3SSadaf Ebrahimi if (tot < 0)
550*287e80b3SSadaf Ebrahimi return tot;
551*287e80b3SSadaf Ebrahimi
552*287e80b3SSadaf Ebrahimi if (tot == 0)
553*287e80b3SSadaf Ebrahimi return 0;
554*287e80b3SSadaf Ebrahimi
555*287e80b3SSadaf Ebrahimi ret = splice(tcpu->splice_pipe[0], NULL, wfd, NULL,
556*287e80b3SSadaf Ebrahimi tot, SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
557*287e80b3SSadaf Ebrahimi
558*287e80b3SSadaf Ebrahimi if (ret >= 0)
559*287e80b3SSadaf Ebrahimi return ret;
560*287e80b3SSadaf Ebrahimi
561*287e80b3SSadaf Ebrahimi /* Some file systems do not allow splicing, try writing instead */
562*287e80b3SSadaf Ebrahimi do {
563*287e80b3SSadaf Ebrahimi int r = tcpu->subbuf_size;
564*287e80b3SSadaf Ebrahimi
565*287e80b3SSadaf Ebrahimi if (r > tot)
566*287e80b3SSadaf Ebrahimi r = tot;
567*287e80b3SSadaf Ebrahimi
568*287e80b3SSadaf Ebrahimi ret = read(tcpu->splice_pipe[0], buffer, r);
569*287e80b3SSadaf Ebrahimi if (ret > 0) {
570*287e80b3SSadaf Ebrahimi tot -= ret;
571*287e80b3SSadaf Ebrahimi ret = write(wfd, buffer, ret);
572*287e80b3SSadaf Ebrahimi }
573*287e80b3SSadaf Ebrahimi if (ret > 0)
574*287e80b3SSadaf Ebrahimi tot_write += ret;
575*287e80b3SSadaf Ebrahimi } while (ret > 0);
576*287e80b3SSadaf Ebrahimi
577*287e80b3SSadaf Ebrahimi if (ret < 0)
578*287e80b3SSadaf Ebrahimi return ret;
579*287e80b3SSadaf Ebrahimi
580*287e80b3SSadaf Ebrahimi return tot_write;
581*287e80b3SSadaf Ebrahimi }
582*287e80b3SSadaf Ebrahimi
583*287e80b3SSadaf Ebrahimi /**
584*287e80b3SSadaf Ebrahimi * tracefs_cpu_pipe - Write the raw trace file into a pipe descriptor
585*287e80b3SSadaf Ebrahimi * @tcpu: The descriptor representing the raw trace file
586*287e80b3SSadaf Ebrahimi * @wfd: The write file descriptor to write the data to (must be a pipe)
587*287e80b3SSadaf Ebrahimi * @nonblock: Hint to not block on the read if there's no data.
588*287e80b3SSadaf Ebrahimi *
589*287e80b3SSadaf Ebrahimi * This will splice directly the file descriptor of the trace_pipe_raw
590*287e80b3SSadaf Ebrahimi * file to the given @wfd, which must be a pipe. This can also be used
591*287e80b3SSadaf Ebrahimi * if @tcpu was created with tracefs_cpu_create_fd() where the passed
592*287e80b3SSadaf Ebrahimi * in @fd there was a pipe, then @wfd does not need to be a pipe.
593*287e80b3SSadaf Ebrahimi *
594*287e80b3SSadaf Ebrahimi * Returns the number of bytes read or negative on error.
595*287e80b3SSadaf Ebrahimi */
tracefs_cpu_pipe(struct tracefs_cpu * tcpu,int wfd,bool nonblock)596*287e80b3SSadaf Ebrahimi int tracefs_cpu_pipe(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
597*287e80b3SSadaf Ebrahimi {
598*287e80b3SSadaf Ebrahimi int mode = SPLICE_F_MOVE;
599*287e80b3SSadaf Ebrahimi int ret;
600*287e80b3SSadaf Ebrahimi
601*287e80b3SSadaf Ebrahimi ret = wait_on_input(tcpu, nonblock);
602*287e80b3SSadaf Ebrahimi if (ret <= 0)
603*287e80b3SSadaf Ebrahimi return ret;
604*287e80b3SSadaf Ebrahimi
605*287e80b3SSadaf Ebrahimi if (tcpu->flags & TC_NONBLOCK)
606*287e80b3SSadaf Ebrahimi mode |= SPLICE_F_NONBLOCK;
607*287e80b3SSadaf Ebrahimi
608*287e80b3SSadaf Ebrahimi ret = splice(tcpu->fd, NULL, wfd, NULL,
609*287e80b3SSadaf Ebrahimi tcpu->pipe_size, mode);
610*287e80b3SSadaf Ebrahimi return ret;
611*287e80b3SSadaf Ebrahimi }
612