xref: /aosp_15_r20/external/mesa3d/src/intel/common/intel_measure.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file intel_measure.c
25  */
26 
27 #include "intel_measure.h"
28 
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <sys/stat.h>
34 #include <sys/types.h>
35 #include <unistd.h>
36 
37 #define __STDC_FORMAT_MACROS 1
38 #include <inttypes.h>
39 
40 #include "dev/intel_device_info.h"
41 #include "util/os_time.h"
42 #include "util/u_debug.h"
43 #include "util/macros.h"
44 
45 
46 static const struct debug_control debug_control[] = {
47    { "draw",            INTEL_MEASURE_DRAW       },
48    { "rt",              INTEL_MEASURE_RENDERPASS },
49    { "shader",          INTEL_MEASURE_SHADER     },
50    { "batch",           INTEL_MEASURE_BATCH      },
51    { "frame",           INTEL_MEASURE_FRAME      },
52    { NULL, 0 }
53 };
54 static struct intel_measure_config config;
55 
56 void
intel_measure_init(struct intel_measure_device * device)57 intel_measure_init(struct intel_measure_device *device)
58 {
59    static bool once = false;
60    const char *env = getenv("INTEL_MEASURE");
61    if (unlikely(!once)) {
62       once = true;
63       memset(&config, 0, sizeof(struct intel_measure_config));
64       if (!env)
65          return;
66 
67       char env_copy[1024];
68       strncpy(env_copy, env, 1024);
69       env_copy[1023] = '\0';
70 
71       config.file = stderr;
72       config.flags = parse_debug_string(env_copy, debug_control);
73       if (!config.flags)
74          config.flags = INTEL_MEASURE_DRAW;
75       config.enabled = true;
76       config.event_interval = 1;
77       config.control_fh = -1;
78 
79       /* Overflows of the following defaults will drop data and generate a
80        * warning on the output filehandle.
81        */
82 
83       /* default batch_size allows for 32k renders in a single batch */
84       const int MINIMUM_BATCH_SIZE = 1024;
85       const int DEFAULT_BATCH_SIZE = 64 * 1024;
86       config.batch_size = DEFAULT_BATCH_SIZE;
87 
88       /* Default buffer_size allows for 64k batches per line of output in the
89        * csv.  Overflow may occur for offscreen workloads or large 'interval'
90        * settings.
91        */
92       const int MINIMUM_BUFFER_SIZE = 1024;
93       const int DEFAULT_BUFFER_SIZE = 64 * 1024;
94       config.buffer_size = DEFAULT_BUFFER_SIZE;
95 
96       const char *filename = strstr(env_copy, "file=");
97       const char *start_frame_s = strstr(env_copy, "start=");
98       const char *count_frame_s = strstr(env_copy, "count=");
99       const char *control_path = strstr(env_copy, "control=");
100       const char *interval_s = strstr(env_copy, "interval=");
101       const char *batch_size_s = strstr(env_copy, "batch_size=");
102       const char *buffer_size_s = strstr(env_copy, "buffer_size=");
103       const char *cpu_s = strstr(env_copy, "cpu");
104       while (true) {
105          char *sep = strrchr(env_copy, ',');
106          if (sep == NULL)
107             break;
108          *sep = '\0';
109       }
110 
111       if (filename && __normal_user()) {
112          filename += 5;
113          config.file = fopen(filename, "w");
114          if (!config.file) {
115             fprintf(stderr, "INTEL_MEASURE failed to open output file %s: %s\n",
116                     filename, strerror (errno));
117             abort();
118          }
119       }
120 
121       if (start_frame_s) {
122          start_frame_s += 6;
123          const int start_frame = atoi(start_frame_s);
124          if (start_frame < 0) {
125             fprintf(stderr, "INTEL_MEASURE start frame may "
126                     "not be negative: %d\n", start_frame);
127             abort();
128          }
129 
130          config.start_frame = start_frame;
131          config.enabled = false;
132       }
133 
134       if (count_frame_s) {
135          count_frame_s += 6;
136          const int count_frame = atoi(count_frame_s);
137          if (count_frame <= 0) {
138             fprintf(stderr, "INTEL_MEASURE count frame must be positive: %d\n",
139                     count_frame);
140             abort();
141          }
142 
143          config.end_frame = config.start_frame + count_frame;
144       }
145 
146       if (control_path) {
147          control_path += 8;
148          if (mkfifoat(AT_FDCWD, control_path, O_CREAT | S_IRUSR | S_IWUSR)) {
149             if (errno != EEXIST) {
150                fprintf(stderr, "INTEL_MEASURE failed to create control "
151                        "fifo %s: %s\n", control_path, strerror (errno));
152                abort();
153             }
154          }
155 
156          config.control_fh = openat(AT_FDCWD, control_path,
157                                     O_RDONLY | O_NONBLOCK);
158          if (config.control_fh == -1) {
159             fprintf(stderr, "INTEL_MEASURE failed to open control fifo "
160                     "%s: %s\n", control_path, strerror (errno));
161             abort();
162          }
163 
164          /* when using a control fifo, do not start until the user triggers
165           * capture
166           */
167          config.enabled = false;
168       }
169 
170       if (interval_s) {
171          interval_s += 9;
172          const int event_interval = atoi(interval_s);
173          if (event_interval < 1) {
174             fprintf(stderr, "INTEL_MEASURE event_interval must be positive: "
175                     "%d\n", event_interval);
176             abort();
177          }
178          config.event_interval = event_interval;
179       }
180 
181       if (batch_size_s) {
182          batch_size_s += 11;
183          const int batch_size = atoi(batch_size_s);
184          if (batch_size < MINIMUM_BATCH_SIZE ) {
185             fprintf(stderr, "INTEL_MEASURE minimum batch_size is 1k: "
186                     "%d\n", batch_size);
187             abort();
188          }
189          if (batch_size > MINIMUM_BATCH_SIZE * 4 * 1024) {
190             fprintf(stderr, "INTEL_MEASURE batch_size limited to 4M: "
191                     "%d\n", batch_size);
192             abort();
193          }
194 
195          config.batch_size = batch_size;
196       }
197 
198       if (buffer_size_s) {
199          buffer_size_s += 12;
200          const int buffer_size = atoi(buffer_size_s);
201          if (buffer_size < MINIMUM_BUFFER_SIZE) {
202             fprintf(stderr, "INTEL_MEASURE minimum buffer_size is 1k: "
203                     "%d\n", DEFAULT_BUFFER_SIZE);
204          }
205          if (buffer_size > MINIMUM_BUFFER_SIZE * 1024) {
206             fprintf(stderr, "INTEL_MEASURE buffer_size limited to 1M: "
207                     "%d\n", buffer_size);
208          }
209 
210          config.buffer_size = buffer_size;
211       }
212 
213       if (cpu_s) {
214          config.cpu_measure = true;
215       }
216 
217       if (!config.cpu_measure)
218          fputs("draw_start,draw_end,frame,batch,batch_size,renderpass,"
219                "event_index,event_count,type,count,vs,tcs,tes,"
220                "gs,fs,cs,ms,ts,idle_us,time_us\n",
221                config.file);
222       else
223          fputs("draw_start,frame,batch,batch_size,event_index,event_count,"
224                "type,count\n",
225                config.file);
226    }
227 
228    device->config = NULL;
229    device->frame = 0;
230    device->render_pass_count = 0;
231    device->release_batch = NULL;
232    pthread_mutex_init(&device->mutex, NULL);
233    list_inithead(&device->queued_snapshots);
234 
235    if (env)
236       device->config = &config;
237 }
238 
239 const char *
intel_measure_snapshot_string(enum intel_measure_snapshot_type type)240 intel_measure_snapshot_string(enum intel_measure_snapshot_type type)
241 {
242    const char *names[] = {
243       [INTEL_SNAPSHOT_UNDEFINED]           = "undefined",
244       [INTEL_SNAPSHOT_BLIT]                = "blit",
245       [INTEL_SNAPSHOT_CCS_AMBIGUATE]       = "ccs ambiguate",
246       [INTEL_SNAPSHOT_CCS_COLOR_CLEAR]     = "ccs color clear",
247       [INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE] = "ccs partial resolve",
248       [INTEL_SNAPSHOT_CCS_RESOLVE]         = "ccs resolve",
249       [INTEL_SNAPSHOT_COMPUTE]             = "compute",
250       [INTEL_SNAPSHOT_COPY]                = "copy",
251       [INTEL_SNAPSHOT_DRAW]                = "draw",
252       [INTEL_SNAPSHOT_HIZ_AMBIGUATE]       = "hiz ambiguate",
253       [INTEL_SNAPSHOT_HIZ_CLEAR]           = "hiz clear",
254       [INTEL_SNAPSHOT_HIZ_RESOLVE]         = "hiz resolve",
255       [INTEL_SNAPSHOT_MCS_AMBIGUATE]       = "mcs ambiguate",
256       [INTEL_SNAPSHOT_MCS_COLOR_CLEAR]     = "mcs color clear",
257       [INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve",
258       [INTEL_SNAPSHOT_SLOW_COLOR_CLEAR]    = "slow color clear",
259       [INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR]    = "slow depth clear",
260       [INTEL_SNAPSHOT_SECONDARY_BATCH]     = "secondary command buffer",
261       [INTEL_SNAPSHOT_END]                 = "end",
262    };
263    assert(type < ARRAY_SIZE(names));
264    assert(names[type] != NULL);
265    assert(type != INTEL_SNAPSHOT_UNDEFINED);
266    return names[type];
267 }
268 
269 /**
270  * Indicate to the caller whether a new snapshot should be started.
271  *
272  * Callers provide rendering state to this method to determine whether the
273  * current start event should be skipped. Depending on the configuration
274  * flags, a new snapshot may start:
275  *  - at every event
276  *  - when the program changes
277  *  - after a batch is submitted
278  *  - at frame boundaries
279  *
280  * Returns true if a snapshot should be started.
281  */
282 bool
intel_measure_state_changed(const struct intel_measure_batch * batch,uint32_t vs,uint32_t tcs,uint32_t tes,uint32_t gs,uint32_t fs,uint32_t cs,uint32_t ms,uint32_t ts)283 intel_measure_state_changed(const struct intel_measure_batch *batch,
284                             uint32_t vs, uint32_t tcs, uint32_t tes,
285                             uint32_t gs, uint32_t fs, uint32_t cs,
286                             uint32_t ms, uint32_t ts)
287 {
288    if (batch->index == 0) {
289       /* always record the first event */
290       return true;
291    }
292 
293    const struct intel_measure_snapshot *last_snap =
294       &batch->snapshots[batch->index - 1];
295 
296    if (config.flags & INTEL_MEASURE_DRAW)
297       return true;
298 
299    if (batch->index % 2 == 0) {
300       /* no snapshot is running, but we have a start event */
301       return true;
302    }
303 
304    if (config.flags & (INTEL_MEASURE_FRAME | INTEL_MEASURE_BATCH)) {
305       /* only start collection when index == 0, at the beginning of a batch */
306       return false;
307    }
308 
309    if (config.flags & INTEL_MEASURE_RENDERPASS) {
310       bool new_renderpass = !cs && last_snap->renderpass != batch->renderpass;
311       bool new_compute_block = cs && last_snap->type != INTEL_SNAPSHOT_COMPUTE;
312       return new_renderpass || new_compute_block;
313    }
314 
315    /* remaining comparisons check the state of the render pipeline for
316     * INTEL_MEASURE_PROGRAM
317     */
318    assert(config.flags & INTEL_MEASURE_SHADER);
319 
320    if (!vs && !tcs && !tes && !gs && !fs && !cs && !ms && !ts) {
321       /* blorp always changes program */
322       return true;
323    }
324 
325    return (last_snap->vs  != vs ||
326            last_snap->tcs != tcs ||
327            last_snap->tes != tes ||
328            last_snap->gs  != gs ||
329            last_snap->fs  != fs ||
330            last_snap->cs  != cs ||
331            last_snap->ms  != ms ||
332            last_snap->ts  != ts);
333 }
334 
335 /**
336  * Notify intel_measure that a frame is about to begin.
337  *
338  * Configuration values and the control fifo may commence measurement at frame
339  * boundaries.
340  */
341 void
intel_measure_frame_transition(unsigned frame)342 intel_measure_frame_transition(unsigned frame)
343 {
344    if (frame == config.start_frame)
345       config.enabled = true;
346    else if (frame == config.end_frame)
347       config.enabled = false;
348 
349    /* user commands to the control fifo will override any start/count
350     * environment settings
351     */
352    if (config.control_fh != -1) {
353       while (true) {
354          const unsigned BUF_SIZE = 128;
355          char buf[BUF_SIZE];
356          ssize_t bytes = read(config.control_fh, buf, BUF_SIZE - 1);
357          if (bytes == 0)
358             break;
359          if (bytes == -1) {
360             fprintf(stderr, "INTEL_MEASURE failed to read control fifo: %s\n",
361                     strerror(errno));
362             abort();
363          }
364 
365          buf[bytes] = '\0';
366          char *nptr = buf, *endptr = buf;
367          while (*nptr != '\0' && *endptr != '\0') {
368             long fcount = strtol(nptr, &endptr, 10);
369             if (nptr == endptr) {
370                config.enabled = false;
371                fprintf(stderr, "INTEL_MEASURE invalid frame count on "
372                        "control fifo.\n");
373                lseek(config.control_fh, 0, SEEK_END);
374                break;
375             } else if (fcount == 0) {
376                config.enabled = false;
377             } else {
378                config.enabled = true;
379                config.end_frame = frame + fcount;
380             }
381 
382             nptr = endptr + 1;
383          }
384       }
385    }
386 }
387 
388 #define TIMESTAMP_BITS 36
389 static uint64_t
raw_timestamp_delta(uint64_t time0,uint64_t time1)390 raw_timestamp_delta(uint64_t time0, uint64_t time1)
391 {
392    if (time0 > time1) {
393       return (1ULL << TIMESTAMP_BITS) + time1 - time0;
394    } else {
395       return time1 - time0;
396    }
397 }
398 
399 /**
400  * Verify that rendering has completed for the batch
401  *
402  * Rendering is complete when the last timestamp has been written.
403 */
404 bool
intel_measure_ready(struct intel_measure_batch * batch)405 intel_measure_ready(struct intel_measure_batch *batch)
406 {
407    assert(batch->timestamps);
408    assert(batch->index > 1);
409    return (batch->timestamps[batch->index - 1] != 0);
410 }
411 
412 /**
413  * Submit completed snapshots for buffering.
414  *
415  * Snapshot data becomes available when asynchronous rendering completes.
416  * Depending on configuration, snapshot data may need to be collated before
417  * writing to the output file.
418  */
419 static void
intel_measure_push_result(struct intel_measure_device * device,struct intel_measure_batch * batch)420 intel_measure_push_result(struct intel_measure_device *device,
421                           struct intel_measure_batch *batch)
422 {
423    struct intel_measure_ringbuffer *rb = device->ringbuffer;
424 
425    uint64_t *timestamps = batch->timestamps;
426    assert(timestamps != NULL);
427    assert(batch->index == 0 || timestamps[0] != 0);
428 
429    for (int i = 0; i < batch->index; i += 2) {
430       const struct intel_measure_snapshot *begin = &batch->snapshots[i];
431       const struct intel_measure_snapshot *end = &batch->snapshots[i+1];
432 
433       assert (end->type == INTEL_SNAPSHOT_END);
434 
435       if (begin->type == INTEL_SNAPSHOT_SECONDARY_BATCH) {
436          assert(begin->secondary != NULL);
437          begin->secondary->batch_count = batch->batch_count;
438          begin->secondary->batch_size = 0;
439          begin->secondary->primary_renderpass = batch->renderpass;
440          intel_measure_push_result(device, begin->secondary);
441          continue;
442       }
443 
444       const uint64_t prev_end_ts = rb->results[rb->head].end_ts;
445 
446       /* advance ring buffer */
447       if (++rb->head == config.buffer_size)
448          rb->head = 0;
449       if (rb->head == rb->tail) {
450          static bool warned = false;
451          if (unlikely(!warned)) {
452             fprintf(config.file,
453                     "WARNING: Buffered data exceeds INTEL_MEASURE limit: %d. "
454                     "Data has been dropped. "
455                     "Increase setting with INTEL_MEASURE=buffer_size={count}\n",
456                     config.buffer_size);
457             warned = true;
458          }
459          break;
460       }
461 
462       struct intel_measure_buffered_result *buffered_result =
463          &rb->results[rb->head];
464 
465       memset(buffered_result, 0, sizeof(*buffered_result));
466       memcpy(&buffered_result->snapshot, begin,
467              sizeof(struct intel_measure_snapshot));
468       buffered_result->start_ts = timestamps[i];
469       buffered_result->end_ts = timestamps[i+1];
470       buffered_result->idle_duration =
471          raw_timestamp_delta(prev_end_ts, buffered_result->start_ts);
472       buffered_result->frame = batch->frame;
473       buffered_result->batch_count = batch->batch_count;
474       buffered_result->batch_size = batch->batch_size;
475       buffered_result->primary_renderpass = batch->primary_renderpass;
476       buffered_result->event_index = i / 2;
477       buffered_result->snapshot.event_count = end->event_count;
478    }
479 }
480 
481 static unsigned
ringbuffer_size(const struct intel_measure_ringbuffer * rb)482 ringbuffer_size(const struct intel_measure_ringbuffer *rb)
483 {
484    unsigned head = rb->head;
485    if (head < rb->tail)
486       head += config.buffer_size;
487    return head - rb->tail;
488 }
489 
490 static const struct intel_measure_buffered_result *
ringbuffer_pop(struct intel_measure_ringbuffer * rb)491 ringbuffer_pop(struct intel_measure_ringbuffer *rb)
492 {
493    if (rb->tail == rb->head) {
494       /* encountered ringbuffer overflow while processing events */
495       return NULL;
496    }
497 
498    if (++rb->tail == config.buffer_size)
499       rb->tail = 0;
500    return &rb->results[rb->tail];
501 }
502 
503 static const struct intel_measure_buffered_result *
ringbuffer_peek(const struct intel_measure_ringbuffer * rb,unsigned index)504 ringbuffer_peek(const struct intel_measure_ringbuffer *rb, unsigned index)
505 {
506    int result_offset = rb->tail + index + 1;
507    if (result_offset >= config.buffer_size)
508       result_offset -= config.buffer_size;
509    return &rb->results[result_offset];
510 }
511 
512 
513 /**
514  * Determine the number of buffered events that must be combined for the next
515  * line of csv output. Returns 0 if more events are needed.
516  */
517 static unsigned
buffered_event_count(struct intel_measure_device * device)518 buffered_event_count(struct intel_measure_device *device)
519 {
520    const struct intel_measure_ringbuffer *rb = device->ringbuffer;
521    const unsigned buffered_event_count = ringbuffer_size(rb);
522    if (buffered_event_count == 0) {
523       /* no events to collect */
524       return 0;
525    }
526 
527    /* count the number of buffered events required to meet the configuration */
528    if (config.flags & (INTEL_MEASURE_DRAW |
529                        INTEL_MEASURE_RENDERPASS |
530                        INTEL_MEASURE_SHADER)) {
531       /* For these flags, every buffered event represents a line in the
532        * output.  None of these events span batches.  If the event interval
533        * crosses a batch boundary, then the next interval starts with the new
534        * batch.
535        */
536       return 1;
537    }
538 
539    const unsigned start_frame = ringbuffer_peek(rb, 0)->frame;
540    if (config.flags & INTEL_MEASURE_BATCH) {
541       /* each buffered event is a command buffer.  The number of events to
542        * process is the same as the interval, unless the interval crosses a
543        * frame boundary
544        */
545       if (buffered_event_count < config.event_interval) {
546          /* not enough events */
547          return 0;
548       }
549 
550       /* Imperfect frame tracking requires us to allow for *older* frames */
551       if (ringbuffer_peek(rb, config.event_interval - 1)->frame <= start_frame) {
552          /* No frame transition.  The next {interval} events should be combined. */
553          return config.event_interval;
554       }
555 
556       /* Else a frame transition occurs within the interval.  Find the
557        * transition, so the following line of output begins with the batch
558        * that starts the new frame.
559        */
560       for (int event_index = 1;
561            event_index <= config.event_interval;
562            ++event_index) {
563          if (ringbuffer_peek(rb, event_index)->frame > start_frame)
564             return event_index;
565       }
566 
567       assert(false);
568    }
569 
570    /* Else we need to search buffered events to find the matching frame
571     * transition for our interval.
572     */
573    assert(config.flags & INTEL_MEASURE_FRAME);
574    for (int event_index = 1;
575         event_index < buffered_event_count;
576         ++event_index) {
577       const int latest_frame = ringbuffer_peek(rb, event_index)->frame;
578       if (latest_frame - start_frame >= config.event_interval)
579          return event_index;
580    }
581 
582    return 0;
583 }
584 
585 /**
586  * Take result_count events from the ringbuffer and output them as a single
587  * line.
588  */
589 static void
print_combined_results(struct intel_measure_device * measure_device,int result_count,const struct intel_device_info * info)590 print_combined_results(struct intel_measure_device *measure_device,
591                        int result_count,
592                        const struct intel_device_info *info)
593 {
594    if (result_count == 0)
595       return;
596 
597    struct intel_measure_ringbuffer *result_rb = measure_device->ringbuffer;
598    assert(ringbuffer_size(result_rb) >= result_count);
599    const struct intel_measure_buffered_result* start_result =
600       ringbuffer_pop(result_rb);
601    const struct intel_measure_buffered_result* current_result = start_result;
602 
603    if (start_result == NULL)
604       return;
605    --result_count;
606 
607    uint64_t duration_ts = raw_timestamp_delta(start_result->start_ts,
608                                               current_result->end_ts);
609    unsigned event_count = start_result->snapshot.event_count;
610    while (result_count-- > 0) {
611       assert(ringbuffer_size(result_rb) > 0);
612       current_result = ringbuffer_pop(result_rb);
613       if (current_result == NULL)
614          return;
615       duration_ts += raw_timestamp_delta(current_result->start_ts,
616                                          current_result->end_ts);
617       event_count += current_result->snapshot.event_count;
618    }
619 
620    uint64_t duration_idle_ns =
621       intel_device_info_timebase_scale(info, start_result->idle_duration);
622    uint64_t duration_time_ns =
623       intel_device_info_timebase_scale(info, duration_ts);
624    const struct intel_measure_snapshot *begin = &start_result->snapshot;
625    uint32_t renderpass = (start_result->primary_renderpass)
626       ? start_result->primary_renderpass : begin->renderpass;
627    fprintf(config.file, "%"PRIu64",%"PRIu64",%u,%u,%"PRIu64",%u,%u,%u,%s,%u,"
628            "0x%x,0x%x,0x%x,0x%x,0x%x,0x%x,0x%x,0x%x,%.3lf,%.3lf\n",
629            start_result->start_ts, current_result->end_ts,
630            start_result->frame,
631            start_result->batch_count, start_result->batch_size,
632            renderpass, start_result->event_index, event_count,
633            begin->event_name, begin->count,
634            begin->vs, begin->tcs, begin->tes, begin->gs,
635            begin->fs, begin->cs, begin->ms, begin->ts,
636            (double)duration_idle_ns / 1000.0,
637            (double)duration_time_ns / 1000.0);
638 }
639 
640 /**
641  * Write data for a cpu event.
642  */
643 void
intel_measure_print_cpu_result(unsigned int frame,unsigned int batch_count,uint64_t batch_size,unsigned int event_index,unsigned int event_count,unsigned int count,const char * event_name)644 intel_measure_print_cpu_result(unsigned int frame,
645                                unsigned int batch_count,
646                                uint64_t batch_size,
647                                unsigned int event_index,
648                                unsigned int event_count,
649                                unsigned int count,
650                                const char* event_name)
651 {
652    assert(config.cpu_measure);
653    uint64_t start_ns = os_time_get_nano();
654 
655    fprintf(config.file, "%"PRIu64",%u,%3u,%"PRIu64",%3u,%u,%s,%u\n",
656            start_ns, frame, batch_count, batch_size,
657            event_index, event_count, event_name, count);
658 }
659 
660 /**
661  * Empty the ringbuffer of events that can be printed.
662  */
663 static void
intel_measure_print(struct intel_measure_device * device,const struct intel_device_info * info)664 intel_measure_print(struct intel_measure_device *device,
665                     const struct intel_device_info *info)
666 {
667    while (true) {
668       const int events_to_combine = buffered_event_count(device);
669       if (events_to_combine == 0)
670          break;
671       print_combined_results(device, events_to_combine, info);
672    }
673 }
674 
675 /**
676  * Collect snapshots from completed command buffers and submit them to
677  * intel_measure for printing.
678  */
679 void
intel_measure_gather(struct intel_measure_device * measure_device,const struct intel_device_info * info)680 intel_measure_gather(struct intel_measure_device *measure_device,
681                      const struct intel_device_info *info)
682 {
683    pthread_mutex_lock(&measure_device->mutex);
684 
685    /* Iterate snapshots and collect if ready.  Each snapshot queue will be
686     * in-order, but we must determine which queue has the oldest batch.
687     */
688    /* iterate snapshots and collect if ready */
689    while (!list_is_empty(&measure_device->queued_snapshots)) {
690       struct intel_measure_batch *batch =
691          list_first_entry(&measure_device->queued_snapshots,
692                           struct intel_measure_batch, link);
693 
694       if (!intel_measure_ready(batch)) {
695          /* command buffer has begun execution on the gpu, but has not
696           * completed.
697           */
698          break;
699       }
700 
701       list_del(&batch->link);
702       assert(batch->index % 2 == 0);
703 
704       intel_measure_push_result(measure_device, batch);
705 
706       batch->index = 0;
707       batch->frame = 0;
708       if (measure_device->release_batch)
709          measure_device->release_batch(batch);
710    }
711 
712    intel_measure_print(measure_device, info);
713    pthread_mutex_unlock(&measure_device->mutex);
714 }
715