xref: /aosp_15_r20/external/skia/tools/skpbench/skpbench.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "bench/BigPath.h"
9 #include "include/core/SkCanvas.h"
10 #include "include/core/SkGraphics.h"
11 #include "include/core/SkPicture.h"
12 #include "include/core/SkPictureRecorder.h"
13 #include "include/core/SkStream.h"
14 #include "include/core/SkSurface.h"
15 #include "include/core/SkSurfaceProps.h"
16 #include "include/docs/SkMultiPictureDocument.h"
17 #include "include/effects/SkPerlinNoiseShader.h"
18 #include "include/gpu/ganesh/GrDirectContext.h"
19 #include "include/gpu/ganesh/SkSurfaceGanesh.h"
20 #include "include/private/chromium/GrDeferredDisplayList.h"
21 #include "src/core/SkOSFile.h"
22 #include "src/core/SkTaskGroup.h"
23 #include "src/gpu/ganesh/GrCaps.h"
24 #include "src/gpu/ganesh/GrDirectContextPriv.h"
25 #include "src/gpu/ganesh/SkGr.h"
26 #include "src/gpu/ganesh/image/GrImageUtils.h"
27 #include "src/utils/SkOSPath.h"
28 #include "tools/DDLPromiseImageHelper.h"
29 #include "tools/DDLTileHelper.h"
30 #include "tools/EncodeUtils.h"
31 #include "tools/SkSharingProc.h"
32 #include "tools/flags/CommandLineFlags.h"
33 #include "tools/flags/CommonFlags.h"
34 #include "tools/flags/CommonFlagsConfig.h"
35 #include "tools/flags/CommonFlagsGanesh.h"
36 #include "tools/fonts/FontToolUtils.h"
37 #include "tools/gpu/FlushFinishTracker.h"
38 #include "tools/gpu/GpuTimer.h"
39 #include "tools/gpu/GrContextFactory.h"
40 
41 #if defined(SK_ENABLE_SVG)
42 #include "modules/skshaper/utils/FactoryHelpers.h"
43 #include "modules/svg/include/SkSVGDOM.h"
44 #include "src/xml/SkDOM.h"
45 #endif
46 
47 #include <stdlib.h>
48 #include <algorithm>
49 #include <array>
50 #include <chrono>
51 #include <cinttypes>
52 #include <cmath>
53 #include <vector>
54 
55 /**
56  * This is a minimalist program whose sole purpose is to open a .skp or .svg file, benchmark it on a
57  * single config, and exit. It is intended to be used through skpbench.py rather than invoked
58  * directly. Limiting the entire process to a single config/skp pair helps to keep the results
59  * repeatable.
60  *
61  * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched
62  * render target and syncs the GPU after each draw.
63  *
64  * Well, maybe a little fanciness, MSKP's can be loaded and played. The animation is played as many
65  * times as necessary to reach the target sample duration and FPS is reported.
66  *
67  * Currently, only GPU configs are supported.
68  */
69 
70 static DEFINE_bool(ddl, false, "record the skp into DDLs before rendering");
71 static DEFINE_int(ddlNumRecordingThreads, 0, "number of DDL recording threads (0=num_cores)");
72 static DEFINE_int(ddlTilingWidthHeight, 0, "number of tiles along one edge when in DDL mode");
73 
74 static DEFINE_bool(comparableDDL, false, "render in a way that is comparable to 'comparableSKP'");
75 static DEFINE_bool(comparableSKP, false, "report in a way that is comparable to 'comparableDDL'");
76 
77 static DEFINE_int(duration, 5000, "number of milliseconds to run the benchmark");
78 static DEFINE_int(sampleMs, 50, "minimum duration of a sample");
79 static DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
80 static DEFINE_bool(fps, false, "use fps instead of ms");
81 static DEFINE_string(src, "",
82                      "path to a single .skp or .svg file, or 'warmup' for a builtin warmup run");
83 static DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
84 static DEFINE_int(verbosity, 4, "level of verbosity (0=none to 5=debug)");
85 static DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
86 static DEFINE_double(scale, 1, "Scale the size of the canvas and the zoom level by this factor.");
87 static DEFINE_bool(dumpSamples, false, "print the individual samples to stdout");
88 
89 static const char header[] =
90 "   accum    median       max       min   stddev  samples  sample_ms  clock  metric  config    bench";
91 
92 static const char resultFormat[] =
93 "%8.4g  %8.4g  %8.4g  %8.4g  %6.3g%%  %7zu  %9i  %-5s  %-6s  %-9s %s";
94 
95 static constexpr int kNumFlushesToPrimeCache = 3;
96 
97 struct Sample {
98     using duration = std::chrono::nanoseconds;
99 
SampleSample100     Sample() : fFrames(0), fDuration(0) {}
secondsSample101     double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
msSample102     double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); }
valueSample103     double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
metricSample104     static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
105 
106     int        fFrames;
107     duration   fDuration;
108 };
109 
110 class GpuSync {
111 public:
GpuSync()112     GpuSync() {}
~GpuSync()113     ~GpuSync() {}
114 
115     void waitIfNeeded();
116 
117     sk_gpu_test::FlushFinishTracker* newFlushTracker(GrDirectContext* context);
118 
119 private:
120     enum { kMaxFrameLag = 3 };
121     sk_sp<sk_gpu_test::FlushFinishTracker> fFinishTrackers[kMaxFrameLag - 1];
122     int fCurrentFlushIdx = 0;
123 };
124 
125 enum class ExitErr {
126     kOk           = 0,
127     kUsage        = 64,
128     kData         = 65,
129     kUnavailable  = 69,
130     kIO           = 74,
131     kSoftware     = 70
132 };
133 
134 static void flush_with_sync(GrDirectContext*, GpuSync&);
135 static void draw_skp_and_flush_with_sync(GrDirectContext*, SkSurface*, const SkPicture*, GpuSync&);
136 static sk_sp<SkPicture> create_warmup_skp();
137 static sk_sp<SkPicture> create_skp_from_svg(SkStream*, const char* filename);
138 static bool mkdir_p(const SkString& name);
139 static SkString         join(const CommandLineFlags::StringArray&);
140 static void exitf(ExitErr, const char* format, ...);
141 
142 // An interface used by both static SKPs and animated SKPs
143 class SkpProducer {
144 public:
~SkpProducer()145     virtual ~SkpProducer() {}
146     // Draw an SkPicture to the provided surface, flush the surface, and sync the GPU.
147     // You may use the static draw_skp_and_flush_with_sync declared above.
148     // returned int tells how many draw/flush/sync were done.
149     virtual int drawAndFlushAndSync(GrDirectContext*, SkSurface* surface, GpuSync& gpuSync) = 0;
150 };
151 
152 class StaticSkp : public SkpProducer {
153 public:
StaticSkp(sk_sp<SkPicture> skp)154     StaticSkp(sk_sp<SkPicture> skp) : fSkp(skp) {}
155 
drawAndFlushAndSync(GrDirectContext * context,SkSurface * surface,GpuSync & gpuSync)156     int drawAndFlushAndSync(GrDirectContext* context,
157                             SkSurface* surface,
158                             GpuSync& gpuSync) override {
159         draw_skp_and_flush_with_sync(context, surface, fSkp.get(), gpuSync);
160         return 1;
161     }
162 
163 private:
164     sk_sp<SkPicture> fSkp;
165 };
166 
167 // A class for playing/benchmarking a multi frame SKP file.
168 // the recorded frames are looped over repeatedly.
169 // This type of benchmark may have a much higher std dev in frame times.
170 class MultiFrameSkp : public SkpProducer {
171 public:
MultiFrameSkp(const std::vector<SkDocumentPage> & frames)172     MultiFrameSkp(const std::vector<SkDocumentPage>& frames) : fFrames(frames){}
173 
MakeFromFile(const SkString & path)174     static std::unique_ptr<MultiFrameSkp> MakeFromFile(const SkString& path) {
175         // Load the multi frame skp at the given filename.
176         std::unique_ptr<SkStreamAsset> stream = SkStream::MakeFromFile(path.c_str());
177         if (!stream) { return nullptr; }
178 
179         // Attempt to deserialize with an image sharing serial proc.
180         auto deserialContext = std::make_unique<SkSharingDeserialContext>();
181         SkDeserialProcs procs;
182         procs.fImageProc = SkSharingDeserialContext::deserializeImage;
183         procs.fImageCtx = deserialContext.get();
184 
185         // The outer format of multi-frame skps is the multi-picture document, which is a
186         // skp file containing subpictures separated by annotations.
187         int page_count = SkMultiPictureDocument::ReadPageCount(stream.get());
188         if (!page_count) {
189             return nullptr;
190         }
191         std::vector<SkDocumentPage> frames(page_count); // can't call reserve, why?
192         if (!SkMultiPictureDocument::Read(stream.get(), frames.data(), page_count, &procs)) {
193             return nullptr;
194         }
195 
196         return std::make_unique<MultiFrameSkp>(frames);
197     }
198 
199     // Draw the whole animation once.
drawAndFlushAndSync(GrDirectContext * context,SkSurface * surface,GpuSync & gpuSync)200     int drawAndFlushAndSync(GrDirectContext* context,
201                             SkSurface* surface,
202                             GpuSync& gpuSync) override {
203         for (int i=0; i<this->count(); i++){
204             draw_skp_and_flush_with_sync(context, surface, this->frame(i).get(), gpuSync);
205         }
206         return this->count();
207     }
208     // Return the requested frame.
frame(int n) const209     sk_sp<SkPicture> frame(int n) const { return fFrames[n].fPicture; }
210     // Return the number of frames in the recording.
count() const211     int count() const { return fFrames.size(); }
212 private:
213     std::vector<SkDocumentPage> fFrames;
214 };
215 
ddl_sample(GrDirectContext * dContext,DDLTileHelper * tiles,GpuSync & gpuSync,Sample * sample,SkTaskGroup * recordingTaskGroup,SkTaskGroup * gpuTaskGroup,std::chrono::high_resolution_clock::time_point * startStopTime,SkPicture * picture)216 static void ddl_sample(GrDirectContext* dContext, DDLTileHelper* tiles, GpuSync& gpuSync,
217                        Sample* sample, SkTaskGroup* recordingTaskGroup, SkTaskGroup* gpuTaskGroup,
218                        std::chrono::high_resolution_clock::time_point* startStopTime,
219                        SkPicture* picture) {
220     using clock = std::chrono::high_resolution_clock;
221 
222     clock::time_point start = *startStopTime;
223 
224     if (FLAGS_comparableDDL) {
225         SkASSERT(!FLAGS_comparableSKP);
226 
227         // In this mode we simply alternate between creating a DDL and drawing it - all on one
228         // thread. The interleaving is so that we don't starve the GPU.
229         // One unfortunate side effect of this is that we can't delete the DDLs until after
230         // the GPU work is flushed.
231         tiles->interleaveDDLCreationAndDraw(dContext, picture);
232     } else if (FLAGS_comparableSKP) {
233         // In this mode simply draw the re-inflated per-tile SKPs directly to the GPU w/o going
234         // through a DDL.
235         tiles->drawAllTilesDirectly(dContext, picture);
236     } else {
237         tiles->kickOffThreadedWork(recordingTaskGroup, gpuTaskGroup, dContext, picture);
238         recordingTaskGroup->wait();
239     }
240 
241     if (gpuTaskGroup) {
242         gpuTaskGroup->add([&]{
243             flush_with_sync(dContext, gpuSync);
244         });
245         gpuTaskGroup->wait();
246     } else {
247         flush_with_sync(dContext, gpuSync);
248     }
249 
250     *startStopTime = clock::now();
251 
252     if (sample) {
253         sample->fDuration += *startStopTime - start;
254         sample->fFrames++;
255     }
256 }
257 
run_ddl_benchmark(sk_gpu_test::TestContext * testContext,GrDirectContext * dContext,sk_sp<SkSurface> dstSurface,SkPicture * inputPicture,std::vector<Sample> * samples)258 static void run_ddl_benchmark(sk_gpu_test::TestContext* testContext,
259                               GrDirectContext* dContext,
260                               sk_sp<SkSurface> dstSurface,
261                               SkPicture* inputPicture,
262                               std::vector<Sample>* samples) {
263     using clock = std::chrono::high_resolution_clock;
264     const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
265     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
266 
267     GrSurfaceCharacterization dstCharacterization;
268     SkAssertResult(dstSurface->characterize(&dstCharacterization));
269 
270     SkIRect viewport = dstSurface->imageInfo().bounds();
271 
272     auto supportedYUVADataTypes = skgpu::ganesh::SupportedTextureFormats(*dContext);
273     DDLPromiseImageHelper promiseImageHelper(supportedYUVADataTypes);
274     sk_sp<SkPicture> newSKP = promiseImageHelper.recreateSKP(dContext, inputPicture);
275     if (!newSKP) {
276         exitf(ExitErr::kUnavailable, "DDL: conversion of skp failed");
277     }
278 
279     promiseImageHelper.uploadAllToGPU(nullptr, dContext);
280 
281     DDLTileHelper tiles(dContext, dstCharacterization, viewport,
282                         FLAGS_ddlTilingWidthHeight, FLAGS_ddlTilingWidthHeight,
283                         /* addRandomPaddingToDst */ false);
284 
285     tiles.createBackendTextures(nullptr, dContext);
286 
287     // In comparable modes, there is no GPU thread. The following pointers are all null.
288     // Otherwise, we transfer testContext onto the GPU thread until after the bench.
289     std::unique_ptr<SkExecutor> gpuThread;
290     std::unique_ptr<SkTaskGroup> gpuTaskGroup;
291     std::unique_ptr<SkExecutor> recordingThreadPool;
292     std::unique_ptr<SkTaskGroup> recordingTaskGroup;
293     if (!FLAGS_comparableDDL && !FLAGS_comparableSKP) {
294         gpuThread = SkExecutor::MakeFIFOThreadPool(1, false);
295         gpuTaskGroup = std::make_unique<SkTaskGroup>(*gpuThread);
296         recordingThreadPool = SkExecutor::MakeFIFOThreadPool(FLAGS_ddlNumRecordingThreads, false);
297         recordingTaskGroup = std::make_unique<SkTaskGroup>(*recordingThreadPool);
298         testContext->makeNotCurrent();
299         gpuTaskGroup->add([=]{ testContext->makeCurrent(); });
300     }
301 
302     clock::time_point startStopTime = clock::now();
303 
304     GpuSync gpuSync;
305     ddl_sample(dContext, &tiles, gpuSync, nullptr, recordingTaskGroup.get(),
306                gpuTaskGroup.get(), &startStopTime, newSKP.get());
307 
308     clock::duration cumulativeDuration = std::chrono::milliseconds(0);
309 
310     do {
311         samples->emplace_back();
312         Sample& sample = samples->back();
313 
314         do {
315             tiles.resetAllTiles();
316             ddl_sample(dContext, &tiles, gpuSync, &sample, recordingTaskGroup.get(),
317                        gpuTaskGroup.get(), &startStopTime, newSKP.get());
318         } while (sample.fDuration < sampleDuration);
319 
320         cumulativeDuration += sample.fDuration;
321     } while (cumulativeDuration < benchDuration || 0 == samples->size() % 2);
322 
323     // Move the context back to this thread now that we're done benching.
324     if (gpuTaskGroup) {
325         gpuTaskGroup->add([=]{
326             testContext->makeNotCurrent();
327         });
328         gpuTaskGroup->wait();
329         testContext->makeCurrent();
330     }
331 
332     if (!FLAGS_png.isEmpty()) {
333         // The user wants to see the final result
334         skgpu::ganesh::DrawDDL(dstSurface, tiles.composeDDL());
335         dContext->flushAndSubmit(dstSurface.get(), GrSyncCpu::kNo);
336     }
337 
338     tiles.resetAllTiles();
339 
340     // Make sure the gpu has finished all its work before we exit this function and delete the
341     // fence.
342     dContext->flush();
343     dContext->submit(GrSyncCpu::kYes);
344 
345     promiseImageHelper.deleteAllFromGPU(nullptr, dContext);
346 
347     tiles.deleteBackendTextures(nullptr, dContext);
348 }
349 
run_benchmark(GrDirectContext * context,sk_sp<SkSurface> surface,SkpProducer * skpp,std::vector<Sample> * samples)350 static void run_benchmark(GrDirectContext* context,
351                           sk_sp<SkSurface> surface,
352                           SkpProducer* skpp,
353                           std::vector<Sample>* samples) {
354     using clock = std::chrono::high_resolution_clock;
355     const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
356     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
357 
358     GpuSync gpuSync;
359     int i = 0;
360     do {
361         i += skpp->drawAndFlushAndSync(context, surface.get(), gpuSync);
362     } while(i < kNumFlushesToPrimeCache);
363 
364     clock::time_point now = clock::now();
365     const clock::time_point endTime = now + benchDuration;
366 
367     do {
368         clock::time_point sampleStart = now;
369         samples->emplace_back();
370         Sample& sample = samples->back();
371 
372         do {
373             sample.fFrames += skpp->drawAndFlushAndSync(context, surface.get(), gpuSync);
374             now = clock::now();
375             sample.fDuration = now - sampleStart;
376         } while (sample.fDuration < sampleDuration);
377     } while (now < endTime || 0 == samples->size() % 2);
378 
379     // Make sure the gpu has finished all its work before we exit this function and delete the
380     // fence.
381     context->flush(surface.get());
382     context->submit(GrSyncCpu::kYes);
383 }
384 
run_gpu_time_benchmark(sk_gpu_test::GpuTimer * gpuTimer,GrDirectContext * context,sk_sp<SkSurface> surface,const SkPicture * skp,std::vector<Sample> * samples)385 static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer,
386                                    GrDirectContext* context,
387                                    sk_sp<SkSurface> surface,
388                                    const SkPicture* skp,
389                                    std::vector<Sample>* samples) {
390     using sk_gpu_test::PlatformTimerQuery;
391     using clock = std::chrono::steady_clock;
392     const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
393     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
394 
395     if (!gpuTimer->disjointSupport()) {
396         fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
397                         "results may be unreliable\n");
398     }
399 
400     GpuSync gpuSync;
401     draw_skp_and_flush_with_sync(context, surface.get(), skp, gpuSync);
402 
403     PlatformTimerQuery previousTime = 0;
404     for (int i = 1; i < kNumFlushesToPrimeCache; ++i) {
405         gpuTimer->queueStart();
406         draw_skp_and_flush_with_sync(context, surface.get(), skp, gpuSync);
407         previousTime = gpuTimer->queueStop();
408     }
409 
410     clock::time_point now = clock::now();
411     const clock::time_point endTime = now + benchDuration;
412 
413     do {
414         const clock::time_point sampleEndTime = now + sampleDuration;
415         samples->emplace_back();
416         Sample& sample = samples->back();
417 
418         do {
419             gpuTimer->queueStart();
420             draw_skp_and_flush_with_sync(context, surface.get(), skp, gpuSync);
421             PlatformTimerQuery time = gpuTimer->queueStop();
422 
423             switch (gpuTimer->checkQueryStatus(previousTime)) {
424                 using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
425                 case QueryStatus::kInvalid:
426                     exitf(ExitErr::kUnavailable, "GPU timer failed");
427                     break;
428                 case QueryStatus::kPending:
429                     exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
430                     break;
431                 case QueryStatus::kDisjoint:
432                     if (FLAGS_verbosity >= 4) {
433                         fprintf(stderr, "discarding timer query due to disjoint operations.\n");
434                     }
435                     break;
436                 case QueryStatus::kAccurate:
437                     sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
438                     ++sample.fFrames;
439                     break;
440             }
441             gpuTimer->deleteQuery(previousTime);
442             previousTime = time;
443             now = clock::now();
444         } while (now < sampleEndTime || 0 == sample.fFrames);
445     } while (now < endTime || 0 == samples->size() % 2);
446 
447     gpuTimer->deleteQuery(previousTime);
448 
449     // Make sure the gpu has finished all its work before we exit this function and delete the
450     // fence.
451     context->flush(surface.get());
452     context->submit(GrSyncCpu::kYes);
453 }
454 
print_result(const std::vector<Sample> & samples,const char * config,const char * bench)455 void print_result(const std::vector<Sample>& samples, const char* config, const char* bench)  {
456     if (0 == (samples.size() % 2)) {
457         exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
458     }
459 
460     if (FLAGS_dumpSamples) {
461         printf("Samples: ");
462         for (const Sample& sample : samples) {
463             printf("%" PRId64 " ", static_cast<int64_t>(sample.fDuration.count()));
464         }
465         printf("%s\n", bench);
466     }
467 
468     Sample accum = Sample();
469     std::vector<double> values;
470     values.reserve(samples.size());
471     for (const Sample& sample : samples) {
472         accum.fFrames += sample.fFrames;
473         accum.fDuration += sample.fDuration;
474         values.push_back(sample.value());
475     }
476     std::sort(values.begin(), values.end());
477 
478     const double accumValue = accum.value();
479     double variance = 0;
480     for (double value : values) {
481         const double delta = value - accumValue;
482         variance += delta * delta;
483     }
484     variance /= values.size();
485     // Technically, this is the relative standard deviation.
486     const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
487 
488     printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
489            stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
490            config, bench);
491     printf("\n");
492     fflush(stdout);
493 }
494 
main(int argc,char ** argv)495 int main(int argc, char** argv) {
496     CommandLineFlags::SetUsage(
497             "Use skpbench.py instead. "
498             "You usually don't want to use this program directly.");
499     CommandLineFlags::Parse(argc, argv);
500 
501     if (!FLAGS_suppressHeader) {
502         printf("%s\n", header);
503     }
504     if (FLAGS_duration <= 0) {
505         exit(0); // This can be used to print the header and quit.
506     }
507 
508     // Parse the config.
509     const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning.
510     SkCommandLineConfigArray configs;
511     ParseConfigs(FLAGS_config, &configs);
512     if (configs.size() != 1 || !(config = configs[0]->asConfigGpu())) {
513         exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config",
514                                join(FLAGS_config).c_str());
515     }
516 
517     // Parse the skp.
518     if (FLAGS_src.size() != 1) {
519         exitf(ExitErr::kUsage,
520               "invalid input '%s': must specify a single .skp or .svg file, or 'warmup'",
521               join(FLAGS_src).c_str());
522     }
523 
524     SkGraphics::Init();
525 
526     sk_sp<SkPicture> skp;
527     std::unique_ptr<MultiFrameSkp> mskp; // populated if the file is multi frame.
528     SkString srcname;
529     if (0 == strcmp(FLAGS_src[0], "warmup")) {
530         skp = create_warmup_skp();
531         srcname = "warmup";
532     } else {
533         SkString srcfile(FLAGS_src[0]);
534         std::unique_ptr<SkStream> srcstream(SkStream::MakeFromFile(srcfile.c_str()));
535         if (!srcstream) {
536             exitf(ExitErr::kIO, "failed to open file %s", srcfile.c_str());
537         }
538         if (srcfile.endsWith(".svg")) {
539             skp = create_skp_from_svg(srcstream.get(), srcfile.c_str());
540         } else if (srcfile.endsWith(".mskp")) {
541             mskp = MultiFrameSkp::MakeFromFile(srcfile);
542             // populate skp with it's first frame, for width height determination.
543             skp = mskp->frame(0);
544         } else {
545             skp = SkPicture::MakeFromStream(srcstream.get());
546         }
547         if (!skp) {
548             exitf(ExitErr::kData, "failed to parse file %s", srcfile.c_str());
549         }
550         srcname = SkOSPath::Basename(srcfile.c_str());
551     }
552     int width = std::min(SkScalarCeilToInt(skp->cullRect().width()), 2048),
553         height = std::min(SkScalarCeilToInt(skp->cullRect().height()), 2048);
554     if (FLAGS_verbosity >= 3 &&
555         (width != skp->cullRect().width() || height != skp->cullRect().height())) {
556         fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n",
557                         srcname.c_str(), SkScalarCeilToInt(skp->cullRect().width()),
558                         SkScalarCeilToInt(skp->cullRect().height()), width, height);
559     }
560     if (FLAGS_scale != 1) {
561         width *= FLAGS_scale;
562         height *= FLAGS_scale;
563         if (FLAGS_verbosity >= 3) {
564             fprintf(stderr, "Scale factor of %.2f: scaling to %ix%i.\n",
565                     FLAGS_scale, width, height);
566         }
567     }
568 
569     if (config->getSurfType() != SkCommandLineConfigGpu::SurfType::kDefault) {
570         exitf(ExitErr::kUnavailable, "This tool only supports the default surface type. (%s)",
571               config->getTag().c_str());
572     }
573 
574     // Create a context.
575     GrContextOptions ctxOptions;
576     CommonFlags::SetCtxOptions(&ctxOptions);
577     sk_gpu_test::GrContextFactory factory(ctxOptions);
578     sk_gpu_test::ContextInfo ctxInfo =
579         factory.getContextInfo(config->getContextType(), config->getContextOverrides());
580     auto ctx = ctxInfo.directContext();
581     if (!ctx) {
582         exitf(ExitErr::kUnavailable, "failed to create context for config %s",
583                                      config->getTag().c_str());
584     }
585     if (ctx->maxRenderTargetSize() < std::max(width, height)) {
586         exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)",
587               width, height, ctx->maxRenderTargetSize());
588     }
589     GrBackendFormat format = ctx->defaultBackendFormat(config->getColorType(), GrRenderable::kYes);
590     if (!format.isValid()) {
591         exitf(ExitErr::kUnavailable, "failed to get GrBackendFormat from SkColorType: %d",
592                                      config->getColorType());
593     }
594     int supportedSampleCount = ctx->priv().caps()->getRenderTargetSampleCount(
595             config->getSamples(), format);
596     if (supportedSampleCount != config->getSamples()) {
597         exitf(ExitErr::kUnavailable, "sample count %i not supported by platform",
598                                      config->getSamples());
599     }
600     sk_gpu_test::TestContext* testCtx = ctxInfo.testContext();
601     if (!testCtx) {
602         exitf(ExitErr::kSoftware, "testContext is null");
603     }
604     if (!testCtx->fenceSyncSupport()) {
605         exitf(ExitErr::kUnavailable, "GPU does not support fence sync");
606     }
607 
608     // Create a render target.
609     SkImageInfo info = SkImageInfo::Make(
610             width, height, config->getColorType(), config->getAlphaType(), config->refColorSpace());
611     SkSurfaceProps props(config->getSurfaceFlags(), kRGB_H_SkPixelGeometry);
612     sk_sp<SkSurface> surface =
613             SkSurfaces::RenderTarget(ctx, skgpu::Budgeted::kNo, info, config->getSamples(), &props);
614     if (!surface) {
615         exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s",
616                                      width, height, config->getTag().c_str());
617     }
618 
619     // Run the benchmark.
620     std::vector<Sample> samples;
621     if (FLAGS_sampleMs > 0) {
622         // +1 because we might take one more sample in order to have an odd number.
623         samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs);
624     } else {
625         samples.reserve(2 * FLAGS_duration);
626     }
627     SkCanvas* canvas = surface->getCanvas();
628     canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
629     if (FLAGS_scale != 1) {
630         canvas->scale(FLAGS_scale, FLAGS_scale);
631     }
632     if (!FLAGS_gpuClock) {
633         if (FLAGS_ddl) {
634             run_ddl_benchmark(testCtx, ctx, surface, skp.get(), &samples);
635         } else if (!mskp) {
636             auto s = std::make_unique<StaticSkp>(skp);
637             run_benchmark(ctx, surface, s.get(), &samples);
638         } else {
639             run_benchmark(ctx, surface, mskp.get(), &samples);
640         }
641     } else {
642         if (FLAGS_ddl) {
643             exitf(ExitErr::kUnavailable, "DDL: GPU-only timing not supported");
644         }
645         if (!testCtx->gpuTimingSupport()) {
646             exitf(ExitErr::kUnavailable, "GPU does not support timing");
647         }
648         run_gpu_time_benchmark(testCtx->gpuTimer(), ctx, surface, skp.get(), &samples);
649     }
650     print_result(samples, config->getTag().c_str(), srcname.c_str());
651 
652     // Save a proof (if one was requested).
653     if (!FLAGS_png.isEmpty()) {
654         SkBitmap bmp;
655         bmp.allocPixels(info);
656         if (!surface->getCanvas()->readPixels(bmp, 0, 0)) {
657             exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png");
658         }
659         if (!mkdir_p(SkOSPath::Dirname(FLAGS_png[0]))) {
660             exitf(ExitErr::kIO, "failed to create directory for png \"%s\"", FLAGS_png[0]);
661         }
662         if (!ToolUtils::EncodeImageToPngFile(FLAGS_png[0], bmp)) {
663             exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]);
664         }
665     }
666 
667     return(0);
668 }
669 
flush_with_sync(GrDirectContext * context,GpuSync & gpuSync)670 static void flush_with_sync(GrDirectContext* context, GpuSync& gpuSync) {
671     gpuSync.waitIfNeeded();
672 
673     GrFlushInfo flushInfo;
674     flushInfo.fFinishedProc = sk_gpu_test::FlushFinishTracker::FlushFinished;
675     flushInfo.fFinishedContext = gpuSync.newFlushTracker(context);
676 
677     context->flush(flushInfo);
678     context->submit();
679 }
680 
draw_skp_and_flush_with_sync(GrDirectContext * context,SkSurface * surface,const SkPicture * skp,GpuSync & gpuSync)681 static void draw_skp_and_flush_with_sync(GrDirectContext* context, SkSurface* surface,
682                                          const SkPicture* skp, GpuSync& gpuSync) {
683     auto canvas = surface->getCanvas();
684     canvas->drawPicture(skp);
685 
686     flush_with_sync(context, gpuSync);
687 }
688 
create_warmup_skp()689 static sk_sp<SkPicture> create_warmup_skp() {
690     static constexpr SkRect bounds{0, 0, 500, 500};
691     SkPictureRecorder recorder;
692     SkCanvas* recording = recorder.beginRecording(bounds);
693 
694     recording->clear(SK_ColorWHITE);
695 
696     SkPaint stroke;
697     stroke.setStyle(SkPaint::kStroke_Style);
698     stroke.setStrokeWidth(2);
699 
700     // Use a big path to (theoretically) warmup the CPU.
701     SkPath bigPath = BenchUtils::make_big_path();
702     recording->drawPath(bigPath, stroke);
703 
704     // Use a perlin shader to warmup the GPU.
705     SkPaint perlin;
706     perlin.setShader(SkShaders::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr));
707     recording->drawRect(bounds, perlin);
708 
709     return recorder.finishRecordingAsPicture();
710 }
711 
create_skp_from_svg(SkStream * stream,const char * filename)712 static sk_sp<SkPicture> create_skp_from_svg(SkStream* stream, const char* filename) {
713 #if defined(SK_ENABLE_SVG)
714     sk_sp<SkSVGDOM> svg = SkSVGDOM::Builder()
715                                   .setFontManager(ToolUtils::TestFontMgr())
716                                   .setTextShapingFactory(SkShapers::BestAvailable())
717                                   .make(*stream);
718     if (!svg) {
719         exitf(ExitErr::kData, "failed to build svg dom from file %s", filename);
720     }
721 
722     static constexpr SkRect bounds{0, 0, 1200, 1200};
723     SkPictureRecorder recorder;
724     SkCanvas* recording = recorder.beginRecording(bounds);
725 
726     svg->setContainerSize(SkSize::Make(recording->getBaseLayerSize()));
727     svg->render(recording);
728 
729     return recorder.finishRecordingAsPicture();
730 #endif
731     exitf(ExitErr::kData, "SK_ENABLE_SVG is disabled; cannot open svg file %s", filename);
732     return nullptr;
733 }
734 
mkdir_p(const SkString & dirname)735 bool mkdir_p(const SkString& dirname) {
736     if (dirname.isEmpty() || dirname == SkString("/")) {
737         return true;
738     }
739     return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str());
740 }
741 
join(const CommandLineFlags::StringArray & stringArray)742 static SkString join(const CommandLineFlags::StringArray& stringArray) {
743     SkString joined;
744     for (int i = 0; i < stringArray.size(); ++i) {
745         joined.appendf(i ? " %s" : "%s", stringArray[i]);
746     }
747     return joined;
748 }
749 
750 static void exitf(ExitErr err, const char* format, ...) SK_PRINTF_LIKE(2, 3);
751 
exitf(ExitErr err,const char * format,...)752 static void exitf(ExitErr err, const char* format, ...) {
753     fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: ");
754     va_list args;
755     va_start(args, format);
756     vfprintf(stderr, format, args);
757     va_end(args);
758     fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n");
759     exit((int)err);
760 }
761 
waitIfNeeded()762 void GpuSync::waitIfNeeded() {
763     if (fFinishTrackers[fCurrentFlushIdx]) {
764         fFinishTrackers[fCurrentFlushIdx]->waitTillFinished();
765     }
766 }
767 
newFlushTracker(GrDirectContext * context)768 sk_gpu_test::FlushFinishTracker* GpuSync::newFlushTracker(GrDirectContext* context) {
769     fFinishTrackers[fCurrentFlushIdx].reset(new sk_gpu_test::FlushFinishTracker(context));
770 
771     sk_gpu_test::FlushFinishTracker* tracker = fFinishTrackers[fCurrentFlushIdx].get();
772     // We add an additional ref to the current flush tracker here. This ref is owned by the finish
773     // callback on the flush call. The finish callback will unref the tracker when called.
774     tracker->ref();
775 
776     fCurrentFlushIdx = (fCurrentFlushIdx + 1) % std::size(fFinishTrackers);
777     return tracker;
778 }
779