xref: /aosp_15_r20/external/eigen/bench/tensors/benchmark_main.cc (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1*bf2c3715SXin Li /*
2*bf2c3715SXin Li  * Copyright (C) 2012 The Android Open Source Project
3*bf2c3715SXin Li  *
4*bf2c3715SXin Li  * Licensed under the Apache License, Version 2.0 (the "License");
5*bf2c3715SXin Li  * you may not use this file except in compliance with the License.
6*bf2c3715SXin Li  * You may obtain a copy of the License at
7*bf2c3715SXin Li  *
8*bf2c3715SXin Li  *      http://www.apache.org/licenses/LICENSE-2.0
9*bf2c3715SXin Li  *
10*bf2c3715SXin Li  * Unless required by applicable law or agreed to in writing, software
11*bf2c3715SXin Li  * distributed under the License is distributed on an "AS IS" BASIS,
12*bf2c3715SXin Li  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*bf2c3715SXin Li  * See the License for the specific language governing permissions and
14*bf2c3715SXin Li  * limitations under the License.
15*bf2c3715SXin Li  */
16*bf2c3715SXin Li #include "benchmark.h"
17*bf2c3715SXin Li #include <regex.h>
18*bf2c3715SXin Li #include <stdio.h>
19*bf2c3715SXin Li #include <stdlib.h>
20*bf2c3715SXin Li #include <string.h>
21*bf2c3715SXin Li #include <string>
22*bf2c3715SXin Li #include <inttypes.h>
23*bf2c3715SXin Li #include <time.h>
24*bf2c3715SXin Li #include <map>
25*bf2c3715SXin Li 
26*bf2c3715SXin Li static int64_t g_flops_processed;
27*bf2c3715SXin Li static int64_t g_benchmark_total_time_ns;
28*bf2c3715SXin Li static int64_t g_benchmark_start_time_ns;
29*bf2c3715SXin Li typedef std::map<std::string, ::testing::Benchmark*> BenchmarkMap;
30*bf2c3715SXin Li typedef BenchmarkMap::iterator BenchmarkMapIt;
31*bf2c3715SXin Li 
gBenchmarks()32*bf2c3715SXin Li BenchmarkMap& gBenchmarks() {
33*bf2c3715SXin Li   static BenchmarkMap g_benchmarks;
34*bf2c3715SXin Li   return g_benchmarks;
35*bf2c3715SXin Li }
36*bf2c3715SXin Li 
37*bf2c3715SXin Li static int g_name_column_width = 20;
38*bf2c3715SXin Li 
Round(int n)39*bf2c3715SXin Li static int Round(int n) {
40*bf2c3715SXin Li   int base = 1;
41*bf2c3715SXin Li   while (base*10 < n) {
42*bf2c3715SXin Li     base *= 10;
43*bf2c3715SXin Li   }
44*bf2c3715SXin Li   if (n < 2*base) {
45*bf2c3715SXin Li     return 2*base;
46*bf2c3715SXin Li   }
47*bf2c3715SXin Li   if (n < 5*base) {
48*bf2c3715SXin Li     return 5*base;
49*bf2c3715SXin Li   }
50*bf2c3715SXin Li   return 10*base;
51*bf2c3715SXin Li }
52*bf2c3715SXin Li 
53*bf2c3715SXin Li #ifdef __APPLE__
54*bf2c3715SXin Li   #include <mach/mach_time.h>
55*bf2c3715SXin Li   static mach_timebase_info_data_t g_time_info;
init_info()56*bf2c3715SXin Li   static void __attribute__((constructor)) init_info() {
57*bf2c3715SXin Li     mach_timebase_info(&g_time_info);
58*bf2c3715SXin Li   }
59*bf2c3715SXin Li #endif
60*bf2c3715SXin Li 
NanoTime()61*bf2c3715SXin Li static int64_t NanoTime() {
62*bf2c3715SXin Li #if defined(__APPLE__)
63*bf2c3715SXin Li   uint64_t t = mach_absolute_time();
64*bf2c3715SXin Li   return t * g_time_info.numer / g_time_info.denom;
65*bf2c3715SXin Li #else
66*bf2c3715SXin Li   struct timespec t;
67*bf2c3715SXin Li   t.tv_sec = t.tv_nsec = 0;
68*bf2c3715SXin Li   clock_gettime(CLOCK_MONOTONIC, &t);
69*bf2c3715SXin Li   return static_cast<int64_t>(t.tv_sec) * 1000000000LL + t.tv_nsec;
70*bf2c3715SXin Li #endif
71*bf2c3715SXin Li }
72*bf2c3715SXin Li 
73*bf2c3715SXin Li namespace testing {
Arg(int arg)74*bf2c3715SXin Li Benchmark* Benchmark::Arg(int arg) {
75*bf2c3715SXin Li   args_.push_back(arg);
76*bf2c3715SXin Li   return this;
77*bf2c3715SXin Li }
78*bf2c3715SXin Li 
Range(int lo,int hi)79*bf2c3715SXin Li Benchmark* Benchmark::Range(int lo, int hi) {
80*bf2c3715SXin Li   const int kRangeMultiplier = 8;
81*bf2c3715SXin Li   if (hi < lo) {
82*bf2c3715SXin Li     int temp = hi;
83*bf2c3715SXin Li     hi = lo;
84*bf2c3715SXin Li     lo = temp;
85*bf2c3715SXin Li   }
86*bf2c3715SXin Li   while (lo < hi) {
87*bf2c3715SXin Li     args_.push_back(lo);
88*bf2c3715SXin Li     lo *= kRangeMultiplier;
89*bf2c3715SXin Li   }
90*bf2c3715SXin Li   // We always run the hi number.
91*bf2c3715SXin Li   args_.push_back(hi);
92*bf2c3715SXin Li   return this;
93*bf2c3715SXin Li }
94*bf2c3715SXin Li 
Name()95*bf2c3715SXin Li const char* Benchmark::Name() {
96*bf2c3715SXin Li   return name_;
97*bf2c3715SXin Li }
ShouldRun(int argc,char * argv[])98*bf2c3715SXin Li bool Benchmark::ShouldRun(int argc, char* argv[]) {
99*bf2c3715SXin Li   if (argc == 1) {
100*bf2c3715SXin Li     return true;  // With no arguments, we run all benchmarks.
101*bf2c3715SXin Li   }
102*bf2c3715SXin Li   // Otherwise, we interpret each argument as a regular expression and
103*bf2c3715SXin Li   // see if any of our benchmarks match.
104*bf2c3715SXin Li   for (int i = 1; i < argc; i++) {
105*bf2c3715SXin Li     regex_t re;
106*bf2c3715SXin Li     if (regcomp(&re, argv[i], 0) != 0) {
107*bf2c3715SXin Li       fprintf(stderr, "couldn't compile \"%s\" as a regular expression!\n", argv[i]);
108*bf2c3715SXin Li       exit(EXIT_FAILURE);
109*bf2c3715SXin Li     }
110*bf2c3715SXin Li     int match = regexec(&re, name_, 0, NULL, 0);
111*bf2c3715SXin Li     regfree(&re);
112*bf2c3715SXin Li     if (match != REG_NOMATCH) {
113*bf2c3715SXin Li       return true;
114*bf2c3715SXin Li     }
115*bf2c3715SXin Li   }
116*bf2c3715SXin Li   return false;
117*bf2c3715SXin Li }
Register(const char * name,void (* fn)(int),void (* fn_range)(int,int))118*bf2c3715SXin Li void Benchmark::Register(const char* name, void (*fn)(int), void (*fn_range)(int, int)) {
119*bf2c3715SXin Li   name_ = name;
120*bf2c3715SXin Li   fn_ = fn;
121*bf2c3715SXin Li   fn_range_ = fn_range;
122*bf2c3715SXin Li   if (fn_ == NULL && fn_range_ == NULL) {
123*bf2c3715SXin Li     fprintf(stderr, "%s: missing function\n", name_);
124*bf2c3715SXin Li     exit(EXIT_FAILURE);
125*bf2c3715SXin Li   }
126*bf2c3715SXin Li   gBenchmarks().insert(std::make_pair(name, this));
127*bf2c3715SXin Li }
Run()128*bf2c3715SXin Li void Benchmark::Run() {
129*bf2c3715SXin Li   if (fn_ != NULL) {
130*bf2c3715SXin Li     RunWithArg(0);
131*bf2c3715SXin Li   } else {
132*bf2c3715SXin Li     if (args_.empty()) {
133*bf2c3715SXin Li       fprintf(stderr, "%s: no args!\n", name_);
134*bf2c3715SXin Li       exit(EXIT_FAILURE);
135*bf2c3715SXin Li     }
136*bf2c3715SXin Li     for (size_t i = 0; i < args_.size(); ++i) {
137*bf2c3715SXin Li       RunWithArg(args_[i]);
138*bf2c3715SXin Li     }
139*bf2c3715SXin Li   }
140*bf2c3715SXin Li }
RunRepeatedlyWithArg(int iterations,int arg)141*bf2c3715SXin Li void Benchmark::RunRepeatedlyWithArg(int iterations, int arg) {
142*bf2c3715SXin Li   g_flops_processed = 0;
143*bf2c3715SXin Li   g_benchmark_total_time_ns = 0;
144*bf2c3715SXin Li   g_benchmark_start_time_ns = NanoTime();
145*bf2c3715SXin Li   if (fn_ != NULL) {
146*bf2c3715SXin Li     fn_(iterations);
147*bf2c3715SXin Li   } else {
148*bf2c3715SXin Li     fn_range_(iterations, arg);
149*bf2c3715SXin Li   }
150*bf2c3715SXin Li   if (g_benchmark_start_time_ns != 0) {
151*bf2c3715SXin Li     g_benchmark_total_time_ns += NanoTime() - g_benchmark_start_time_ns;
152*bf2c3715SXin Li   }
153*bf2c3715SXin Li }
RunWithArg(int arg)154*bf2c3715SXin Li void Benchmark::RunWithArg(int arg) {
155*bf2c3715SXin Li   // run once in case it's expensive
156*bf2c3715SXin Li   int iterations = 1;
157*bf2c3715SXin Li   RunRepeatedlyWithArg(iterations, arg);
158*bf2c3715SXin Li   while (g_benchmark_total_time_ns < 1e9 && iterations < 1e9) {
159*bf2c3715SXin Li     int last = iterations;
160*bf2c3715SXin Li     if (g_benchmark_total_time_ns/iterations == 0) {
161*bf2c3715SXin Li       iterations = 1e9;
162*bf2c3715SXin Li     } else {
163*bf2c3715SXin Li       iterations = 1e9 / (g_benchmark_total_time_ns/iterations);
164*bf2c3715SXin Li     }
165*bf2c3715SXin Li     iterations = std::max(last + 1, std::min(iterations + iterations/2, 100*last));
166*bf2c3715SXin Li     iterations = Round(iterations);
167*bf2c3715SXin Li     RunRepeatedlyWithArg(iterations, arg);
168*bf2c3715SXin Li   }
169*bf2c3715SXin Li   char throughput[100];
170*bf2c3715SXin Li   throughput[0] = '\0';
171*bf2c3715SXin Li   if (g_benchmark_total_time_ns > 0 && g_flops_processed > 0) {
172*bf2c3715SXin Li     double mflops_processed = static_cast<double>(g_flops_processed)/1e6;
173*bf2c3715SXin Li     double seconds = static_cast<double>(g_benchmark_total_time_ns)/1e9;
174*bf2c3715SXin Li     snprintf(throughput, sizeof(throughput), " %8.2f MFlops/s", mflops_processed/seconds);
175*bf2c3715SXin Li   }
176*bf2c3715SXin Li   char full_name[100];
177*bf2c3715SXin Li   if (fn_range_ != NULL) {
178*bf2c3715SXin Li     if (arg >= (1<<20)) {
179*bf2c3715SXin Li       snprintf(full_name, sizeof(full_name), "%s/%dM", name_, arg/(1<<20));
180*bf2c3715SXin Li     } else if (arg >= (1<<10)) {
181*bf2c3715SXin Li       snprintf(full_name, sizeof(full_name), "%s/%dK", name_, arg/(1<<10));
182*bf2c3715SXin Li     } else {
183*bf2c3715SXin Li       snprintf(full_name, sizeof(full_name), "%s/%d", name_, arg);
184*bf2c3715SXin Li     }
185*bf2c3715SXin Li   } else {
186*bf2c3715SXin Li     snprintf(full_name, sizeof(full_name), "%s", name_);
187*bf2c3715SXin Li   }
188*bf2c3715SXin Li   printf("%-*s %10d %10" PRId64 "%s\n", g_name_column_width, full_name,
189*bf2c3715SXin Li          iterations, g_benchmark_total_time_ns/iterations, throughput);
190*bf2c3715SXin Li   fflush(stdout);
191*bf2c3715SXin Li }
192*bf2c3715SXin Li }  // namespace testing
SetBenchmarkFlopsProcessed(int64_t x)193*bf2c3715SXin Li void SetBenchmarkFlopsProcessed(int64_t x) {
194*bf2c3715SXin Li   g_flops_processed = x;
195*bf2c3715SXin Li }
StopBenchmarkTiming()196*bf2c3715SXin Li void StopBenchmarkTiming() {
197*bf2c3715SXin Li   if (g_benchmark_start_time_ns != 0) {
198*bf2c3715SXin Li     g_benchmark_total_time_ns += NanoTime() - g_benchmark_start_time_ns;
199*bf2c3715SXin Li   }
200*bf2c3715SXin Li   g_benchmark_start_time_ns = 0;
201*bf2c3715SXin Li }
StartBenchmarkTiming()202*bf2c3715SXin Li void StartBenchmarkTiming() {
203*bf2c3715SXin Li   if (g_benchmark_start_time_ns == 0) {
204*bf2c3715SXin Li     g_benchmark_start_time_ns = NanoTime();
205*bf2c3715SXin Li   }
206*bf2c3715SXin Li }
main(int argc,char * argv[])207*bf2c3715SXin Li int main(int argc, char* argv[]) {
208*bf2c3715SXin Li   if (gBenchmarks().empty()) {
209*bf2c3715SXin Li     fprintf(stderr, "No benchmarks registered!\n");
210*bf2c3715SXin Li     exit(EXIT_FAILURE);
211*bf2c3715SXin Li   }
212*bf2c3715SXin Li   for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
213*bf2c3715SXin Li     int name_width = static_cast<int>(strlen(it->second->Name()));
214*bf2c3715SXin Li     g_name_column_width = std::max(g_name_column_width, name_width);
215*bf2c3715SXin Li   }
216*bf2c3715SXin Li   bool need_header = true;
217*bf2c3715SXin Li   for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
218*bf2c3715SXin Li     ::testing::Benchmark* b = it->second;
219*bf2c3715SXin Li     if (b->ShouldRun(argc, argv)) {
220*bf2c3715SXin Li       if (need_header) {
221*bf2c3715SXin Li         printf("%-*s %10s %10s\n", g_name_column_width, "", "iterations", "ns/op");
222*bf2c3715SXin Li         fflush(stdout);
223*bf2c3715SXin Li         need_header = false;
224*bf2c3715SXin Li       }
225*bf2c3715SXin Li       b->Run();
226*bf2c3715SXin Li     }
227*bf2c3715SXin Li   }
228*bf2c3715SXin Li   if (need_header) {
229*bf2c3715SXin Li     fprintf(stderr, "No matching benchmarks!\n");
230*bf2c3715SXin Li     fprintf(stderr, "Available benchmarks:\n");
231*bf2c3715SXin Li     for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
232*bf2c3715SXin Li       fprintf(stderr, "  %s\n", it->second->Name());
233*bf2c3715SXin Li     }
234*bf2c3715SXin Li     exit(EXIT_FAILURE);
235*bf2c3715SXin Li   }
236*bf2c3715SXin Li   return 0;
237*bf2c3715SXin Li }
238