xref: /aosp_15_r20/external/XNNPACK/bench/utils.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <cstddef>
9 #include <cstdint>
10 
11 #include <xnnpack.h>
12 #include <xnnpack/allocator.h>
13 
14 #include <benchmark/benchmark.h>
15 
16 namespace benchmark {
17 namespace utils {
18 
19 uint32_t WipeCache();
20 uint32_t PrefetchToL1(const void* ptr, size_t size);
21 
22 // Disable support for denormalized numbers in floating-point units.
23 void DisableDenormals();
24 
25 // Return clock rate, in Hz, for the currently used logical processor.
26 uint64_t GetCurrentCpuFrequency();
27 
28 // Return maximum (across all cores/clusters/sockets) last level cache size.
29 // Can overestimate, but not underestimate LLC size.
30 size_t GetMaxCacheSize();
31 
32 // Set number of elements for a unary elementwise microkernel such that:
33 // - It is divisible by 2, 3, 4, 5, 6.
34 // - It is divisible by AVX512 width.
35 // - Total memory footprint does not exceed the characteristic cache size for
36 //   the architecture.
37 template<class InType, class OutType>
UnaryElementwiseParameters(benchmark::internal::Benchmark * benchmark)38 void UnaryElementwiseParameters(benchmark::internal::Benchmark* benchmark) {
39   benchmark->ArgName("N");
40 
41   size_t characteristic_l1 = 32 * 1024;
42   size_t characteristic_l2 = 256 * 1024;
43 #if XNN_ARCH_ARM
44   characteristic_l1 = 16 * 1024;
45   characteristic_l2 = 128 * 1024;
46 #endif  // XNN_ARCH_ARM
47 
48   const size_t elementwise_size = sizeof(InType) + sizeof(OutType);
49   benchmark->Arg(characteristic_l1 / elementwise_size / 960 * 960);
50   benchmark->Arg(characteristic_l2 / elementwise_size / 960 * 960);
51 }
52 
53 // Set number of elements for a binary elementwise microkernel such that:
54 // - It is divisible by 2, 3, 4, 5, 6.
55 // - It is divisible by AVX512 width.
56 // - Total memory footprint does not exceed the characteristic cache size for
57 //   the architecture.
58 template<class InType, class OutType>
BinaryElementwiseParameters(benchmark::internal::Benchmark * benchmark)59 void BinaryElementwiseParameters(benchmark::internal::Benchmark* benchmark) {
60   benchmark->ArgName("N");
61 
62   size_t characteristic_l1 = 32 * 1024;
63   size_t characteristic_l2 = 256 * 1024;
64 #if XNN_ARCH_ARM
65   characteristic_l1 = 16 * 1024;
66   characteristic_l2 = 128 * 1024;
67 #endif  // XNN_ARCH_ARM
68 
69   const size_t elementwise_size = 2 * sizeof(InType) + sizeof(OutType);
70   benchmark->Arg(characteristic_l1 / elementwise_size / 960 * 960);
71   benchmark->Arg(characteristic_l2 / elementwise_size / 960 * 960);
72 }
73 
74 // Set multi-threading parameters appropriate for the processor.
75 void MultiThreadingParameters(benchmark::internal::Benchmark* benchmark);
76 
77 typedef bool (*IsaCheckFunction)(benchmark::State& state);
78 
79 // Check if either ARM VFPv2 or VFPv3 extension is supported.
80 // If VFP is unsupported, report error in benchmark state, and return false.
81 bool CheckVFP(benchmark::State& state);
82 
83 // Check if ARMv6 extensions are supported.
84 // If ARMv6 extensions are unsupported, report error in benchmark state, and return false.
85 bool CheckARMV6(benchmark::State& state);
86 
87 // Check if ARM NEON extension is supported.
88 // If NEON is unsupported, report error in benchmark state, and return false.
89 bool CheckNEON(benchmark::State& state);
90 
91 // Check if ARM NEON-FP16 extension is supported.
92 // If NEON-FP16 is unsupported, report error in benchmark state, and return false.
93 bool CheckNEONFP16(benchmark::State& state);
94 
95 // Check if ARM NEON-FMA extension is supported.
96 // If NEON-FMA is unsupported, report error in benchmark state, and return false.
97 bool CheckNEONFMA(benchmark::State& state);
98 
99 // Check if ARMv8 NEON instructions are supported.
100 // If ARMv8 NEON is unsupported, report error in benchmark state, and return false.
101 bool CheckNEONV8(benchmark::State& state);
102 
103 // Check if ARM NEON-FP16-ARITH extension is supported.
104 // If NEON-FP16-ARITH is unsupported, report error in benchmark state, and return false.
105 bool CheckNEONFP16ARITH(benchmark::State& state);
106 
107 // Check if ARM NEON-BF16 extension is supported.
108 // If NEON-BF16 is unsupported, report error in benchmark state, and return false.
109 bool CheckNEONBF16(benchmark::State& state);
110 
111 // Check if ARM DOT extension is supported.
112 // If DOT is unsupported, report error in benchmark state, and return false.
113 bool CheckNEONDOT(benchmark::State& state);
114 
115 // Check if x86 SSSE3 extension is supported.
116 // If SSSE3 is unsupported, report error in benchmark state, and return false.
117 bool CheckSSSE3(benchmark::State& state);
118 
119 // Check if x86 SSE4.1 extension is supported.
120 // If SSE4.1 is unsupported, report error in benchmark state, and return false.
121 bool CheckSSE41(benchmark::State& state);
122 
123 // Check if x86 AVX extension is supported.
124 // If AVX is unsupported, report error in benchmark state, and return false.
125 bool CheckAVX(benchmark::State& state);
126 
127 // Check if x86 F16C extension is supported.
128 // If F16C is unsupported, report error in benchmark state, and return false.
129 bool CheckF16C(benchmark::State& state);
130 
131 // Check if x86 XOP extension is supported.
132 // If XOP is unsupported, report error in benchmark state, and return false.
133 bool CheckXOP(benchmark::State& state);
134 
135 // Check if x86 FMA3 extension is supported.
136 // If FMA3 is unsupported, report error in benchmark state, and return false.
137 bool CheckFMA3(benchmark::State& state);
138 
139 // Check if x86 AVX2 extension is supported.
140 // If AVX2 is unsupported, report error in benchmark state, and return false.
141 bool CheckAVX2(benchmark::State& state);
142 
143 // Check if x86 AVX512F extension is supported.
144 // If AVX512F is unsupported, report error in benchmark state, and return false.
145 bool CheckAVX512F(benchmark::State& state);
146 
147 // Check if x86 SKX-level AVX512 extensions (AVX512F, AVX512CD, AVX512BW, AVX512DQ, and AVX512VL) are supported.
148 // If SKX-level AVX512 extensions are unsupported, report error in benchmark state, and return false.
149 bool CheckAVX512SKX(benchmark::State& state);
150 
151 template <class T>
DivideRoundUp(T x,T q)152 inline T DivideRoundUp(T x, T q) {
153   return x / q + T(x % q != 0);
154 }
155 
156 template <class T>
RoundUp(T x,T q)157 inline T RoundUp(T x, T q) {
158   return q * DivideRoundUp(x, q);
159 }
160 
161 template <class T>
Doz(T a,T b)162 inline T Doz(T a, T b) {
163   return a >= b ? a - b : T(0);
164 }
165 
166 // A struct that uses RAII pattern to allocate and release code memory.
167 struct CodeMemoryHelper {
168   CodeMemoryHelper();
169   ~CodeMemoryHelper();
170 
171   xnn_code_buffer buffer;
172   xnn_status status;
173 };
174 
175 }  // namespace utils
176 }  // namespace benchmark
177