1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #pragma once
7
8 #include <cstddef>
9 #include <cstdint>
10
11 #include <xnnpack.h>
12 #include <xnnpack/allocator.h>
13
14 #include <benchmark/benchmark.h>
15
16 namespace benchmark {
17 namespace utils {
18
19 uint32_t WipeCache();
20 uint32_t PrefetchToL1(const void* ptr, size_t size);
21
22 // Disable support for denormalized numbers in floating-point units.
23 void DisableDenormals();
24
25 // Return clock rate, in Hz, for the currently used logical processor.
26 uint64_t GetCurrentCpuFrequency();
27
28 // Return maximum (across all cores/clusters/sockets) last level cache size.
29 // Can overestimate, but not underestimate LLC size.
30 size_t GetMaxCacheSize();
31
32 // Set number of elements for a unary elementwise microkernel such that:
33 // - It is divisible by 2, 3, 4, 5, 6.
34 // - It is divisible by AVX512 width.
35 // - Total memory footprint does not exceed the characteristic cache size for
36 // the architecture.
37 template<class InType, class OutType>
UnaryElementwiseParameters(benchmark::internal::Benchmark * benchmark)38 void UnaryElementwiseParameters(benchmark::internal::Benchmark* benchmark) {
39 benchmark->ArgName("N");
40
41 size_t characteristic_l1 = 32 * 1024;
42 size_t characteristic_l2 = 256 * 1024;
43 #if XNN_ARCH_ARM
44 characteristic_l1 = 16 * 1024;
45 characteristic_l2 = 128 * 1024;
46 #endif // XNN_ARCH_ARM
47
48 const size_t elementwise_size = sizeof(InType) + sizeof(OutType);
49 benchmark->Arg(characteristic_l1 / elementwise_size / 960 * 960);
50 benchmark->Arg(characteristic_l2 / elementwise_size / 960 * 960);
51 }
52
53 // Set number of elements for a binary elementwise microkernel such that:
54 // - It is divisible by 2, 3, 4, 5, 6.
55 // - It is divisible by AVX512 width.
56 // - Total memory footprint does not exceed the characteristic cache size for
57 // the architecture.
58 template<class InType, class OutType>
BinaryElementwiseParameters(benchmark::internal::Benchmark * benchmark)59 void BinaryElementwiseParameters(benchmark::internal::Benchmark* benchmark) {
60 benchmark->ArgName("N");
61
62 size_t characteristic_l1 = 32 * 1024;
63 size_t characteristic_l2 = 256 * 1024;
64 #if XNN_ARCH_ARM
65 characteristic_l1 = 16 * 1024;
66 characteristic_l2 = 128 * 1024;
67 #endif // XNN_ARCH_ARM
68
69 const size_t elementwise_size = 2 * sizeof(InType) + sizeof(OutType);
70 benchmark->Arg(characteristic_l1 / elementwise_size / 960 * 960);
71 benchmark->Arg(characteristic_l2 / elementwise_size / 960 * 960);
72 }
73
74 // Set multi-threading parameters appropriate for the processor.
75 void MultiThreadingParameters(benchmark::internal::Benchmark* benchmark);
76
77 typedef bool (*IsaCheckFunction)(benchmark::State& state);
78
79 // Check if either ARM VFPv2 or VFPv3 extension is supported.
80 // If VFP is unsupported, report error in benchmark state, and return false.
81 bool CheckVFP(benchmark::State& state);
82
83 // Check if ARMv6 extensions are supported.
84 // If ARMv6 extensions are unsupported, report error in benchmark state, and return false.
85 bool CheckARMV6(benchmark::State& state);
86
87 // Check if ARM NEON extension is supported.
88 // If NEON is unsupported, report error in benchmark state, and return false.
89 bool CheckNEON(benchmark::State& state);
90
91 // Check if ARM NEON-FP16 extension is supported.
92 // If NEON-FP16 is unsupported, report error in benchmark state, and return false.
93 bool CheckNEONFP16(benchmark::State& state);
94
95 // Check if ARM NEON-FMA extension is supported.
96 // If NEON-FMA is unsupported, report error in benchmark state, and return false.
97 bool CheckNEONFMA(benchmark::State& state);
98
99 // Check if ARMv8 NEON instructions are supported.
100 // If ARMv8 NEON is unsupported, report error in benchmark state, and return false.
101 bool CheckNEONV8(benchmark::State& state);
102
103 // Check if ARM NEON-FP16-ARITH extension is supported.
104 // If NEON-FP16-ARITH is unsupported, report error in benchmark state, and return false.
105 bool CheckNEONFP16ARITH(benchmark::State& state);
106
107 // Check if ARM NEON-BF16 extension is supported.
108 // If NEON-BF16 is unsupported, report error in benchmark state, and return false.
109 bool CheckNEONBF16(benchmark::State& state);
110
111 // Check if ARM DOT extension is supported.
112 // If DOT is unsupported, report error in benchmark state, and return false.
113 bool CheckNEONDOT(benchmark::State& state);
114
115 // Check if x86 SSSE3 extension is supported.
116 // If SSSE3 is unsupported, report error in benchmark state, and return false.
117 bool CheckSSSE3(benchmark::State& state);
118
119 // Check if x86 SSE4.1 extension is supported.
120 // If SSE4.1 is unsupported, report error in benchmark state, and return false.
121 bool CheckSSE41(benchmark::State& state);
122
123 // Check if x86 AVX extension is supported.
124 // If AVX is unsupported, report error in benchmark state, and return false.
125 bool CheckAVX(benchmark::State& state);
126
127 // Check if x86 F16C extension is supported.
128 // If F16C is unsupported, report error in benchmark state, and return false.
129 bool CheckF16C(benchmark::State& state);
130
131 // Check if x86 XOP extension is supported.
132 // If XOP is unsupported, report error in benchmark state, and return false.
133 bool CheckXOP(benchmark::State& state);
134
135 // Check if x86 FMA3 extension is supported.
136 // If FMA3 is unsupported, report error in benchmark state, and return false.
137 bool CheckFMA3(benchmark::State& state);
138
139 // Check if x86 AVX2 extension is supported.
140 // If AVX2 is unsupported, report error in benchmark state, and return false.
141 bool CheckAVX2(benchmark::State& state);
142
143 // Check if x86 AVX512F extension is supported.
144 // If AVX512F is unsupported, report error in benchmark state, and return false.
145 bool CheckAVX512F(benchmark::State& state);
146
147 // Check if x86 SKX-level AVX512 extensions (AVX512F, AVX512CD, AVX512BW, AVX512DQ, and AVX512VL) are supported.
148 // If SKX-level AVX512 extensions are unsupported, report error in benchmark state, and return false.
149 bool CheckAVX512SKX(benchmark::State& state);
150
151 template <class T>
DivideRoundUp(T x,T q)152 inline T DivideRoundUp(T x, T q) {
153 return x / q + T(x % q != 0);
154 }
155
156 template <class T>
RoundUp(T x,T q)157 inline T RoundUp(T x, T q) {
158 return q * DivideRoundUp(x, q);
159 }
160
161 template <class T>
Doz(T a,T b)162 inline T Doz(T a, T b) {
163 return a >= b ? a - b : T(0);
164 }
165
166 // A struct that uses RAII pattern to allocate and release code memory.
167 struct CodeMemoryHelper {
168 CodeMemoryHelper();
169 ~CodeMemoryHelper();
170
171 xnn_code_buffer buffer;
172 xnn_status status;
173 };
174
175 } // namespace utils
176 } // namespace benchmark
177