1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2020 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <algorithm>
10 #include <array>
11 #include <cmath>
12 #include <functional>
13 #include <limits>
14 #include <random>
15 #include <vector>
16
17 #include <fp16.h>
18
19 #include <xnnpack.h>
20
21 #include <benchmark/benchmark.h>
22 #include "bench/utils.h"
23 #ifdef BENCHMARK_TENSORFLOW_LITE
24 #include "flatbuffers/include/flatbuffers/flatbuffers.h"
25 #include "tensorflow/lite/interpreter.h"
26 #include "tensorflow/lite/kernels/register.h"
27 #include "tensorflow/lite/model.h"
28 #include "tensorflow/lite/schema/schema_generated.h"
29 #include "tensorflow/lite/version.h"
30 #endif // BENCHMARK_TENSORFLOW_LITE
31
32
33 #ifndef XNN_NO_F16_OPERATORS
xnnpack_sigmoid_f16(benchmark::State & state)34 static void xnnpack_sigmoid_f16(benchmark::State& state) {
35 const size_t batch_size = state.range(0);
36
37 std::random_device random_device;
38 auto rng = std::mt19937(random_device());
39 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
40 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
41
42 std::vector<uint16_t> input(batch_size + XNN_EXTRA_BYTES / sizeof(uint16_t));
43 std::vector<uint16_t> output(batch_size);
44 std::generate(input.begin(), input.end(), std::ref(f16rng));
45 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
46
47 xnn_status status = xnn_initialize(nullptr /* allocator */);
48 if (status != xnn_status_success) {
49 state.SkipWithError("failed to initialize XNNPACK");
50 return;
51 }
52
53 xnn_operator_t sigmoid_op = nullptr;
54 status = xnn_create_sigmoid_nc_f16(
55 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
56 0 /* flags */, &sigmoid_op);
57 if (status != xnn_status_success || sigmoid_op == nullptr) {
58 state.SkipWithError("failed to create Sigmoid operator");
59 return;
60 }
61
62 status = xnn_setup_sigmoid_nc_f16(
63 sigmoid_op, batch_size,
64 input.data(), output.data(),
65 nullptr /* thread pool */);
66 if (status != xnn_status_success) {
67 state.SkipWithError("failed to setup Sigmoid operator");
68 return;
69 }
70
71 for (auto _ : state) {
72 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
73 if (status != xnn_status_success) {
74 state.SkipWithError("failed to run Sigmoid operator");
75 return;
76 }
77 }
78
79 status = xnn_delete_operator(sigmoid_op);
80 if (status != xnn_status_success) {
81 state.SkipWithError("failed to delete Sigmoid operator");
82 return;
83 }
84
85 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
86 if (cpu_frequency != 0) {
87 state.counters["cpufreq"] = cpu_frequency;
88 }
89
90 state.counters["elements"] =
91 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
92
93 const size_t bytes_per_iteration = 2 * batch_size * sizeof(uint16_t);
94 state.counters["bytes"] =
95 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
96 }
97 #endif // XNN_NO_F16_OPERATORS
98
xnnpack_sigmoid_f32(benchmark::State & state)99 static void xnnpack_sigmoid_f32(benchmark::State& state) {
100 const size_t batch_size = state.range(0);
101
102 std::random_device random_device;
103 auto rng = std::mt19937(random_device());
104 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
105
106 std::vector<float> input(batch_size + XNN_EXTRA_BYTES / sizeof(float));
107 std::vector<float> output(batch_size);
108 std::generate(input.begin(), input.end(), std::ref(f32rng));
109 std::fill(output.begin(), output.end(), std::nanf(""));
110
111 xnn_status status = xnn_initialize(nullptr /* allocator */);
112 if (status != xnn_status_success) {
113 state.SkipWithError("failed to initialize XNNPACK");
114 return;
115 }
116
117 xnn_operator_t sigmoid_op = nullptr;
118 status = xnn_create_sigmoid_nc_f32(
119 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
120 0 /* flags */, &sigmoid_op);
121 if (status != xnn_status_success || sigmoid_op == nullptr) {
122 state.SkipWithError("failed to create Sigmoid operator");
123 return;
124 }
125
126 status = xnn_setup_sigmoid_nc_f32(
127 sigmoid_op, batch_size,
128 input.data(), output.data(),
129 nullptr /* thread pool */);
130 if (status != xnn_status_success) {
131 state.SkipWithError("failed to setup Sigmoid operator");
132 return;
133 }
134
135 for (auto _ : state) {
136 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
137 if (status != xnn_status_success) {
138 state.SkipWithError("failed to run Sigmoid operator");
139 return;
140 }
141 }
142
143 status = xnn_delete_operator(sigmoid_op);
144 if (status != xnn_status_success) {
145 state.SkipWithError("failed to delete Sigmoid operator");
146 return;
147 }
148
149 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
150 if (cpu_frequency != 0) {
151 state.counters["cpufreq"] = cpu_frequency;
152 }
153
154 state.counters["elements"] =
155 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
156
157 const size_t bytes_per_iteration = 2 * batch_size * sizeof(float);
158 state.counters["bytes"] =
159 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
160 }
161
162 #ifndef XNN_NO_QS8_OPERATORS
xnnpack_sigmoid_qs8(benchmark::State & state)163 static void xnnpack_sigmoid_qs8(benchmark::State& state) {
164 const size_t batch_size = state.range(0);
165
166 std::random_device random_device;
167 auto rng = std::mt19937(random_device());
168 auto i8rng = std::bind(
169 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
170 std::ref(rng));
171
172 std::vector<int8_t> input(batch_size + XNN_EXTRA_BYTES / sizeof(int8_t));
173 std::vector<int8_t> output(batch_size);
174 std::generate(input.begin(), input.end(), std::ref(i8rng));
175 std::fill(output.begin(), output.end(), INT8_C(0xA5));
176
177 xnn_status status = xnn_initialize(nullptr /* allocator */);
178 if (status != xnn_status_success) {
179 state.SkipWithError("failed to initialize XNNPACK");
180 return;
181 }
182
183 xnn_operator_t sigmoid_op = nullptr;
184 status = xnn_create_sigmoid_nc_qs8(
185 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
186 1 /* input zero point */, 1.0f /* input scale */,
187 -128 /* output zero point */, 1.0f / 256.0f /* output scale */,
188 std::numeric_limits<int8_t>::min() /* output min */, std::numeric_limits<int8_t>::max() /* output max */,
189 0 /* flags */, &sigmoid_op);
190 if (status != xnn_status_success || sigmoid_op == nullptr) {
191 state.SkipWithError("failed to create Sigmoid operator");
192 return;
193 }
194
195 status = xnn_setup_sigmoid_nc_qs8(
196 sigmoid_op, batch_size,
197 input.data(), output.data(),
198 nullptr /* thread pool */);
199 if (status != xnn_status_success) {
200 state.SkipWithError("failed to setup Sigmoid operator");
201 return;
202 }
203
204 for (auto _ : state) {
205 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
206 if (status != xnn_status_success) {
207 state.SkipWithError("failed to run Sigmoid operator");
208 return;
209 }
210 }
211
212 status = xnn_delete_operator(sigmoid_op);
213 if (status != xnn_status_success) {
214 state.SkipWithError("failed to delete Sigmoid operator");
215 return;
216 }
217
218 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
219 if (cpu_frequency != 0) {
220 state.counters["cpufreq"] = cpu_frequency;
221 }
222
223 state.counters["elements"] =
224 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
225
226 const size_t bytes_per_iteration = 2 * batch_size * sizeof(int8_t);
227 state.counters["bytes"] =
228 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
229 }
230 #endif // XNN_NO_QS8_OPERATORS
231
232 #ifndef XNN_NO_QU8_OPERATORS
xnnpack_sigmoid_qu8(benchmark::State & state)233 static void xnnpack_sigmoid_qu8(benchmark::State& state) {
234 const size_t batch_size = state.range(0);
235
236 std::random_device random_device;
237 auto rng = std::mt19937(random_device());
238 auto u8rng = std::bind(
239 std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
240
241 std::vector<uint8_t> input(batch_size + XNN_EXTRA_BYTES / sizeof(uint8_t));
242 std::vector<uint8_t> output(batch_size);
243 std::generate(input.begin(), input.end(), std::ref(u8rng));
244 std::fill(output.begin(), output.end(), UINT8_C(0xA5));
245
246 xnn_status status = xnn_initialize(nullptr /* allocator */);
247 if (status != xnn_status_success) {
248 state.SkipWithError("failed to initialize XNNPACK");
249 return;
250 }
251
252 xnn_operator_t sigmoid_op = nullptr;
253 status = xnn_create_sigmoid_nc_qu8(
254 1 /* channels */, 1 /* input stride */, 1 /* output stride */,
255 128 /* input zero point */, 1.0f /* input scale */,
256 0 /* output zero point */, 1.0f / 256.0f /* output scale */,
257 std::numeric_limits<uint8_t>::min() /* output min */, std::numeric_limits<uint8_t>::max() /* output max */,
258 0 /* flags */, &sigmoid_op);
259 if (status != xnn_status_success || sigmoid_op == nullptr) {
260 state.SkipWithError("failed to create Sigmoid operator");
261 return;
262 }
263
264 status = xnn_setup_sigmoid_nc_qu8(
265 sigmoid_op, batch_size,
266 input.data(), output.data(),
267 nullptr /* thread pool */);
268 if (status != xnn_status_success) {
269 state.SkipWithError("failed to setup Sigmoid operator");
270 return;
271 }
272
273 for (auto _ : state) {
274 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
275 if (status != xnn_status_success) {
276 state.SkipWithError("failed to run Sigmoid operator");
277 return;
278 }
279 }
280
281 status = xnn_delete_operator(sigmoid_op);
282 if (status != xnn_status_success) {
283 state.SkipWithError("failed to delete Sigmoid operator");
284 return;
285 }
286
287 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
288 if (cpu_frequency != 0) {
289 state.counters["cpufreq"] = cpu_frequency;
290 }
291
292 state.counters["elements"] =
293 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
294
295 const size_t bytes_per_iteration = 2 * batch_size * sizeof(uint8_t);
296 state.counters["bytes"] =
297 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
298 }
299 #endif // XNN_NO_QU8_OPERATORS
300
301 #ifdef BENCHMARK_TENSORFLOW_LITE
tflite_sigmoid_f32(benchmark::State & state)302 static void tflite_sigmoid_f32(benchmark::State& state) {
303 const size_t batch_size = state.range(0);
304
305 std::random_device random_device;
306 auto rng = std::mt19937(random_device());
307 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
308
309 flatbuffers::FlatBufferBuilder builder;
310 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
311 CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
312
313 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
314 tflite::CreateBuffer(builder, builder.CreateVector({})),
315 }};
316
317 const std::array<int32_t, 1> shape{{
318 static_cast<int32_t>(batch_size)
319 }};
320
321 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
322 tflite::CreateTensor(builder,
323 builder.CreateVector<int32_t>(shape.data(), shape.size()),
324 tflite::TensorType_FLOAT32),
325 tflite::CreateTensor(builder,
326 builder.CreateVector<int32_t>(shape.data(), shape.size()),
327 tflite::TensorType_FLOAT32),
328 }};
329
330 const std::array<int32_t, 1> op_inputs{{ 0 }};
331 const std::array<int32_t, 1> op_outputs{{ 1 }};
332 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
333 builder,
334 0 /* opcode_index */,
335 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
336 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
337
338 const std::array<int32_t, 1> graph_inputs{{ 0 }};
339 const std::array<int32_t, 1> graph_outputs{{ 1 }};
340 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
341 builder,
342 builder.CreateVector(tensors.data(), tensors.size()),
343 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
344 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
345 builder.CreateVector(&op, 1));
346
347 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
348 TFLITE_SCHEMA_VERSION,
349 builder.CreateVector(&operator_code, 1),
350 builder.CreateVector(&subgraph, 1),
351 builder.CreateString("Sigmoid model"),
352 builder.CreateVector(buffers.data(), buffers.size()));
353
354 builder.Finish(model_buffer);
355
356 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
357 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
358 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
359 std::unique_ptr<tflite::Interpreter> interpreter;
360 if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
361 state.SkipWithError("failed to create TFLite interpreter");
362 return;
363 }
364 interpreter->SetNumThreads(1);
365
366 if (interpreter->AllocateTensors() != kTfLiteOk) {
367 state.SkipWithError("failed to allocate tensors");
368 return;
369 }
370
371 std::generate(
372 interpreter->typed_tensor<float>(0),
373 interpreter->typed_tensor<float>(0) + batch_size,
374 std::ref(f32rng));
375
376 for (auto _ : state) {
377 if (interpreter->Invoke() != kTfLiteOk) {
378 state.SkipWithError("failed to invoke TFLite interpreter");
379 return;
380 }
381 }
382
383 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
384 if (cpu_frequency != 0) {
385 state.counters["cpufreq"] = cpu_frequency;
386 }
387
388 state.counters["elements"] =
389 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
390
391 const size_t bytes_per_iteration = 2 * batch_size * sizeof(float);
392 state.counters["bytes"] =
393 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
394
395 interpreter.reset();
396 }
397
tflite_sigmoid_qs8(benchmark::State & state)398 static void tflite_sigmoid_qs8(benchmark::State& state) {
399 const size_t batch_size = state.range(0);
400
401 std::random_device random_device;
402 auto rng = std::mt19937(random_device());
403 auto i8rng = std::bind(
404 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
405 std::ref(rng));
406
407 flatbuffers::FlatBufferBuilder builder;
408 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
409 CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
410
411 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
412 tflite::CreateBuffer(builder, builder.CreateVector({})),
413 }};
414
415 const std::array<int32_t, 1> shape{{
416 static_cast<int32_t>(batch_size)
417 }};
418
419 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
420 tflite::CreateTensor(builder,
421 builder.CreateVector<int32_t>(shape.data(), shape.size()),
422 tflite::TensorType_INT8, 0 /* buffer */, 0 /* name */,
423 tflite::CreateQuantizationParameters(builder,
424 0 /*min*/, 0 /*max*/,
425 builder.CreateVector<float>({1.0f /* scale */}),
426 builder.CreateVector<int64_t>({1 /* zero point */}))),
427 tflite::CreateTensor(builder,
428 builder.CreateVector<int32_t>(shape.data(), shape.size()),
429 tflite::TensorType_INT8, 0 /* buffer */, 0 /* name */,
430 tflite::CreateQuantizationParameters(builder,
431 0 /*min*/, 0 /*max*/,
432 builder.CreateVector<float>({1.0f / 256.0f /* scale */}),
433 builder.CreateVector<int64_t>({-128 /* zero point */}))),
434 }};
435
436 const std::array<int32_t, 1> op_inputs{{ 0 }};
437 const std::array<int32_t, 1> op_outputs{{ 1 }};
438 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
439 builder,
440 0 /* opcode_index */,
441 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
442 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
443
444 const std::array<int32_t, 1> graph_inputs{{ 0 }};
445 const std::array<int32_t, 1> graph_outputs{{ 1 }};
446 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
447 builder,
448 builder.CreateVector(tensors.data(), tensors.size()),
449 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
450 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
451 builder.CreateVector(&op, 1));
452
453 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
454 TFLITE_SCHEMA_VERSION,
455 builder.CreateVector(&operator_code, 1),
456 builder.CreateVector(&subgraph, 1),
457 builder.CreateString("Sigmoid model"),
458 builder.CreateVector(buffers.data(), buffers.size()));
459
460 builder.Finish(model_buffer);
461
462 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
463 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
464 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
465 std::unique_ptr<tflite::Interpreter> interpreter;
466 if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
467 state.SkipWithError("failed to create TFLite interpreter");
468 return;
469 }
470 interpreter->SetNumThreads(1);
471
472 if (interpreter->AllocateTensors() != kTfLiteOk) {
473 state.SkipWithError("failed to allocate tensors");
474 return;
475 }
476
477 std::generate(
478 interpreter->typed_tensor<int8_t>(0),
479 interpreter->typed_tensor<int8_t>(0) + batch_size,
480 std::ref(i8rng));
481
482 for (auto _ : state) {
483 if (interpreter->Invoke() != kTfLiteOk) {
484 state.SkipWithError("failed to invoke TFLite interpreter");
485 return;
486 }
487 }
488
489 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
490 if (cpu_frequency != 0) {
491 state.counters["cpufreq"] = cpu_frequency;
492 }
493
494 state.counters["elements"] =
495 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
496
497 const size_t bytes_per_iteration = 2 * batch_size * sizeof(int8_t);
498 state.counters["bytes"] =
499 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
500
501 interpreter.reset();
502 }
503
tflite_sigmoid_qu8(benchmark::State & state)504 static void tflite_sigmoid_qu8(benchmark::State& state) {
505 const size_t batch_size = state.range(0);
506
507 std::random_device random_device;
508 auto rng = std::mt19937(random_device());
509 auto u8rng = std::bind(
510 std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()),
511 std::ref(rng));
512
513 flatbuffers::FlatBufferBuilder builder;
514 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
515 CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
516
517 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
518 tflite::CreateBuffer(builder, builder.CreateVector({})),
519 }};
520
521 const std::array<int32_t, 1> shape{{
522 static_cast<int32_t>(batch_size)
523 }};
524
525 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
526 tflite::CreateTensor(builder,
527 builder.CreateVector<int32_t>(shape.data(), shape.size()),
528 tflite::TensorType_UINT8, 0 /* buffer */, 0 /* name */,
529 tflite::CreateQuantizationParameters(builder,
530 0 /*min*/, 0 /*max*/,
531 builder.CreateVector<float>({1.0f /* scale */}),
532 builder.CreateVector<int64_t>({128 /* zero point */}))),
533 tflite::CreateTensor(builder,
534 builder.CreateVector<int32_t>(shape.data(), shape.size()),
535 tflite::TensorType_UINT8, 0 /* buffer */, 0 /* name */,
536 tflite::CreateQuantizationParameters(builder,
537 0 /*min*/, 0 /*max*/,
538 builder.CreateVector<float>({1.0f / 256.0f /* scale */}),
539 builder.CreateVector<int64_t>({0 /* zero point */}))),
540 }};
541
542 const std::array<int32_t, 1> op_inputs{{ 0 }};
543 const std::array<int32_t, 1> op_outputs{{ 1 }};
544 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
545 builder,
546 0 /* opcode_index */,
547 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
548 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
549
550 const std::array<int32_t, 1> graph_inputs{{ 0 }};
551 const std::array<int32_t, 1> graph_outputs{{ 1 }};
552 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
553 builder,
554 builder.CreateVector(tensors.data(), tensors.size()),
555 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
556 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
557 builder.CreateVector(&op, 1));
558
559 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
560 TFLITE_SCHEMA_VERSION,
561 builder.CreateVector(&operator_code, 1),
562 builder.CreateVector(&subgraph, 1),
563 builder.CreateString("Sigmoid model"),
564 builder.CreateVector(buffers.data(), buffers.size()));
565
566 builder.Finish(model_buffer);
567
568 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
569 tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates resolver;
570 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
571 std::unique_ptr<tflite::Interpreter> interpreter;
572 if (interpreterBuilder(&interpreter) != kTfLiteOk || interpreter == nullptr) {
573 state.SkipWithError("failed to create TFLite interpreter");
574 return;
575 }
576 interpreter->SetNumThreads(1);
577
578 if (interpreter->AllocateTensors() != kTfLiteOk) {
579 state.SkipWithError("failed to allocate tensors");
580 return;
581 }
582
583 std::generate(
584 interpreter->typed_tensor<uint8_t>(0),
585 interpreter->typed_tensor<uint8_t>(0) + batch_size,
586 std::ref(u8rng));
587
588 for (auto _ : state) {
589 if (interpreter->Invoke() != kTfLiteOk) {
590 state.SkipWithError("failed to invoke TFLite interpreter");
591 return;
592 }
593 }
594
595 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
596 if (cpu_frequency != 0) {
597 state.counters["cpufreq"] = cpu_frequency;
598 }
599
600 state.counters["elements"] =
601 benchmark::Counter(uint64_t(state.iterations()) * batch_size, benchmark::Counter::kIsRate);
602
603 const size_t bytes_per_iteration = 2 * batch_size * sizeof(uint8_t);
604 state.counters["bytes"] =
605 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
606
607 interpreter.reset();
608 }
609 #endif // BENCHMARK_TENSORFLOW_LITE
610
611 #ifndef XNN_NO_F16_OPERATORS
612 BENCHMARK(xnnpack_sigmoid_f16)
613 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, uint16_t>)
614 ->UseRealTime();
615 #endif // XNN_NO_F16_OPERATORS
616 BENCHMARK(xnnpack_sigmoid_f32)
617 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
618 ->UseRealTime();
619 #ifndef XNN_NO_QS8_OPERATORS
620 BENCHMARK(xnnpack_sigmoid_qs8)
621 ->Apply(benchmark::utils::UnaryElementwiseParameters<int8_t, int8_t>)
622 ->UseRealTime();
623 #endif // XNN_NO_QS8_OPERATORS
624 #ifndef XNN_NO_QU8_OPERATORS
625 BENCHMARK(xnnpack_sigmoid_qu8)
626 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint8_t, uint8_t>)
627 ->UseRealTime();
628 #endif // XNN_NO_QU8_OPERATORS
629
630 #ifdef BENCHMARK_TENSORFLOW_LITE
631 BENCHMARK(tflite_sigmoid_f32)
632 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
633 ->UseRealTime();
634 BENCHMARK(tflite_sigmoid_qs8)
635 ->Apply(benchmark::utils::UnaryElementwiseParameters<int8_t, int8_t>)
636 ->UseRealTime();
637 BENCHMARK(tflite_sigmoid_qu8)
638 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint8_t, uint8_t>)
639 ->UseRealTime();
640 #endif // BENCHMARK_TENSORFLOW_LITE
641
642 #ifndef XNNPACK_BENCHMARK_NO_MAIN
643 BENCHMARK_MAIN();
644 #endif
645