1 #include <ATen/ATen.h>
2 #include <iostream>
3
4 #include <benchmark/benchmark.h>
5
quantize_per_channel_4d_contiguous(benchmark::State & state)6 static void quantize_per_channel_4d_contiguous(benchmark::State& state) {
7 const size_t batches = static_cast<size_t>(state.range(0));
8 const size_t channels = static_cast<size_t>(state.range(1));
9 const size_t height = static_cast<size_t>(state.range(2));
10 const size_t width = static_cast<size_t>(state.range(3));
11
12 at::Tensor a = at::rand({batches, channels, height, width});
13 at::Tensor scales = at::rand({channels});
14 at::Tensor zero_points = at::randint(
15 0, 10, {channels}, at::TensorOptions().dtype(at::ScalarType::Int));
16
17 at::Tensor qa;
18 for (auto _ : state) {
19 qa = at::native::quantize_per_channel(
20 a, scales, zero_points, 1, at::ScalarType::QUInt8);
21 }
22 }
23
quantize_per_channel_4d_channels_last(benchmark::State & state)24 static void quantize_per_channel_4d_channels_last(benchmark::State& state) {
25 const size_t batches = static_cast<size_t>(state.range(0));
26 const size_t channels = static_cast<size_t>(state.range(1));
27 const size_t height = static_cast<size_t>(state.range(2));
28 const size_t width = static_cast<size_t>(state.range(3));
29
30 at::Tensor a = at::rand(
31 {batches, channels, height, width},
32 at::TensorOptions().memory_format(at::MemoryFormat::ChannelsLast));
33 at::Tensor scales = at::rand({channels});
34 at::Tensor zero_points = at::randint(
35 0, 10, {channels}, at::TensorOptions().dtype(at::ScalarType::Int));
36
37 at::Tensor qa;
38 for (auto _ : state) {
39 qa = at::native::quantize_per_channel(
40 a, scales, zero_points, 1, at::ScalarType::QUInt8);
41 }
42 }
43
quantize_per_channel_2d(benchmark::State & state)44 static void quantize_per_channel_2d(benchmark::State& state) {
45 const size_t channels = static_cast<size_t>(state.range(0));
46 const size_t nelem = static_cast<size_t>(state.range(1));
47
48 at::Tensor a = at::rand({channels, nelem});
49 at::Tensor scales = at::rand({channels});
50 at::Tensor zero_points = at::randint(
51 0, 10, {channels}, at::TensorOptions().dtype(at::ScalarType::Int));
52
53 at::Tensor qa;
54 for (auto _ : state) {
55 qa = at::native::quantize_per_channel(
56 a, scales, zero_points, 0, at::ScalarType::QUInt8);
57 }
58 }
59
GenerateSizes4d(benchmark::internal::Benchmark * b)60 static void GenerateSizes4d(benchmark::internal::Benchmark* b) {
61 b->ArgNames({"N", "C", "H", "W"});
62
63 for (size_t n = 16; n < 256; n *= 2) {
64 for (size_t c = 4; c < 256; c *= 2) {
65 for (size_t hw = 4; hw < 256; hw *= 2) {
66 b->Args({n, c, hw, hw});
67 }
68 }
69 }
70 }
71
GenerateSizes2d(benchmark::internal::Benchmark * b)72 static void GenerateSizes2d(benchmark::internal::Benchmark* b) {
73 b->ArgNames({"C", "N"});
74
75 for (size_t c = 4; c < 512; c *= 2) {
76 for (size_t n = 4; n < 512; n *= 2) {
77 b->Args({c, n});
78 }
79 }
80 }
81
82 BENCHMARK(quantize_per_channel_2d)->Apply(GenerateSizes2d);
83 BENCHMARK(quantize_per_channel_4d_contiguous)->Apply(GenerateSizes4d);
84 BENCHMARK(quantize_per_channel_4d_channels_last)->Apply(GenerateSizes4d);
85 BENCHMARK_MAIN();
86