xref: /aosp_15_r20/external/pytorch/aten/src/ATen/benchmarks/quantize_per_channel.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <ATen/ATen.h>
2 #include <iostream>
3 
4 #include <benchmark/benchmark.h>
5 
quantize_per_channel_4d_contiguous(benchmark::State & state)6 static void quantize_per_channel_4d_contiguous(benchmark::State& state) {
7   const size_t batches = static_cast<size_t>(state.range(0));
8   const size_t channels = static_cast<size_t>(state.range(1));
9   const size_t height = static_cast<size_t>(state.range(2));
10   const size_t width = static_cast<size_t>(state.range(3));
11 
12   at::Tensor a = at::rand({batches, channels, height, width});
13   at::Tensor scales = at::rand({channels});
14   at::Tensor zero_points = at::randint(
15       0, 10, {channels}, at::TensorOptions().dtype(at::ScalarType::Int));
16 
17   at::Tensor qa;
18   for (auto _ : state) {
19     qa = at::native::quantize_per_channel(
20         a, scales, zero_points, 1, at::ScalarType::QUInt8);
21   }
22 }
23 
quantize_per_channel_4d_channels_last(benchmark::State & state)24 static void quantize_per_channel_4d_channels_last(benchmark::State& state) {
25   const size_t batches = static_cast<size_t>(state.range(0));
26   const size_t channels = static_cast<size_t>(state.range(1));
27   const size_t height = static_cast<size_t>(state.range(2));
28   const size_t width = static_cast<size_t>(state.range(3));
29 
30   at::Tensor a = at::rand(
31       {batches, channels, height, width},
32       at::TensorOptions().memory_format(at::MemoryFormat::ChannelsLast));
33   at::Tensor scales = at::rand({channels});
34   at::Tensor zero_points = at::randint(
35       0, 10, {channels}, at::TensorOptions().dtype(at::ScalarType::Int));
36 
37   at::Tensor qa;
38   for (auto _ : state) {
39     qa = at::native::quantize_per_channel(
40         a, scales, zero_points, 1, at::ScalarType::QUInt8);
41   }
42 }
43 
quantize_per_channel_2d(benchmark::State & state)44 static void quantize_per_channel_2d(benchmark::State& state) {
45   const size_t channels = static_cast<size_t>(state.range(0));
46   const size_t nelem = static_cast<size_t>(state.range(1));
47 
48   at::Tensor a = at::rand({channels, nelem});
49   at::Tensor scales = at::rand({channels});
50   at::Tensor zero_points = at::randint(
51       0, 10, {channels}, at::TensorOptions().dtype(at::ScalarType::Int));
52 
53   at::Tensor qa;
54   for (auto _ : state) {
55     qa = at::native::quantize_per_channel(
56         a, scales, zero_points, 0, at::ScalarType::QUInt8);
57   }
58 }
59 
GenerateSizes4d(benchmark::internal::Benchmark * b)60 static void GenerateSizes4d(benchmark::internal::Benchmark* b) {
61   b->ArgNames({"N", "C", "H", "W"});
62 
63   for (size_t n = 16; n < 256; n *= 2) {
64     for (size_t c = 4; c < 256; c *= 2) {
65       for (size_t hw = 4; hw < 256; hw *= 2) {
66         b->Args({n, c, hw, hw});
67       }
68     }
69   }
70 }
71 
GenerateSizes2d(benchmark::internal::Benchmark * b)72 static void GenerateSizes2d(benchmark::internal::Benchmark* b) {
73   b->ArgNames({"C", "N"});
74 
75   for (size_t c = 4; c < 512; c *= 2) {
76     for (size_t n = 4; n < 512; n *= 2) {
77       b->Args({c, n});
78     }
79   }
80 }
81 
82 BENCHMARK(quantize_per_channel_2d)->Apply(GenerateSizes2d);
83 BENCHMARK(quantize_per_channel_4d_contiguous)->Apply(GenerateSizes4d);
84 BENCHMARK(quantize_per_channel_4d_channels_last)->Apply(GenerateSizes4d);
85 BENCHMARK_MAIN();
86