1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <algorithm>
10 #include <array>
11 #include <cmath>
12 #include <type_traits>
13 #include <utility>
14 #include <vector>
15 
16 #include <gtest/gtest.h>
17 
18 #include <executorch/extension/llm/custom_ops/spinquant/fast_hadamard_transform.h>
19 #include <executorch/extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_test_impl.h>
20 
21 using executorch::runtime::testing::fast_hadamard_transform_28N_with_transpose;
22 using executorch::runtime::testing::random_floats;
23 using executorch::runtime::testing::reference_fht_impl;
24 
TEST(FastHadamardTransformTest,SingleElement)25 TEST(FastHadamardTransformTest, SingleElement) {
26   // FHT of a single element is a no-op.
27   std::array<float, 1> data = {{42}};
28   executorch::fast_hadamard_transform(data.data(), 0);
29   EXPECT_EQ(data[0], 42);
30 }
31 
TEST(FastHadamardTransformTest,LargerInput)32 TEST(FastHadamardTransformTest, LargerInput) {
33   std::vector<float> data = random_floats(4096);
34 
35   auto expected = data;
36   reference_fht_impl(expected.data(), expected.size());
37 
38   auto actual = data;
39   executorch::fast_hadamard_transform(actual.data(), 12);
40 
41   for (int ii = 0; ii < expected.size(); ++ii) {
42     EXPECT_FLOAT_EQ(actual[ii], expected[ii]);
43   }
44 }
45 
TEST(FastHadamardTransform28NTest,Basic)46 TEST(FastHadamardTransform28NTest, Basic) {
47   std::vector<float> data = random_floats(1024 * 28);
48 
49   auto expected = data;
50   fast_hadamard_transform_28N_with_transpose(expected.data(), 10);
51 
52   auto actual = data;
53   executorch::fast_hadamard_transform_28N(actual.data(), 10);
54 
55   for (int ii = 0; ii < actual.size(); ++ii) {
56     EXPECT_FLOAT_EQ(actual[ii], expected[ii]);
57   }
58 }
59 
60 namespace {
61 constexpr int32_t qmin = -(1 << 15) + 1;
62 constexpr int32_t qmax = -qmin;
63 
quantize(float x,float scale)64 int16_t quantize(float x, float scale) {
65   float scaled = x / scale;
66   // XXX: Supposed to round ties to even, but this is just test code.
67   int32_t scaled_int =
68       std::clamp((int32_t)std::lround<int32_t>(scaled), qmin, qmax);
69   return static_cast<int16_t>(scaled_int);
70 }
71 
72 template <typename T>
quantize(const std::vector<float> & data,float scale)73 std::vector<T> quantize(const std::vector<float>& data, float scale) {
74   std::vector<T> result;
75   result.reserve(data.size());
76   for (const float unquant : data) {
77     result.push_back(quantize(unquant, scale));
78   }
79   return result;
80 }
81 
82 template <typename T>
quantize(const std::vector<float> & data)83 std::pair<std::vector<T>, float> quantize(const std::vector<float>& data) {
84   auto [minIt, maxIt] = std::minmax_element(data.begin(), data.end());
85   float scale = (*maxIt - *minIt) / (qmax - qmin);
86   return {quantize<T>(data, scale), scale};
87 }
88 
89 template <typename T>
dequantize(T x,float scale)90 float dequantize(T x, float scale) {
91   return x * scale;
92 }
93 
94 template <typename T>
dequantize(const std::vector<T> & data,float scale)95 std::vector<float> dequantize(const std::vector<T>& data, float scale) {
96   static_assert(!std::is_same_v<T, float>);
97   std::vector<float> result;
98   result.reserve(data.size());
99   for (const T quant : data) {
100     result.push_back(dequantize(quant, scale));
101   }
102   return result;
103 }
104 
105 #define EXPECT_CLOSE_IMPL(a, b, atol, rtol)             \
106   EXPECT_LE(std::abs(a - b), atol + rtol * std::abs(b)) \
107       << "a: " << a << ", b: " << b
108 #define EXPECT_CLOSE(a, b) EXPECT_CLOSE_IMPL(a, b, 2e-4, 1e-4)
109 
testQuantizedFastHadamardTransform(int logN)110 void testQuantizedFastHadamardTransform(int logN) {
111   std::vector<float> data = random_floats(1 << logN);
112 
113   auto [qdata, scale] = quantize<int16_t>(data);
114 
115   auto expected_unquant = dequantize(qdata, scale);
116   reference_fht_impl(expected_unquant.data(), expected_unquant.size());
117   auto expected = quantize<int16_t>(expected_unquant, scale);
118 
119   auto actual = qdata;
120   executorch::fast_hadamard_transform_symmetric_quantized_s16(
121       actual.data(), logN);
122 
123   for (int ii = 0; ii < expected.size(); ++ii) {
124     EXPECT_CLOSE(
125         dequantize(actual[ii], scale), dequantize(expected[ii], scale));
126   }
127 }
128 
129 } // namespace
130 
TEST(QuantizedFastHadamardTransformTest,Basic)131 TEST(QuantizedFastHadamardTransformTest, Basic) {
132   testQuantizedFastHadamardTransform(12); // 4096
133 }
134 
TEST(QuantizedFastHadamardTransformTest,OddLogN)135 TEST(QuantizedFastHadamardTransformTest, OddLogN) {
136   testQuantizedFastHadamardTransform(11); // 2048
137 }
138 
TEST(QuantizedFastHadamardTransform28NTest,Basic)139 TEST(QuantizedFastHadamardTransform28NTest, Basic) {
140   std::vector<float> data = random_floats(1024 * 28);
141 
142   auto [qdata, scale] = quantize<int16_t>(data);
143 
144   auto expected_unquant = dequantize(qdata, scale);
145   fast_hadamard_transform_28N_with_transpose(expected_unquant.data(), 10);
146   auto expected = quantize<int16_t>(expected_unquant, scale);
147 
148   auto actual = qdata;
149   executorch::fast_hadamard_transform_symmetric_quantized_s16_28N(
150       actual.data(), 10);
151 
152   for (int ii = 0; ii < expected.size(); ++ii) {
153     EXPECT_CLOSE(
154         dequantize(actual[ii], scale), dequantize(expected[ii], scale));
155   }
156 }
157