1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <algorithm>
10 #include <array>
11 #include <cmath>
12 #include <type_traits>
13 #include <utility>
14 #include <vector>
15
16 #include <gtest/gtest.h>
17
18 #include <executorch/extension/llm/custom_ops/spinquant/fast_hadamard_transform.h>
19 #include <executorch/extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_test_impl.h>
20
21 using executorch::runtime::testing::fast_hadamard_transform_28N_with_transpose;
22 using executorch::runtime::testing::random_floats;
23 using executorch::runtime::testing::reference_fht_impl;
24
TEST(FastHadamardTransformTest,SingleElement)25 TEST(FastHadamardTransformTest, SingleElement) {
26 // FHT of a single element is a no-op.
27 std::array<float, 1> data = {{42}};
28 executorch::fast_hadamard_transform(data.data(), 0);
29 EXPECT_EQ(data[0], 42);
30 }
31
TEST(FastHadamardTransformTest,LargerInput)32 TEST(FastHadamardTransformTest, LargerInput) {
33 std::vector<float> data = random_floats(4096);
34
35 auto expected = data;
36 reference_fht_impl(expected.data(), expected.size());
37
38 auto actual = data;
39 executorch::fast_hadamard_transform(actual.data(), 12);
40
41 for (int ii = 0; ii < expected.size(); ++ii) {
42 EXPECT_FLOAT_EQ(actual[ii], expected[ii]);
43 }
44 }
45
TEST(FastHadamardTransform28NTest,Basic)46 TEST(FastHadamardTransform28NTest, Basic) {
47 std::vector<float> data = random_floats(1024 * 28);
48
49 auto expected = data;
50 fast_hadamard_transform_28N_with_transpose(expected.data(), 10);
51
52 auto actual = data;
53 executorch::fast_hadamard_transform_28N(actual.data(), 10);
54
55 for (int ii = 0; ii < actual.size(); ++ii) {
56 EXPECT_FLOAT_EQ(actual[ii], expected[ii]);
57 }
58 }
59
60 namespace {
61 constexpr int32_t qmin = -(1 << 15) + 1;
62 constexpr int32_t qmax = -qmin;
63
quantize(float x,float scale)64 int16_t quantize(float x, float scale) {
65 float scaled = x / scale;
66 // XXX: Supposed to round ties to even, but this is just test code.
67 int32_t scaled_int =
68 std::clamp((int32_t)std::lround<int32_t>(scaled), qmin, qmax);
69 return static_cast<int16_t>(scaled_int);
70 }
71
72 template <typename T>
quantize(const std::vector<float> & data,float scale)73 std::vector<T> quantize(const std::vector<float>& data, float scale) {
74 std::vector<T> result;
75 result.reserve(data.size());
76 for (const float unquant : data) {
77 result.push_back(quantize(unquant, scale));
78 }
79 return result;
80 }
81
82 template <typename T>
quantize(const std::vector<float> & data)83 std::pair<std::vector<T>, float> quantize(const std::vector<float>& data) {
84 auto [minIt, maxIt] = std::minmax_element(data.begin(), data.end());
85 float scale = (*maxIt - *minIt) / (qmax - qmin);
86 return {quantize<T>(data, scale), scale};
87 }
88
89 template <typename T>
dequantize(T x,float scale)90 float dequantize(T x, float scale) {
91 return x * scale;
92 }
93
94 template <typename T>
dequantize(const std::vector<T> & data,float scale)95 std::vector<float> dequantize(const std::vector<T>& data, float scale) {
96 static_assert(!std::is_same_v<T, float>);
97 std::vector<float> result;
98 result.reserve(data.size());
99 for (const T quant : data) {
100 result.push_back(dequantize(quant, scale));
101 }
102 return result;
103 }
104
105 #define EXPECT_CLOSE_IMPL(a, b, atol, rtol) \
106 EXPECT_LE(std::abs(a - b), atol + rtol * std::abs(b)) \
107 << "a: " << a << ", b: " << b
108 #define EXPECT_CLOSE(a, b) EXPECT_CLOSE_IMPL(a, b, 2e-4, 1e-4)
109
testQuantizedFastHadamardTransform(int logN)110 void testQuantizedFastHadamardTransform(int logN) {
111 std::vector<float> data = random_floats(1 << logN);
112
113 auto [qdata, scale] = quantize<int16_t>(data);
114
115 auto expected_unquant = dequantize(qdata, scale);
116 reference_fht_impl(expected_unquant.data(), expected_unquant.size());
117 auto expected = quantize<int16_t>(expected_unquant, scale);
118
119 auto actual = qdata;
120 executorch::fast_hadamard_transform_symmetric_quantized_s16(
121 actual.data(), logN);
122
123 for (int ii = 0; ii < expected.size(); ++ii) {
124 EXPECT_CLOSE(
125 dequantize(actual[ii], scale), dequantize(expected[ii], scale));
126 }
127 }
128
129 } // namespace
130
TEST(QuantizedFastHadamardTransformTest,Basic)131 TEST(QuantizedFastHadamardTransformTest, Basic) {
132 testQuantizedFastHadamardTransform(12); // 4096
133 }
134
TEST(QuantizedFastHadamardTransformTest,OddLogN)135 TEST(QuantizedFastHadamardTransformTest, OddLogN) {
136 testQuantizedFastHadamardTransform(11); // 2048
137 }
138
TEST(QuantizedFastHadamardTransform28NTest,Basic)139 TEST(QuantizedFastHadamardTransform28NTest, Basic) {
140 std::vector<float> data = random_floats(1024 * 28);
141
142 auto [qdata, scale] = quantize<int16_t>(data);
143
144 auto expected_unquant = dequantize(qdata, scale);
145 fast_hadamard_transform_28N_with_transpose(expected_unquant.data(), 10);
146 auto expected = quantize<int16_t>(expected_unquant, scale);
147
148 auto actual = qdata;
149 executorch::fast_hadamard_transform_symmetric_quantized_s16_28N(
150 actual.data(), 10);
151
152 for (int ii = 0; ii < expected.size(); ++ii) {
153 EXPECT_CLOSE(
154 dequantize(actual[ii], scale), dequantize(expected[ii], scale));
155 }
156 }
157