1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #pragma once
10 
11 #include <memory>
12 #include <vector>
13 
14 #include <executorch/extension/llm/custom_ops/spinquant/fast_hadamard_transform.h>
15 #include <executorch/extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h>
16 
17 namespace executorch::runtime::testing {
18 void reference_fht_impl(float* buf, int n);
19 
20 // Alternate implementation of fast_hadamard_transform_28N to mutation
21 // test against. Benchmarking suggests this one is slower, which is
22 // why it's in the test.
23 template <typename T>
fast_hadamard_transform_28N_with_transpose(T * vec,int log2_vec_size)24 void fast_hadamard_transform_28N_with_transpose(T* vec, int log2_vec_size) {
25   const int vec_size = (1 << log2_vec_size);
26   for (int ii = 0; ii < 28; ++ii) {
27     executorch::fast_hadamard_transform(&vec[ii * vec_size], log2_vec_size);
28   }
29   std::unique_ptr<T[]> transposed = std::make_unique<T[]>(28 * vec_size);
30   for (int ii = 0; ii < 28; ++ii) {
31     for (int jj = 0; jj < vec_size; ++jj) {
32       transposed[jj * 28 + ii] = vec[ii * vec_size + jj];
33     }
34   }
35   for (int ii = 0; ii < vec_size; ++ii) {
36     hadamard_mult_28(&transposed[ii * 28]);
37   }
38   for (int jj = 0; jj < vec_size; ++jj) {
39     for (int ii = 0; ii < 28; ++ii) {
40       vec[ii * vec_size + jj] = transposed[jj * 28 + ii];
41     }
42   }
43 }
44 
45 std::vector<float> random_floats(int howMany);
46 
47 } // namespace executorch::runtime::testing
48