1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 9 #pragma once 10 11 #include <memory> 12 #include <vector> 13 14 #include <executorch/extension/llm/custom_ops/spinquant/fast_hadamard_transform.h> 15 #include <executorch/extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h> 16 17 namespace executorch::runtime::testing { 18 void reference_fht_impl(float* buf, int n); 19 20 // Alternate implementation of fast_hadamard_transform_28N to mutation 21 // test against. Benchmarking suggests this one is slower, which is 22 // why it's in the test. 23 template <typename T> fast_hadamard_transform_28N_with_transpose(T * vec,int log2_vec_size)24void fast_hadamard_transform_28N_with_transpose(T* vec, int log2_vec_size) { 25 const int vec_size = (1 << log2_vec_size); 26 for (int ii = 0; ii < 28; ++ii) { 27 executorch::fast_hadamard_transform(&vec[ii * vec_size], log2_vec_size); 28 } 29 std::unique_ptr<T[]> transposed = std::make_unique<T[]>(28 * vec_size); 30 for (int ii = 0; ii < 28; ++ii) { 31 for (int jj = 0; jj < vec_size; ++jj) { 32 transposed[jj * 28 + ii] = vec[ii * vec_size + jj]; 33 } 34 } 35 for (int ii = 0; ii < vec_size; ++ii) { 36 hadamard_mult_28(&transposed[ii * 28]); 37 } 38 for (int jj = 0; jj < vec_size; ++jj) { 39 for (int ii = 0; ii < 28; ++ii) { 40 vec[ii * vec_size + jj] = transposed[jj * 28 + ii]; 41 } 42 } 43 } 44 45 std::vector<float> random_floats(int howMany); 46 47 } // namespace executorch::runtime::testing 48