xref: /aosp_15_r20/external/XNNPACK/src/cs16-bfly4/samples1-scalar.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <xnnpack/math.h>
11 #include <xnnpack/fft.h>
12 
13 
xnn_cs16_bfly4_samples1_ukernel__scalar(size_t samples,int16_t * data,const size_t stride,const int16_t * twiddle)14 void xnn_cs16_bfly4_samples1_ukernel__scalar(
15     size_t samples,
16     int16_t* data,
17     const size_t stride,
18     const int16_t* twiddle)
19 {
20   assert(samples == 1);
21   assert(data != NULL);
22   assert(stride != 0);
23   assert(twiddle != NULL);
24 
25   int32_t vout0r = (int32_t) data[0];
26   int32_t vout0i = (int32_t) data[1];
27   int32_t vout1r = (int32_t) data[2];
28   int32_t vout1i = (int32_t) data[3];
29   int32_t vout2r = (int32_t) data[4];
30   int32_t vout2i = (int32_t) data[5];
31   int32_t vout3r = (int32_t) data[6];
32   int32_t vout3i = (int32_t) data[7];
33 
34   // Note 32767 / 4 = 8191.  Should be 8192.
35   vout0r = math_asr_s32(vout0r * 8191 + 16384, 15);
36   vout0i = math_asr_s32(vout0i * 8191 + 16384, 15);
37   vout1r = math_asr_s32(vout1r * 8191 + 16384, 15);
38   vout1i = math_asr_s32(vout1i * 8191 + 16384, 15);
39   vout2r = math_asr_s32(vout2r * 8191 + 16384, 15);
40   vout2i = math_asr_s32(vout2i * 8191 + 16384, 15);
41   vout3r = math_asr_s32(vout3r * 8191 + 16384, 15);
42   vout3i = math_asr_s32(vout3i * 8191 + 16384, 15);
43 
44   const int32_t vtmp5r = vout0r - vout2r;
45   const int32_t vtmp5i = vout0i - vout2i;
46   vout0r += vout2r;
47   vout0i += vout2i;
48   const int32_t vtmp3r = vout1r + vout3r;
49   const int32_t vtmp3i = vout1i + vout3i;
50   const int32_t vtmp4r = vout1r - vout3r;
51   const int32_t vtmp4i = vout1i - vout3i;
52   vout2r = vout0r - vtmp3r;
53   vout2i = vout0i - vtmp3i;
54 
55   vout0r += vtmp3r;
56   vout0i += vtmp3i;
57 
58   vout1r = vtmp5r + vtmp4i;
59   vout1i = vtmp5i - vtmp4r;
60   vout3r = vtmp5r - vtmp4i;
61   vout3i = vtmp5i + vtmp4r;
62 
63   data[0] = (int16_t) vout0r;
64   data[1] = (int16_t) vout0i;
65   data[2] = (int16_t) vout1r;
66   data[3] = (int16_t) vout1i;
67   data[4] = (int16_t) vout2r;
68   data[5] = (int16_t) vout2i;
69   data[6] = (int16_t) vout3r;
70   data[7] = (int16_t) vout3i;
71 }
72