1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7 #include <stddef.h>
8 #include <stdint.h>
9
10 #include <xnnpack/math.h>
11 #include <xnnpack/fft.h>
12
13
xnn_cs16_bfly4_samples1_ukernel__scalar(size_t samples,int16_t * data,const size_t stride,const int16_t * twiddle)14 void xnn_cs16_bfly4_samples1_ukernel__scalar(
15 size_t samples,
16 int16_t* data,
17 const size_t stride,
18 const int16_t* twiddle)
19 {
20 assert(samples == 1);
21 assert(data != NULL);
22 assert(stride != 0);
23 assert(twiddle != NULL);
24
25 int32_t vout0r = (int32_t) data[0];
26 int32_t vout0i = (int32_t) data[1];
27 int32_t vout1r = (int32_t) data[2];
28 int32_t vout1i = (int32_t) data[3];
29 int32_t vout2r = (int32_t) data[4];
30 int32_t vout2i = (int32_t) data[5];
31 int32_t vout3r = (int32_t) data[6];
32 int32_t vout3i = (int32_t) data[7];
33
34 // Note 32767 / 4 = 8191. Should be 8192.
35 vout0r = math_asr_s32(vout0r * 8191 + 16384, 15);
36 vout0i = math_asr_s32(vout0i * 8191 + 16384, 15);
37 vout1r = math_asr_s32(vout1r * 8191 + 16384, 15);
38 vout1i = math_asr_s32(vout1i * 8191 + 16384, 15);
39 vout2r = math_asr_s32(vout2r * 8191 + 16384, 15);
40 vout2i = math_asr_s32(vout2i * 8191 + 16384, 15);
41 vout3r = math_asr_s32(vout3r * 8191 + 16384, 15);
42 vout3i = math_asr_s32(vout3i * 8191 + 16384, 15);
43
44 const int32_t vtmp5r = vout0r - vout2r;
45 const int32_t vtmp5i = vout0i - vout2i;
46 vout0r += vout2r;
47 vout0i += vout2i;
48 const int32_t vtmp3r = vout1r + vout3r;
49 const int32_t vtmp3i = vout1i + vout3i;
50 const int32_t vtmp4r = vout1r - vout3r;
51 const int32_t vtmp4i = vout1i - vout3i;
52 vout2r = vout0r - vtmp3r;
53 vout2i = vout0i - vtmp3i;
54
55 vout0r += vtmp3r;
56 vout0i += vtmp3i;
57
58 vout1r = vtmp5r + vtmp4i;
59 vout1i = vtmp5i - vtmp4r;
60 vout3r = vtmp5r - vtmp4i;
61 vout3i = vtmp5i + vtmp4r;
62
63 data[0] = (int16_t) vout0r;
64 data[1] = (int16_t) vout0i;
65 data[2] = (int16_t) vout1r;
66 data[3] = (int16_t) vout1i;
67 data[4] = (int16_t) vout2r;
68 data[5] = (int16_t) vout2i;
69 data[6] = (int16_t) vout3r;
70 data[7] = (int16_t) vout3i;
71 }
72