xref: /aosp_15_r20/external/XNNPACK/src/cs16-fftr/scalar.c.in (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1// Copyright 2022 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6$assert SAMPLE_TILE >= 1
7#include <assert.h>
8#include <stddef.h>
9#include <stdint.h>
10
11#include <xnnpack/math.h>
12#include <xnnpack/fft.h>
13
14
15void xnn_cs16_fftr_ukernel__scalar_x${SAMPLE_TILE}(
16    size_t samples,
17    int16_t* data,
18    const int16_t* twiddle)
19{
20  assert(samples >= 2);
21  assert(samples % 2 == 0);
22  assert(data != NULL);
23  assert(data != NULL);
24  assert(twiddle != NULL);
25
26  int16_t* dl = data;
27  int16_t* dr = data + samples * 2;
28  int32_t vdcr = (int32_t) dl[0];
29  int32_t vdci = (int32_t) dl[1];
30
31  vdcr = math_asr_s32(vdcr * 16383 + 16384, 15);
32  vdci = math_asr_s32(vdci * 16383 + 16384, 15);
33
34  dl[0] = vdcr + vdci;
35  dl[1] = 0;
36  dl += 2;
37  dr[0] = vdcr - vdci;
38  dr[1] = 0;
39
40  samples >>= 1;
41
42  $if SAMPLE_TILE > 1:
43    for (; samples >= ${SAMPLE_TILE}; samples -= ${SAMPLE_TILE}) {
44      dr -= ${SAMPLE_TILE} * 2;
45      $for C in range(SAMPLE_TILE):
46        int32_t vilr${C} = dl[${C * 2 + 0}];
47        int32_t vili${C} = dl[${C * 2 + 1}];
48      $for C in range(SAMPLE_TILE):
49        int32_t virr${C} =  (int32_t) dr[${(SAMPLE_TILE - 1 - C) * 2 + 0}];
50        int32_t viri${C} = -(int32_t) dr[${(SAMPLE_TILE - 1 - C) * 2 + 1}];
51      $for C in range(SAMPLE_TILE):
52        const int32_t vtwr${C} = twiddle[${C * 2 + 0}];
53        const int32_t vtwi${C} = twiddle[${C * 2 + 1}];
54      twiddle += ${SAMPLE_TILE} * 2;
55
56      $for C in range(SAMPLE_TILE):
57        vilr${C} = math_asr_s32(vilr${C} * 16383 + 16384, 15);
58        virr${C} = math_asr_s32(virr${C} * 16383 + 16384, 15);
59      $for C in range(SAMPLE_TILE):
60        vili${C} = math_asr_s32(vili${C} * 16383 + 16384, 15);
61        viri${C} = math_asr_s32(viri${C} * 16383 + 16384, 15);
62      $for C in range(SAMPLE_TILE):
63        const int32_t vacc1r${C} = vilr${C} + virr${C};
64        const int32_t vacc2r${C} = vilr${C} - virr${C};
65      $for C in range(SAMPLE_TILE):
66        const int32_t vacc1i${C} = vili${C} + viri${C};
67        const int32_t vacc2i${C} = vili${C} - viri${C};
68
69      $for C in range(SAMPLE_TILE):
70        const int32_t twr${C} = math_asr_s32(vacc2r${C} * vtwr${C} - vacc2i${C} * vtwi${C} + 16384, 15);
71      $for C in range(SAMPLE_TILE):
72        const int32_t twi${C} = math_asr_s32(vacc2r${C} * vtwi${C} + vacc2i${C} * vtwr${C} + 16384, 15);
73
74      $for C in range(SAMPLE_TILE):
75        dl[${C * 2 + 0}] = math_asr_s32(vacc1r${C} + twr${C}, 1);
76        dl[${C * 2 + 1}] = math_asr_s32(vacc1i${C} + twi${C}, 1);
77      $for C in range(SAMPLE_TILE):
78        dr[${(SAMPLE_TILE - 1 - C) * 2 + 0}] = math_asr_s32(vacc1r${C} - twr${C}, 1);
79        dr[${(SAMPLE_TILE - 1 - C) * 2 + 1}] = math_asr_s32(twi${C} - vacc1i${C}, 1);
80      dl += ${SAMPLE_TILE} * 2;
81    }
82
83  if XNN_UNLIKELY(samples != 0) {
84    do {
85      dr -= 2;
86      int32_t vilr = dl[0];
87      int32_t vili = dl[1];
88      int32_t virr =  (int32_t) dr[0];
89      int32_t viri = -(int32_t) dr[1];
90      const int32_t vtwr = twiddle[0];
91      const int32_t vtwi = twiddle[1];
92      twiddle += 2;
93
94      vilr =  math_asr_s32(vilr * 16383 + 16384, 15);
95      vili =  math_asr_s32(vili * 16383 + 16384, 15);
96      virr = math_asr_s32(virr * 16383 + 16384, 15);
97      viri = math_asr_s32(viri * 16383 + 16384, 15);
98      const int32_t vacc1r = vilr + virr;
99      const int32_t vacc1i = vili + viri;
100      const int32_t vacc2r = vilr - virr;
101      const int32_t vacc2i = vili - viri;
102
103      const int32_t twr = math_asr_s32(vacc2r * vtwr - vacc2i * vtwi + 16384, 15);
104      const int32_t twi = math_asr_s32(vacc2r * vtwi + vacc2i * vtwr + 16384, 15);
105
106      dl[0] = math_asr_s32(vacc1r + twr, 1);
107      dl[1] = math_asr_s32(vacc1i + twi, 1);
108      dr[0] = math_asr_s32(vacc1r - twr, 1);
109      dr[1] = math_asr_s32(twi - vacc1i, 1);
110      dl += 2;
111    } while (--samples != 0);
112  }
113}
114