1 // Auto-generated file. Do not edit!
2 // Template: src/s16-window/scalar.c.in
3 // Generator: tools/xngen
4 //
5 // Copyright 2022 Google LLC
6 //
7 // This source code is licensed under the BSD-style license found in the
8 // LICENSE file in the root directory of this source tree.
9
10 #include <assert.h>
11 #include <stddef.h>
12 #include <stdint.h>
13
14 #include <xnnpack/math.h>
15 #include <xnnpack/window.h>
16
17
xnn_s16_window_ukernel__scalar_x4(size_t rows,size_t batch_size,const int16_t * input,const int16_t * weights,int16_t * output,uint32_t shift)18 void xnn_s16_window_ukernel__scalar_x4(
19 size_t rows,
20 size_t batch_size,
21 const int16_t* input,
22 const int16_t* weights,
23 int16_t* output,
24 uint32_t shift)
25 {
26 assert(rows > 0);
27 assert(batch_size != 0);
28 assert(input != NULL);
29 assert(weights != NULL);
30 assert(output != NULL);
31 assert(shift < 32);
32
33 do {
34 size_t n = batch_size;
35 const int16_t* w = weights;
36 for (; n >= 4; n -= 4) {
37 const int16_t vi0 = input[0];
38 const int16_t vi1 = input[1];
39 const int16_t vi2 = input[2];
40 const int16_t vi3 = input[3];
41 input += 4;
42
43 const int16_t w0 = w[0];
44 const int16_t w1 = w[1];
45 const int16_t w2 = w[2];
46 const int16_t w3 = w[3];
47 w += 4;
48
49 int32_t vout0 = (int32_t) vi0 * (int32_t) w0;
50 int32_t vout1 = (int32_t) vi1 * (int32_t) w1;
51 int32_t vout2 = (int32_t) vi2 * (int32_t) w2;
52 int32_t vout3 = (int32_t) vi3 * (int32_t) w3;
53
54 vout0 = math_asr_s32(vout0, shift);
55 vout1 = math_asr_s32(vout1, shift);
56 vout2 = math_asr_s32(vout2, shift);
57 vout3 = math_asr_s32(vout3, shift);
58
59 vout0 = math_max_s32(vout0, INT16_MIN);
60 vout1 = math_max_s32(vout1, INT16_MIN);
61 vout2 = math_max_s32(vout2, INT16_MIN);
62 vout3 = math_max_s32(vout3, INT16_MIN);
63
64 vout0 = math_min_s32(vout0, INT16_MAX);
65 vout1 = math_min_s32(vout1, INT16_MAX);
66 vout2 = math_min_s32(vout2, INT16_MAX);
67 vout3 = math_min_s32(vout3, INT16_MAX);
68
69 output[0] = (int16_t) vout0;
70 output[1] = (int16_t) vout1;
71 output[2] = (int16_t) vout2;
72 output[3] = (int16_t) vout3;
73
74 output += 4;
75 }
76
77 if XNN_UNLIKELY(n != 0) {
78 do {
79 const int32_t vi = (int32_t) *input++;
80 const int32_t vw = (int32_t) *w++;
81 int32_t vout = vi * vw;
82 vout = math_asr_s32(vout, shift);
83 vout = math_max_s32(vout, INT16_MIN);
84 vout = math_min_s32(vout, INT16_MAX);
85 *output++ = (int16_t) vout;
86 } while (--n != 0);
87 }
88 } while (--rows != 0);
89 }
90