xref: /aosp_15_r20/external/XNNPACK/src/f32-spmm/gen/4x1-minmax-scalar.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Auto-generated file. Do not edit!
2 //   Template: src/f32-spmm/scalar.c.in
3 //   Generator: tools/xngen
4 //
5 // Copyright 2019 Google LLC
6 //
7 // This source code is licensed under the BSD-style license found in the
8 // LICENSE file in the root directory of this source tree.
9 
10 #include <assert.h>
11 
12 #include <xnnpack/math.h>
13 #include <xnnpack/spmm.h>
14 
15 
xnn_f32_spmm_minmax_ukernel_4x1__scalar(size_t mc,size_t nc,const float * restrict input,const float * restrict weights,const int32_t * restrict widx_dmap,const uint32_t * restrict nidx_nnzmap,float * restrict output,size_t output_stride,const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS (1)])16 void xnn_f32_spmm_minmax_ukernel_4x1__scalar(
17     size_t mc,
18     size_t nc,
19     const float*restrict input,
20     const float*restrict weights,
21     const int32_t*restrict widx_dmap,
22     const uint32_t*restrict nidx_nnzmap,
23     float*restrict output,
24     size_t output_stride,
25     const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
26 {
27   assert(mc != 0);
28   assert(mc % sizeof(float) == 0);
29   assert(nc != 0);
30 
31   const float vmin = params->scalar.min;
32   const float vmax = params->scalar.max;
33   size_t output_decrement = output_stride * nc - 4 * sizeof(float);
34   while (mc >= 4 * sizeof(float)) {
35     const float*restrict w = weights;
36     const int32_t* dmap = widx_dmap;
37     const uint32_t* nnzmap = nidx_nnzmap;
38     size_t n = nc;
39     while (n >= 1) {
40       uint32_t nnz = *nnzmap++;
41       float vacc0x0 = *w++;
42       float vacc1x0 = vacc0x0;
43       float vacc2x0 = vacc0x0;
44       float vacc3x0 = vacc0x0;
45       if XNN_LIKELY(nnz != 0) {
46         do {
47           const intptr_t diff = *dmap++;
48           const float vi0 = input[0];
49           const float vi1 = input[1];
50           const float vi2 = input[2];
51           const float vi3 = input[3];
52           input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
53           const float vw0 = *w++;
54           vacc0x0 += vi0 * vw0;
55           vacc1x0 += vi1 * vw0;
56           vacc2x0 += vi2 * vw0;
57           vacc3x0 += vi3 * vw0;
58         } while (--nnz != 0);
59       }
60       float vout0x0 = math_min_f32(vacc0x0, vmax);
61       float vout1x0 = math_min_f32(vacc1x0, vmax);
62       float vout2x0 = math_min_f32(vacc2x0, vmax);
63       float vout3x0 = math_min_f32(vacc3x0, vmax);
64       vout0x0 = math_max_f32(vout0x0, vmin);
65       vout1x0 = math_max_f32(vout1x0, vmin);
66       vout2x0 = math_max_f32(vout2x0, vmin);
67       vout3x0 = math_max_f32(vout3x0, vmin);
68       output[0] = vout0x0;
69       output[1] = vout1x0;
70       output[2] = vout2x0;
71       output[3] = vout3x0;
72       output[0] = vout0x0;
73       output[1] = vout1x0;
74       output[2] = vout2x0;
75       output[3] = vout3x0;
76       output = (float*restrict) ((uintptr_t) output + output_stride);
77       n -= 1;
78     }
79     if XNN_UNLIKELY(n != 0) {
80       do {
81         uint32_t nnz = *nnzmap++;
82         float vacc0 = *w++;
83         float vacc1 = vacc0;
84         float vacc2 = vacc0;
85         float vacc3 = vacc0;
86         if XNN_LIKELY(nnz != 0) {
87           do {
88             const intptr_t diff = *dmap++;
89             const float vi0 = input[0];
90             const float vi1 = input[1];
91             const float vi2 = input[2];
92             const float vi3 = input[3];
93             input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
94             const float vw = *w++;
95             vacc0 += vi0 * vw;
96             vacc1 += vi1 * vw;
97             vacc2 += vi2 * vw;
98             vacc3 += vi3 * vw;
99           } while (--nnz != 0);
100         }
101         float vout0 = math_min_f32(vacc0, vmax);
102         float vout1 = math_min_f32(vacc1, vmax);
103         float vout2 = math_min_f32(vacc2, vmax);
104         float vout3 = math_min_f32(vacc3, vmax);
105         vout0 = math_max_f32(vout0, vmin);
106         vout1 = math_max_f32(vout1, vmin);
107         vout2 = math_max_f32(vout2, vmin);
108         vout3 = math_max_f32(vout3, vmin);
109         output[0] = vout0;
110         output[1] = vout1;
111         output[2] = vout2;
112         output[3] = vout3;
113         output = (float*restrict) ((uintptr_t) output + output_stride);
114         n -= 1;
115       } while (n != 0);
116     }
117     output = (float*restrict) ((uintptr_t) output - output_decrement);
118     input += 4;
119     mc -= 4 * sizeof(float);
120   }
121   if XNN_UNLIKELY(mc != 0) {
122     output_decrement += 2 * sizeof(float);
123     if (mc & (2 * sizeof(float))) {
124       const float*restrict w = weights;
125       const int32_t* dmap = widx_dmap;
126       const uint32_t* nnzmap = nidx_nnzmap;
127       size_t n = nc;
128       while (n >= 1) {
129         uint32_t nnz = *nnzmap++;
130         float vacc0x0 = *w++;
131         float vacc1x0 = vacc0x0;
132         if XNN_LIKELY(nnz != 0) {
133           do {
134             const intptr_t diff = *dmap++;
135             const float vi0 = input[0];
136             const float vi1 = input[1];
137             input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
138             const float vw0 = *w++;
139             vacc0x0 += vi0 * vw0;
140             vacc1x0 += vi1 * vw0;
141           } while (--nnz != 0);
142         }
143         float vout0x0 = math_min_f32(vacc0x0, vmax);
144         float vout1x0 = math_min_f32(vacc1x0, vmax);
145         vout0x0 = math_max_f32(vout0x0, vmin);
146         vout1x0 = math_max_f32(vout1x0, vmin);
147         output[0] = vout0x0;
148         output[1] = vout1x0;
149         output = (float*restrict) ((uintptr_t) output + output_stride);
150         n -= 1;
151       }
152       if XNN_UNLIKELY(n != 0) {
153         do {
154           uint32_t nnz = *nnzmap++;
155           float vacc0 = *w++;
156           float vacc1 = vacc0;
157           if XNN_LIKELY(nnz != 0) {
158             do {
159               const intptr_t diff = *dmap++;
160               const float vi0 = input[0];
161               const float vi1 = input[1];
162               input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
163               const float vw = *w++;
164               vacc0 += vi0 * vw;
165               vacc1 += vi1 * vw;
166             } while (--nnz != 0);
167           }
168           float vout0 = math_min_f32(vacc0, vmax);
169           float vout1 = math_min_f32(vacc1, vmax);
170           vout0 = math_max_f32(vout0, vmin);
171           vout1 = math_max_f32(vout1, vmin);
172           output[0] = vout0;
173           output[1] = vout1;
174           output = (float*restrict) ((uintptr_t) output + output_stride);
175           n -= 1;
176         } while (n != 0);
177       }
178       output = (float*restrict) ((uintptr_t) output - output_decrement);
179       input += 2;
180     }
181     output_decrement += 1 * sizeof(float);
182     if (mc & (1 * sizeof(float))) {
183       const float*restrict w = weights;
184       const int32_t* dmap = widx_dmap;
185       const uint32_t* nnzmap = nidx_nnzmap;
186       size_t n = nc;
187       while (n >= 1) {
188         uint32_t nnz = *nnzmap++;
189         float vacc0x0 = *w++;
190         if XNN_LIKELY(nnz != 0) {
191           do {
192             const intptr_t diff = *dmap++;
193             const float vi0 = input[0];
194             input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
195             const float vw0 = *w++;
196             vacc0x0 += vi0 * vw0;
197           } while (--nnz != 0);
198         }
199         float vout0x0 = math_min_f32(vacc0x0, vmax);
200         vout0x0 = math_max_f32(vout0x0, vmin);
201         output[0] = vout0x0;
202         output = (float*restrict) ((uintptr_t) output + output_stride);
203         n -= 1;
204       }
205       if XNN_UNLIKELY(n != 0) {
206         do {
207           uint32_t nnz = *nnzmap++;
208           float vacc0 = *w++;
209           if XNN_LIKELY(nnz != 0) {
210             do {
211               const intptr_t diff = *dmap++;
212               const float vi0 = input[0];
213               input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
214               const float vw = *w++;
215               vacc0 += vi0 * vw;
216             } while (--nnz != 0);
217           }
218           float vout0 = math_min_f32(vacc0, vmax);
219           vout0 = math_max_f32(vout0, vmin);
220           output[0] = vout0;
221           output = (float*restrict) ((uintptr_t) output + output_stride);
222           n -= 1;
223         } while (n != 0);
224       }
225       output = (float*restrict) ((uintptr_t) output - output_decrement);
226       input += 1;
227     }
228   }
229 }
230