1 // Auto-generated file. Do not edit!
2 // Template: src/f32-spmm/scalar.c.in
3 // Generator: tools/xngen
4 //
5 // Copyright 2019 Google LLC
6 //
7 // This source code is licensed under the BSD-style license found in the
8 // LICENSE file in the root directory of this source tree.
9
10 #include <assert.h>
11
12 #include <xnnpack/math.h>
13 #include <xnnpack/spmm.h>
14
15
xnn_f32_spmm_minmax_ukernel_8x1__scalar(size_t mc,size_t nc,const float * restrict input,const float * restrict weights,const int32_t * restrict widx_dmap,const uint32_t * restrict nidx_nnzmap,float * restrict output,size_t output_stride,const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS (1)])16 void xnn_f32_spmm_minmax_ukernel_8x1__scalar(
17 size_t mc,
18 size_t nc,
19 const float*restrict input,
20 const float*restrict weights,
21 const int32_t*restrict widx_dmap,
22 const uint32_t*restrict nidx_nnzmap,
23 float*restrict output,
24 size_t output_stride,
25 const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
26 {
27 assert(mc != 0);
28 assert(mc % sizeof(float) == 0);
29 assert(nc != 0);
30
31 const float vmin = params->scalar.min;
32 const float vmax = params->scalar.max;
33 size_t output_decrement = output_stride * nc - 8 * sizeof(float);
34 while (mc >= 8 * sizeof(float)) {
35 const float*restrict w = weights;
36 const int32_t* dmap = widx_dmap;
37 const uint32_t* nnzmap = nidx_nnzmap;
38 size_t n = nc;
39 while (n >= 1) {
40 uint32_t nnz = *nnzmap++;
41 float vacc0x0 = *w++;
42 float vacc1x0 = vacc0x0;
43 float vacc2x0 = vacc0x0;
44 float vacc3x0 = vacc0x0;
45 float vacc4x0 = vacc0x0;
46 float vacc5x0 = vacc0x0;
47 float vacc6x0 = vacc0x0;
48 float vacc7x0 = vacc0x0;
49 if XNN_LIKELY(nnz != 0) {
50 do {
51 const intptr_t diff = *dmap++;
52 const float vi0 = input[0];
53 const float vi1 = input[1];
54 const float vi2 = input[2];
55 const float vi3 = input[3];
56 const float vi4 = input[4];
57 const float vi5 = input[5];
58 const float vi6 = input[6];
59 const float vi7 = input[7];
60 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
61 const float vw0 = *w++;
62 vacc0x0 += vi0 * vw0;
63 vacc1x0 += vi1 * vw0;
64 vacc2x0 += vi2 * vw0;
65 vacc3x0 += vi3 * vw0;
66 vacc4x0 += vi4 * vw0;
67 vacc5x0 += vi5 * vw0;
68 vacc6x0 += vi6 * vw0;
69 vacc7x0 += vi7 * vw0;
70 } while (--nnz != 0);
71 }
72 float vout0x0 = math_min_f32(vacc0x0, vmax);
73 float vout1x0 = math_min_f32(vacc1x0, vmax);
74 float vout2x0 = math_min_f32(vacc2x0, vmax);
75 float vout3x0 = math_min_f32(vacc3x0, vmax);
76 float vout4x0 = math_min_f32(vacc4x0, vmax);
77 float vout5x0 = math_min_f32(vacc5x0, vmax);
78 float vout6x0 = math_min_f32(vacc6x0, vmax);
79 float vout7x0 = math_min_f32(vacc7x0, vmax);
80 vout0x0 = math_max_f32(vout0x0, vmin);
81 vout1x0 = math_max_f32(vout1x0, vmin);
82 vout2x0 = math_max_f32(vout2x0, vmin);
83 vout3x0 = math_max_f32(vout3x0, vmin);
84 vout4x0 = math_max_f32(vout4x0, vmin);
85 vout5x0 = math_max_f32(vout5x0, vmin);
86 vout6x0 = math_max_f32(vout6x0, vmin);
87 vout7x0 = math_max_f32(vout7x0, vmin);
88 output[0] = vout0x0;
89 output[1] = vout1x0;
90 output[2] = vout2x0;
91 output[3] = vout3x0;
92 output[4] = vout4x0;
93 output[5] = vout5x0;
94 output[6] = vout6x0;
95 output[7] = vout7x0;
96 output[0] = vout0x0;
97 output[1] = vout1x0;
98 output[2] = vout2x0;
99 output[3] = vout3x0;
100 output[4] = vout4x0;
101 output[5] = vout5x0;
102 output[6] = vout6x0;
103 output[7] = vout7x0;
104 output = (float*restrict) ((uintptr_t) output + output_stride);
105 n -= 1;
106 }
107 if XNN_UNLIKELY(n != 0) {
108 do {
109 uint32_t nnz = *nnzmap++;
110 float vacc0 = *w++;
111 float vacc1 = vacc0;
112 float vacc2 = vacc0;
113 float vacc3 = vacc0;
114 float vacc4 = vacc0;
115 float vacc5 = vacc0;
116 float vacc6 = vacc0;
117 float vacc7 = vacc0;
118 if XNN_LIKELY(nnz != 0) {
119 do {
120 const intptr_t diff = *dmap++;
121 const float vi0 = input[0];
122 const float vi1 = input[1];
123 const float vi2 = input[2];
124 const float vi3 = input[3];
125 const float vi4 = input[4];
126 const float vi5 = input[5];
127 const float vi6 = input[6];
128 const float vi7 = input[7];
129 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
130 const float vw = *w++;
131 vacc0 += vi0 * vw;
132 vacc1 += vi1 * vw;
133 vacc2 += vi2 * vw;
134 vacc3 += vi3 * vw;
135 vacc4 += vi4 * vw;
136 vacc5 += vi5 * vw;
137 vacc6 += vi6 * vw;
138 vacc7 += vi7 * vw;
139 } while (--nnz != 0);
140 }
141 float vout0 = math_min_f32(vacc0, vmax);
142 float vout1 = math_min_f32(vacc1, vmax);
143 float vout2 = math_min_f32(vacc2, vmax);
144 float vout3 = math_min_f32(vacc3, vmax);
145 float vout4 = math_min_f32(vacc4, vmax);
146 float vout5 = math_min_f32(vacc5, vmax);
147 float vout6 = math_min_f32(vacc6, vmax);
148 float vout7 = math_min_f32(vacc7, vmax);
149 vout0 = math_max_f32(vout0, vmin);
150 vout1 = math_max_f32(vout1, vmin);
151 vout2 = math_max_f32(vout2, vmin);
152 vout3 = math_max_f32(vout3, vmin);
153 vout4 = math_max_f32(vout4, vmin);
154 vout5 = math_max_f32(vout5, vmin);
155 vout6 = math_max_f32(vout6, vmin);
156 vout7 = math_max_f32(vout7, vmin);
157 output[0] = vout0;
158 output[1] = vout1;
159 output[2] = vout2;
160 output[3] = vout3;
161 output[4] = vout4;
162 output[5] = vout5;
163 output[6] = vout6;
164 output[7] = vout7;
165 output = (float*restrict) ((uintptr_t) output + output_stride);
166 n -= 1;
167 } while (n != 0);
168 }
169 output = (float*restrict) ((uintptr_t) output - output_decrement);
170 input += 8;
171 mc -= 8 * sizeof(float);
172 }
173 if XNN_UNLIKELY(mc != 0) {
174 output_decrement += 4 * sizeof(float);
175 if (mc & (4 * sizeof(float))) {
176 const float*restrict w = weights;
177 const int32_t* dmap = widx_dmap;
178 const uint32_t* nnzmap = nidx_nnzmap;
179 size_t n = nc;
180 while (n >= 1) {
181 uint32_t nnz = *nnzmap++;
182 float vacc0x0 = *w++;
183 float vacc1x0 = vacc0x0;
184 float vacc2x0 = vacc0x0;
185 float vacc3x0 = vacc0x0;
186 if XNN_LIKELY(nnz != 0) {
187 do {
188 const intptr_t diff = *dmap++;
189 const float vi0 = input[0];
190 const float vi1 = input[1];
191 const float vi2 = input[2];
192 const float vi3 = input[3];
193 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
194 const float vw0 = *w++;
195 vacc0x0 += vi0 * vw0;
196 vacc1x0 += vi1 * vw0;
197 vacc2x0 += vi2 * vw0;
198 vacc3x0 += vi3 * vw0;
199 } while (--nnz != 0);
200 }
201 float vout0x0 = math_min_f32(vacc0x0, vmax);
202 float vout1x0 = math_min_f32(vacc1x0, vmax);
203 float vout2x0 = math_min_f32(vacc2x0, vmax);
204 float vout3x0 = math_min_f32(vacc3x0, vmax);
205 vout0x0 = math_max_f32(vout0x0, vmin);
206 vout1x0 = math_max_f32(vout1x0, vmin);
207 vout2x0 = math_max_f32(vout2x0, vmin);
208 vout3x0 = math_max_f32(vout3x0, vmin);
209 output[0] = vout0x0;
210 output[1] = vout1x0;
211 output[2] = vout2x0;
212 output[3] = vout3x0;
213 output = (float*restrict) ((uintptr_t) output + output_stride);
214 n -= 1;
215 }
216 if XNN_UNLIKELY(n != 0) {
217 do {
218 uint32_t nnz = *nnzmap++;
219 float vacc0 = *w++;
220 float vacc1 = vacc0;
221 float vacc2 = vacc0;
222 float vacc3 = vacc0;
223 if XNN_LIKELY(nnz != 0) {
224 do {
225 const intptr_t diff = *dmap++;
226 const float vi0 = input[0];
227 const float vi1 = input[1];
228 const float vi2 = input[2];
229 const float vi3 = input[3];
230 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
231 const float vw = *w++;
232 vacc0 += vi0 * vw;
233 vacc1 += vi1 * vw;
234 vacc2 += vi2 * vw;
235 vacc3 += vi3 * vw;
236 } while (--nnz != 0);
237 }
238 float vout0 = math_min_f32(vacc0, vmax);
239 float vout1 = math_min_f32(vacc1, vmax);
240 float vout2 = math_min_f32(vacc2, vmax);
241 float vout3 = math_min_f32(vacc3, vmax);
242 vout0 = math_max_f32(vout0, vmin);
243 vout1 = math_max_f32(vout1, vmin);
244 vout2 = math_max_f32(vout2, vmin);
245 vout3 = math_max_f32(vout3, vmin);
246 output[0] = vout0;
247 output[1] = vout1;
248 output[2] = vout2;
249 output[3] = vout3;
250 output = (float*restrict) ((uintptr_t) output + output_stride);
251 n -= 1;
252 } while (n != 0);
253 }
254 output = (float*restrict) ((uintptr_t) output - output_decrement);
255 input += 4;
256 }
257 output_decrement += 2 * sizeof(float);
258 if (mc & (2 * sizeof(float))) {
259 const float*restrict w = weights;
260 const int32_t* dmap = widx_dmap;
261 const uint32_t* nnzmap = nidx_nnzmap;
262 size_t n = nc;
263 while (n >= 1) {
264 uint32_t nnz = *nnzmap++;
265 float vacc0x0 = *w++;
266 float vacc1x0 = vacc0x0;
267 if XNN_LIKELY(nnz != 0) {
268 do {
269 const intptr_t diff = *dmap++;
270 const float vi0 = input[0];
271 const float vi1 = input[1];
272 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
273 const float vw0 = *w++;
274 vacc0x0 += vi0 * vw0;
275 vacc1x0 += vi1 * vw0;
276 } while (--nnz != 0);
277 }
278 float vout0x0 = math_min_f32(vacc0x0, vmax);
279 float vout1x0 = math_min_f32(vacc1x0, vmax);
280 vout0x0 = math_max_f32(vout0x0, vmin);
281 vout1x0 = math_max_f32(vout1x0, vmin);
282 output[0] = vout0x0;
283 output[1] = vout1x0;
284 output = (float*restrict) ((uintptr_t) output + output_stride);
285 n -= 1;
286 }
287 if XNN_UNLIKELY(n != 0) {
288 do {
289 uint32_t nnz = *nnzmap++;
290 float vacc0 = *w++;
291 float vacc1 = vacc0;
292 if XNN_LIKELY(nnz != 0) {
293 do {
294 const intptr_t diff = *dmap++;
295 const float vi0 = input[0];
296 const float vi1 = input[1];
297 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
298 const float vw = *w++;
299 vacc0 += vi0 * vw;
300 vacc1 += vi1 * vw;
301 } while (--nnz != 0);
302 }
303 float vout0 = math_min_f32(vacc0, vmax);
304 float vout1 = math_min_f32(vacc1, vmax);
305 vout0 = math_max_f32(vout0, vmin);
306 vout1 = math_max_f32(vout1, vmin);
307 output[0] = vout0;
308 output[1] = vout1;
309 output = (float*restrict) ((uintptr_t) output + output_stride);
310 n -= 1;
311 } while (n != 0);
312 }
313 output = (float*restrict) ((uintptr_t) output - output_decrement);
314 input += 2;
315 }
316 output_decrement += 1 * sizeof(float);
317 if (mc & (1 * sizeof(float))) {
318 const float*restrict w = weights;
319 const int32_t* dmap = widx_dmap;
320 const uint32_t* nnzmap = nidx_nnzmap;
321 size_t n = nc;
322 while (n >= 1) {
323 uint32_t nnz = *nnzmap++;
324 float vacc0x0 = *w++;
325 if XNN_LIKELY(nnz != 0) {
326 do {
327 const intptr_t diff = *dmap++;
328 const float vi0 = input[0];
329 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
330 const float vw0 = *w++;
331 vacc0x0 += vi0 * vw0;
332 } while (--nnz != 0);
333 }
334 float vout0x0 = math_min_f32(vacc0x0, vmax);
335 vout0x0 = math_max_f32(vout0x0, vmin);
336 output[0] = vout0x0;
337 output = (float*restrict) ((uintptr_t) output + output_stride);
338 n -= 1;
339 }
340 if XNN_UNLIKELY(n != 0) {
341 do {
342 uint32_t nnz = *nnzmap++;
343 float vacc0 = *w++;
344 if XNN_LIKELY(nnz != 0) {
345 do {
346 const intptr_t diff = *dmap++;
347 const float vi0 = input[0];
348 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
349 const float vw = *w++;
350 vacc0 += vi0 * vw;
351 } while (--nnz != 0);
352 }
353 float vout0 = math_min_f32(vacc0, vmax);
354 vout0 = math_max_f32(vout0, vmin);
355 output[0] = vout0;
356 output = (float*restrict) ((uintptr_t) output + output_stride);
357 n -= 1;
358 } while (n != 0);
359 }
360 output = (float*restrict) ((uintptr_t) output - output_decrement);
361 input += 1;
362 }
363 }
364 }
365