1// Copyright 2019 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 7#include <assert.h> 8 9#include <xnnpack/math.h> 10#include <xnnpack/spmm.h> 11 12 13void xnn_f32_spmm_minmax_ukernel_${MR}x${NR}__scalar_pipelined( 14 size_t mc, 15 size_t nc, 16 const float*restrict input, 17 const float*restrict weights, 18 const int32_t*restrict widx_dmap, 19 const uint32_t*restrict nidx_nnzmap, 20 float*restrict output, 21 size_t output_stride, 22 const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) 23{ 24 assert(mc != 0); 25 assert(mc % sizeof(float) == 0); 26 assert(nc != 0); 27 28 const float vmin = params->scalar.min; 29 const float vmax = params->scalar.max; 30 size_t output_decrement = output_stride * nc - ${MR} * sizeof(float); 31 while XNN_LIKELY(mc >= ${MR} * sizeof(float)) { 32 const float*restrict w = weights; 33 const int32_t* dmap = widx_dmap; 34 const uint32_t* nnzmap = nidx_nnzmap; 35 float vw = *w++; 36 intptr_t diff = *dmap++; 37 $for M in range(MR): 38 float vi${ABC[M]} = input[${M}]; 39 size_t n = nc; 40 do { 41 uint32_t nnz = *nnzmap++; 42 $for M in range(MR): 43 float vacc${ABC[M]} = vw; 44 vw = *w++; 45 if XNN_LIKELY(nnz != 0) { 46 do { 47 $for M in range(MR): 48 vacc${ABC[M]} += vi${ABC[M]} * vw; 49 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff); 50 51 diff = *dmap++; 52 vw = *w++; 53 $for M in range(MR): 54 vi${ABC[M]} = input[${M}]; 55 } while (--nnz != 0); 56 } 57 $for M in range(MR): 58 float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax); 59 $for M in range(MR): 60 vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin); 61 $for M in range(MR): 62 output[${M}] = vout${ABC[M]}; 63 output = (float*restrict) ((uintptr_t) output + output_stride); 64 } while (--n != 0); 65 output = (float*restrict) ((uintptr_t) output - output_decrement); 66 input += ${MR}; 67 mc -= ${MR} * sizeof(float); 68 } 69 if XNN_UNLIKELY(mc != 0) { 70 $for LOG2M in reversed(range((MR - 1).bit_length())): 71 $SUBMR = 1 << LOG2M 72 $if SUBMR * 2 >= MR: 73 output_decrement += ${MR - SUBMR} * sizeof(float); 74 $else: 75 output_decrement += ${SUBMR} * sizeof(float); 76 if (mc & (${SUBMR} * sizeof(float))) { 77 const float*restrict w = weights; 78 const int32_t* dmap = widx_dmap; 79 const uint32_t* nnzmap = nidx_nnzmap; 80 float vw = *w++; 81 intptr_t diff = *dmap++; 82 $for M in range(SUBMR): 83 float vi${ABC[M]} = input[${M}]; 84 size_t n = nc; 85 do { 86 uint32_t nnz = *nnzmap++; 87 $for M in range(SUBMR): 88 float vacc${ABC[M]} = vw; 89 vw = *w++; 90 if XNN_LIKELY(nnz != 0) { 91 do { 92 $for M in range(SUBMR): 93 vacc${ABC[M]} += vi${ABC[M]} * vw; 94 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff); 95 96 diff = *dmap++; 97 vw = *w++; 98 $for M in range(SUBMR): 99 vi${ABC[M]} = input[${M}]; 100 } while (--nnz != 0); 101 } 102 $for M in range(SUBMR): 103 float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax); 104 $for M in range(SUBMR): 105 vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin); 106 $for M in range(SUBMR): 107 output[${M}] = vout${ABC[M]}; 108 output = (float*restrict) ((uintptr_t) output + output_stride); 109 } while (--n != 0); 110 output = (float*restrict) ((uintptr_t) output - output_decrement); 111 input += ${SUBMR}; 112 } 113 } 114} 115