1// Copyright 2022 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6$assert DATATYPE in ["QS8", "QU8"] 7$assert BATCH_TILE >= 1 8$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 9#include <assert.h> 10 11#include <xnnpack/math.h> 12#include <xnnpack/vcvt.h> 13 14 15$XINT8_T = {"QS8": "int8_t", "QU8": "uint8_t"}[DATATYPE] 16$OUTPUT_MIN = {"QS8": -128, "QU8": 0}[DATATYPE] 17$OUTPUT_MAX = {"QS8": 127, "QU8": 255}[DATATYPE] 18void xnn_${DATATYPE.lower()}_vcvt_ukernel__scalar_x${BATCH_TILE}( 19 size_t n, 20 const ${XINT8_T}* x, 21 ${XINT8_T}* y, 22 const union xnn_${DATATYPE.lower()}_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) 23{ 24 const int32_t vbias = params->scalar.bias; 25 const int32_t vmultiplier = params->scalar.multiplier; 26 $if BATCH_TILE == 1: 27 do { 28 int32_t vacc = *x++; 29 vacc = vbias + vacc * vmultiplier; 30 31 int32_t vout = math_asr_s32(vacc, 8); 32 vout = math_max_s32(vout, ${OUTPUT_MIN}); 33 vout = math_min_s32(vout, ${OUTPUT_MAX}); 34 *y++ = (${XINT8_T}) vout; 35 36 n -= sizeof(${XINT8_T}); 37 } while (n != 0); 38 $else: 39 for (; n >= ${BATCH_TILE} * sizeof(${XINT8_T}); n -= ${BATCH_TILE} * sizeof(${XINT8_T})) { 40 $for N in range(BATCH_TILE): 41 int32_t vacc${ABC[N]} = x[${N}]; 42 x += ${BATCH_TILE}; 43 44 $for N in range(BATCH_TILE): 45 vacc${ABC[N]} = vbias + vacc${ABC[N]} * vmultiplier; 46 47 $for N in range(BATCH_TILE): 48 int32_t vout${ABC[N]} = math_asr_s32(vacc${ABC[N]}, 8); 49 50 $for N in range(BATCH_TILE): 51 vout${ABC[N]} = math_max_s32(vout${ABC[N]}, ${OUTPUT_MIN}); 52 53 $for N in range(BATCH_TILE): 54 vout${ABC[N]} = math_min_s32(vout${ABC[N]}, ${OUTPUT_MAX}); 55 56 $for N in range(BATCH_TILE): 57 y[${N}] = (${XINT8_T}) vout${ABC[N]}; 58 y += ${BATCH_TILE}; 59 } 60 if XNN_UNLIKELY(n != 0) { 61 $if BATCH_TILE == 2: 62 int32_t vacc = *x; 63 vacc = vbias + vacc * vmultiplier; 64 65 int32_t vout = math_asr_s32(vacc, 8); 66 vout = math_max_s32(vout, ${OUTPUT_MIN}); 67 vout = math_min_s32(vout, ${OUTPUT_MAX}); 68 *y = (${XINT8_T}) vout; 69 $else: 70 do { 71 int32_t vacc = *x++; 72 vacc = vbias + vacc * vmultiplier; 73 74 int32_t vout = math_asr_s32(vacc, 8); 75 vout = math_max_s32(vout, ${OUTPUT_MIN}); 76 vout = math_min_s32(vout, ${OUTPUT_MAX}); 77 *y++ = (${XINT8_T}) vout; 78 79 n -= sizeof(${XINT8_T}); 80 } while (n != 0); 81 } 82} 83