1// Copyright 2022 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6$assert BATCH_TILE >= 8 7$assert BATCH_TILE == 8 or BATCH_TILE % 16 == 0 8$SIMD_TILE = BATCH_TILE // 16 9$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 10#include <assert.h> 11 12#include <wasm_simd128.h> 13 14#include <xnnpack/common.h> 15#include <xnnpack/vcvt.h> 16 17 18$XINT8_T = {"QS8": "int8_t", "QU8": "uint8_t"}[DATATYPE] 19$WASM_X16X8_LOAD8X8 = {"QS8": "wasm_i16x8_load8x8", "QU8": "wasm_u16x8_load8x8"}[DATATYPE] 20$WASM_I16X8_Q15MULR = "__builtin_wasm_relaxed_q15mulr_s_i16x8" if RELAXED else "wasm_i16x8_q15mulr_sat" 21$WASM_X8X16_NARROW_I16X8 = {"QS8": "wasm_i8x16_narrow_i16x8", "QU8": "wasm_u8x16_narrow_i16x8"}[DATATYPE] 22$ISA = "wasmrelaxedsimd" if RELAXED else "wasmsimd" 23void xnn_${DATATYPE.lower()}_vcvt_ukernel__${ISA}_x${BATCH_TILE}( 24 size_t n, 25 const ${XINT8_T}* x, 26 ${XINT8_T}* y, 27 const union xnn_${DATATYPE.lower()}_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS 28{ 29 assert(n != 0); 30 assert(n % sizeof(${XINT8_T}) == 0); 31 assert(x != NULL); 32 assert(y != NULL); 33 34 const v128_t vinput_zero_point = wasm_v128_load64_splat(params->wasmsimd.input_zero_point); 35 const v128_t vmultiplier = wasm_v128_load64_splat(params->wasmsimd.multiplier); 36 const v128_t voutput_zero_point = wasm_v128_load64_splat(params->wasmsimd.output_zero_point); 37 $if BATCH_TILE > 8: 38 for (; n >= ${BATCH_TILE} * sizeof(${XINT8_T}); n -= ${BATCH_TILE} * sizeof(${XINT8_T})) { 39 v128_t vacc${ABC[0]} = ${WASM_X16X8_LOAD8X8}(x); 40 $for N in range(1, 2*SIMD_TILE): 41 v128_t vacc${ABC[N]} = ${WASM_X16X8_LOAD8X8}(x + ${N * 8}); 42 x += ${BATCH_TILE}; 43 44 $for N in range(2*SIMD_TILE): 45 vacc${ABC[N]} = wasm_i16x8_sub(vinput_zero_point, vacc${ABC[N]}); 46 47 $for N in range(2*SIMD_TILE): 48 vacc${ABC[N]} = wasm_i16x8_shl(vacc${ABC[N]}, 7); 49 50 $for N in range(2*SIMD_TILE): 51 vacc${ABC[N]} = ${WASM_I16X8_Q15MULR}(vacc${ABC[N]}, vmultiplier); 52 53 $for N in range(2*SIMD_TILE): 54 vacc${ABC[N]} = wasm_i16x8_add_sat(vacc${ABC[N]}, voutput_zero_point); 55 56 $for N in range(SIMD_TILE): 57 const v128_t vy${ABC[N]} = ${WASM_X8X16_NARROW_I16X8}(vacc${ABC[2*N]}, vacc${ABC[2*N+1]}); 58 59 wasm_v128_store(y, vy${ABC[0]}); 60 $for N in range(1, SIMD_TILE): 61 wasm_v128_store((y + ${N * 16}), vy${ABC[N]}); 62 y += ${BATCH_TILE}; 63 } 64 for (; n >= 8 * sizeof(${XINT8_T}); n -= 8 * sizeof(${XINT8_T})) { 65 v128_t vacc = ${WASM_X16X8_LOAD8X8}(x); 66 vacc = wasm_i16x8_sub(vinput_zero_point, vacc); 67 vacc = wasm_i16x8_shl(vacc, 7); 68 vacc = ${WASM_I16X8_Q15MULR}(vacc, vmultiplier); 69 vacc = wasm_i16x8_add_sat(vacc, voutput_zero_point); 70 x += 8; 71 72 const v128_t vy = ${WASM_X8X16_NARROW_I16X8}(vacc, vacc); 73 wasm_v128_store64_lane(y, vy, 0); 74 y += 8; 75 } 76 if XNN_UNLIKELY(n != 0) { 77 assert(n >= 1 * sizeof(${XINT8_T})); 78 assert(n <= 7 * sizeof(${XINT8_T})); 79 80 v128_t vacc = ${WASM_X16X8_LOAD8X8}(x); 81 vacc = wasm_i16x8_sub(vinput_zero_point, vacc); 82 vacc = wasm_i16x8_shl(vacc, 7); 83 vacc = ${WASM_I16X8_Q15MULR}(vacc, vmultiplier); 84 vacc = wasm_i16x8_add_sat(vacc, voutput_zero_point); 85 86 v128_t vy = ${WASM_X8X16_NARROW_I16X8}(vacc, vacc); 87 if (n & (4 * sizeof(${XINT8_T}))) { 88 wasm_v128_store32_lane(y, vy, 0); 89 vy = wasm_u64x2_shr(vy, 32); 90 y += 4; 91 } 92 if (n & (2 * sizeof(${XINT8_T}))) { 93 wasm_v128_store16_lane(y, vy, 0); 94 vy = wasm_u32x4_shr(vy, 16); 95 y += 2; 96 } 97 if (n & (1 * sizeof(${XINT8_T}))) { 98 wasm_v128_store8_lane(y, vy, 0); 99 } 100 } 101} 102