1// Copyright 2021 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6$assert BATCH_TILE >= 1 7$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 8#include <assert.h> 9 10#include <xnnpack/common.h> 11#include <xnnpack/math.h> 12#include <xnnpack/vcvt.h> 13 14 15void xnn_f16_f32_vcvt_ukernel__scalar_x${BATCH_TILE}( 16 size_t n, 17 const void* input, 18 float* output, 19 const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) 20{ 21 assert(n != 0); 22 assert(n % sizeof(uint16_t) == 0); 23 assert(input != NULL); 24 assert(output != NULL); 25 26 const uint32_t vsign_mask = params->scalar.sign_mask; 27 const uint32_t vexp_offset = params->scalar.exp_offset; 28 const float vexp_scale = params->scalar.exp_scale; 29 const uint32_t vmagic_mask = params->scalar.magic_mask; 30 const float vmagic_bias = params->scalar.magic_bias; 31 const uint32_t vdenorm_cutoff = params->scalar.denorm_cutoff; 32 33 const uint16_t* i = (const uint16_t*) input; 34 uint32_t* o = (uint32_t*) output; 35 $if BATCH_TILE > 1: 36 for (; n >= ${BATCH_TILE} * sizeof(uint16_t); n -= ${BATCH_TILE} * sizeof(uint16_t)) { 37 $for N in range(BATCH_TILE): 38 const uint16_t vh${N} = i[${N}]; 39 i += ${BATCH_TILE}; 40 41 $for N in range(BATCH_TILE): 42 const uint32_t vw${N} = (uint32_t) vh${N} << 16; 43 44 $for N in range(BATCH_TILE): 45 const uint32_t vsign${N} = vw${N} & vsign_mask; 46 47 $for N in range(BATCH_TILE): 48 const uint32_t v2w${N} = vw${N} + vw${N}; 49 50 $for N in range(BATCH_TILE): 51 const uint32_t vnorm${N} = float_as_uint32(uint32_as_float((v2w${N} >> 4) + vexp_offset) * vexp_scale); 52 53 $for N in range(BATCH_TILE): 54 const uint32_t vdenorm${N} = float_as_uint32(uint32_as_float((v2w${N} >> 17) | vmagic_mask) - vmagic_bias); 55 56 $for N in range(BATCH_TILE): 57 const uint32_t vf${N} = vsign${N} | (XNN_UNPREDICTABLE(v2w${N} < vdenorm_cutoff) ? vdenorm${N} : vnorm${N}); 58 59 $for N in range(BATCH_TILE): 60 o[${N}] = vf${N}; 61 o += ${BATCH_TILE}; 62 } 63 $if BATCH_TILE == 1: 64 do { 65 const uint16_t vh = *i++; 66 67 const uint32_t vw = (uint32_t) vh << 16; 68 const uint32_t vsign = vw & vsign_mask; 69 const uint32_t v2w = vw + vw; 70 const uint32_t vnorm = float_as_uint32(uint32_as_float((v2w >> 4) + vexp_offset) * vexp_scale); 71 const uint32_t vdenorm = float_as_uint32(uint32_as_float((v2w >> 17) | vmagic_mask) - vmagic_bias); 72 const uint32_t vf = vsign | (XNN_UNPREDICTABLE(v2w < vdenorm_cutoff) ? vdenorm : vnorm); 73 74 *o++ = vf; 75 76 n -= sizeof(uint16_t); 77 } while (n != 0); 78 $elif BATCH_TILE == 2: 79 if XNN_UNLIKELY(n != 0) { 80 const uint16_t vh = *i; 81 82 const uint32_t vw = (uint32_t) vh << 16; 83 const uint32_t vsign = vw & vsign_mask; 84 const uint32_t v2w = vw + vw; 85 const uint32_t vnorm = float_as_uint32(uint32_as_float((v2w >> 4) + vexp_offset) * vexp_scale); 86 const uint32_t vdenorm = float_as_uint32(uint32_as_float((v2w >> 17) | vmagic_mask) - vmagic_bias); 87 const uint32_t vf = vsign | (XNN_UNPREDICTABLE(v2w < vdenorm_cutoff) ? vdenorm : vnorm); 88 89 *o = vf; 90 } 91 $else: 92 if XNN_UNLIKELY(n != 0) { 93 do { 94 const uint16_t vh = *i++; 95 96 const uint32_t vw = (uint32_t) vh << 16; 97 const uint32_t vsign = vw & vsign_mask; 98 const uint32_t v2w = vw + vw; 99 const uint32_t vnorm = float_as_uint32(uint32_as_float((v2w >> 4) + vexp_offset) * vexp_scale); 100 const uint32_t vdenorm = float_as_uint32(uint32_as_float((v2w >> 17) | vmagic_mask) - vmagic_bias); 101 const uint32_t vf = vsign | (XNN_UNPREDICTABLE(v2w < vdenorm_cutoff) ? vdenorm : vnorm); 102 103 *o++ = vf; 104 105 n -= sizeof(uint16_t); 106 } while (n != 0); 107 } 108} 109