1# Copyright 2020 Google LLC 2# 3# This source code is licensed under the BSD-style license found in the 4# LICENSE file in the root directory of this source tree. 5 6#include <xnnpack/assembly.h> 7 8# void xnn_f32_vrelu_ukernel__wasm32_shr_x4( 9# size_t n, 0 10# const float* x, 1 11# float* y, 2 12# const union params) 3 unused 13 14# locals 15# float value0 4 16# float value1 5 17# float value2 6 18# float value3 7 19# float mask0 8 20# float mask1 9 21# float mask2 10 22# float mask3 11 23 24BEGIN_FUNCTION xnn_f32_vrelu_ukernel__wasm32_shr_x4 25 .functype xnn_f32_vrelu_ukernel__wasm32_shr_x4 (i32, i32, i32, i32) -> () 26 .local i32, i32, i32, i32, i32, i32, i32, i32 27 28 local.get 0 29 i32.const 16 # count >= 16 30 i32.ge_s 31 if 32 loop 33 local.get 1 34 i32.load 0 # load 4 floats from src 35 local.set 4 36 local.get 1 37 i32.load 4 38 local.set 5 39 local.get 1 40 i32.load 8 41 local.set 6 42 local.get 1 43 i32.load 12 44 local.set 7 45 46 local.get 4 # (v >> 31) - 1) & v 47 i32.const 31 48 i32.shr_u 49 local.set 8 50 local.get 5 51 i32.const 31 52 i32.shr_u 53 local.set 9 54 local.get 6 55 i32.const 31 56 i32.shr_u 57 local.set 10 58 local.get 7 59 i32.const 31 60 i32.shr_u 61 local.set 11 62 63 local.get 8 64 i32.const -1 65 i32.add 66 local.set 8 67 local.get 9 68 i32.const -1 69 i32.add 70 local.set 9 71 local.get 10 72 i32.const -1 73 i32.add 74 local.set 10 75 local.get 11 76 i32.const -1 77 i32.add 78 local.set 11 79 80 local.get 4 81 local.get 8 82 i32.and 83 local.set 4 84 local.get 5 85 local.get 9 86 i32.and 87 local.set 5 88 local.get 6 89 local.get 10 90 i32.and 91 local.set 6 92 local.get 7 93 local.get 11 94 i32.and 95 local.set 7 96 97 local.get 2 98 local.get 4 99 i32.store 0 # store 4 floats 100 local.get 2 101 local.get 5 102 i32.store 4 103 local.get 2 104 local.get 6 105 i32.store 8 106 local.get 2 107 local.get 7 108 i32.store 12 109 110 local.get 2 # dst += 16 111 i32.const 16 112 i32.add 113 local.set 2 114 115 local.get 1 # src += 16 116 i32.const 16 117 i32.add 118 local.set 1 119 120 local.get 0 121 i32.const -16 122 i32.add # count -= 16 123 local.set 0 124 125 local.get 0 126 i32.const 16 # count >= 16 127 i32.ge_s 128 br_if 0 # loop 129 end_loop 130 end_if 131 132 local.get 0 133 i32.const 4 # if count >= 4 134 i32.ge_s 135 if 136 loop 137 local.get 1 # src 138 i32.load 0 # load float from src 139 local.set 4 140 141 local.get 1 # src += 4 142 i32.const 4 143 i32.add 144 local.set 1 145 146 local.get 4 # (v >> 31) - 1) & v 147 i32.const 31 148 i32.shr_u 149 local.set 5 150 151 local.get 5 152 i32.const -1 153 i32.add 154 local.set 5 155 156 local.get 4 157 local.get 5 158 i32.and 159 local.set 4 160 161 local.get 2 # dst 162 local.get 4 163 i32.store 0 # store float 164 165 local.get 2 # dst += 4 166 i32.const 4 167 i32.add 168 local.set 2 169 170 local.get 0 171 i32.const -4 172 i32.add # count -= 4 173 local.set 0 174 175 local.get 0 176 i32.const 4 # count >= 4 177 i32.ge_s 178 br_if 0 # loop 179 end_loop 180 end_if 181END_FUNCTION xnn_f32_vrelu_ukernel__wasm32_shr_x4 182