/aosp_15_r20/external/XNNPACK/src/f32-velu/gen/ |
H A D | velu-wasmsimd-x86-rr2-p6-x24.c | 72 v128_t vsKLMN = wasm_i32x4_shl(vnKLMN, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 105 vsKLMN = wasm_v128_andnot(vsKLMN, vsatmKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 158 vtKLMN = wasm_f32x4_mul(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 159 vsKLMN = wasm_f32x4_sub(vsKLMN, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 173 const v128_t veKLMN = wasm_f32x4_mul(wasm_f32x4_add(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
H A D | velu-wasmsimd-x86-rr2-lut16-p3-x24.c | 134 v128_t vsKLMN = wasm_i32x4_add(vlKLMN, venKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() local 160 vsKLMN = wasm_v128_andnot(vsKLMN, vsatmKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 192 vtKLMN = wasm_f32x4_mul(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 193 vsKLMN = wasm_f32x4_sub(vsKLMN, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 207 const v128_t veKLMN = wasm_f32x4_mul(wasm_f32x4_add(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24()
|
H A D | velu-neonfma-rr1-p6-x24.c | 75 float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() local 130 vtKLMN = vmulq_f32(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 131 vsKLMN = vsubq_f32(vsKLMN, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 145 const float32x4_t veKLMN = vmulq_f32(vaddq_f32(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
|
H A D | velu-wasmsimd-arm-rr2-p6-x24.c | 72 v128_t vsKLMN = wasm_i32x4_shl(vnKLMN, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() local 140 vtKLMN = wasm_f32x4_mul(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 141 vsKLMN = wasm_f32x4_sub(vsKLMN, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 155 const v128_t veKLMN = wasm_f32x4_mul(wasm_f32x4_add(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
|
H A D | velu-sse41-rr2-p6-x24.c | 72 __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() local 140 vtKLMN = _mm_mul_ps(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 141 vsKLMN = _mm_sub_ps(vsKLMN, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 155 const __m128 veKLMN = _mm_mul_ps(_mm_add_ps(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
|
H A D | velu-neon-rr2-p6-x24.c | 76 float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local 138 vtKLMN = vmulq_f32(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 139 vsKLMN = vsubq_f32(vsKLMN, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 153 const float32x4_t veKLMN = vmulq_f32(vaddq_f32(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
|
H A D | velu-sse2-rr2-p6-x24.c | 72 __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() local 140 vtKLMN = _mm_mul_ps(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 141 vsKLMN = _mm_sub_ps(vsKLMN, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 155 const __m128 veKLMN = _mm_mul_ps(_mm_add_ps(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
|
H A D | velu-neonfma-rr1-lut16-p3-x24.c | 132 float32x4_t vsKLMN = vreinterpretq_f32_s32(vaddq_s32(vlKLMN, venKLMN)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() local 165 vtKLMN = vmulq_f32(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 166 vsKLMN = vsubq_f32(vsKLMN, vone); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 180 const float32x4_t veKLMN = vmulq_f32(vaddq_f32(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24()
|
H A D | velu-neon-rr2-lut16-p3-x24.c | 133 float32x4_t vsKLMN = vreinterpretq_f32_s32(vaddq_s32(vlKLMN, venKLMN)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() local 173 vtKLMN = vmulq_f32(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 174 vsKLMN = vsubq_f32(vsKLMN, vone); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 188 const float32x4_t veKLMN = vmulq_f32(vaddq_f32(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24()
|
H A D | velu-wasmsimd-arm-rr2-lut16-p3-x24.c | 134 v128_t vsKLMN = wasm_i32x4_add(vlKLMN, venKLMN); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() local 174 vtKLMN = wasm_f32x4_mul(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() 175 vsKLMN = wasm_f32x4_sub(vsKLMN, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() 189 const v128_t veKLMN = wasm_f32x4_mul(wasm_f32x4_add(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24()
|
H A D | velu-sse41-rr2-lut16-p3-x24.c | 191 __m128 vsKLMN = _mm_castsi128_ps(_mm_add_epi32(vlKLMN, venKLMN)); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() local 231 vtKLMN = _mm_mul_ps(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 232 vsKLMN = _mm_sub_ps(vsKLMN, vone); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 246 const __m128 veKLMN = _mm_mul_ps(_mm_add_ps(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
|
H A D | velu-sse2-rr2-lut16-p3-x24.c | 215 __m128 vsKLMN = _mm_castsi128_ps(_mm_add_epi32(vlKLMN, venKLMN)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() local 255 vtKLMN = _mm_mul_ps(vtKLMN, vsKLMN); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 256 vsKLMN = _mm_sub_ps(vsKLMN, vone); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 270 const __m128 veKLMN = _mm_mul_ps(_mm_add_ps(vpKLMN, vsKLMN), valpha); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
|
/aosp_15_r20/external/XNNPACK/src/f32-vsigmoid/gen/ |
H A D | vsigmoid-neonfma-rr1-p5-div-x24.c | 64 … const float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24() local 113 vtKLMN = vmulq_f32(vtKLMN, vsKLMN); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24() 120 const float32x4_t veKLMN = vfmaq_f32(vsKLMN, vpKLMN, vtKLMN); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24()
|
H A D | vsigmoid-sse41-rr2-p5-div-x24.c | 66 const __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() local 122 vtKLMN = _mm_mul_ps(vtKLMN, vsKLMN); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() 129 __m128 veKLMN = _mm_add_ps(_mm_mul_ps(vtKLMN, vpKLMN), vsKLMN); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
|
H A D | vsigmoid-wasmsimd-rr2-p5-div-x24.c | 66 const v128_t vsKLMN = wasm_i32x4_shl(vnKLMN, 23); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24() local 122 vtKLMN = wasm_f32x4_mul(vtKLMN, vsKLMN); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24() 129 const v128_t veKLMN = wasm_f32x4_add(vsKLMN, wasm_f32x4_mul(vtKLMN, vpKLMN)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24()
|
H A D | vsigmoid-neonfma-rr1-p5-nr2recps-x24.c | 64 … const float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() local 113 vtKLMN = vmulq_f32(vtKLMN, vsKLMN); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 120 const float32x4_t veKLMN = vfmaq_f32(vsKLMN, vpKLMN, vtKLMN); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24()
|
H A D | vsigmoid-neon-rr2-p5-nr2recps-x24.c | 65 … const float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24() local 121 vtKLMN = vmulq_f32(vtKLMN, vsKLMN); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 128 const float32x4_t veKLMN = vmlaq_f32(vsKLMN, vpKLMN, vtKLMN); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
|
H A D | vsigmoid-neonfma-rr1-p5-nr2fma-x24.c | 64 … const float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() local 113 vtKLMN = vmulq_f32(vtKLMN, vsKLMN); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 120 const float32x4_t veKLMN = vfmaq_f32(vsKLMN, vpKLMN, vtKLMN); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24()
|
H A D | vsigmoid-neonfma-rr1-p5-nr1recps1fma-x24.c | 64 … const float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() local 113 vtKLMN = vmulq_f32(vtKLMN, vsKLMN); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 120 const float32x4_t veKLMN = vfmaq_f32(vsKLMN, vpKLMN, vtKLMN); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
|
H A D | vsigmoid-sse2-rr2-p5-div-x24.c | 66 const __m128 vsKLMN = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnKLMN), 23)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24() local 122 vtKLMN = _mm_mul_ps(vtKLMN, vsKLMN); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24() 129 __m128 veKLMN = _mm_add_ps(_mm_mul_ps(vtKLMN, vpKLMN), vsKLMN); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24()
|
H A D | vsigmoid-neonfma-rr1-lut64-p2-div-x24.c | 122 …const float32x4_t vsKLMN = vreinterpretq_f32_s32(vaddq_s32(vreinterpretq_s32_f32(vlKLMN), veKLMN)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() local 157 const float32x4_t vyKLMN = vfmsq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24()
|
H A D | vsigmoid-neonfma-rr1-lut2048-p1-div-x24.c | 121 …const float32x4_t vsKLMN = vreinterpretq_f32_s32(vaddq_s32(vreinterpretq_s32_f32(vlKLMN), veKLMN)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() local 149 const float32x4_t vyKLMN = vfmaq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
|
H A D | vsigmoid-wasmsimd-rr2-lut64-p2-div-x24.c | 122 const v128_t vsKLMN = wasm_i32x4_add(vlKLMN, veKLMN); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24() local 164 const v128_t vyKLMN = wasm_f32x4_sub(vsKLMN, wasm_f32x4_mul(vsKLMN, vpKLMN)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24()
|
H A D | vsigmoid-neon-rr2-lut64-p2-nr2recps-x24.c | 123 …const float32x4_t vsKLMN = vreinterpretq_f32_s32(vaddq_s32(vreinterpretq_s32_f32(vlKLMN), veKLMN)); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() local 165 const float32x4_t vyKLMN = vmlsq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24()
|
H A D | vsigmoid-neonfma-rr1-lut64-p2-nr1recps1fma-x24.c | 122 …const float32x4_t vsKLMN = vreinterpretq_f32_s32(vaddq_s32(vreinterpretq_s32_f32(vlKLMN), veKLMN)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() local 157 const float32x4_t vyKLMN = vfmsq_f32(vsKLMN, vsKLMN, vpKLMN); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24()
|