Home
last modified time | relevance | path

Searched defs:vsum (Results 1 – 25 of 87) sorted by relevance

1234

/aosp_15_r20/external/libvpx/vpx_dsp/x86/
H A Dvariance_sse2.c26 __m128i vsum = _mm_setzero_si128(); in vpx_get_mb_ss_sse2() local
56 static INLINE void variance_final_128_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_128_pel_sse2()
68 static INLINE void variance_final_256_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_256_pel_sse2()
80 static INLINE void variance_final_512_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_512_pel_sse2()
220 __m128i vsse, vsum; in vpx_get8x8var_sse2() local
228 __m128i vsse, vsum; in vpx_get16x16var_sse2() local
236 __m128i vsse, vsum; in vpx_variance4x4_sse2() local
246 __m128i vsse, vsum; in vpx_variance4x8_sse2() local
256 __m128i vsse, vsum; in vpx_variance8x4_sse2() local
266 __m128i vsse, vsum; in vpx_variance8x8_sse2() local
[all …]
H A Dvariance_avx2.c62 __m128i vsum, in variance_final_from_32bit_sum_avx2()
81 __m256i vsum, in variance_final_from_16bit_sum_avx2()
160 __m256i *const vsum) { in variance8_avx2()
175 __m256i *const vsum) { in variance16_avx2()
190 __m256i *const vsum) { in variance32_avx2()
205 __m256i *const vsum) { in variance64_avx2()
220 __m256i vsse, vsum; in vpx_get16x16var_avx2() local
668 __m256i vsse, vsum; in vpx_variance8x4_avx2() local
678 __m256i vsse, vsum; in vpx_variance8x8_avx2() local
688 __m256i vsse, vsum; in vpx_variance8x16_avx2() local
[all …]
/aosp_15_r20/external/libaom/av1/encoder/x86/
H A Dhighbd_temporal_filter_avx2.c125 static AOM_FORCE_INLINE int32_t xx_mask_and_hadd(__m256i vsum, int i) { in xx_mask_and_hadd()
185 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() local
203 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() local
231 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() local
250 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() local
279 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() local
297 __m256i vsum = _mm256_add_epi32(vsum3, vsrc[4]); in highbd_apply_temporal_filter() local
/aosp_15_r20/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8avgpool/
H A Dmp8x9p8q-neon.c88 const uint16x8_t vsum = vaddq_u16(vsum2345, vsum01678); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
141 const uint16x8_t vsum = vaddq_u16(vsum2345, vsum01678); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
194 const uint16x8_t vsum = vaddq_u16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
244 const uint16x8_t vsum = vaddq_u16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
318 const int16x8_t vsum = vaddq_s16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
394 const int16x8_t vsum = vaddq_s16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
H A Dmp8x9p8q-sse2.c87 const __m128i vsum = _mm_add_epi16(vsum2345, vsum01678); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
149 const __m128i vsum = _mm_add_epi16(vsum2345, vsum01678); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
210 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
268 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
349 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
427 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
/aosp_15_r20/external/XNNPACK/src/f32-gavgpool/
H A D7p7x-minmax-neon-c4.c54 const float32x4_t vsum = vaddq_f32(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
87 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
141 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
169 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
H A D7p7x-minmax-wasmsimd-x86-c4.c61 const v128_t vsum = wasm_f32x4_add(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
102 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
164 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
193 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
H A D7p7x-minmax-wasmsimd-arm-c4.c61 const v128_t vsum = wasm_f32x4_add(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
102 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
164 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
193 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
H A D7p7x-minmax-sse-c4.c61 const __m128 vsum = _mm_add_ps(vsum016, vsum2345); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
101 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
163 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
192 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
/aosp_15_r20/external/libaom/aom_dsp/x86/
H A Dvariance_sse2.c25 __m128i vsum = _mm_setzero_si128(); in aom_get_mb_ss_sse2() local
82 static inline void variance_final_128_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_128_pel_sse2()
94 static inline void variance_final_256_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_256_pel_sse2()
106 static inline void variance_final_512_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_512_pel_sse2()
118 static inline void variance_final_1024_pel_sse2(__m128i vsse, __m128i vsum, in variance_final_1024_pel_sse2()
252 __m128i vsum = _mm_setzero_si128(); in aom_get_var_sse_sum_8x8_quad_sse2() local
284 __m128i vsum = _mm_setzero_si128(); in aom_get_var_sse_sum_16x16_dual_sse2() local
H A Dvariance_avx2.c49 static inline int variance_final_from_32bit_sum_avx2(__m256i vsse, __m128i vsum, in variance_final_from_32bit_sum_avx2()
66 static inline int variance_final_512_avx2(__m256i vsse, __m256i vsum, in variance_final_512_avx2()
76 static inline int variance_final_1024_avx2(__m256i vsse, __m256i vsum, in variance_final_1024_avx2()
94 static inline int variance_final_2048_avx2(__m256i vsse, __m256i vsum, in variance_final_2048_avx2()
125 __m256i *const vsum) { in variance16_avx2()
138 __m256i *const vsum) { in variance32_avx2()
151 __m256i *const vsum) { in variance64_avx2()
165 __m256i *const vsum) { in variance128_avx2()
/aosp_15_r20/external/XNNPACK/src/f32-gavgpool-cw/
H A Dneon-x4.c76 const float32x4_t vsum = vpaddq_f32(vsum01, vsum23); in xnn_f32_gavgpool_cw_ukernel__neon_x4() local
82 const float32x4_t vsum = vcombine_f32(vpadd_f32(vget_low_f32(vsum01), vget_high_f32(vsum01)), in xnn_f32_gavgpool_cw_ukernel__neon_x4() local
114 float32x2_t vsum = vadd_f32(vget_low_f32(vsum0), vget_high_f32(vsum0)); in xnn_f32_gavgpool_cw_ukernel__neon_x4() local
/aosp_15_r20/external/XNNPACK/src/f32-avgpool/
H A D9p8x-minmax-wasmsimd-arm-c4.c110 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum01678); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local
187 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local
287 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local
316 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local
H A D9p8x-minmax-wasmsimd-x86-c4.c110 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum01678); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local
187 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local
287 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local
316 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local
H A D9p8x-minmax-sse-c4.c110 const __m128 vsum = _mm_add_ps(vsum2345, vsum01678); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local
186 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local
285 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local
314 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local
H A D9p8x-minmax-neon-c4.c101 const float32x4_t vsum = vaddq_f32(vsum2345, vsum01678); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local
169 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local
260 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local
288 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local
/aosp_15_r20/external/XNNPACK/src/f16-avgpool/
H A D9p8x-minmax-neonfp16arith-c8.c101 const float16x8_t vsum = vaddq_f16(vsum2345, vsum01678); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local
169 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local
260 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local
288 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local
H A D9p8x-minmax-f16c-c8.c112 const __m128i vsum = _mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum01678), _MM_FROUND_NO_EXC); in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local
189 const __m128i vsum = _mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND_NO_EXC); in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local
290 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local
319 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local
/aosp_15_r20/external/XNNPACK/src/f32-pavgpool/
H A D9p8x-minmax-wasmsimd-x86-c4.c110 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum01678); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local
187 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local
290 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local
319 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local
H A D9p8x-minmax-wasmsimd-arm-c4.c110 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum01678); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local
187 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local
290 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local
319 const v128_t vsum = wasm_f32x4_add(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local
H A D9p8x-minmax-sse-c4.c110 const __m128 vsum = _mm_add_ps(vsum2345, vsum01678); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local
186 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local
288 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local
317 const __m128 vsum = _mm_add_ps(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local
H A D9p8x-minmax-neon-c4.c101 const float32x4_t vsum = vaddq_f32(vsum2345, vsum01678); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local
169 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local
261 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local
289 const float32x4_t vsum = vaddq_f32(vsum2345, vsum0167a); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local
/aosp_15_r20/external/XNNPACK/src/f16-pavgpool/
H A D9p8x-minmax-neonfp16arith-c8.c101 const float16x8_t vsum = vaddq_f16(vsum2345, vsum01678); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local
169 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local
261 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local
289 const float16x8_t vsum = vaddq_f16(vsum2345, vsum0167a); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local
H A D9p8x-minmax-avx2-c8.c112 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum01678), _MM_FROUND… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local
189 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local
292 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local
321 …const __m256 vsum = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum2345, vsum0167a), _MM_FROUND… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local
/aosp_15_r20/external/XNNPACK/src/f16-gavgpool-cw/
H A Dneonfp16arith-x8.c79 const float16x4_t vsum = vpadd_f16(vget_low_f16(vsum0123), vget_high_f16(vsum0123)); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8() local
87 const float16x4_t vsum = vpadd_f16(vsum01_lo, vsum23_lo); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8() local
122 float16x4_t vsum = vadd_f16(vget_low_f16(vsum0), vget_high_f16(vsum0)); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8() local

1234