xref: /aosp_15_r20/external/libvpx/vpx_dsp/mips/variance_msa.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker  *
4*fb1b10abSAndroid Build Coastguard Worker  *  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker  *  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker  *  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker  *  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker  *  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker  */
10*fb1b10abSAndroid Build Coastguard Worker 
11*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
12*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/mips/macros_msa.h"
13*fb1b10abSAndroid Build Coastguard Worker 
14*fb1b10abSAndroid Build Coastguard Worker #define CALC_MSE_B(src, ref, var)                                   \
15*fb1b10abSAndroid Build Coastguard Worker   {                                                                 \
16*fb1b10abSAndroid Build Coastguard Worker     v16u8 src_l0_m, src_l1_m;                                       \
17*fb1b10abSAndroid Build Coastguard Worker     v8i16 res_l0_m, res_l1_m;                                       \
18*fb1b10abSAndroid Build Coastguard Worker                                                                     \
19*fb1b10abSAndroid Build Coastguard Worker     ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m);                      \
20*fb1b10abSAndroid Build Coastguard Worker     HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m);            \
21*fb1b10abSAndroid Build Coastguard Worker     DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
22*fb1b10abSAndroid Build Coastguard Worker   }
23*fb1b10abSAndroid Build Coastguard Worker 
24*fb1b10abSAndroid Build Coastguard Worker #define CALC_MSE_AVG_B(src, ref, var, sub)                          \
25*fb1b10abSAndroid Build Coastguard Worker   {                                                                 \
26*fb1b10abSAndroid Build Coastguard Worker     v16u8 src_l0_m, src_l1_m;                                       \
27*fb1b10abSAndroid Build Coastguard Worker     v8i16 res_l0_m, res_l1_m;                                       \
28*fb1b10abSAndroid Build Coastguard Worker                                                                     \
29*fb1b10abSAndroid Build Coastguard Worker     ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m);                      \
30*fb1b10abSAndroid Build Coastguard Worker     HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m);            \
31*fb1b10abSAndroid Build Coastguard Worker     DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
32*fb1b10abSAndroid Build Coastguard Worker                                                                     \
33*fb1b10abSAndroid Build Coastguard Worker     sub += res_l0_m + res_l1_m;                                     \
34*fb1b10abSAndroid Build Coastguard Worker   }
35*fb1b10abSAndroid Build Coastguard Worker 
36*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_WxH(sse, diff, shift) \
37*fb1b10abSAndroid Build Coastguard Worker   (sse) - (((uint32_t)(diff) * (diff)) >> (shift))
38*fb1b10abSAndroid Build Coastguard Worker 
39*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_LARGE_WxH(sse, diff, shift) \
40*fb1b10abSAndroid Build Coastguard Worker   (sse) - (((int64_t)(diff) * (diff)) >> (shift))
41*fb1b10abSAndroid Build Coastguard Worker 
sse_diff_4width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height,int32_t * diff)42*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_diff_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
43*fb1b10abSAndroid Build Coastguard Worker                                     const uint8_t *ref_ptr, int32_t ref_stride,
44*fb1b10abSAndroid Build Coastguard Worker                                     int32_t height, int32_t *diff) {
45*fb1b10abSAndroid Build Coastguard Worker   uint32_t src0, src1, src2, src3;
46*fb1b10abSAndroid Build Coastguard Worker   uint32_t ref0, ref1, ref2, ref3;
47*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
48*fb1b10abSAndroid Build Coastguard Worker   v16u8 src = { 0 };
49*fb1b10abSAndroid Build Coastguard Worker   v16u8 ref = { 0 };
50*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg = { 0 };
51*fb1b10abSAndroid Build Coastguard Worker   v4i32 vec, var = { 0 };
52*fb1b10abSAndroid Build Coastguard Worker 
53*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = (height >> 2); ht_cnt--;) {
54*fb1b10abSAndroid Build Coastguard Worker     LW4(src_ptr, src_stride, src0, src1, src2, src3);
55*fb1b10abSAndroid Build Coastguard Worker     src_ptr += (4 * src_stride);
56*fb1b10abSAndroid Build Coastguard Worker     LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
57*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += (4 * ref_stride);
58*fb1b10abSAndroid Build Coastguard Worker 
59*fb1b10abSAndroid Build Coastguard Worker     INSERT_W4_UB(src0, src1, src2, src3, src);
60*fb1b10abSAndroid Build Coastguard Worker     INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
61*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src, ref, var, avg);
62*fb1b10abSAndroid Build Coastguard Worker   }
63*fb1b10abSAndroid Build Coastguard Worker 
64*fb1b10abSAndroid Build Coastguard Worker   vec = __msa_hadd_s_w(avg, avg);
65*fb1b10abSAndroid Build Coastguard Worker   *diff = HADD_SW_S32(vec);
66*fb1b10abSAndroid Build Coastguard Worker 
67*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
68*fb1b10abSAndroid Build Coastguard Worker }
69*fb1b10abSAndroid Build Coastguard Worker 
sse_diff_8width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height,int32_t * diff)70*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_diff_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
71*fb1b10abSAndroid Build Coastguard Worker                                     const uint8_t *ref_ptr, int32_t ref_stride,
72*fb1b10abSAndroid Build Coastguard Worker                                     int32_t height, int32_t *diff) {
73*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
74*fb1b10abSAndroid Build Coastguard Worker   v16u8 src0, src1, src2, src3;
75*fb1b10abSAndroid Build Coastguard Worker   v16u8 ref0, ref1, ref2, ref3;
76*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg = { 0 };
77*fb1b10abSAndroid Build Coastguard Worker   v4i32 vec, var = { 0 };
78*fb1b10abSAndroid Build Coastguard Worker 
79*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = (height >> 2); ht_cnt--;) {
80*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
81*fb1b10abSAndroid Build Coastguard Worker     src_ptr += (4 * src_stride);
82*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
83*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += (4 * ref_stride);
84*fb1b10abSAndroid Build Coastguard Worker 
85*fb1b10abSAndroid Build Coastguard Worker     PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
86*fb1b10abSAndroid Build Coastguard Worker                 ref0, ref1);
87*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg);
88*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg);
89*fb1b10abSAndroid Build Coastguard Worker   }
90*fb1b10abSAndroid Build Coastguard Worker 
91*fb1b10abSAndroid Build Coastguard Worker   vec = __msa_hadd_s_w(avg, avg);
92*fb1b10abSAndroid Build Coastguard Worker   *diff = HADD_SW_S32(vec);
93*fb1b10abSAndroid Build Coastguard Worker 
94*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
95*fb1b10abSAndroid Build Coastguard Worker }
96*fb1b10abSAndroid Build Coastguard Worker 
sse_diff_16width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height,int32_t * diff)97*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_diff_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
98*fb1b10abSAndroid Build Coastguard Worker                                      const uint8_t *ref_ptr, int32_t ref_stride,
99*fb1b10abSAndroid Build Coastguard Worker                                      int32_t height, int32_t *diff) {
100*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
101*fb1b10abSAndroid Build Coastguard Worker   v16u8 src, ref;
102*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg = { 0 };
103*fb1b10abSAndroid Build Coastguard Worker   v4i32 vec, var = { 0 };
104*fb1b10abSAndroid Build Coastguard Worker 
105*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = (height >> 2); ht_cnt--;) {
106*fb1b10abSAndroid Build Coastguard Worker     src = LD_UB(src_ptr);
107*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
108*fb1b10abSAndroid Build Coastguard Worker     ref = LD_UB(ref_ptr);
109*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
110*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src, ref, var, avg);
111*fb1b10abSAndroid Build Coastguard Worker 
112*fb1b10abSAndroid Build Coastguard Worker     src = LD_UB(src_ptr);
113*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
114*fb1b10abSAndroid Build Coastguard Worker     ref = LD_UB(ref_ptr);
115*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
116*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src, ref, var, avg);
117*fb1b10abSAndroid Build Coastguard Worker 
118*fb1b10abSAndroid Build Coastguard Worker     src = LD_UB(src_ptr);
119*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
120*fb1b10abSAndroid Build Coastguard Worker     ref = LD_UB(ref_ptr);
121*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
122*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src, ref, var, avg);
123*fb1b10abSAndroid Build Coastguard Worker 
124*fb1b10abSAndroid Build Coastguard Worker     src = LD_UB(src_ptr);
125*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
126*fb1b10abSAndroid Build Coastguard Worker     ref = LD_UB(ref_ptr);
127*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
128*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src, ref, var, avg);
129*fb1b10abSAndroid Build Coastguard Worker   }
130*fb1b10abSAndroid Build Coastguard Worker 
131*fb1b10abSAndroid Build Coastguard Worker   vec = __msa_hadd_s_w(avg, avg);
132*fb1b10abSAndroid Build Coastguard Worker   *diff = HADD_SW_S32(vec);
133*fb1b10abSAndroid Build Coastguard Worker 
134*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
135*fb1b10abSAndroid Build Coastguard Worker }
136*fb1b10abSAndroid Build Coastguard Worker 
sse_diff_32width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height,int32_t * diff)137*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_diff_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
138*fb1b10abSAndroid Build Coastguard Worker                                      const uint8_t *ref_ptr, int32_t ref_stride,
139*fb1b10abSAndroid Build Coastguard Worker                                      int32_t height, int32_t *diff) {
140*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
141*fb1b10abSAndroid Build Coastguard Worker   v16u8 src0, src1, ref0, ref1;
142*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg = { 0 };
143*fb1b10abSAndroid Build Coastguard Worker   v4i32 vec, var = { 0 };
144*fb1b10abSAndroid Build Coastguard Worker 
145*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = (height >> 2); ht_cnt--;) {
146*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
147*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
148*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
149*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
150*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg);
151*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg);
152*fb1b10abSAndroid Build Coastguard Worker 
153*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
154*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
155*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
156*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
157*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg);
158*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg);
159*fb1b10abSAndroid Build Coastguard Worker 
160*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
161*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
162*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
163*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
164*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg);
165*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg);
166*fb1b10abSAndroid Build Coastguard Worker 
167*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
168*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
169*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
170*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
171*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg);
172*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg);
173*fb1b10abSAndroid Build Coastguard Worker   }
174*fb1b10abSAndroid Build Coastguard Worker 
175*fb1b10abSAndroid Build Coastguard Worker   vec = __msa_hadd_s_w(avg, avg);
176*fb1b10abSAndroid Build Coastguard Worker   *diff = HADD_SW_S32(vec);
177*fb1b10abSAndroid Build Coastguard Worker 
178*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
179*fb1b10abSAndroid Build Coastguard Worker }
180*fb1b10abSAndroid Build Coastguard Worker 
sse_diff_32x64_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t * diff)181*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_diff_32x64_msa(const uint8_t *src_ptr, int32_t src_stride,
182*fb1b10abSAndroid Build Coastguard Worker                                    const uint8_t *ref_ptr, int32_t ref_stride,
183*fb1b10abSAndroid Build Coastguard Worker                                    int32_t *diff) {
184*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
185*fb1b10abSAndroid Build Coastguard Worker   v16u8 src0, src1, ref0, ref1;
186*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg0 = { 0 };
187*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg1 = { 0 };
188*fb1b10abSAndroid Build Coastguard Worker   v4i32 vec, var = { 0 };
189*fb1b10abSAndroid Build Coastguard Worker 
190*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = 16; ht_cnt--;) {
191*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
192*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
193*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
194*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
195*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg0);
196*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg1);
197*fb1b10abSAndroid Build Coastguard Worker 
198*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
199*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
200*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
201*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
202*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg0);
203*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg1);
204*fb1b10abSAndroid Build Coastguard Worker 
205*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
206*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
207*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
208*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
209*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg0);
210*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg1);
211*fb1b10abSAndroid Build Coastguard Worker 
212*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
213*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
214*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
215*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
216*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg0);
217*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg1);
218*fb1b10abSAndroid Build Coastguard Worker   }
219*fb1b10abSAndroid Build Coastguard Worker 
220*fb1b10abSAndroid Build Coastguard Worker   vec = __msa_hadd_s_w(avg0, avg0);
221*fb1b10abSAndroid Build Coastguard Worker   vec += __msa_hadd_s_w(avg1, avg1);
222*fb1b10abSAndroid Build Coastguard Worker   *diff = HADD_SW_S32(vec);
223*fb1b10abSAndroid Build Coastguard Worker 
224*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
225*fb1b10abSAndroid Build Coastguard Worker }
226*fb1b10abSAndroid Build Coastguard Worker 
sse_diff_64x32_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t * diff)227*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_diff_64x32_msa(const uint8_t *src_ptr, int32_t src_stride,
228*fb1b10abSAndroid Build Coastguard Worker                                    const uint8_t *ref_ptr, int32_t ref_stride,
229*fb1b10abSAndroid Build Coastguard Worker                                    int32_t *diff) {
230*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
231*fb1b10abSAndroid Build Coastguard Worker   v16u8 src0, src1, src2, src3;
232*fb1b10abSAndroid Build Coastguard Worker   v16u8 ref0, ref1, ref2, ref3;
233*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg0 = { 0 };
234*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg1 = { 0 };
235*fb1b10abSAndroid Build Coastguard Worker   v4i32 vec, var = { 0 };
236*fb1b10abSAndroid Build Coastguard Worker 
237*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = 16; ht_cnt--;) {
238*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
239*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
240*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
241*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
242*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg0);
243*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src2, ref2, var, avg0);
244*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg1);
245*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src3, ref3, var, avg1);
246*fb1b10abSAndroid Build Coastguard Worker 
247*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
248*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
249*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
250*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
251*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg0);
252*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src2, ref2, var, avg0);
253*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg1);
254*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src3, ref3, var, avg1);
255*fb1b10abSAndroid Build Coastguard Worker   }
256*fb1b10abSAndroid Build Coastguard Worker 
257*fb1b10abSAndroid Build Coastguard Worker   vec = __msa_hadd_s_w(avg0, avg0);
258*fb1b10abSAndroid Build Coastguard Worker   vec += __msa_hadd_s_w(avg1, avg1);
259*fb1b10abSAndroid Build Coastguard Worker   *diff = HADD_SW_S32(vec);
260*fb1b10abSAndroid Build Coastguard Worker 
261*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
262*fb1b10abSAndroid Build Coastguard Worker }
263*fb1b10abSAndroid Build Coastguard Worker 
sse_diff_64x64_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t * diff)264*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_diff_64x64_msa(const uint8_t *src_ptr, int32_t src_stride,
265*fb1b10abSAndroid Build Coastguard Worker                                    const uint8_t *ref_ptr, int32_t ref_stride,
266*fb1b10abSAndroid Build Coastguard Worker                                    int32_t *diff) {
267*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
268*fb1b10abSAndroid Build Coastguard Worker   v16u8 src0, src1, src2, src3;
269*fb1b10abSAndroid Build Coastguard Worker   v16u8 ref0, ref1, ref2, ref3;
270*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg0 = { 0 };
271*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg1 = { 0 };
272*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg2 = { 0 };
273*fb1b10abSAndroid Build Coastguard Worker   v8i16 avg3 = { 0 };
274*fb1b10abSAndroid Build Coastguard Worker   v4i32 vec, var = { 0 };
275*fb1b10abSAndroid Build Coastguard Worker 
276*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = 32; ht_cnt--;) {
277*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
278*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
279*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
280*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
281*fb1b10abSAndroid Build Coastguard Worker 
282*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg0);
283*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg1);
284*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src2, ref2, var, avg2);
285*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src3, ref3, var, avg3);
286*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
287*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
288*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
289*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
290*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src0, ref0, var, avg0);
291*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src1, ref1, var, avg1);
292*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src2, ref2, var, avg2);
293*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_AVG_B(src3, ref3, var, avg3);
294*fb1b10abSAndroid Build Coastguard Worker   }
295*fb1b10abSAndroid Build Coastguard Worker 
296*fb1b10abSAndroid Build Coastguard Worker   vec = __msa_hadd_s_w(avg0, avg0);
297*fb1b10abSAndroid Build Coastguard Worker   vec += __msa_hadd_s_w(avg1, avg1);
298*fb1b10abSAndroid Build Coastguard Worker   vec += __msa_hadd_s_w(avg2, avg2);
299*fb1b10abSAndroid Build Coastguard Worker   vec += __msa_hadd_s_w(avg3, avg3);
300*fb1b10abSAndroid Build Coastguard Worker   *diff = HADD_SW_S32(vec);
301*fb1b10abSAndroid Build Coastguard Worker 
302*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
303*fb1b10abSAndroid Build Coastguard Worker }
304*fb1b10abSAndroid Build Coastguard Worker 
get_mb_ss_msa(const int16_t * src)305*fb1b10abSAndroid Build Coastguard Worker static uint32_t get_mb_ss_msa(const int16_t *src) {
306*fb1b10abSAndroid Build Coastguard Worker   uint32_t sum, cnt;
307*fb1b10abSAndroid Build Coastguard Worker   v8i16 src0, src1, src2, src3;
308*fb1b10abSAndroid Build Coastguard Worker   v4i32 src0_l, src1_l, src2_l, src3_l;
309*fb1b10abSAndroid Build Coastguard Worker   v4i32 src0_r, src1_r, src2_r, src3_r;
310*fb1b10abSAndroid Build Coastguard Worker   v2i64 sq_src_l = { 0 };
311*fb1b10abSAndroid Build Coastguard Worker   v2i64 sq_src_r = { 0 };
312*fb1b10abSAndroid Build Coastguard Worker 
313*fb1b10abSAndroid Build Coastguard Worker   for (cnt = 8; cnt--;) {
314*fb1b10abSAndroid Build Coastguard Worker     LD_SH4(src, 8, src0, src1, src2, src3);
315*fb1b10abSAndroid Build Coastguard Worker     src += 4 * 8;
316*fb1b10abSAndroid Build Coastguard Worker 
317*fb1b10abSAndroid Build Coastguard Worker     UNPCK_SH_SW(src0, src0_l, src0_r);
318*fb1b10abSAndroid Build Coastguard Worker     UNPCK_SH_SW(src1, src1_l, src1_r);
319*fb1b10abSAndroid Build Coastguard Worker     UNPCK_SH_SW(src2, src2_l, src2_r);
320*fb1b10abSAndroid Build Coastguard Worker     UNPCK_SH_SW(src3, src3_l, src3_r);
321*fb1b10abSAndroid Build Coastguard Worker 
322*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(src0_l, src0_r, sq_src_l, sq_src_r);
323*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(src1_l, src1_r, sq_src_l, sq_src_r);
324*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(src2_l, src2_r, sq_src_l, sq_src_r);
325*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(src3_l, src3_r, sq_src_l, sq_src_r);
326*fb1b10abSAndroid Build Coastguard Worker   }
327*fb1b10abSAndroid Build Coastguard Worker 
328*fb1b10abSAndroid Build Coastguard Worker   sq_src_l += __msa_splati_d(sq_src_l, 1);
329*fb1b10abSAndroid Build Coastguard Worker   sq_src_r += __msa_splati_d(sq_src_r, 1);
330*fb1b10abSAndroid Build Coastguard Worker 
331*fb1b10abSAndroid Build Coastguard Worker   sum = __msa_copy_s_d(sq_src_l, 0);
332*fb1b10abSAndroid Build Coastguard Worker   sum += __msa_copy_s_d(sq_src_r, 0);
333*fb1b10abSAndroid Build Coastguard Worker 
334*fb1b10abSAndroid Build Coastguard Worker   return sum;
335*fb1b10abSAndroid Build Coastguard Worker }
336*fb1b10abSAndroid Build Coastguard Worker 
sse_4width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height)337*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
338*fb1b10abSAndroid Build Coastguard Worker                                const uint8_t *ref_ptr, int32_t ref_stride,
339*fb1b10abSAndroid Build Coastguard Worker                                int32_t height) {
340*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
341*fb1b10abSAndroid Build Coastguard Worker   uint32_t src0, src1, src2, src3;
342*fb1b10abSAndroid Build Coastguard Worker   uint32_t ref0, ref1, ref2, ref3;
343*fb1b10abSAndroid Build Coastguard Worker   v16u8 src = { 0 };
344*fb1b10abSAndroid Build Coastguard Worker   v16u8 ref = { 0 };
345*fb1b10abSAndroid Build Coastguard Worker   v4i32 var = { 0 };
346*fb1b10abSAndroid Build Coastguard Worker 
347*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = (height >> 2); ht_cnt--;) {
348*fb1b10abSAndroid Build Coastguard Worker     LW4(src_ptr, src_stride, src0, src1, src2, src3);
349*fb1b10abSAndroid Build Coastguard Worker     src_ptr += (4 * src_stride);
350*fb1b10abSAndroid Build Coastguard Worker     LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
351*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += (4 * ref_stride);
352*fb1b10abSAndroid Build Coastguard Worker 
353*fb1b10abSAndroid Build Coastguard Worker     INSERT_W4_UB(src0, src1, src2, src3, src);
354*fb1b10abSAndroid Build Coastguard Worker     INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
355*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src, ref, var);
356*fb1b10abSAndroid Build Coastguard Worker   }
357*fb1b10abSAndroid Build Coastguard Worker 
358*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
359*fb1b10abSAndroid Build Coastguard Worker }
360*fb1b10abSAndroid Build Coastguard Worker 
sse_8width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height)361*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
362*fb1b10abSAndroid Build Coastguard Worker                                const uint8_t *ref_ptr, int32_t ref_stride,
363*fb1b10abSAndroid Build Coastguard Worker                                int32_t height) {
364*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
365*fb1b10abSAndroid Build Coastguard Worker   v16u8 src0, src1, src2, src3;
366*fb1b10abSAndroid Build Coastguard Worker   v16u8 ref0, ref1, ref2, ref3;
367*fb1b10abSAndroid Build Coastguard Worker   v4i32 var = { 0 };
368*fb1b10abSAndroid Build Coastguard Worker 
369*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = (height >> 2); ht_cnt--;) {
370*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
371*fb1b10abSAndroid Build Coastguard Worker     src_ptr += (4 * src_stride);
372*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
373*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += (4 * ref_stride);
374*fb1b10abSAndroid Build Coastguard Worker 
375*fb1b10abSAndroid Build Coastguard Worker     PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
376*fb1b10abSAndroid Build Coastguard Worker                 ref0, ref1);
377*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src0, ref0, var);
378*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src1, ref1, var);
379*fb1b10abSAndroid Build Coastguard Worker   }
380*fb1b10abSAndroid Build Coastguard Worker 
381*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
382*fb1b10abSAndroid Build Coastguard Worker }
383*fb1b10abSAndroid Build Coastguard Worker 
sse_16width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height)384*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
385*fb1b10abSAndroid Build Coastguard Worker                                 const uint8_t *ref_ptr, int32_t ref_stride,
386*fb1b10abSAndroid Build Coastguard Worker                                 int32_t height) {
387*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
388*fb1b10abSAndroid Build Coastguard Worker   v16u8 src, ref;
389*fb1b10abSAndroid Build Coastguard Worker   v4i32 var = { 0 };
390*fb1b10abSAndroid Build Coastguard Worker 
391*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = (height >> 2); ht_cnt--;) {
392*fb1b10abSAndroid Build Coastguard Worker     src = LD_UB(src_ptr);
393*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
394*fb1b10abSAndroid Build Coastguard Worker     ref = LD_UB(ref_ptr);
395*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
396*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src, ref, var);
397*fb1b10abSAndroid Build Coastguard Worker 
398*fb1b10abSAndroid Build Coastguard Worker     src = LD_UB(src_ptr);
399*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
400*fb1b10abSAndroid Build Coastguard Worker     ref = LD_UB(ref_ptr);
401*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
402*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src, ref, var);
403*fb1b10abSAndroid Build Coastguard Worker 
404*fb1b10abSAndroid Build Coastguard Worker     src = LD_UB(src_ptr);
405*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
406*fb1b10abSAndroid Build Coastguard Worker     ref = LD_UB(ref_ptr);
407*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
408*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src, ref, var);
409*fb1b10abSAndroid Build Coastguard Worker 
410*fb1b10abSAndroid Build Coastguard Worker     src = LD_UB(src_ptr);
411*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
412*fb1b10abSAndroid Build Coastguard Worker     ref = LD_UB(ref_ptr);
413*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
414*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src, ref, var);
415*fb1b10abSAndroid Build Coastguard Worker   }
416*fb1b10abSAndroid Build Coastguard Worker 
417*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
418*fb1b10abSAndroid Build Coastguard Worker }
419*fb1b10abSAndroid Build Coastguard Worker 
sse_32width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height)420*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
421*fb1b10abSAndroid Build Coastguard Worker                                 const uint8_t *ref_ptr, int32_t ref_stride,
422*fb1b10abSAndroid Build Coastguard Worker                                 int32_t height) {
423*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
424*fb1b10abSAndroid Build Coastguard Worker   v16u8 src0, src1, ref0, ref1;
425*fb1b10abSAndroid Build Coastguard Worker   v4i32 var = { 0 };
426*fb1b10abSAndroid Build Coastguard Worker 
427*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = (height >> 2); ht_cnt--;) {
428*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
429*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
430*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
431*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
432*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src0, ref0, var);
433*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src1, ref1, var);
434*fb1b10abSAndroid Build Coastguard Worker 
435*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
436*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
437*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
438*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
439*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src0, ref0, var);
440*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src1, ref1, var);
441*fb1b10abSAndroid Build Coastguard Worker 
442*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
443*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
444*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
445*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
446*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src0, ref0, var);
447*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src1, ref1, var);
448*fb1b10abSAndroid Build Coastguard Worker 
449*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(src_ptr, 16, src0, src1);
450*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
451*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref_ptr, 16, ref0, ref1);
452*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
453*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src0, ref0, var);
454*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src1, ref1, var);
455*fb1b10abSAndroid Build Coastguard Worker   }
456*fb1b10abSAndroid Build Coastguard Worker 
457*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
458*fb1b10abSAndroid Build Coastguard Worker }
459*fb1b10abSAndroid Build Coastguard Worker 
sse_64width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height)460*fb1b10abSAndroid Build Coastguard Worker static uint32_t sse_64width_msa(const uint8_t *src_ptr, int32_t src_stride,
461*fb1b10abSAndroid Build Coastguard Worker                                 const uint8_t *ref_ptr, int32_t ref_stride,
462*fb1b10abSAndroid Build Coastguard Worker                                 int32_t height) {
463*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
464*fb1b10abSAndroid Build Coastguard Worker   v16u8 src0, src1, src2, src3;
465*fb1b10abSAndroid Build Coastguard Worker   v16u8 ref0, ref1, ref2, ref3;
466*fb1b10abSAndroid Build Coastguard Worker   v4i32 var = { 0 };
467*fb1b10abSAndroid Build Coastguard Worker 
468*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = height >> 1; ht_cnt--;) {
469*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
470*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
471*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
472*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
473*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src0, ref0, var);
474*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src2, ref2, var);
475*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src1, ref1, var);
476*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src3, ref3, var);
477*fb1b10abSAndroid Build Coastguard Worker 
478*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(src_ptr, 16, src0, src1, src2, src3);
479*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
480*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
481*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += ref_stride;
482*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src0, ref0, var);
483*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src2, ref2, var);
484*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src1, ref1, var);
485*fb1b10abSAndroid Build Coastguard Worker     CALC_MSE_B(src3, ref3, var);
486*fb1b10abSAndroid Build Coastguard Worker   }
487*fb1b10abSAndroid Build Coastguard Worker 
488*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(var);
489*fb1b10abSAndroid Build Coastguard Worker }
490*fb1b10abSAndroid Build Coastguard Worker 
vpx_get4x4sse_cs_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride)491*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_get4x4sse_cs_msa(const uint8_t *src_ptr, int32_t src_stride,
492*fb1b10abSAndroid Build Coastguard Worker                               const uint8_t *ref_ptr, int32_t ref_stride) {
493*fb1b10abSAndroid Build Coastguard Worker   uint32_t src0, src1, src2, src3;
494*fb1b10abSAndroid Build Coastguard Worker   uint32_t ref0, ref1, ref2, ref3;
495*fb1b10abSAndroid Build Coastguard Worker   v16i8 src = { 0 };
496*fb1b10abSAndroid Build Coastguard Worker   v16i8 ref = { 0 };
497*fb1b10abSAndroid Build Coastguard Worker   v4i32 err0 = { 0 };
498*fb1b10abSAndroid Build Coastguard Worker 
499*fb1b10abSAndroid Build Coastguard Worker   LW4(src_ptr, src_stride, src0, src1, src2, src3);
500*fb1b10abSAndroid Build Coastguard Worker   LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
501*fb1b10abSAndroid Build Coastguard Worker   INSERT_W4_SB(src0, src1, src2, src3, src);
502*fb1b10abSAndroid Build Coastguard Worker   INSERT_W4_SB(ref0, ref1, ref2, ref3, ref);
503*fb1b10abSAndroid Build Coastguard Worker   CALC_MSE_B(src, ref, err0);
504*fb1b10abSAndroid Build Coastguard Worker 
505*fb1b10abSAndroid Build Coastguard Worker   return HADD_SW_S32(err0);
506*fb1b10abSAndroid Build Coastguard Worker }
507*fb1b10abSAndroid Build Coastguard Worker 
508*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4);
509*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5);
510*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5);
511*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_8Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 6);
512*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_8Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 7);
513*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_16Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 7);
514*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_16Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 8);
515*fb1b10abSAndroid Build Coastguard Worker 
516*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_16Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
517*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_32Wx16H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
518*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_32Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 10);
519*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_32Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
520*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_64Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
521*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_64Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 12);
522*fb1b10abSAndroid Build Coastguard Worker 
523*fb1b10abSAndroid Build Coastguard Worker #define VPX_VARIANCE_WDXHT_MSA(wd, ht)                                         \
524*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_variance##wd##x##ht##_msa(                                      \
525*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *src, int32_t src_stride, const uint8_t *ref,              \
526*fb1b10abSAndroid Build Coastguard Worker       int32_t ref_stride, uint32_t *sse) {                                     \
527*fb1b10abSAndroid Build Coastguard Worker     int32_t diff;                                                              \
528*fb1b10abSAndroid Build Coastguard Worker                                                                                \
529*fb1b10abSAndroid Build Coastguard Worker     *sse =                                                                     \
530*fb1b10abSAndroid Build Coastguard Worker         sse_diff_##wd##width_msa(src, src_stride, ref, ref_stride, ht, &diff); \
531*fb1b10abSAndroid Build Coastguard Worker                                                                                \
532*fb1b10abSAndroid Build Coastguard Worker     return VARIANCE_##wd##Wx##ht##H(*sse, diff);                               \
533*fb1b10abSAndroid Build Coastguard Worker   }
534*fb1b10abSAndroid Build Coastguard Worker 
535*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE_WDXHT_MSA(4, 4);
536*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE_WDXHT_MSA(4, 8);
537*fb1b10abSAndroid Build Coastguard Worker 
538*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE_WDXHT_MSA(8, 4)
539*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE_WDXHT_MSA(8, 8)
540*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE_WDXHT_MSA(8, 16)
541*fb1b10abSAndroid Build Coastguard Worker 
542*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE_WDXHT_MSA(16, 8)
543*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE_WDXHT_MSA(16, 16)
544*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE_WDXHT_MSA(16, 32)
545*fb1b10abSAndroid Build Coastguard Worker 
546*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE_WDXHT_MSA(32, 16)
547*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE_WDXHT_MSA(32, 32)
548*fb1b10abSAndroid Build Coastguard Worker 
vpx_variance32x64_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)549*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_variance32x64_msa(const uint8_t *src, int32_t src_stride,
550*fb1b10abSAndroid Build Coastguard Worker                                const uint8_t *ref, int32_t ref_stride,
551*fb1b10abSAndroid Build Coastguard Worker                                uint32_t *sse) {
552*fb1b10abSAndroid Build Coastguard Worker   int32_t diff;
553*fb1b10abSAndroid Build Coastguard Worker 
554*fb1b10abSAndroid Build Coastguard Worker   *sse = sse_diff_32x64_msa(src, src_stride, ref, ref_stride, &diff);
555*fb1b10abSAndroid Build Coastguard Worker 
556*fb1b10abSAndroid Build Coastguard Worker   return VARIANCE_32Wx64H(*sse, diff);
557*fb1b10abSAndroid Build Coastguard Worker }
558*fb1b10abSAndroid Build Coastguard Worker 
vpx_variance64x32_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)559*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_variance64x32_msa(const uint8_t *src, int32_t src_stride,
560*fb1b10abSAndroid Build Coastguard Worker                                const uint8_t *ref, int32_t ref_stride,
561*fb1b10abSAndroid Build Coastguard Worker                                uint32_t *sse) {
562*fb1b10abSAndroid Build Coastguard Worker   int32_t diff;
563*fb1b10abSAndroid Build Coastguard Worker 
564*fb1b10abSAndroid Build Coastguard Worker   *sse = sse_diff_64x32_msa(src, src_stride, ref, ref_stride, &diff);
565*fb1b10abSAndroid Build Coastguard Worker 
566*fb1b10abSAndroid Build Coastguard Worker   return VARIANCE_64Wx32H(*sse, diff);
567*fb1b10abSAndroid Build Coastguard Worker }
568*fb1b10abSAndroid Build Coastguard Worker 
vpx_variance64x64_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)569*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_variance64x64_msa(const uint8_t *src, int32_t src_stride,
570*fb1b10abSAndroid Build Coastguard Worker                                const uint8_t *ref, int32_t ref_stride,
571*fb1b10abSAndroid Build Coastguard Worker                                uint32_t *sse) {
572*fb1b10abSAndroid Build Coastguard Worker   int32_t diff;
573*fb1b10abSAndroid Build Coastguard Worker 
574*fb1b10abSAndroid Build Coastguard Worker   *sse = sse_diff_64x64_msa(src, src_stride, ref, ref_stride, &diff);
575*fb1b10abSAndroid Build Coastguard Worker 
576*fb1b10abSAndroid Build Coastguard Worker   return VARIANCE_64Wx64H(*sse, diff);
577*fb1b10abSAndroid Build Coastguard Worker }
578*fb1b10abSAndroid Build Coastguard Worker 
vpx_mse8x8_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)579*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_mse8x8_msa(const uint8_t *src, int32_t src_stride,
580*fb1b10abSAndroid Build Coastguard Worker                         const uint8_t *ref, int32_t ref_stride, uint32_t *sse) {
581*fb1b10abSAndroid Build Coastguard Worker   *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 8);
582*fb1b10abSAndroid Build Coastguard Worker 
583*fb1b10abSAndroid Build Coastguard Worker   return *sse;
584*fb1b10abSAndroid Build Coastguard Worker }
585*fb1b10abSAndroid Build Coastguard Worker 
vpx_mse8x16_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)586*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_mse8x16_msa(const uint8_t *src, int32_t src_stride,
587*fb1b10abSAndroid Build Coastguard Worker                          const uint8_t *ref, int32_t ref_stride,
588*fb1b10abSAndroid Build Coastguard Worker                          uint32_t *sse) {
589*fb1b10abSAndroid Build Coastguard Worker   *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 16);
590*fb1b10abSAndroid Build Coastguard Worker 
591*fb1b10abSAndroid Build Coastguard Worker   return *sse;
592*fb1b10abSAndroid Build Coastguard Worker }
593*fb1b10abSAndroid Build Coastguard Worker 
vpx_mse16x8_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)594*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_mse16x8_msa(const uint8_t *src, int32_t src_stride,
595*fb1b10abSAndroid Build Coastguard Worker                          const uint8_t *ref, int32_t ref_stride,
596*fb1b10abSAndroid Build Coastguard Worker                          uint32_t *sse) {
597*fb1b10abSAndroid Build Coastguard Worker   *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 8);
598*fb1b10abSAndroid Build Coastguard Worker 
599*fb1b10abSAndroid Build Coastguard Worker   return *sse;
600*fb1b10abSAndroid Build Coastguard Worker }
601*fb1b10abSAndroid Build Coastguard Worker 
vpx_mse16x16_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)602*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_mse16x16_msa(const uint8_t *src, int32_t src_stride,
603*fb1b10abSAndroid Build Coastguard Worker                           const uint8_t *ref, int32_t ref_stride,
604*fb1b10abSAndroid Build Coastguard Worker                           uint32_t *sse) {
605*fb1b10abSAndroid Build Coastguard Worker   *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 16);
606*fb1b10abSAndroid Build Coastguard Worker 
607*fb1b10abSAndroid Build Coastguard Worker   return *sse;
608*fb1b10abSAndroid Build Coastguard Worker }
609*fb1b10abSAndroid Build Coastguard Worker 
vpx_get8x8var_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse,int32_t * sum)610*fb1b10abSAndroid Build Coastguard Worker void vpx_get8x8var_msa(const uint8_t *src, int32_t src_stride,
611*fb1b10abSAndroid Build Coastguard Worker                        const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
612*fb1b10abSAndroid Build Coastguard Worker                        int32_t *sum) {
613*fb1b10abSAndroid Build Coastguard Worker   *sse = sse_diff_8width_msa(src, src_stride, ref, ref_stride, 8, sum);
614*fb1b10abSAndroid Build Coastguard Worker }
615*fb1b10abSAndroid Build Coastguard Worker 
vpx_get16x16var_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse,int32_t * sum)616*fb1b10abSAndroid Build Coastguard Worker void vpx_get16x16var_msa(const uint8_t *src, int32_t src_stride,
617*fb1b10abSAndroid Build Coastguard Worker                          const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
618*fb1b10abSAndroid Build Coastguard Worker                          int32_t *sum) {
619*fb1b10abSAndroid Build Coastguard Worker   *sse = sse_diff_16width_msa(src, src_stride, ref, ref_stride, 16, sum);
620*fb1b10abSAndroid Build Coastguard Worker }
621*fb1b10abSAndroid Build Coastguard Worker 
vpx_get_mb_ss_msa(const int16_t * src)622*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_get_mb_ss_msa(const int16_t *src) { return get_mb_ss_msa(src); }
623