1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker *
4*fb1b10abSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker * that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker * tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker * in the file PATENTS. All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker * be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker */
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
12*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/mips/macros_msa.h"
13*fb1b10abSAndroid Build Coastguard Worker
14*fb1b10abSAndroid Build Coastguard Worker #define SAD_INSVE_W4(RTYPE, in0, in1, in2, in3, out) \
15*fb1b10abSAndroid Build Coastguard Worker { \
16*fb1b10abSAndroid Build Coastguard Worker out = (RTYPE)__msa_insve_w((v4i32)out, 0, (v4i32)in0); \
17*fb1b10abSAndroid Build Coastguard Worker out = (RTYPE)__msa_insve_w((v4i32)out, 1, (v4i32)in1); \
18*fb1b10abSAndroid Build Coastguard Worker out = (RTYPE)__msa_insve_w((v4i32)out, 2, (v4i32)in2); \
19*fb1b10abSAndroid Build Coastguard Worker out = (RTYPE)__msa_insve_w((v4i32)out, 3, (v4i32)in3); \
20*fb1b10abSAndroid Build Coastguard Worker }
21*fb1b10abSAndroid Build Coastguard Worker #define SAD_INSVE_W4_UB(...) SAD_INSVE_W4(v16u8, __VA_ARGS__)
22*fb1b10abSAndroid Build Coastguard Worker
sad_4width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height)23*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
24*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref_ptr, int32_t ref_stride,
25*fb1b10abSAndroid Build Coastguard Worker int32_t height) {
26*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
27*fb1b10abSAndroid Build Coastguard Worker uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
28*fb1b10abSAndroid Build Coastguard Worker v16u8 src = { 0 };
29*fb1b10abSAndroid Build Coastguard Worker v16u8 ref = { 0 };
30*fb1b10abSAndroid Build Coastguard Worker v16u8 diff;
31*fb1b10abSAndroid Build Coastguard Worker v8u16 sad = { 0 };
32*fb1b10abSAndroid Build Coastguard Worker
33*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 2); ht_cnt--;) {
34*fb1b10abSAndroid Build Coastguard Worker LW4(src_ptr, src_stride, src0, src1, src2, src3);
35*fb1b10abSAndroid Build Coastguard Worker src_ptr += (4 * src_stride);
36*fb1b10abSAndroid Build Coastguard Worker LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
37*fb1b10abSAndroid Build Coastguard Worker ref_ptr += (4 * ref_stride);
38*fb1b10abSAndroid Build Coastguard Worker
39*fb1b10abSAndroid Build Coastguard Worker INSERT_W4_UB(src0, src1, src2, src3, src);
40*fb1b10abSAndroid Build Coastguard Worker INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
41*fb1b10abSAndroid Build Coastguard Worker
42*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref);
43*fb1b10abSAndroid Build Coastguard Worker sad += __msa_hadd_u_h(diff, diff);
44*fb1b10abSAndroid Build Coastguard Worker }
45*fb1b10abSAndroid Build Coastguard Worker
46*fb1b10abSAndroid Build Coastguard Worker return HADD_UH_U32(sad);
47*fb1b10abSAndroid Build Coastguard Worker }
48*fb1b10abSAndroid Build Coastguard Worker
sad_8width_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)49*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_8width_msa(const uint8_t *src, int32_t src_stride,
50*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
51*fb1b10abSAndroid Build Coastguard Worker int32_t height) {
52*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
53*fb1b10abSAndroid Build Coastguard Worker v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
54*fb1b10abSAndroid Build Coastguard Worker v8u16 sad = { 0 };
55*fb1b10abSAndroid Build Coastguard Worker
56*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 2); ht_cnt--;) {
57*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, src_stride, src0, src1, src2, src3);
58*fb1b10abSAndroid Build Coastguard Worker src += (4 * src_stride);
59*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
60*fb1b10abSAndroid Build Coastguard Worker ref += (4 * ref_stride);
61*fb1b10abSAndroid Build Coastguard Worker
62*fb1b10abSAndroid Build Coastguard Worker PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
63*fb1b10abSAndroid Build Coastguard Worker ref0, ref1);
64*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src0, src1, ref0, ref1);
65*fb1b10abSAndroid Build Coastguard Worker }
66*fb1b10abSAndroid Build Coastguard Worker
67*fb1b10abSAndroid Build Coastguard Worker return HADD_UH_U32(sad);
68*fb1b10abSAndroid Build Coastguard Worker }
69*fb1b10abSAndroid Build Coastguard Worker
sad_16width_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)70*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_16width_msa(const uint8_t *src, int32_t src_stride,
71*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
72*fb1b10abSAndroid Build Coastguard Worker int32_t height) {
73*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
74*fb1b10abSAndroid Build Coastguard Worker v16u8 src0, src1, ref0, ref1;
75*fb1b10abSAndroid Build Coastguard Worker v8u16 sad = { 0 };
76*fb1b10abSAndroid Build Coastguard Worker
77*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 2); ht_cnt--;) {
78*fb1b10abSAndroid Build Coastguard Worker LD_UB2(src, src_stride, src0, src1);
79*fb1b10abSAndroid Build Coastguard Worker src += (2 * src_stride);
80*fb1b10abSAndroid Build Coastguard Worker LD_UB2(ref, ref_stride, ref0, ref1);
81*fb1b10abSAndroid Build Coastguard Worker ref += (2 * ref_stride);
82*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src0, src1, ref0, ref1);
83*fb1b10abSAndroid Build Coastguard Worker
84*fb1b10abSAndroid Build Coastguard Worker LD_UB2(src, src_stride, src0, src1);
85*fb1b10abSAndroid Build Coastguard Worker src += (2 * src_stride);
86*fb1b10abSAndroid Build Coastguard Worker LD_UB2(ref, ref_stride, ref0, ref1);
87*fb1b10abSAndroid Build Coastguard Worker ref += (2 * ref_stride);
88*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src0, src1, ref0, ref1);
89*fb1b10abSAndroid Build Coastguard Worker }
90*fb1b10abSAndroid Build Coastguard Worker
91*fb1b10abSAndroid Build Coastguard Worker return HADD_UH_U32(sad);
92*fb1b10abSAndroid Build Coastguard Worker }
93*fb1b10abSAndroid Build Coastguard Worker
sad_32width_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)94*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_32width_msa(const uint8_t *src, int32_t src_stride,
95*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
96*fb1b10abSAndroid Build Coastguard Worker int32_t height) {
97*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
98*fb1b10abSAndroid Build Coastguard Worker v16u8 src0, src1, ref0, ref1;
99*fb1b10abSAndroid Build Coastguard Worker v8u16 sad = { 0 };
100*fb1b10abSAndroid Build Coastguard Worker
101*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 2); ht_cnt--;) {
102*fb1b10abSAndroid Build Coastguard Worker LD_UB2(src, 16, src0, src1);
103*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
104*fb1b10abSAndroid Build Coastguard Worker LD_UB2(ref, 16, ref0, ref1);
105*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
106*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src0, src1, ref0, ref1);
107*fb1b10abSAndroid Build Coastguard Worker
108*fb1b10abSAndroid Build Coastguard Worker LD_UB2(src, 16, src0, src1);
109*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
110*fb1b10abSAndroid Build Coastguard Worker LD_UB2(ref, 16, ref0, ref1);
111*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
112*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src0, src1, ref0, ref1);
113*fb1b10abSAndroid Build Coastguard Worker
114*fb1b10abSAndroid Build Coastguard Worker LD_UB2(src, 16, src0, src1);
115*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
116*fb1b10abSAndroid Build Coastguard Worker LD_UB2(ref, 16, ref0, ref1);
117*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
118*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src0, src1, ref0, ref1);
119*fb1b10abSAndroid Build Coastguard Worker
120*fb1b10abSAndroid Build Coastguard Worker LD_UB2(src, 16, src0, src1);
121*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
122*fb1b10abSAndroid Build Coastguard Worker LD_UB2(ref, 16, ref0, ref1);
123*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
124*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src0, src1, ref0, ref1);
125*fb1b10abSAndroid Build Coastguard Worker }
126*fb1b10abSAndroid Build Coastguard Worker
127*fb1b10abSAndroid Build Coastguard Worker return HADD_UH_U32(sad);
128*fb1b10abSAndroid Build Coastguard Worker }
129*fb1b10abSAndroid Build Coastguard Worker
sad_64width_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)130*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_64width_msa(const uint8_t *src, int32_t src_stride,
131*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
132*fb1b10abSAndroid Build Coastguard Worker int32_t height) {
133*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
134*fb1b10abSAndroid Build Coastguard Worker uint32_t sad = 0;
135*fb1b10abSAndroid Build Coastguard Worker v16u8 src0, src1, src2, src3;
136*fb1b10abSAndroid Build Coastguard Worker v16u8 ref0, ref1, ref2, ref3;
137*fb1b10abSAndroid Build Coastguard Worker v8u16 sad0 = { 0 };
138*fb1b10abSAndroid Build Coastguard Worker v8u16 sad1 = { 0 };
139*fb1b10abSAndroid Build Coastguard Worker
140*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 1); ht_cnt--;) {
141*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, 16, src0, src1, src2, src3);
142*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
143*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
144*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
145*fb1b10abSAndroid Build Coastguard Worker sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
146*fb1b10abSAndroid Build Coastguard Worker sad1 += SAD_UB2_UH(src2, src3, ref2, ref3);
147*fb1b10abSAndroid Build Coastguard Worker
148*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, 16, src0, src1, src2, src3);
149*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
150*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
151*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
152*fb1b10abSAndroid Build Coastguard Worker sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
153*fb1b10abSAndroid Build Coastguard Worker sad1 += SAD_UB2_UH(src2, src3, ref2, ref3);
154*fb1b10abSAndroid Build Coastguard Worker }
155*fb1b10abSAndroid Build Coastguard Worker
156*fb1b10abSAndroid Build Coastguard Worker sad = HADD_UH_U32(sad0);
157*fb1b10abSAndroid Build Coastguard Worker sad += HADD_UH_U32(sad1);
158*fb1b10abSAndroid Build Coastguard Worker
159*fb1b10abSAndroid Build Coastguard Worker return sad;
160*fb1b10abSAndroid Build Coastguard Worker }
161*fb1b10abSAndroid Build Coastguard Worker
sad_4width_x4d_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)162*fb1b10abSAndroid Build Coastguard Worker static void sad_4width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride,
163*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const aref_ptr[],
164*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, int32_t height,
165*fb1b10abSAndroid Build Coastguard Worker uint32_t *sad_array) {
166*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
167*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
168*fb1b10abSAndroid Build Coastguard Worker uint32_t src0, src1, src2, src3;
169*fb1b10abSAndroid Build Coastguard Worker uint32_t ref0, ref1, ref2, ref3;
170*fb1b10abSAndroid Build Coastguard Worker v16u8 src = { 0 };
171*fb1b10abSAndroid Build Coastguard Worker v16u8 ref = { 0 };
172*fb1b10abSAndroid Build Coastguard Worker v16u8 diff;
173*fb1b10abSAndroid Build Coastguard Worker v8u16 sad0 = { 0 };
174*fb1b10abSAndroid Build Coastguard Worker v8u16 sad1 = { 0 };
175*fb1b10abSAndroid Build Coastguard Worker v8u16 sad2 = { 0 };
176*fb1b10abSAndroid Build Coastguard Worker v8u16 sad3 = { 0 };
177*fb1b10abSAndroid Build Coastguard Worker
178*fb1b10abSAndroid Build Coastguard Worker ref0_ptr = aref_ptr[0];
179*fb1b10abSAndroid Build Coastguard Worker ref1_ptr = aref_ptr[1];
180*fb1b10abSAndroid Build Coastguard Worker ref2_ptr = aref_ptr[2];
181*fb1b10abSAndroid Build Coastguard Worker ref3_ptr = aref_ptr[3];
182*fb1b10abSAndroid Build Coastguard Worker
183*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 2); ht_cnt--;) {
184*fb1b10abSAndroid Build Coastguard Worker LW4(src_ptr, src_stride, src0, src1, src2, src3);
185*fb1b10abSAndroid Build Coastguard Worker INSERT_W4_UB(src0, src1, src2, src3, src);
186*fb1b10abSAndroid Build Coastguard Worker src_ptr += (4 * src_stride);
187*fb1b10abSAndroid Build Coastguard Worker
188*fb1b10abSAndroid Build Coastguard Worker LW4(ref0_ptr, ref_stride, ref0, ref1, ref2, ref3);
189*fb1b10abSAndroid Build Coastguard Worker INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
190*fb1b10abSAndroid Build Coastguard Worker ref0_ptr += (4 * ref_stride);
191*fb1b10abSAndroid Build Coastguard Worker
192*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref);
193*fb1b10abSAndroid Build Coastguard Worker sad0 += __msa_hadd_u_h(diff, diff);
194*fb1b10abSAndroid Build Coastguard Worker
195*fb1b10abSAndroid Build Coastguard Worker LW4(ref1_ptr, ref_stride, ref0, ref1, ref2, ref3);
196*fb1b10abSAndroid Build Coastguard Worker INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
197*fb1b10abSAndroid Build Coastguard Worker ref1_ptr += (4 * ref_stride);
198*fb1b10abSAndroid Build Coastguard Worker
199*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref);
200*fb1b10abSAndroid Build Coastguard Worker sad1 += __msa_hadd_u_h(diff, diff);
201*fb1b10abSAndroid Build Coastguard Worker
202*fb1b10abSAndroid Build Coastguard Worker LW4(ref2_ptr, ref_stride, ref0, ref1, ref2, ref3);
203*fb1b10abSAndroid Build Coastguard Worker INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
204*fb1b10abSAndroid Build Coastguard Worker ref2_ptr += (4 * ref_stride);
205*fb1b10abSAndroid Build Coastguard Worker
206*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref);
207*fb1b10abSAndroid Build Coastguard Worker sad2 += __msa_hadd_u_h(diff, diff);
208*fb1b10abSAndroid Build Coastguard Worker
209*fb1b10abSAndroid Build Coastguard Worker LW4(ref3_ptr, ref_stride, ref0, ref1, ref2, ref3);
210*fb1b10abSAndroid Build Coastguard Worker INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
211*fb1b10abSAndroid Build Coastguard Worker ref3_ptr += (4 * ref_stride);
212*fb1b10abSAndroid Build Coastguard Worker
213*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref);
214*fb1b10abSAndroid Build Coastguard Worker sad3 += __msa_hadd_u_h(diff, diff);
215*fb1b10abSAndroid Build Coastguard Worker }
216*fb1b10abSAndroid Build Coastguard Worker
217*fb1b10abSAndroid Build Coastguard Worker sad_array[0] = HADD_UH_U32(sad0);
218*fb1b10abSAndroid Build Coastguard Worker sad_array[1] = HADD_UH_U32(sad1);
219*fb1b10abSAndroid Build Coastguard Worker sad_array[2] = HADD_UH_U32(sad2);
220*fb1b10abSAndroid Build Coastguard Worker sad_array[3] = HADD_UH_U32(sad3);
221*fb1b10abSAndroid Build Coastguard Worker }
222*fb1b10abSAndroid Build Coastguard Worker
sad_8width_x4d_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)223*fb1b10abSAndroid Build Coastguard Worker static void sad_8width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride,
224*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const aref_ptr[],
225*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, int32_t height,
226*fb1b10abSAndroid Build Coastguard Worker uint32_t *sad_array) {
227*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
228*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
229*fb1b10abSAndroid Build Coastguard Worker v16u8 src0, src1, src2, src3;
230*fb1b10abSAndroid Build Coastguard Worker v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
231*fb1b10abSAndroid Build Coastguard Worker v16u8 ref8, ref9, ref10, ref11, ref12, ref13, ref14, ref15;
232*fb1b10abSAndroid Build Coastguard Worker v8u16 sad0 = { 0 };
233*fb1b10abSAndroid Build Coastguard Worker v8u16 sad1 = { 0 };
234*fb1b10abSAndroid Build Coastguard Worker v8u16 sad2 = { 0 };
235*fb1b10abSAndroid Build Coastguard Worker v8u16 sad3 = { 0 };
236*fb1b10abSAndroid Build Coastguard Worker
237*fb1b10abSAndroid Build Coastguard Worker ref0_ptr = aref_ptr[0];
238*fb1b10abSAndroid Build Coastguard Worker ref1_ptr = aref_ptr[1];
239*fb1b10abSAndroid Build Coastguard Worker ref2_ptr = aref_ptr[2];
240*fb1b10abSAndroid Build Coastguard Worker ref3_ptr = aref_ptr[3];
241*fb1b10abSAndroid Build Coastguard Worker
242*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 2); ht_cnt--;) {
243*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
244*fb1b10abSAndroid Build Coastguard Worker src_ptr += (4 * src_stride);
245*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref0_ptr, ref_stride, ref0, ref1, ref2, ref3);
246*fb1b10abSAndroid Build Coastguard Worker ref0_ptr += (4 * ref_stride);
247*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref1_ptr, ref_stride, ref4, ref5, ref6, ref7);
248*fb1b10abSAndroid Build Coastguard Worker ref1_ptr += (4 * ref_stride);
249*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref2_ptr, ref_stride, ref8, ref9, ref10, ref11);
250*fb1b10abSAndroid Build Coastguard Worker ref2_ptr += (4 * ref_stride);
251*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref3_ptr, ref_stride, ref12, ref13, ref14, ref15);
252*fb1b10abSAndroid Build Coastguard Worker ref3_ptr += (4 * ref_stride);
253*fb1b10abSAndroid Build Coastguard Worker
254*fb1b10abSAndroid Build Coastguard Worker PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
255*fb1b10abSAndroid Build Coastguard Worker PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
256*fb1b10abSAndroid Build Coastguard Worker sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
257*fb1b10abSAndroid Build Coastguard Worker
258*fb1b10abSAndroid Build Coastguard Worker PCKEV_D2_UB(ref5, ref4, ref7, ref6, ref0, ref1);
259*fb1b10abSAndroid Build Coastguard Worker sad1 += SAD_UB2_UH(src0, src1, ref0, ref1);
260*fb1b10abSAndroid Build Coastguard Worker
261*fb1b10abSAndroid Build Coastguard Worker PCKEV_D2_UB(ref9, ref8, ref11, ref10, ref0, ref1);
262*fb1b10abSAndroid Build Coastguard Worker sad2 += SAD_UB2_UH(src0, src1, ref0, ref1);
263*fb1b10abSAndroid Build Coastguard Worker
264*fb1b10abSAndroid Build Coastguard Worker PCKEV_D2_UB(ref13, ref12, ref15, ref14, ref0, ref1);
265*fb1b10abSAndroid Build Coastguard Worker sad3 += SAD_UB2_UH(src0, src1, ref0, ref1);
266*fb1b10abSAndroid Build Coastguard Worker }
267*fb1b10abSAndroid Build Coastguard Worker
268*fb1b10abSAndroid Build Coastguard Worker sad_array[0] = HADD_UH_U32(sad0);
269*fb1b10abSAndroid Build Coastguard Worker sad_array[1] = HADD_UH_U32(sad1);
270*fb1b10abSAndroid Build Coastguard Worker sad_array[2] = HADD_UH_U32(sad2);
271*fb1b10abSAndroid Build Coastguard Worker sad_array[3] = HADD_UH_U32(sad3);
272*fb1b10abSAndroid Build Coastguard Worker }
273*fb1b10abSAndroid Build Coastguard Worker
sad_16width_x4d_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)274*fb1b10abSAndroid Build Coastguard Worker static void sad_16width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride,
275*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const aref_ptr[],
276*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, int32_t height,
277*fb1b10abSAndroid Build Coastguard Worker uint32_t *sad_array) {
278*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
279*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
280*fb1b10abSAndroid Build Coastguard Worker v16u8 src, ref0, ref1, ref2, ref3, diff;
281*fb1b10abSAndroid Build Coastguard Worker v8u16 sad0 = { 0 };
282*fb1b10abSAndroid Build Coastguard Worker v8u16 sad1 = { 0 };
283*fb1b10abSAndroid Build Coastguard Worker v8u16 sad2 = { 0 };
284*fb1b10abSAndroid Build Coastguard Worker v8u16 sad3 = { 0 };
285*fb1b10abSAndroid Build Coastguard Worker
286*fb1b10abSAndroid Build Coastguard Worker ref0_ptr = aref_ptr[0];
287*fb1b10abSAndroid Build Coastguard Worker ref1_ptr = aref_ptr[1];
288*fb1b10abSAndroid Build Coastguard Worker ref2_ptr = aref_ptr[2];
289*fb1b10abSAndroid Build Coastguard Worker ref3_ptr = aref_ptr[3];
290*fb1b10abSAndroid Build Coastguard Worker
291*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 1); ht_cnt--;) {
292*fb1b10abSAndroid Build Coastguard Worker src = LD_UB(src_ptr);
293*fb1b10abSAndroid Build Coastguard Worker src_ptr += src_stride;
294*fb1b10abSAndroid Build Coastguard Worker ref0 = LD_UB(ref0_ptr);
295*fb1b10abSAndroid Build Coastguard Worker ref0_ptr += ref_stride;
296*fb1b10abSAndroid Build Coastguard Worker ref1 = LD_UB(ref1_ptr);
297*fb1b10abSAndroid Build Coastguard Worker ref1_ptr += ref_stride;
298*fb1b10abSAndroid Build Coastguard Worker ref2 = LD_UB(ref2_ptr);
299*fb1b10abSAndroid Build Coastguard Worker ref2_ptr += ref_stride;
300*fb1b10abSAndroid Build Coastguard Worker ref3 = LD_UB(ref3_ptr);
301*fb1b10abSAndroid Build Coastguard Worker ref3_ptr += ref_stride;
302*fb1b10abSAndroid Build Coastguard Worker
303*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref0);
304*fb1b10abSAndroid Build Coastguard Worker sad0 += __msa_hadd_u_h(diff, diff);
305*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref1);
306*fb1b10abSAndroid Build Coastguard Worker sad1 += __msa_hadd_u_h(diff, diff);
307*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref2);
308*fb1b10abSAndroid Build Coastguard Worker sad2 += __msa_hadd_u_h(diff, diff);
309*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref3);
310*fb1b10abSAndroid Build Coastguard Worker sad3 += __msa_hadd_u_h(diff, diff);
311*fb1b10abSAndroid Build Coastguard Worker
312*fb1b10abSAndroid Build Coastguard Worker src = LD_UB(src_ptr);
313*fb1b10abSAndroid Build Coastguard Worker src_ptr += src_stride;
314*fb1b10abSAndroid Build Coastguard Worker ref0 = LD_UB(ref0_ptr);
315*fb1b10abSAndroid Build Coastguard Worker ref0_ptr += ref_stride;
316*fb1b10abSAndroid Build Coastguard Worker ref1 = LD_UB(ref1_ptr);
317*fb1b10abSAndroid Build Coastguard Worker ref1_ptr += ref_stride;
318*fb1b10abSAndroid Build Coastguard Worker ref2 = LD_UB(ref2_ptr);
319*fb1b10abSAndroid Build Coastguard Worker ref2_ptr += ref_stride;
320*fb1b10abSAndroid Build Coastguard Worker ref3 = LD_UB(ref3_ptr);
321*fb1b10abSAndroid Build Coastguard Worker ref3_ptr += ref_stride;
322*fb1b10abSAndroid Build Coastguard Worker
323*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref0);
324*fb1b10abSAndroid Build Coastguard Worker sad0 += __msa_hadd_u_h(diff, diff);
325*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref1);
326*fb1b10abSAndroid Build Coastguard Worker sad1 += __msa_hadd_u_h(diff, diff);
327*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref2);
328*fb1b10abSAndroid Build Coastguard Worker sad2 += __msa_hadd_u_h(diff, diff);
329*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, ref3);
330*fb1b10abSAndroid Build Coastguard Worker sad3 += __msa_hadd_u_h(diff, diff);
331*fb1b10abSAndroid Build Coastguard Worker }
332*fb1b10abSAndroid Build Coastguard Worker
333*fb1b10abSAndroid Build Coastguard Worker sad_array[0] = HADD_UH_U32(sad0);
334*fb1b10abSAndroid Build Coastguard Worker sad_array[1] = HADD_UH_U32(sad1);
335*fb1b10abSAndroid Build Coastguard Worker sad_array[2] = HADD_UH_U32(sad2);
336*fb1b10abSAndroid Build Coastguard Worker sad_array[3] = HADD_UH_U32(sad3);
337*fb1b10abSAndroid Build Coastguard Worker }
338*fb1b10abSAndroid Build Coastguard Worker
sad_32width_x4d_msa(const uint8_t * src,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)339*fb1b10abSAndroid Build Coastguard Worker static void sad_32width_x4d_msa(const uint8_t *src, int32_t src_stride,
340*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const aref_ptr[],
341*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, int32_t height,
342*fb1b10abSAndroid Build Coastguard Worker uint32_t *sad_array) {
343*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
344*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
345*fb1b10abSAndroid Build Coastguard Worker v16u8 src0, src1, ref0, ref1;
346*fb1b10abSAndroid Build Coastguard Worker v8u16 sad0 = { 0 };
347*fb1b10abSAndroid Build Coastguard Worker v8u16 sad1 = { 0 };
348*fb1b10abSAndroid Build Coastguard Worker v8u16 sad2 = { 0 };
349*fb1b10abSAndroid Build Coastguard Worker v8u16 sad3 = { 0 };
350*fb1b10abSAndroid Build Coastguard Worker
351*fb1b10abSAndroid Build Coastguard Worker ref0_ptr = aref_ptr[0];
352*fb1b10abSAndroid Build Coastguard Worker ref1_ptr = aref_ptr[1];
353*fb1b10abSAndroid Build Coastguard Worker ref2_ptr = aref_ptr[2];
354*fb1b10abSAndroid Build Coastguard Worker ref3_ptr = aref_ptr[3];
355*fb1b10abSAndroid Build Coastguard Worker
356*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = height; ht_cnt--;) {
357*fb1b10abSAndroid Build Coastguard Worker LD_UB2(src, 16, src0, src1);
358*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
359*fb1b10abSAndroid Build Coastguard Worker
360*fb1b10abSAndroid Build Coastguard Worker LD_UB2(ref0_ptr, 16, ref0, ref1);
361*fb1b10abSAndroid Build Coastguard Worker ref0_ptr += ref_stride;
362*fb1b10abSAndroid Build Coastguard Worker sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
363*fb1b10abSAndroid Build Coastguard Worker
364*fb1b10abSAndroid Build Coastguard Worker LD_UB2(ref1_ptr, 16, ref0, ref1);
365*fb1b10abSAndroid Build Coastguard Worker ref1_ptr += ref_stride;
366*fb1b10abSAndroid Build Coastguard Worker sad1 += SAD_UB2_UH(src0, src1, ref0, ref1);
367*fb1b10abSAndroid Build Coastguard Worker
368*fb1b10abSAndroid Build Coastguard Worker LD_UB2(ref2_ptr, 16, ref0, ref1);
369*fb1b10abSAndroid Build Coastguard Worker ref2_ptr += ref_stride;
370*fb1b10abSAndroid Build Coastguard Worker sad2 += SAD_UB2_UH(src0, src1, ref0, ref1);
371*fb1b10abSAndroid Build Coastguard Worker
372*fb1b10abSAndroid Build Coastguard Worker LD_UB2(ref3_ptr, 16, ref0, ref1);
373*fb1b10abSAndroid Build Coastguard Worker ref3_ptr += ref_stride;
374*fb1b10abSAndroid Build Coastguard Worker sad3 += SAD_UB2_UH(src0, src1, ref0, ref1);
375*fb1b10abSAndroid Build Coastguard Worker }
376*fb1b10abSAndroid Build Coastguard Worker
377*fb1b10abSAndroid Build Coastguard Worker sad_array[0] = HADD_UH_U32(sad0);
378*fb1b10abSAndroid Build Coastguard Worker sad_array[1] = HADD_UH_U32(sad1);
379*fb1b10abSAndroid Build Coastguard Worker sad_array[2] = HADD_UH_U32(sad2);
380*fb1b10abSAndroid Build Coastguard Worker sad_array[3] = HADD_UH_U32(sad3);
381*fb1b10abSAndroid Build Coastguard Worker }
382*fb1b10abSAndroid Build Coastguard Worker
sad_64width_x4d_msa(const uint8_t * src,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)383*fb1b10abSAndroid Build Coastguard Worker static void sad_64width_x4d_msa(const uint8_t *src, int32_t src_stride,
384*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const aref_ptr[],
385*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, int32_t height,
386*fb1b10abSAndroid Build Coastguard Worker uint32_t *sad_array) {
387*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
388*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
389*fb1b10abSAndroid Build Coastguard Worker v16u8 src0, src1, src2, src3;
390*fb1b10abSAndroid Build Coastguard Worker v16u8 ref0, ref1, ref2, ref3;
391*fb1b10abSAndroid Build Coastguard Worker v8u16 sad0_0 = { 0 };
392*fb1b10abSAndroid Build Coastguard Worker v8u16 sad0_1 = { 0 };
393*fb1b10abSAndroid Build Coastguard Worker v8u16 sad1_0 = { 0 };
394*fb1b10abSAndroid Build Coastguard Worker v8u16 sad1_1 = { 0 };
395*fb1b10abSAndroid Build Coastguard Worker v8u16 sad2_0 = { 0 };
396*fb1b10abSAndroid Build Coastguard Worker v8u16 sad2_1 = { 0 };
397*fb1b10abSAndroid Build Coastguard Worker v8u16 sad3_0 = { 0 };
398*fb1b10abSAndroid Build Coastguard Worker v8u16 sad3_1 = { 0 };
399*fb1b10abSAndroid Build Coastguard Worker v4u32 sad;
400*fb1b10abSAndroid Build Coastguard Worker
401*fb1b10abSAndroid Build Coastguard Worker ref0_ptr = aref_ptr[0];
402*fb1b10abSAndroid Build Coastguard Worker ref1_ptr = aref_ptr[1];
403*fb1b10abSAndroid Build Coastguard Worker ref2_ptr = aref_ptr[2];
404*fb1b10abSAndroid Build Coastguard Worker ref3_ptr = aref_ptr[3];
405*fb1b10abSAndroid Build Coastguard Worker
406*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = height; ht_cnt--;) {
407*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, 16, src0, src1, src2, src3);
408*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
409*fb1b10abSAndroid Build Coastguard Worker
410*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref0_ptr, 16, ref0, ref1, ref2, ref3);
411*fb1b10abSAndroid Build Coastguard Worker ref0_ptr += ref_stride;
412*fb1b10abSAndroid Build Coastguard Worker sad0_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
413*fb1b10abSAndroid Build Coastguard Worker sad0_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
414*fb1b10abSAndroid Build Coastguard Worker
415*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref1_ptr, 16, ref0, ref1, ref2, ref3);
416*fb1b10abSAndroid Build Coastguard Worker ref1_ptr += ref_stride;
417*fb1b10abSAndroid Build Coastguard Worker sad1_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
418*fb1b10abSAndroid Build Coastguard Worker sad1_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
419*fb1b10abSAndroid Build Coastguard Worker
420*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref2_ptr, 16, ref0, ref1, ref2, ref3);
421*fb1b10abSAndroid Build Coastguard Worker ref2_ptr += ref_stride;
422*fb1b10abSAndroid Build Coastguard Worker sad2_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
423*fb1b10abSAndroid Build Coastguard Worker sad2_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
424*fb1b10abSAndroid Build Coastguard Worker
425*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref3_ptr, 16, ref0, ref1, ref2, ref3);
426*fb1b10abSAndroid Build Coastguard Worker ref3_ptr += ref_stride;
427*fb1b10abSAndroid Build Coastguard Worker sad3_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
428*fb1b10abSAndroid Build Coastguard Worker sad3_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
429*fb1b10abSAndroid Build Coastguard Worker }
430*fb1b10abSAndroid Build Coastguard Worker
431*fb1b10abSAndroid Build Coastguard Worker sad = __msa_hadd_u_w(sad0_0, sad0_0);
432*fb1b10abSAndroid Build Coastguard Worker sad += __msa_hadd_u_w(sad0_1, sad0_1);
433*fb1b10abSAndroid Build Coastguard Worker sad_array[0] = HADD_UW_U32(sad);
434*fb1b10abSAndroid Build Coastguard Worker
435*fb1b10abSAndroid Build Coastguard Worker sad = __msa_hadd_u_w(sad1_0, sad1_0);
436*fb1b10abSAndroid Build Coastguard Worker sad += __msa_hadd_u_w(sad1_1, sad1_1);
437*fb1b10abSAndroid Build Coastguard Worker sad_array[1] = HADD_UW_U32(sad);
438*fb1b10abSAndroid Build Coastguard Worker
439*fb1b10abSAndroid Build Coastguard Worker sad = __msa_hadd_u_w(sad2_0, sad2_0);
440*fb1b10abSAndroid Build Coastguard Worker sad += __msa_hadd_u_w(sad2_1, sad2_1);
441*fb1b10abSAndroid Build Coastguard Worker sad_array[2] = HADD_UW_U32(sad);
442*fb1b10abSAndroid Build Coastguard Worker
443*fb1b10abSAndroid Build Coastguard Worker sad = __msa_hadd_u_w(sad3_0, sad3_0);
444*fb1b10abSAndroid Build Coastguard Worker sad += __msa_hadd_u_w(sad3_1, sad3_1);
445*fb1b10abSAndroid Build Coastguard Worker sad_array[3] = HADD_UW_U32(sad);
446*fb1b10abSAndroid Build Coastguard Worker }
447*fb1b10abSAndroid Build Coastguard Worker
avgsad_4width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height,const uint8_t * sec_pred)448*fb1b10abSAndroid Build Coastguard Worker static uint32_t avgsad_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
449*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref_ptr, int32_t ref_stride,
450*fb1b10abSAndroid Build Coastguard Worker int32_t height, const uint8_t *sec_pred) {
451*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
452*fb1b10abSAndroid Build Coastguard Worker uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
453*fb1b10abSAndroid Build Coastguard Worker v16u8 src = { 0 };
454*fb1b10abSAndroid Build Coastguard Worker v16u8 ref = { 0 };
455*fb1b10abSAndroid Build Coastguard Worker v16u8 diff, pred, comp;
456*fb1b10abSAndroid Build Coastguard Worker v8u16 sad = { 0 };
457*fb1b10abSAndroid Build Coastguard Worker
458*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 2); ht_cnt--;) {
459*fb1b10abSAndroid Build Coastguard Worker LW4(src_ptr, src_stride, src0, src1, src2, src3);
460*fb1b10abSAndroid Build Coastguard Worker src_ptr += (4 * src_stride);
461*fb1b10abSAndroid Build Coastguard Worker LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
462*fb1b10abSAndroid Build Coastguard Worker ref_ptr += (4 * ref_stride);
463*fb1b10abSAndroid Build Coastguard Worker pred = LD_UB(sec_pred);
464*fb1b10abSAndroid Build Coastguard Worker sec_pred += 16;
465*fb1b10abSAndroid Build Coastguard Worker
466*fb1b10abSAndroid Build Coastguard Worker INSERT_W4_UB(src0, src1, src2, src3, src);
467*fb1b10abSAndroid Build Coastguard Worker INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
468*fb1b10abSAndroid Build Coastguard Worker
469*fb1b10abSAndroid Build Coastguard Worker comp = __msa_aver_u_b(pred, ref);
470*fb1b10abSAndroid Build Coastguard Worker diff = __msa_asub_u_b(src, comp);
471*fb1b10abSAndroid Build Coastguard Worker sad += __msa_hadd_u_h(diff, diff);
472*fb1b10abSAndroid Build Coastguard Worker }
473*fb1b10abSAndroid Build Coastguard Worker
474*fb1b10abSAndroid Build Coastguard Worker return HADD_UH_U32(sad);
475*fb1b10abSAndroid Build Coastguard Worker }
476*fb1b10abSAndroid Build Coastguard Worker
avgsad_8width_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height,const uint8_t * sec_pred)477*fb1b10abSAndroid Build Coastguard Worker static uint32_t avgsad_8width_msa(const uint8_t *src, int32_t src_stride,
478*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
479*fb1b10abSAndroid Build Coastguard Worker int32_t height, const uint8_t *sec_pred) {
480*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
481*fb1b10abSAndroid Build Coastguard Worker v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
482*fb1b10abSAndroid Build Coastguard Worker v16u8 diff0, diff1, pred0, pred1;
483*fb1b10abSAndroid Build Coastguard Worker v8u16 sad = { 0 };
484*fb1b10abSAndroid Build Coastguard Worker
485*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 2); ht_cnt--;) {
486*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, src_stride, src0, src1, src2, src3);
487*fb1b10abSAndroid Build Coastguard Worker src += (4 * src_stride);
488*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
489*fb1b10abSAndroid Build Coastguard Worker ref += (4 * ref_stride);
490*fb1b10abSAndroid Build Coastguard Worker LD_UB2(sec_pred, 16, pred0, pred1);
491*fb1b10abSAndroid Build Coastguard Worker sec_pred += 32;
492*fb1b10abSAndroid Build Coastguard Worker PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
493*fb1b10abSAndroid Build Coastguard Worker ref0, ref1);
494*fb1b10abSAndroid Build Coastguard Worker AVER_UB2_UB(pred0, ref0, pred1, ref1, diff0, diff1);
495*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src0, src1, diff0, diff1);
496*fb1b10abSAndroid Build Coastguard Worker }
497*fb1b10abSAndroid Build Coastguard Worker
498*fb1b10abSAndroid Build Coastguard Worker return HADD_UH_U32(sad);
499*fb1b10abSAndroid Build Coastguard Worker }
500*fb1b10abSAndroid Build Coastguard Worker
avgsad_16width_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height,const uint8_t * sec_pred)501*fb1b10abSAndroid Build Coastguard Worker static uint32_t avgsad_16width_msa(const uint8_t *src, int32_t src_stride,
502*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
503*fb1b10abSAndroid Build Coastguard Worker int32_t height, const uint8_t *sec_pred) {
504*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
505*fb1b10abSAndroid Build Coastguard Worker v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
506*fb1b10abSAndroid Build Coastguard Worker v16u8 pred0, pred1, pred2, pred3, comp0, comp1;
507*fb1b10abSAndroid Build Coastguard Worker v8u16 sad = { 0 };
508*fb1b10abSAndroid Build Coastguard Worker
509*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 3); ht_cnt--;) {
510*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, src_stride, src0, src1, src2, src3);
511*fb1b10abSAndroid Build Coastguard Worker src += (4 * src_stride);
512*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
513*fb1b10abSAndroid Build Coastguard Worker ref += (4 * ref_stride);
514*fb1b10abSAndroid Build Coastguard Worker LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
515*fb1b10abSAndroid Build Coastguard Worker sec_pred += (4 * 16);
516*fb1b10abSAndroid Build Coastguard Worker AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1);
517*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src0, src1, comp0, comp1);
518*fb1b10abSAndroid Build Coastguard Worker AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1);
519*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src2, src3, comp0, comp1);
520*fb1b10abSAndroid Build Coastguard Worker
521*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, src_stride, src0, src1, src2, src3);
522*fb1b10abSAndroid Build Coastguard Worker src += (4 * src_stride);
523*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
524*fb1b10abSAndroid Build Coastguard Worker ref += (4 * ref_stride);
525*fb1b10abSAndroid Build Coastguard Worker LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
526*fb1b10abSAndroid Build Coastguard Worker sec_pred += (4 * 16);
527*fb1b10abSAndroid Build Coastguard Worker AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1);
528*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src0, src1, comp0, comp1);
529*fb1b10abSAndroid Build Coastguard Worker AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1);
530*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src2, src3, comp0, comp1);
531*fb1b10abSAndroid Build Coastguard Worker }
532*fb1b10abSAndroid Build Coastguard Worker
533*fb1b10abSAndroid Build Coastguard Worker return HADD_UH_U32(sad);
534*fb1b10abSAndroid Build Coastguard Worker }
535*fb1b10abSAndroid Build Coastguard Worker
avgsad_32width_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height,const uint8_t * sec_pred)536*fb1b10abSAndroid Build Coastguard Worker static uint32_t avgsad_32width_msa(const uint8_t *src, int32_t src_stride,
537*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
538*fb1b10abSAndroid Build Coastguard Worker int32_t height, const uint8_t *sec_pred) {
539*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
540*fb1b10abSAndroid Build Coastguard Worker v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
541*fb1b10abSAndroid Build Coastguard Worker v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
542*fb1b10abSAndroid Build Coastguard Worker v16u8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
543*fb1b10abSAndroid Build Coastguard Worker v16u8 comp0, comp1;
544*fb1b10abSAndroid Build Coastguard Worker v8u16 sad = { 0 };
545*fb1b10abSAndroid Build Coastguard Worker
546*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 2); ht_cnt--;) {
547*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, src_stride, src0, src2, src4, src6);
548*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src + 16, src_stride, src1, src3, src5, src7);
549*fb1b10abSAndroid Build Coastguard Worker src += (4 * src_stride);
550*fb1b10abSAndroid Build Coastguard Worker
551*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref, ref_stride, ref0, ref2, ref4, ref6);
552*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref + 16, ref_stride, ref1, ref3, ref5, ref7);
553*fb1b10abSAndroid Build Coastguard Worker ref += (4 * ref_stride);
554*fb1b10abSAndroid Build Coastguard Worker
555*fb1b10abSAndroid Build Coastguard Worker LD_UB4(sec_pred, 32, pred0, pred2, pred4, pred6);
556*fb1b10abSAndroid Build Coastguard Worker LD_UB4(sec_pred + 16, 32, pred1, pred3, pred5, pred7);
557*fb1b10abSAndroid Build Coastguard Worker sec_pred += (4 * 32);
558*fb1b10abSAndroid Build Coastguard Worker
559*fb1b10abSAndroid Build Coastguard Worker AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1);
560*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src0, src1, comp0, comp1);
561*fb1b10abSAndroid Build Coastguard Worker AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1);
562*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src2, src3, comp0, comp1);
563*fb1b10abSAndroid Build Coastguard Worker AVER_UB2_UB(pred4, ref4, pred5, ref5, comp0, comp1);
564*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src4, src5, comp0, comp1);
565*fb1b10abSAndroid Build Coastguard Worker AVER_UB2_UB(pred6, ref6, pred7, ref7, comp0, comp1);
566*fb1b10abSAndroid Build Coastguard Worker sad += SAD_UB2_UH(src6, src7, comp0, comp1);
567*fb1b10abSAndroid Build Coastguard Worker }
568*fb1b10abSAndroid Build Coastguard Worker
569*fb1b10abSAndroid Build Coastguard Worker return HADD_UH_U32(sad);
570*fb1b10abSAndroid Build Coastguard Worker }
571*fb1b10abSAndroid Build Coastguard Worker
avgsad_64width_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height,const uint8_t * sec_pred)572*fb1b10abSAndroid Build Coastguard Worker static uint32_t avgsad_64width_msa(const uint8_t *src, int32_t src_stride,
573*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
574*fb1b10abSAndroid Build Coastguard Worker int32_t height, const uint8_t *sec_pred) {
575*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
576*fb1b10abSAndroid Build Coastguard Worker v16u8 src0, src1, src2, src3;
577*fb1b10abSAndroid Build Coastguard Worker v16u8 ref0, ref1, ref2, ref3;
578*fb1b10abSAndroid Build Coastguard Worker v16u8 comp0, comp1, comp2, comp3;
579*fb1b10abSAndroid Build Coastguard Worker v16u8 pred0, pred1, pred2, pred3;
580*fb1b10abSAndroid Build Coastguard Worker v8u16 sad0 = { 0 };
581*fb1b10abSAndroid Build Coastguard Worker v8u16 sad1 = { 0 };
582*fb1b10abSAndroid Build Coastguard Worker v4u32 sad;
583*fb1b10abSAndroid Build Coastguard Worker
584*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 2); ht_cnt--;) {
585*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, 16, src0, src1, src2, src3);
586*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
587*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
588*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
589*fb1b10abSAndroid Build Coastguard Worker LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
590*fb1b10abSAndroid Build Coastguard Worker sec_pred += 64;
591*fb1b10abSAndroid Build Coastguard Worker AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
592*fb1b10abSAndroid Build Coastguard Worker comp1, comp2, comp3);
593*fb1b10abSAndroid Build Coastguard Worker sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
594*fb1b10abSAndroid Build Coastguard Worker sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
595*fb1b10abSAndroid Build Coastguard Worker
596*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, 16, src0, src1, src2, src3);
597*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
598*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
599*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
600*fb1b10abSAndroid Build Coastguard Worker LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
601*fb1b10abSAndroid Build Coastguard Worker sec_pred += 64;
602*fb1b10abSAndroid Build Coastguard Worker AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
603*fb1b10abSAndroid Build Coastguard Worker comp1, comp2, comp3);
604*fb1b10abSAndroid Build Coastguard Worker sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
605*fb1b10abSAndroid Build Coastguard Worker sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
606*fb1b10abSAndroid Build Coastguard Worker
607*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, 16, src0, src1, src2, src3);
608*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
609*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
610*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
611*fb1b10abSAndroid Build Coastguard Worker LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
612*fb1b10abSAndroid Build Coastguard Worker sec_pred += 64;
613*fb1b10abSAndroid Build Coastguard Worker AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
614*fb1b10abSAndroid Build Coastguard Worker comp1, comp2, comp3);
615*fb1b10abSAndroid Build Coastguard Worker sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
616*fb1b10abSAndroid Build Coastguard Worker sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
617*fb1b10abSAndroid Build Coastguard Worker
618*fb1b10abSAndroid Build Coastguard Worker LD_UB4(src, 16, src0, src1, src2, src3);
619*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
620*fb1b10abSAndroid Build Coastguard Worker LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
621*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
622*fb1b10abSAndroid Build Coastguard Worker LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
623*fb1b10abSAndroid Build Coastguard Worker sec_pred += 64;
624*fb1b10abSAndroid Build Coastguard Worker AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
625*fb1b10abSAndroid Build Coastguard Worker comp1, comp2, comp3);
626*fb1b10abSAndroid Build Coastguard Worker sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
627*fb1b10abSAndroid Build Coastguard Worker sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
628*fb1b10abSAndroid Build Coastguard Worker }
629*fb1b10abSAndroid Build Coastguard Worker
630*fb1b10abSAndroid Build Coastguard Worker sad = __msa_hadd_u_w(sad0, sad0);
631*fb1b10abSAndroid Build Coastguard Worker sad += __msa_hadd_u_w(sad1, sad1);
632*fb1b10abSAndroid Build Coastguard Worker
633*fb1b10abSAndroid Build Coastguard Worker return HADD_SW_S32(sad);
634*fb1b10abSAndroid Build Coastguard Worker }
635*fb1b10abSAndroid Build Coastguard Worker
636*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_4xHEIGHT_MSA(height) \
637*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad4x##height##_msa(const uint8_t *src, int32_t src_stride, \
638*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride) { \
639*fb1b10abSAndroid Build Coastguard Worker return sad_4width_msa(src, src_stride, ref, ref_stride, height); \
640*fb1b10abSAndroid Build Coastguard Worker }
641*fb1b10abSAndroid Build Coastguard Worker
642*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_8xHEIGHT_MSA(height) \
643*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad8x##height##_msa(const uint8_t *src, int32_t src_stride, \
644*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride) { \
645*fb1b10abSAndroid Build Coastguard Worker return sad_8width_msa(src, src_stride, ref, ref_stride, height); \
646*fb1b10abSAndroid Build Coastguard Worker }
647*fb1b10abSAndroid Build Coastguard Worker
648*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_16xHEIGHT_MSA(height) \
649*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad16x##height##_msa(const uint8_t *src, int32_t src_stride, \
650*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride) { \
651*fb1b10abSAndroid Build Coastguard Worker return sad_16width_msa(src, src_stride, ref, ref_stride, height); \
652*fb1b10abSAndroid Build Coastguard Worker }
653*fb1b10abSAndroid Build Coastguard Worker
654*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_32xHEIGHT_MSA(height) \
655*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad32x##height##_msa(const uint8_t *src, int32_t src_stride, \
656*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride) { \
657*fb1b10abSAndroid Build Coastguard Worker return sad_32width_msa(src, src_stride, ref, ref_stride, height); \
658*fb1b10abSAndroid Build Coastguard Worker }
659*fb1b10abSAndroid Build Coastguard Worker
660*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_64xHEIGHT_MSA(height) \
661*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad64x##height##_msa(const uint8_t *src, int32_t src_stride, \
662*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride) { \
663*fb1b10abSAndroid Build Coastguard Worker return sad_64width_msa(src, src_stride, ref, ref_stride, height); \
664*fb1b10abSAndroid Build Coastguard Worker }
665*fb1b10abSAndroid Build Coastguard Worker
666*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_4xHEIGHTx4D_MSA(height) \
667*fb1b10abSAndroid Build Coastguard Worker void vpx_sad4x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
668*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const refs[4], \
669*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, uint32_t sads[4]) { \
670*fb1b10abSAndroid Build Coastguard Worker sad_4width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
671*fb1b10abSAndroid Build Coastguard Worker }
672*fb1b10abSAndroid Build Coastguard Worker
673*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_8xHEIGHTx4D_MSA(height) \
674*fb1b10abSAndroid Build Coastguard Worker void vpx_sad8x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
675*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const refs[4], \
676*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, uint32_t sads[4]) { \
677*fb1b10abSAndroid Build Coastguard Worker sad_8width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
678*fb1b10abSAndroid Build Coastguard Worker }
679*fb1b10abSAndroid Build Coastguard Worker
680*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_16xHEIGHTx4D_MSA(height) \
681*fb1b10abSAndroid Build Coastguard Worker void vpx_sad16x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
682*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const refs[4], \
683*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, uint32_t sads[4]) { \
684*fb1b10abSAndroid Build Coastguard Worker sad_16width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
685*fb1b10abSAndroid Build Coastguard Worker }
686*fb1b10abSAndroid Build Coastguard Worker
687*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_32xHEIGHTx4D_MSA(height) \
688*fb1b10abSAndroid Build Coastguard Worker void vpx_sad32x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
689*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const refs[4], \
690*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, uint32_t sads[4]) { \
691*fb1b10abSAndroid Build Coastguard Worker sad_32width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
692*fb1b10abSAndroid Build Coastguard Worker }
693*fb1b10abSAndroid Build Coastguard Worker
694*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_64xHEIGHTx4D_MSA(height) \
695*fb1b10abSAndroid Build Coastguard Worker void vpx_sad64x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
696*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const refs[4], \
697*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, uint32_t sads[4]) { \
698*fb1b10abSAndroid Build Coastguard Worker sad_64width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
699*fb1b10abSAndroid Build Coastguard Worker }
700*fb1b10abSAndroid Build Coastguard Worker
701*fb1b10abSAndroid Build Coastguard Worker #define VPX_AVGSAD_4xHEIGHT_MSA(height) \
702*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad4x##height##_avg_msa(const uint8_t *src, int32_t src_stride, \
703*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride, \
704*fb1b10abSAndroid Build Coastguard Worker const uint8_t *second_pred) { \
705*fb1b10abSAndroid Build Coastguard Worker return avgsad_4width_msa(src, src_stride, ref, ref_stride, height, \
706*fb1b10abSAndroid Build Coastguard Worker second_pred); \
707*fb1b10abSAndroid Build Coastguard Worker }
708*fb1b10abSAndroid Build Coastguard Worker
709*fb1b10abSAndroid Build Coastguard Worker #define VPX_AVGSAD_8xHEIGHT_MSA(height) \
710*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad8x##height##_avg_msa(const uint8_t *src, int32_t src_stride, \
711*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride, \
712*fb1b10abSAndroid Build Coastguard Worker const uint8_t *second_pred) { \
713*fb1b10abSAndroid Build Coastguard Worker return avgsad_8width_msa(src, src_stride, ref, ref_stride, height, \
714*fb1b10abSAndroid Build Coastguard Worker second_pred); \
715*fb1b10abSAndroid Build Coastguard Worker }
716*fb1b10abSAndroid Build Coastguard Worker
717*fb1b10abSAndroid Build Coastguard Worker #define VPX_AVGSAD_16xHEIGHT_MSA(height) \
718*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad16x##height##_avg_msa( \
719*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src, int32_t src_stride, const uint8_t *ref, \
720*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, const uint8_t *second_pred) { \
721*fb1b10abSAndroid Build Coastguard Worker return avgsad_16width_msa(src, src_stride, ref, ref_stride, height, \
722*fb1b10abSAndroid Build Coastguard Worker second_pred); \
723*fb1b10abSAndroid Build Coastguard Worker }
724*fb1b10abSAndroid Build Coastguard Worker
725*fb1b10abSAndroid Build Coastguard Worker #define VPX_AVGSAD_32xHEIGHT_MSA(height) \
726*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad32x##height##_avg_msa( \
727*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src, int32_t src_stride, const uint8_t *ref, \
728*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, const uint8_t *second_pred) { \
729*fb1b10abSAndroid Build Coastguard Worker return avgsad_32width_msa(src, src_stride, ref, ref_stride, height, \
730*fb1b10abSAndroid Build Coastguard Worker second_pred); \
731*fb1b10abSAndroid Build Coastguard Worker }
732*fb1b10abSAndroid Build Coastguard Worker
733*fb1b10abSAndroid Build Coastguard Worker #define VPX_AVGSAD_64xHEIGHT_MSA(height) \
734*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad64x##height##_avg_msa( \
735*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src, int32_t src_stride, const uint8_t *ref, \
736*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, const uint8_t *second_pred) { \
737*fb1b10abSAndroid Build Coastguard Worker return avgsad_64width_msa(src, src_stride, ref, ref_stride, height, \
738*fb1b10abSAndroid Build Coastguard Worker second_pred); \
739*fb1b10abSAndroid Build Coastguard Worker }
740*fb1b10abSAndroid Build Coastguard Worker
741*fb1b10abSAndroid Build Coastguard Worker // 64x64
742*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_64xHEIGHT_MSA(64);
743*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_64xHEIGHTx4D_MSA(64);
744*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_64xHEIGHT_MSA(64);
745*fb1b10abSAndroid Build Coastguard Worker
746*fb1b10abSAndroid Build Coastguard Worker // 64x32
747*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_64xHEIGHT_MSA(32);
748*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_64xHEIGHTx4D_MSA(32);
749*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_64xHEIGHT_MSA(32);
750*fb1b10abSAndroid Build Coastguard Worker
751*fb1b10abSAndroid Build Coastguard Worker // 32x64
752*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_32xHEIGHT_MSA(64);
753*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_32xHEIGHTx4D_MSA(64);
754*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_32xHEIGHT_MSA(64);
755*fb1b10abSAndroid Build Coastguard Worker
756*fb1b10abSAndroid Build Coastguard Worker // 32x32
757*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_32xHEIGHT_MSA(32);
758*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_32xHEIGHTx4D_MSA(32);
759*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_32xHEIGHT_MSA(32);
760*fb1b10abSAndroid Build Coastguard Worker
761*fb1b10abSAndroid Build Coastguard Worker // 32x16
762*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_32xHEIGHT_MSA(16);
763*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_32xHEIGHTx4D_MSA(16);
764*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_32xHEIGHT_MSA(16);
765*fb1b10abSAndroid Build Coastguard Worker
766*fb1b10abSAndroid Build Coastguard Worker // 16x32
767*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_16xHEIGHT_MSA(32);
768*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_16xHEIGHTx4D_MSA(32);
769*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_16xHEIGHT_MSA(32);
770*fb1b10abSAndroid Build Coastguard Worker
771*fb1b10abSAndroid Build Coastguard Worker // 16x16
772*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_16xHEIGHT_MSA(16);
773*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_16xHEIGHTx4D_MSA(16);
774*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_16xHEIGHT_MSA(16);
775*fb1b10abSAndroid Build Coastguard Worker
776*fb1b10abSAndroid Build Coastguard Worker // 16x8
777*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_16xHEIGHT_MSA(8);
778*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_16xHEIGHTx4D_MSA(8);
779*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_16xHEIGHT_MSA(8);
780*fb1b10abSAndroid Build Coastguard Worker
781*fb1b10abSAndroid Build Coastguard Worker // 8x16
782*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_8xHEIGHT_MSA(16);
783*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_8xHEIGHTx4D_MSA(16);
784*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_8xHEIGHT_MSA(16);
785*fb1b10abSAndroid Build Coastguard Worker
786*fb1b10abSAndroid Build Coastguard Worker // 8x8
787*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_8xHEIGHT_MSA(8);
788*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_8xHEIGHTx4D_MSA(8);
789*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_8xHEIGHT_MSA(8);
790*fb1b10abSAndroid Build Coastguard Worker
791*fb1b10abSAndroid Build Coastguard Worker // 8x4
792*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_8xHEIGHT_MSA(4);
793*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_8xHEIGHTx4D_MSA(4);
794*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_8xHEIGHT_MSA(4);
795*fb1b10abSAndroid Build Coastguard Worker
796*fb1b10abSAndroid Build Coastguard Worker // 4x8
797*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_4xHEIGHT_MSA(8);
798*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_4xHEIGHTx4D_MSA(8);
799*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_4xHEIGHT_MSA(8);
800*fb1b10abSAndroid Build Coastguard Worker
801*fb1b10abSAndroid Build Coastguard Worker // 4x4
802*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_4xHEIGHT_MSA(4);
803*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_4xHEIGHTx4D_MSA(4);
804*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_4xHEIGHT_MSA(4);
805