xref: /aosp_15_r20/external/libvpx/vpx_dsp/mips/avg_msa.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker  *
4*fb1b10abSAndroid Build Coastguard Worker  *  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker  *  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker  *  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker  *  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker  *  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker  */
10*fb1b10abSAndroid Build Coastguard Worker #include <stdlib.h>
11*fb1b10abSAndroid Build Coastguard Worker 
12*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_config.h"
13*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
14*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/mips/macros_msa.h"
15*fb1b10abSAndroid Build Coastguard Worker 
vpx_avg_8x8_msa(const uint8_t * src,int32_t src_stride)16*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_avg_8x8_msa(const uint8_t *src, int32_t src_stride) {
17*fb1b10abSAndroid Build Coastguard Worker   uint32_t sum_out;
18*fb1b10abSAndroid Build Coastguard Worker   v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
19*fb1b10abSAndroid Build Coastguard Worker   v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
20*fb1b10abSAndroid Build Coastguard Worker   v4u32 sum = { 0 };
21*fb1b10abSAndroid Build Coastguard Worker 
22*fb1b10abSAndroid Build Coastguard Worker   LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
23*fb1b10abSAndroid Build Coastguard Worker   HADD_UB4_UH(src0, src1, src2, src3, sum0, sum1, sum2, sum3);
24*fb1b10abSAndroid Build Coastguard Worker   HADD_UB4_UH(src4, src5, src6, src7, sum4, sum5, sum6, sum7);
25*fb1b10abSAndroid Build Coastguard Worker   ADD4(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum0, sum2, sum4, sum6);
26*fb1b10abSAndroid Build Coastguard Worker   ADD2(sum0, sum2, sum4, sum6, sum0, sum4);
27*fb1b10abSAndroid Build Coastguard Worker   sum0 += sum4;
28*fb1b10abSAndroid Build Coastguard Worker 
29*fb1b10abSAndroid Build Coastguard Worker   sum = __msa_hadd_u_w(sum0, sum0);
30*fb1b10abSAndroid Build Coastguard Worker   sum0 = (v8u16)__msa_pckev_h((v8i16)sum, (v8i16)sum);
31*fb1b10abSAndroid Build Coastguard Worker   sum = __msa_hadd_u_w(sum0, sum0);
32*fb1b10abSAndroid Build Coastguard Worker   sum = (v4u32)__msa_srari_w((v4i32)sum, 6);
33*fb1b10abSAndroid Build Coastguard Worker   sum_out = __msa_copy_u_w((v4i32)sum, 0);
34*fb1b10abSAndroid Build Coastguard Worker 
35*fb1b10abSAndroid Build Coastguard Worker   return sum_out;
36*fb1b10abSAndroid Build Coastguard Worker }
37*fb1b10abSAndroid Build Coastguard Worker 
vpx_avg_4x4_msa(const uint8_t * src,int32_t src_stride)38*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_avg_4x4_msa(const uint8_t *src, int32_t src_stride) {
39*fb1b10abSAndroid Build Coastguard Worker   uint32_t sum_out;
40*fb1b10abSAndroid Build Coastguard Worker   uint32_t src0, src1, src2, src3;
41*fb1b10abSAndroid Build Coastguard Worker   v16u8 vec = { 0 };
42*fb1b10abSAndroid Build Coastguard Worker   v8u16 sum0;
43*fb1b10abSAndroid Build Coastguard Worker   v4u32 sum1;
44*fb1b10abSAndroid Build Coastguard Worker   v2u64 sum2;
45*fb1b10abSAndroid Build Coastguard Worker 
46*fb1b10abSAndroid Build Coastguard Worker   LW4(src, src_stride, src0, src1, src2, src3);
47*fb1b10abSAndroid Build Coastguard Worker   INSERT_W4_UB(src0, src1, src2, src3, vec);
48*fb1b10abSAndroid Build Coastguard Worker 
49*fb1b10abSAndroid Build Coastguard Worker   sum0 = __msa_hadd_u_h(vec, vec);
50*fb1b10abSAndroid Build Coastguard Worker   sum1 = __msa_hadd_u_w(sum0, sum0);
51*fb1b10abSAndroid Build Coastguard Worker   sum0 = (v8u16)__msa_pckev_h((v8i16)sum1, (v8i16)sum1);
52*fb1b10abSAndroid Build Coastguard Worker   sum1 = __msa_hadd_u_w(sum0, sum0);
53*fb1b10abSAndroid Build Coastguard Worker   sum2 = __msa_hadd_u_d(sum1, sum1);
54*fb1b10abSAndroid Build Coastguard Worker   sum1 = (v4u32)__msa_srari_w((v4i32)sum2, 4);
55*fb1b10abSAndroid Build Coastguard Worker   sum_out = __msa_copy_u_w((v4i32)sum1, 0);
56*fb1b10abSAndroid Build Coastguard Worker 
57*fb1b10abSAndroid Build Coastguard Worker   return sum_out;
58*fb1b10abSAndroid Build Coastguard Worker }
59*fb1b10abSAndroid Build Coastguard Worker 
60*fb1b10abSAndroid Build Coastguard Worker #if !CONFIG_VP9_HIGHBITDEPTH
vpx_hadamard_8x8_msa(const int16_t * src,ptrdiff_t src_stride,int16_t * dst)61*fb1b10abSAndroid Build Coastguard Worker void vpx_hadamard_8x8_msa(const int16_t *src, ptrdiff_t src_stride,
62*fb1b10abSAndroid Build Coastguard Worker                           int16_t *dst) {
63*fb1b10abSAndroid Build Coastguard Worker   v8i16 src0, src1, src2, src3, src4, src5, src6, src7;
64*fb1b10abSAndroid Build Coastguard Worker   v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
65*fb1b10abSAndroid Build Coastguard Worker 
66*fb1b10abSAndroid Build Coastguard Worker   LD_SH8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
67*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4,
68*fb1b10abSAndroid Build Coastguard Worker               tmp6, tmp7, tmp5, tmp3, tmp1);
69*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4,
70*fb1b10abSAndroid Build Coastguard Worker               src5, src7, src6, src3, src2);
71*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3,
72*fb1b10abSAndroid Build Coastguard Worker               tmp4, tmp5, tmp1, tmp6, tmp2);
73*fb1b10abSAndroid Build Coastguard Worker   TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1,
74*fb1b10abSAndroid Build Coastguard Worker                      src2, src3, src4, src5, src6, src7);
75*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4,
76*fb1b10abSAndroid Build Coastguard Worker               tmp6, tmp7, tmp5, tmp3, tmp1);
77*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4,
78*fb1b10abSAndroid Build Coastguard Worker               src5, src7, src6, src3, src2);
79*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3,
80*fb1b10abSAndroid Build Coastguard Worker               tmp4, tmp5, tmp1, tmp6, tmp2);
81*fb1b10abSAndroid Build Coastguard Worker   TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1,
82*fb1b10abSAndroid Build Coastguard Worker                      src2, src3, src4, src5, src6, src7);
83*fb1b10abSAndroid Build Coastguard Worker   ST_SH8(src0, src1, src2, src3, src4, src5, src6, src7, dst, 8);
84*fb1b10abSAndroid Build Coastguard Worker }
85*fb1b10abSAndroid Build Coastguard Worker 
vpx_hadamard_16x16_msa(const int16_t * src,ptrdiff_t src_stride,int16_t * dst)86*fb1b10abSAndroid Build Coastguard Worker void vpx_hadamard_16x16_msa(const int16_t *src, ptrdiff_t src_stride,
87*fb1b10abSAndroid Build Coastguard Worker                             int16_t *dst) {
88*fb1b10abSAndroid Build Coastguard Worker   v8i16 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
89*fb1b10abSAndroid Build Coastguard Worker   v8i16 src11, src12, src13, src14, src15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
90*fb1b10abSAndroid Build Coastguard Worker   v8i16 tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
91*fb1b10abSAndroid Build Coastguard Worker   v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
92*fb1b10abSAndroid Build Coastguard Worker 
93*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src0, src8);
94*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
95*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src1, src9);
96*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
97*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src2, src10);
98*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
99*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src3, src11);
100*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
101*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src4, src12);
102*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
103*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src5, src13);
104*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
105*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src6, src14);
106*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
107*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src7, src15);
108*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
109*fb1b10abSAndroid Build Coastguard Worker 
110*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4,
111*fb1b10abSAndroid Build Coastguard Worker               tmp6, tmp7, tmp5, tmp3, tmp1);
112*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src8, src10, src12, src14, src15, src13, src11, src9, tmp8, tmp10,
113*fb1b10abSAndroid Build Coastguard Worker               tmp12, tmp14, tmp15, tmp13, tmp11, tmp9);
114*fb1b10abSAndroid Build Coastguard Worker 
115*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4,
116*fb1b10abSAndroid Build Coastguard Worker               src5, src7, src6, src3, src2);
117*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3,
118*fb1b10abSAndroid Build Coastguard Worker               tmp4, tmp5, tmp1, tmp6, tmp2);
119*fb1b10abSAndroid Build Coastguard Worker   TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1,
120*fb1b10abSAndroid Build Coastguard Worker                      src2, src3, src4, src5, src6, src7);
121*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4,
122*fb1b10abSAndroid Build Coastguard Worker               tmp6, tmp7, tmp5, tmp3, tmp1);
123*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4,
124*fb1b10abSAndroid Build Coastguard Worker               src5, src7, src6, src3, src2);
125*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3,
126*fb1b10abSAndroid Build Coastguard Worker               tmp4, tmp5, tmp1, tmp6, tmp2);
127*fb1b10abSAndroid Build Coastguard Worker   TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1,
128*fb1b10abSAndroid Build Coastguard Worker                      src2, src11, src4, src5, src6, src7);
129*fb1b10abSAndroid Build Coastguard Worker   ST_SH8(src0, src1, src2, src11, src4, src5, src6, src7, dst, 8);
130*fb1b10abSAndroid Build Coastguard Worker 
131*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp8, tmp9, tmp12, tmp13, tmp15, tmp14, tmp11, tmp10, src8, src9,
132*fb1b10abSAndroid Build Coastguard Worker               src12, src13, src15, src14, src11, src10);
133*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src8, src9, src10, src11, src15, src14, src13, src12, tmp8, tmp15,
134*fb1b10abSAndroid Build Coastguard Worker               tmp11, tmp12, tmp13, tmp9, tmp14, tmp10);
135*fb1b10abSAndroid Build Coastguard Worker   TRANSPOSE8x8_SH_SH(tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, src8,
136*fb1b10abSAndroid Build Coastguard Worker                      src9, src10, src11, src12, src13, src14, src15);
137*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src8, src10, src12, src14, src15, src13, src11, src9, tmp8, tmp10,
138*fb1b10abSAndroid Build Coastguard Worker               tmp12, tmp14, tmp15, tmp13, tmp11, tmp9);
139*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp8, tmp9, tmp12, tmp13, tmp15, tmp14, tmp11, tmp10, src8, src9,
140*fb1b10abSAndroid Build Coastguard Worker               src12, src13, src15, src14, src11, src10);
141*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src8, src9, src10, src11, src15, src14, src13, src12, tmp8, tmp15,
142*fb1b10abSAndroid Build Coastguard Worker               tmp11, tmp12, tmp13, tmp9, tmp14, tmp10);
143*fb1b10abSAndroid Build Coastguard Worker   TRANSPOSE8x8_SH_SH(tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, res0,
144*fb1b10abSAndroid Build Coastguard Worker                      res1, res2, res3, res4, res5, res6, res7);
145*fb1b10abSAndroid Build Coastguard Worker 
146*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src0, src8);
147*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
148*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src1, src9);
149*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
150*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src2, src10);
151*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
152*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src3, src11);
153*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
154*fb1b10abSAndroid Build Coastguard Worker 
155*fb1b10abSAndroid Build Coastguard Worker   ST_SH8(res0, res1, res2, res3, res4, res5, res6, res7, dst + 64, 8);
156*fb1b10abSAndroid Build Coastguard Worker 
157*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src4, src12);
158*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
159*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src5, src13);
160*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
161*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src6, src14);
162*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
163*fb1b10abSAndroid Build Coastguard Worker   LD_SH2(src, 8, src7, src15);
164*fb1b10abSAndroid Build Coastguard Worker   src += src_stride;
165*fb1b10abSAndroid Build Coastguard Worker 
166*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4,
167*fb1b10abSAndroid Build Coastguard Worker               tmp6, tmp7, tmp5, tmp3, tmp1);
168*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src8, src10, src12, src14, src15, src13, src11, src9, tmp8, tmp10,
169*fb1b10abSAndroid Build Coastguard Worker               tmp12, tmp14, tmp15, tmp13, tmp11, tmp9);
170*fb1b10abSAndroid Build Coastguard Worker 
171*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4,
172*fb1b10abSAndroid Build Coastguard Worker               src5, src7, src6, src3, src2);
173*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3,
174*fb1b10abSAndroid Build Coastguard Worker               tmp4, tmp5, tmp1, tmp6, tmp2);
175*fb1b10abSAndroid Build Coastguard Worker   TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1,
176*fb1b10abSAndroid Build Coastguard Worker                      src2, src3, src4, src5, src6, src7);
177*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4,
178*fb1b10abSAndroid Build Coastguard Worker               tmp6, tmp7, tmp5, tmp3, tmp1);
179*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4,
180*fb1b10abSAndroid Build Coastguard Worker               src5, src7, src6, src3, src2);
181*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3,
182*fb1b10abSAndroid Build Coastguard Worker               tmp4, tmp5, tmp1, tmp6, tmp2);
183*fb1b10abSAndroid Build Coastguard Worker   TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1,
184*fb1b10abSAndroid Build Coastguard Worker                      src2, src3, src4, src5, src6, src7);
185*fb1b10abSAndroid Build Coastguard Worker   ST_SH8(src0, src1, src2, src3, src4, src5, src6, src7, dst + 2 * 64, 8);
186*fb1b10abSAndroid Build Coastguard Worker 
187*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp8, tmp9, tmp12, tmp13, tmp15, tmp14, tmp11, tmp10, src8, src9,
188*fb1b10abSAndroid Build Coastguard Worker               src12, src13, src15, src14, src11, src10);
189*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src8, src9, src10, src11, src15, src14, src13, src12, tmp8, tmp15,
190*fb1b10abSAndroid Build Coastguard Worker               tmp11, tmp12, tmp13, tmp9, tmp14, tmp10);
191*fb1b10abSAndroid Build Coastguard Worker   TRANSPOSE8x8_SH_SH(tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, src8,
192*fb1b10abSAndroid Build Coastguard Worker                      src9, src10, src11, src12, src13, src14, src15);
193*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src8, src10, src12, src14, src15, src13, src11, src9, tmp8, tmp10,
194*fb1b10abSAndroid Build Coastguard Worker               tmp12, tmp14, tmp15, tmp13, tmp11, tmp9);
195*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp8, tmp9, tmp12, tmp13, tmp15, tmp14, tmp11, tmp10, src8, src9,
196*fb1b10abSAndroid Build Coastguard Worker               src12, src13, src15, src14, src11, src10);
197*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src8, src9, src10, src11, src15, src14, src13, src12, tmp8, tmp15,
198*fb1b10abSAndroid Build Coastguard Worker               tmp11, tmp12, tmp13, tmp9, tmp14, tmp10);
199*fb1b10abSAndroid Build Coastguard Worker   TRANSPOSE8x8_SH_SH(tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, res0,
200*fb1b10abSAndroid Build Coastguard Worker                      res1, res2, res3, res4, res5, res6, res7);
201*fb1b10abSAndroid Build Coastguard Worker   ST_SH8(res0, res1, res2, res3, res4, res5, res6, res7, dst + 3 * 64, 8);
202*fb1b10abSAndroid Build Coastguard Worker 
203*fb1b10abSAndroid Build Coastguard Worker   LD_SH4(dst, 64, src0, src1, src2, src3);
204*fb1b10abSAndroid Build Coastguard Worker   LD_SH4(dst + 8, 64, src4, src5, src6, src7);
205*fb1b10abSAndroid Build Coastguard Worker 
206*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4,
207*fb1b10abSAndroid Build Coastguard Worker               tmp6, tmp7, tmp5, tmp3, tmp1);
208*fb1b10abSAndroid Build Coastguard Worker   SRA_4V(tmp0, tmp1, tmp2, tmp3, 1);
209*fb1b10abSAndroid Build Coastguard Worker   SRA_4V(tmp4, tmp5, tmp6, tmp7, 1);
210*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4,
211*fb1b10abSAndroid Build Coastguard Worker               src5, src7, src6, src3, src2);
212*fb1b10abSAndroid Build Coastguard Worker 
213*fb1b10abSAndroid Build Coastguard Worker   ST_SH4(src0, src1, src2, src3, dst, 64);
214*fb1b10abSAndroid Build Coastguard Worker   ST_SH4(src4, src5, src6, src7, dst + 8, 64);
215*fb1b10abSAndroid Build Coastguard Worker   dst += 16;
216*fb1b10abSAndroid Build Coastguard Worker 
217*fb1b10abSAndroid Build Coastguard Worker   LD_SH4(dst, 64, src0, src1, src2, src3);
218*fb1b10abSAndroid Build Coastguard Worker   LD_SH4(dst + 8, 64, src4, src5, src6, src7);
219*fb1b10abSAndroid Build Coastguard Worker 
220*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4,
221*fb1b10abSAndroid Build Coastguard Worker               tmp6, tmp7, tmp5, tmp3, tmp1);
222*fb1b10abSAndroid Build Coastguard Worker   SRA_4V(tmp0, tmp1, tmp2, tmp3, 1);
223*fb1b10abSAndroid Build Coastguard Worker   SRA_4V(tmp4, tmp5, tmp6, tmp7, 1);
224*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4,
225*fb1b10abSAndroid Build Coastguard Worker               src5, src7, src6, src3, src2);
226*fb1b10abSAndroid Build Coastguard Worker 
227*fb1b10abSAndroid Build Coastguard Worker   ST_SH4(src0, src1, src2, src3, dst, 64);
228*fb1b10abSAndroid Build Coastguard Worker   ST_SH4(src4, src5, src6, src7, dst + 8, 64);
229*fb1b10abSAndroid Build Coastguard Worker   dst += 16;
230*fb1b10abSAndroid Build Coastguard Worker 
231*fb1b10abSAndroid Build Coastguard Worker   LD_SH4(dst, 64, src0, src1, src2, src3);
232*fb1b10abSAndroid Build Coastguard Worker   LD_SH4(dst + 8, 64, src4, src5, src6, src7);
233*fb1b10abSAndroid Build Coastguard Worker 
234*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4,
235*fb1b10abSAndroid Build Coastguard Worker               tmp6, tmp7, tmp5, tmp3, tmp1);
236*fb1b10abSAndroid Build Coastguard Worker   SRA_4V(tmp0, tmp1, tmp2, tmp3, 1);
237*fb1b10abSAndroid Build Coastguard Worker   SRA_4V(tmp4, tmp5, tmp6, tmp7, 1);
238*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4,
239*fb1b10abSAndroid Build Coastguard Worker               src5, src7, src6, src3, src2);
240*fb1b10abSAndroid Build Coastguard Worker 
241*fb1b10abSAndroid Build Coastguard Worker   ST_SH4(src0, src1, src2, src3, dst, 64);
242*fb1b10abSAndroid Build Coastguard Worker   ST_SH4(src4, src5, src6, src7, dst + 8, 64);
243*fb1b10abSAndroid Build Coastguard Worker   dst += 16;
244*fb1b10abSAndroid Build Coastguard Worker 
245*fb1b10abSAndroid Build Coastguard Worker   LD_SH4(dst, 64, src0, src1, src2, src3);
246*fb1b10abSAndroid Build Coastguard Worker   LD_SH4(dst + 8, 64, src4, src5, src6, src7);
247*fb1b10abSAndroid Build Coastguard Worker 
248*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4,
249*fb1b10abSAndroid Build Coastguard Worker               tmp6, tmp7, tmp5, tmp3, tmp1);
250*fb1b10abSAndroid Build Coastguard Worker   SRA_4V(tmp0, tmp1, tmp2, tmp3, 1);
251*fb1b10abSAndroid Build Coastguard Worker   SRA_4V(tmp4, tmp5, tmp6, tmp7, 1);
252*fb1b10abSAndroid Build Coastguard Worker   BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4,
253*fb1b10abSAndroid Build Coastguard Worker               src5, src7, src6, src3, src2);
254*fb1b10abSAndroid Build Coastguard Worker 
255*fb1b10abSAndroid Build Coastguard Worker   ST_SH4(src0, src1, src2, src3, dst, 64);
256*fb1b10abSAndroid Build Coastguard Worker   ST_SH4(src4, src5, src6, src7, dst + 8, 64);
257*fb1b10abSAndroid Build Coastguard Worker }
258*fb1b10abSAndroid Build Coastguard Worker 
vpx_satd_msa(const int16_t * data,int length)259*fb1b10abSAndroid Build Coastguard Worker int vpx_satd_msa(const int16_t *data, int length) {
260*fb1b10abSAndroid Build Coastguard Worker   int i, satd;
261*fb1b10abSAndroid Build Coastguard Worker   v8i16 src0, src1, src2, src3, src4, src5, src6, src7;
262*fb1b10abSAndroid Build Coastguard Worker   v8i16 src8, src9, src10, src11, src12, src13, src14, src15;
263*fb1b10abSAndroid Build Coastguard Worker   v8i16 zero = { 0 };
264*fb1b10abSAndroid Build Coastguard Worker   v8u16 tmp0_h, tmp1_h, tmp2_h, tmp3_h, tmp4_h, tmp5_h, tmp6_h, tmp7_h;
265*fb1b10abSAndroid Build Coastguard Worker   v4u32 tmp0_w = { 0 };
266*fb1b10abSAndroid Build Coastguard Worker 
267*fb1b10abSAndroid Build Coastguard Worker   if (16 == length) {
268*fb1b10abSAndroid Build Coastguard Worker     LD_SH2(data, 8, src0, src1);
269*fb1b10abSAndroid Build Coastguard Worker     tmp0_h = (v8u16)__msa_asub_s_h(src0, zero);
270*fb1b10abSAndroid Build Coastguard Worker     tmp1_h = (v8u16)__msa_asub_s_h(src1, zero);
271*fb1b10abSAndroid Build Coastguard Worker     tmp0_w = __msa_hadd_u_w(tmp0_h, tmp0_h);
272*fb1b10abSAndroid Build Coastguard Worker     tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h);
273*fb1b10abSAndroid Build Coastguard Worker     satd = HADD_UW_U32(tmp0_w);
274*fb1b10abSAndroid Build Coastguard Worker   } else if (64 == length) {
275*fb1b10abSAndroid Build Coastguard Worker     LD_SH8(data, 8, src0, src1, src2, src3, src4, src5, src6, src7);
276*fb1b10abSAndroid Build Coastguard Worker 
277*fb1b10abSAndroid Build Coastguard Worker     tmp0_h = (v8u16)__msa_asub_s_h(src0, zero);
278*fb1b10abSAndroid Build Coastguard Worker     tmp1_h = (v8u16)__msa_asub_s_h(src1, zero);
279*fb1b10abSAndroid Build Coastguard Worker     tmp2_h = (v8u16)__msa_asub_s_h(src2, zero);
280*fb1b10abSAndroid Build Coastguard Worker     tmp3_h = (v8u16)__msa_asub_s_h(src3, zero);
281*fb1b10abSAndroid Build Coastguard Worker     tmp4_h = (v8u16)__msa_asub_s_h(src4, zero);
282*fb1b10abSAndroid Build Coastguard Worker     tmp5_h = (v8u16)__msa_asub_s_h(src5, zero);
283*fb1b10abSAndroid Build Coastguard Worker     tmp6_h = (v8u16)__msa_asub_s_h(src6, zero);
284*fb1b10abSAndroid Build Coastguard Worker     tmp7_h = (v8u16)__msa_asub_s_h(src7, zero);
285*fb1b10abSAndroid Build Coastguard Worker 
286*fb1b10abSAndroid Build Coastguard Worker     tmp0_w = __msa_hadd_u_w(tmp0_h, tmp0_h);
287*fb1b10abSAndroid Build Coastguard Worker     tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h);
288*fb1b10abSAndroid Build Coastguard Worker     tmp0_w += __msa_hadd_u_w(tmp2_h, tmp2_h);
289*fb1b10abSAndroid Build Coastguard Worker     tmp0_w += __msa_hadd_u_w(tmp3_h, tmp3_h);
290*fb1b10abSAndroid Build Coastguard Worker     tmp0_w += __msa_hadd_u_w(tmp4_h, tmp4_h);
291*fb1b10abSAndroid Build Coastguard Worker     tmp0_w += __msa_hadd_u_w(tmp5_h, tmp5_h);
292*fb1b10abSAndroid Build Coastguard Worker     tmp0_w += __msa_hadd_u_w(tmp6_h, tmp6_h);
293*fb1b10abSAndroid Build Coastguard Worker     tmp0_w += __msa_hadd_u_w(tmp7_h, tmp7_h);
294*fb1b10abSAndroid Build Coastguard Worker 
295*fb1b10abSAndroid Build Coastguard Worker     satd = HADD_UW_U32(tmp0_w);
296*fb1b10abSAndroid Build Coastguard Worker   } else if (256 == length) {
297*fb1b10abSAndroid Build Coastguard Worker     for (i = 0; i < 2; ++i) {
298*fb1b10abSAndroid Build Coastguard Worker       LD_SH8(data, 8, src0, src1, src2, src3, src4, src5, src6, src7);
299*fb1b10abSAndroid Build Coastguard Worker       data += 8 * 8;
300*fb1b10abSAndroid Build Coastguard Worker       LD_SH8(data, 8, src8, src9, src10, src11, src12, src13, src14, src15);
301*fb1b10abSAndroid Build Coastguard Worker       data += 8 * 8;
302*fb1b10abSAndroid Build Coastguard Worker 
303*fb1b10abSAndroid Build Coastguard Worker       tmp0_h = (v8u16)__msa_asub_s_h(src0, zero);
304*fb1b10abSAndroid Build Coastguard Worker       tmp1_h = (v8u16)__msa_asub_s_h(src1, zero);
305*fb1b10abSAndroid Build Coastguard Worker       tmp2_h = (v8u16)__msa_asub_s_h(src2, zero);
306*fb1b10abSAndroid Build Coastguard Worker       tmp3_h = (v8u16)__msa_asub_s_h(src3, zero);
307*fb1b10abSAndroid Build Coastguard Worker       tmp4_h = (v8u16)__msa_asub_s_h(src4, zero);
308*fb1b10abSAndroid Build Coastguard Worker       tmp5_h = (v8u16)__msa_asub_s_h(src5, zero);
309*fb1b10abSAndroid Build Coastguard Worker       tmp6_h = (v8u16)__msa_asub_s_h(src6, zero);
310*fb1b10abSAndroid Build Coastguard Worker       tmp7_h = (v8u16)__msa_asub_s_h(src7, zero);
311*fb1b10abSAndroid Build Coastguard Worker 
312*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp0_h, tmp0_h);
313*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h);
314*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp2_h, tmp2_h);
315*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp3_h, tmp3_h);
316*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp4_h, tmp4_h);
317*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp5_h, tmp5_h);
318*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp6_h, tmp6_h);
319*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp7_h, tmp7_h);
320*fb1b10abSAndroid Build Coastguard Worker 
321*fb1b10abSAndroid Build Coastguard Worker       tmp0_h = (v8u16)__msa_asub_s_h(src8, zero);
322*fb1b10abSAndroid Build Coastguard Worker       tmp1_h = (v8u16)__msa_asub_s_h(src9, zero);
323*fb1b10abSAndroid Build Coastguard Worker       tmp2_h = (v8u16)__msa_asub_s_h(src10, zero);
324*fb1b10abSAndroid Build Coastguard Worker       tmp3_h = (v8u16)__msa_asub_s_h(src11, zero);
325*fb1b10abSAndroid Build Coastguard Worker       tmp4_h = (v8u16)__msa_asub_s_h(src12, zero);
326*fb1b10abSAndroid Build Coastguard Worker       tmp5_h = (v8u16)__msa_asub_s_h(src13, zero);
327*fb1b10abSAndroid Build Coastguard Worker       tmp6_h = (v8u16)__msa_asub_s_h(src14, zero);
328*fb1b10abSAndroid Build Coastguard Worker       tmp7_h = (v8u16)__msa_asub_s_h(src15, zero);
329*fb1b10abSAndroid Build Coastguard Worker 
330*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp0_h, tmp0_h);
331*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h);
332*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp2_h, tmp2_h);
333*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp3_h, tmp3_h);
334*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp4_h, tmp4_h);
335*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp5_h, tmp5_h);
336*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp6_h, tmp6_h);
337*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp7_h, tmp7_h);
338*fb1b10abSAndroid Build Coastguard Worker     }
339*fb1b10abSAndroid Build Coastguard Worker 
340*fb1b10abSAndroid Build Coastguard Worker     satd = HADD_UW_U32(tmp0_w);
341*fb1b10abSAndroid Build Coastguard Worker   } else if (1024 == length) {
342*fb1b10abSAndroid Build Coastguard Worker     for (i = 0; i < 8; ++i) {
343*fb1b10abSAndroid Build Coastguard Worker       LD_SH8(data, 8, src0, src1, src2, src3, src4, src5, src6, src7);
344*fb1b10abSAndroid Build Coastguard Worker       data += 8 * 8;
345*fb1b10abSAndroid Build Coastguard Worker       LD_SH8(data, 8, src8, src9, src10, src11, src12, src13, src14, src15);
346*fb1b10abSAndroid Build Coastguard Worker       data += 8 * 8;
347*fb1b10abSAndroid Build Coastguard Worker 
348*fb1b10abSAndroid Build Coastguard Worker       tmp0_h = (v8u16)__msa_asub_s_h(src0, zero);
349*fb1b10abSAndroid Build Coastguard Worker       tmp1_h = (v8u16)__msa_asub_s_h(src1, zero);
350*fb1b10abSAndroid Build Coastguard Worker       tmp2_h = (v8u16)__msa_asub_s_h(src2, zero);
351*fb1b10abSAndroid Build Coastguard Worker       tmp3_h = (v8u16)__msa_asub_s_h(src3, zero);
352*fb1b10abSAndroid Build Coastguard Worker       tmp4_h = (v8u16)__msa_asub_s_h(src4, zero);
353*fb1b10abSAndroid Build Coastguard Worker       tmp5_h = (v8u16)__msa_asub_s_h(src5, zero);
354*fb1b10abSAndroid Build Coastguard Worker       tmp6_h = (v8u16)__msa_asub_s_h(src6, zero);
355*fb1b10abSAndroid Build Coastguard Worker       tmp7_h = (v8u16)__msa_asub_s_h(src7, zero);
356*fb1b10abSAndroid Build Coastguard Worker 
357*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp0_h, tmp0_h);
358*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h);
359*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp2_h, tmp2_h);
360*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp3_h, tmp3_h);
361*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp4_h, tmp4_h);
362*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp5_h, tmp5_h);
363*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp6_h, tmp6_h);
364*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp7_h, tmp7_h);
365*fb1b10abSAndroid Build Coastguard Worker 
366*fb1b10abSAndroid Build Coastguard Worker       tmp0_h = (v8u16)__msa_asub_s_h(src8, zero);
367*fb1b10abSAndroid Build Coastguard Worker       tmp1_h = (v8u16)__msa_asub_s_h(src9, zero);
368*fb1b10abSAndroid Build Coastguard Worker       tmp2_h = (v8u16)__msa_asub_s_h(src10, zero);
369*fb1b10abSAndroid Build Coastguard Worker       tmp3_h = (v8u16)__msa_asub_s_h(src11, zero);
370*fb1b10abSAndroid Build Coastguard Worker       tmp4_h = (v8u16)__msa_asub_s_h(src12, zero);
371*fb1b10abSAndroid Build Coastguard Worker       tmp5_h = (v8u16)__msa_asub_s_h(src13, zero);
372*fb1b10abSAndroid Build Coastguard Worker       tmp6_h = (v8u16)__msa_asub_s_h(src14, zero);
373*fb1b10abSAndroid Build Coastguard Worker       tmp7_h = (v8u16)__msa_asub_s_h(src15, zero);
374*fb1b10abSAndroid Build Coastguard Worker 
375*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp0_h, tmp0_h);
376*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h);
377*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp2_h, tmp2_h);
378*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp3_h, tmp3_h);
379*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp4_h, tmp4_h);
380*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp5_h, tmp5_h);
381*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp6_h, tmp6_h);
382*fb1b10abSAndroid Build Coastguard Worker       tmp0_w += __msa_hadd_u_w(tmp7_h, tmp7_h);
383*fb1b10abSAndroid Build Coastguard Worker     }
384*fb1b10abSAndroid Build Coastguard Worker 
385*fb1b10abSAndroid Build Coastguard Worker     satd = HADD_UW_U32(tmp0_w);
386*fb1b10abSAndroid Build Coastguard Worker   } else {
387*fb1b10abSAndroid Build Coastguard Worker     satd = 0;
388*fb1b10abSAndroid Build Coastguard Worker 
389*fb1b10abSAndroid Build Coastguard Worker     for (i = 0; i < length; ++i) {
390*fb1b10abSAndroid Build Coastguard Worker       satd += abs(data[i]);
391*fb1b10abSAndroid Build Coastguard Worker     }
392*fb1b10abSAndroid Build Coastguard Worker   }
393*fb1b10abSAndroid Build Coastguard Worker 
394*fb1b10abSAndroid Build Coastguard Worker   return satd;
395*fb1b10abSAndroid Build Coastguard Worker }
396*fb1b10abSAndroid Build Coastguard Worker #endif  // !CONFIG_VP9_HIGHBITDEPTH
397*fb1b10abSAndroid Build Coastguard Worker 
vpx_int_pro_row_msa(int16_t hbuf[16],const uint8_t * ref,const int ref_stride,const int height)398*fb1b10abSAndroid Build Coastguard Worker void vpx_int_pro_row_msa(int16_t hbuf[16], const uint8_t *ref,
399*fb1b10abSAndroid Build Coastguard Worker                          const int ref_stride, const int height) {
400*fb1b10abSAndroid Build Coastguard Worker   int i;
401*fb1b10abSAndroid Build Coastguard Worker   v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
402*fb1b10abSAndroid Build Coastguard Worker   v8i16 hbuf_r = { 0 };
403*fb1b10abSAndroid Build Coastguard Worker   v8i16 hbuf_l = { 0 };
404*fb1b10abSAndroid Build Coastguard Worker   v8i16 ref0_r, ref0_l, ref1_r, ref1_l, ref2_r, ref2_l, ref3_r, ref3_l;
405*fb1b10abSAndroid Build Coastguard Worker   v8i16 ref4_r, ref4_l, ref5_r, ref5_l, ref6_r, ref6_l, ref7_r, ref7_l;
406*fb1b10abSAndroid Build Coastguard Worker 
407*fb1b10abSAndroid Build Coastguard Worker   if (16 == height) {
408*fb1b10abSAndroid Build Coastguard Worker     for (i = 2; i--;) {
409*fb1b10abSAndroid Build Coastguard Worker       LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7);
410*fb1b10abSAndroid Build Coastguard Worker       ref += 8 * ref_stride;
411*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref0, ref0_r, ref0_l);
412*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref1, ref1_r, ref1_l);
413*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref2, ref2_r, ref2_l);
414*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref3, ref3_r, ref3_l);
415*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref4, ref4_r, ref4_l);
416*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref5, ref5_r, ref5_l);
417*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref6, ref6_r, ref6_l);
418*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref7, ref7_r, ref7_l);
419*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref0_r, hbuf_l, ref0_l, hbuf_r, ref1_r, hbuf_l, ref1_l,
420*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
421*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref2_r, hbuf_l, ref2_l, hbuf_r, ref3_r, hbuf_l, ref3_l,
422*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
423*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref4_r, hbuf_l, ref4_l, hbuf_r, ref5_r, hbuf_l, ref5_l,
424*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
425*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref6_r, hbuf_l, ref6_l, hbuf_r, ref7_r, hbuf_l, ref7_l,
426*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
427*fb1b10abSAndroid Build Coastguard Worker     }
428*fb1b10abSAndroid Build Coastguard Worker 
429*fb1b10abSAndroid Build Coastguard Worker     SRA_2V(hbuf_r, hbuf_l, 3);
430*fb1b10abSAndroid Build Coastguard Worker     ST_SH2(hbuf_r, hbuf_l, hbuf, 8);
431*fb1b10abSAndroid Build Coastguard Worker   } else if (32 == height) {
432*fb1b10abSAndroid Build Coastguard Worker     for (i = 2; i--;) {
433*fb1b10abSAndroid Build Coastguard Worker       LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7);
434*fb1b10abSAndroid Build Coastguard Worker       ref += 8 * ref_stride;
435*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref0, ref0_r, ref0_l);
436*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref1, ref1_r, ref1_l);
437*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref2, ref2_r, ref2_l);
438*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref3, ref3_r, ref3_l);
439*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref4, ref4_r, ref4_l);
440*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref5, ref5_r, ref5_l);
441*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref6, ref6_r, ref6_l);
442*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref7, ref7_r, ref7_l);
443*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref0_r, hbuf_l, ref0_l, hbuf_r, ref1_r, hbuf_l, ref1_l,
444*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
445*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref2_r, hbuf_l, ref2_l, hbuf_r, ref3_r, hbuf_l, ref3_l,
446*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
447*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref4_r, hbuf_l, ref4_l, hbuf_r, ref5_r, hbuf_l, ref5_l,
448*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
449*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref6_r, hbuf_l, ref6_l, hbuf_r, ref7_r, hbuf_l, ref7_l,
450*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
451*fb1b10abSAndroid Build Coastguard Worker       LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7);
452*fb1b10abSAndroid Build Coastguard Worker       ref += 8 * ref_stride;
453*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref0, ref0_r, ref0_l);
454*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref1, ref1_r, ref1_l);
455*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref2, ref2_r, ref2_l);
456*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref3, ref3_r, ref3_l);
457*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref4, ref4_r, ref4_l);
458*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref5, ref5_r, ref5_l);
459*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref6, ref6_r, ref6_l);
460*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref7, ref7_r, ref7_l);
461*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref0_r, hbuf_l, ref0_l, hbuf_r, ref1_r, hbuf_l, ref1_l,
462*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
463*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref2_r, hbuf_l, ref2_l, hbuf_r, ref3_r, hbuf_l, ref3_l,
464*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
465*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref4_r, hbuf_l, ref4_l, hbuf_r, ref5_r, hbuf_l, ref5_l,
466*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
467*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref6_r, hbuf_l, ref6_l, hbuf_r, ref7_r, hbuf_l, ref7_l,
468*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
469*fb1b10abSAndroid Build Coastguard Worker     }
470*fb1b10abSAndroid Build Coastguard Worker 
471*fb1b10abSAndroid Build Coastguard Worker     SRA_2V(hbuf_r, hbuf_l, 4);
472*fb1b10abSAndroid Build Coastguard Worker     ST_SH2(hbuf_r, hbuf_l, hbuf, 8);
473*fb1b10abSAndroid Build Coastguard Worker   } else if (64 == height) {
474*fb1b10abSAndroid Build Coastguard Worker     for (i = 4; i--;) {
475*fb1b10abSAndroid Build Coastguard Worker       LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7);
476*fb1b10abSAndroid Build Coastguard Worker       ref += 8 * ref_stride;
477*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref0, ref0_r, ref0_l);
478*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref1, ref1_r, ref1_l);
479*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref2, ref2_r, ref2_l);
480*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref3, ref3_r, ref3_l);
481*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref4, ref4_r, ref4_l);
482*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref5, ref5_r, ref5_l);
483*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref6, ref6_r, ref6_l);
484*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref7, ref7_r, ref7_l);
485*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref0_r, hbuf_l, ref0_l, hbuf_r, ref1_r, hbuf_l, ref1_l,
486*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
487*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref2_r, hbuf_l, ref2_l, hbuf_r, ref3_r, hbuf_l, ref3_l,
488*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
489*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref4_r, hbuf_l, ref4_l, hbuf_r, ref5_r, hbuf_l, ref5_l,
490*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
491*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref6_r, hbuf_l, ref6_l, hbuf_r, ref7_r, hbuf_l, ref7_l,
492*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
493*fb1b10abSAndroid Build Coastguard Worker       LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7);
494*fb1b10abSAndroid Build Coastguard Worker       ref += 8 * ref_stride;
495*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref0, ref0_r, ref0_l);
496*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref1, ref1_r, ref1_l);
497*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref2, ref2_r, ref2_l);
498*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref3, ref3_r, ref3_l);
499*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref4, ref4_r, ref4_l);
500*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref5, ref5_r, ref5_l);
501*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref6, ref6_r, ref6_l);
502*fb1b10abSAndroid Build Coastguard Worker       UNPCK_UB_SH(ref7, ref7_r, ref7_l);
503*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref0_r, hbuf_l, ref0_l, hbuf_r, ref1_r, hbuf_l, ref1_l,
504*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
505*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref2_r, hbuf_l, ref2_l, hbuf_r, ref3_r, hbuf_l, ref3_l,
506*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
507*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref4_r, hbuf_l, ref4_l, hbuf_r, ref5_r, hbuf_l, ref5_l,
508*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
509*fb1b10abSAndroid Build Coastguard Worker       ADD4(hbuf_r, ref6_r, hbuf_l, ref6_l, hbuf_r, ref7_r, hbuf_l, ref7_l,
510*fb1b10abSAndroid Build Coastguard Worker            hbuf_r, hbuf_l, hbuf_r, hbuf_l);
511*fb1b10abSAndroid Build Coastguard Worker     }
512*fb1b10abSAndroid Build Coastguard Worker 
513*fb1b10abSAndroid Build Coastguard Worker     SRA_2V(hbuf_r, hbuf_l, 5);
514*fb1b10abSAndroid Build Coastguard Worker     ST_SH2(hbuf_r, hbuf_l, hbuf, 8);
515*fb1b10abSAndroid Build Coastguard Worker   } else {
516*fb1b10abSAndroid Build Coastguard Worker     const int norm_factor = height >> 1;
517*fb1b10abSAndroid Build Coastguard Worker     int cnt;
518*fb1b10abSAndroid Build Coastguard Worker 
519*fb1b10abSAndroid Build Coastguard Worker     for (cnt = 0; cnt < 16; cnt++) {
520*fb1b10abSAndroid Build Coastguard Worker       hbuf[cnt] = 0;
521*fb1b10abSAndroid Build Coastguard Worker     }
522*fb1b10abSAndroid Build Coastguard Worker 
523*fb1b10abSAndroid Build Coastguard Worker     for (i = 0; i < height; ++i) {
524*fb1b10abSAndroid Build Coastguard Worker       for (cnt = 0; cnt < 16; cnt++) {
525*fb1b10abSAndroid Build Coastguard Worker         hbuf[cnt] += ref[cnt];
526*fb1b10abSAndroid Build Coastguard Worker       }
527*fb1b10abSAndroid Build Coastguard Worker 
528*fb1b10abSAndroid Build Coastguard Worker       ref += ref_stride;
529*fb1b10abSAndroid Build Coastguard Worker     }
530*fb1b10abSAndroid Build Coastguard Worker 
531*fb1b10abSAndroid Build Coastguard Worker     for (cnt = 0; cnt < 16; cnt++) {
532*fb1b10abSAndroid Build Coastguard Worker       hbuf[cnt] /= norm_factor;
533*fb1b10abSAndroid Build Coastguard Worker     }
534*fb1b10abSAndroid Build Coastguard Worker   }
535*fb1b10abSAndroid Build Coastguard Worker }
536*fb1b10abSAndroid Build Coastguard Worker 
vpx_int_pro_col_msa(const uint8_t * ref,const int width)537*fb1b10abSAndroid Build Coastguard Worker int16_t vpx_int_pro_col_msa(const uint8_t *ref, const int width) {
538*fb1b10abSAndroid Build Coastguard Worker   int16_t sum;
539*fb1b10abSAndroid Build Coastguard Worker   v16u8 ref0, ref1, ref2, ref3;
540*fb1b10abSAndroid Build Coastguard Worker   v8u16 ref0_h;
541*fb1b10abSAndroid Build Coastguard Worker 
542*fb1b10abSAndroid Build Coastguard Worker   if (16 == width) {
543*fb1b10abSAndroid Build Coastguard Worker     ref0 = LD_UB(ref);
544*fb1b10abSAndroid Build Coastguard Worker     ref0_h = __msa_hadd_u_h(ref0, ref0);
545*fb1b10abSAndroid Build Coastguard Worker     sum = HADD_UH_U32(ref0_h);
546*fb1b10abSAndroid Build Coastguard Worker   } else if (32 == width) {
547*fb1b10abSAndroid Build Coastguard Worker     LD_UB2(ref, 16, ref0, ref1);
548*fb1b10abSAndroid Build Coastguard Worker     ref0_h = __msa_hadd_u_h(ref0, ref0);
549*fb1b10abSAndroid Build Coastguard Worker     ref0_h += __msa_hadd_u_h(ref1, ref1);
550*fb1b10abSAndroid Build Coastguard Worker     sum = HADD_UH_U32(ref0_h);
551*fb1b10abSAndroid Build Coastguard Worker   } else if (64 == width) {
552*fb1b10abSAndroid Build Coastguard Worker     LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
553*fb1b10abSAndroid Build Coastguard Worker     ref0_h = __msa_hadd_u_h(ref0, ref0);
554*fb1b10abSAndroid Build Coastguard Worker     ref0_h += __msa_hadd_u_h(ref1, ref1);
555*fb1b10abSAndroid Build Coastguard Worker     ref0_h += __msa_hadd_u_h(ref2, ref2);
556*fb1b10abSAndroid Build Coastguard Worker     ref0_h += __msa_hadd_u_h(ref3, ref3);
557*fb1b10abSAndroid Build Coastguard Worker     sum = HADD_UH_U32(ref0_h);
558*fb1b10abSAndroid Build Coastguard Worker   } else {
559*fb1b10abSAndroid Build Coastguard Worker     int idx;
560*fb1b10abSAndroid Build Coastguard Worker 
561*fb1b10abSAndroid Build Coastguard Worker     sum = 0;
562*fb1b10abSAndroid Build Coastguard Worker     for (idx = 0; idx < width; ++idx) {
563*fb1b10abSAndroid Build Coastguard Worker       sum += ref[idx];
564*fb1b10abSAndroid Build Coastguard Worker     }
565*fb1b10abSAndroid Build Coastguard Worker   }
566*fb1b10abSAndroid Build Coastguard Worker 
567*fb1b10abSAndroid Build Coastguard Worker   return sum;
568*fb1b10abSAndroid Build Coastguard Worker }
569*fb1b10abSAndroid Build Coastguard Worker 
vpx_vector_var_msa(const int16_t * ref,const int16_t * src,const int bwl)570*fb1b10abSAndroid Build Coastguard Worker int vpx_vector_var_msa(const int16_t *ref, const int16_t *src, const int bwl) {
571*fb1b10abSAndroid Build Coastguard Worker   int sse, mean, var;
572*fb1b10abSAndroid Build Coastguard Worker   v8i16 src0, src1, src2, src3, src4, src5, src6, src7, ref0, ref1, ref2;
573*fb1b10abSAndroid Build Coastguard Worker   v8i16 ref3, ref4, ref5, ref6, ref7, src_l0_m, src_l1_m, src_l2_m, src_l3_m;
574*fb1b10abSAndroid Build Coastguard Worker   v8i16 src_l4_m, src_l5_m, src_l6_m, src_l7_m;
575*fb1b10abSAndroid Build Coastguard Worker   v4i32 res_l0_m, res_l1_m, res_l2_m, res_l3_m, res_l4_m, res_l5_m, res_l6_m;
576*fb1b10abSAndroid Build Coastguard Worker   v4i32 res_l7_m, mean_v;
577*fb1b10abSAndroid Build Coastguard Worker   v2i64 sse_v;
578*fb1b10abSAndroid Build Coastguard Worker 
579*fb1b10abSAndroid Build Coastguard Worker   if (2 == bwl) {
580*fb1b10abSAndroid Build Coastguard Worker     LD_SH2(src, 8, src0, src1);
581*fb1b10abSAndroid Build Coastguard Worker     LD_SH2(ref, 8, ref0, ref1);
582*fb1b10abSAndroid Build Coastguard Worker 
583*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src0, ref0, src_l0_m, src_l1_m);
584*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src1, ref1, src_l2_m, src_l3_m);
585*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l0_m, src_l1_m, res_l0_m, res_l1_m);
586*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l2_m, src_l3_m, res_l2_m, res_l3_m);
587*fb1b10abSAndroid Build Coastguard Worker     sse_v = __msa_dotp_s_d(res_l0_m, res_l0_m);
588*fb1b10abSAndroid Build Coastguard Worker     sse_v = __msa_dpadd_s_d(sse_v, res_l1_m, res_l1_m);
589*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(res_l2_m, res_l3_m, sse_v, sse_v);
590*fb1b10abSAndroid Build Coastguard Worker     mean_v = res_l0_m + res_l1_m;
591*fb1b10abSAndroid Build Coastguard Worker     mean_v += res_l2_m + res_l3_m;
592*fb1b10abSAndroid Build Coastguard Worker 
593*fb1b10abSAndroid Build Coastguard Worker     sse_v += __msa_splati_d(sse_v, 1);
594*fb1b10abSAndroid Build Coastguard Worker     sse = __msa_copy_s_w((v4i32)sse_v, 0);
595*fb1b10abSAndroid Build Coastguard Worker 
596*fb1b10abSAndroid Build Coastguard Worker     mean = HADD_SW_S32(mean_v);
597*fb1b10abSAndroid Build Coastguard Worker   } else if (3 == bwl) {
598*fb1b10abSAndroid Build Coastguard Worker     LD_SH4(src, 8, src0, src1, src2, src3);
599*fb1b10abSAndroid Build Coastguard Worker     LD_SH4(ref, 8, ref0, ref1, ref2, ref3);
600*fb1b10abSAndroid Build Coastguard Worker 
601*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src0, ref0, src_l0_m, src_l1_m);
602*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src1, ref1, src_l2_m, src_l3_m);
603*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src2, ref2, src_l4_m, src_l5_m);
604*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src3, ref3, src_l6_m, src_l7_m);
605*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l0_m, src_l1_m, res_l0_m, res_l1_m);
606*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l2_m, src_l3_m, res_l2_m, res_l3_m);
607*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l4_m, src_l5_m, res_l4_m, res_l5_m);
608*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l6_m, src_l7_m, res_l6_m, res_l7_m);
609*fb1b10abSAndroid Build Coastguard Worker     sse_v = __msa_dotp_s_d(res_l0_m, res_l0_m);
610*fb1b10abSAndroid Build Coastguard Worker     sse_v = __msa_dpadd_s_d(sse_v, res_l1_m, res_l1_m);
611*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(res_l2_m, res_l3_m, sse_v, sse_v);
612*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(res_l4_m, res_l5_m, sse_v, sse_v);
613*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(res_l6_m, res_l7_m, sse_v, sse_v);
614*fb1b10abSAndroid Build Coastguard Worker     mean_v = res_l0_m + res_l1_m;
615*fb1b10abSAndroid Build Coastguard Worker     mean_v += res_l2_m + res_l3_m;
616*fb1b10abSAndroid Build Coastguard Worker     mean_v += res_l4_m + res_l5_m;
617*fb1b10abSAndroid Build Coastguard Worker     mean_v += res_l6_m + res_l7_m;
618*fb1b10abSAndroid Build Coastguard Worker 
619*fb1b10abSAndroid Build Coastguard Worker     sse_v += __msa_splati_d(sse_v, 1);
620*fb1b10abSAndroid Build Coastguard Worker     sse = __msa_copy_s_w((v4i32)sse_v, 0);
621*fb1b10abSAndroid Build Coastguard Worker 
622*fb1b10abSAndroid Build Coastguard Worker     mean = HADD_SW_S32(mean_v);
623*fb1b10abSAndroid Build Coastguard Worker   } else if (4 == bwl) {
624*fb1b10abSAndroid Build Coastguard Worker     LD_SH8(src, 8, src0, src1, src2, src3, src4, src5, src6, src7);
625*fb1b10abSAndroid Build Coastguard Worker     LD_SH8(ref, 8, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7);
626*fb1b10abSAndroid Build Coastguard Worker 
627*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src0, ref0, src_l0_m, src_l1_m);
628*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src1, ref1, src_l2_m, src_l3_m);
629*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src2, ref2, src_l4_m, src_l5_m);
630*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src3, ref3, src_l6_m, src_l7_m);
631*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l0_m, src_l1_m, res_l0_m, res_l1_m);
632*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l2_m, src_l3_m, res_l2_m, res_l3_m);
633*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l4_m, src_l5_m, res_l4_m, res_l5_m);
634*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l6_m, src_l7_m, res_l6_m, res_l7_m);
635*fb1b10abSAndroid Build Coastguard Worker     sse_v = __msa_dotp_s_d(res_l0_m, res_l0_m);
636*fb1b10abSAndroid Build Coastguard Worker     sse_v = __msa_dpadd_s_d(sse_v, res_l1_m, res_l1_m);
637*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(res_l2_m, res_l3_m, sse_v, sse_v);
638*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(res_l4_m, res_l5_m, sse_v, sse_v);
639*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(res_l6_m, res_l7_m, sse_v, sse_v);
640*fb1b10abSAndroid Build Coastguard Worker     mean_v = res_l0_m + res_l1_m;
641*fb1b10abSAndroid Build Coastguard Worker     mean_v += res_l2_m + res_l3_m;
642*fb1b10abSAndroid Build Coastguard Worker     mean_v += res_l4_m + res_l5_m;
643*fb1b10abSAndroid Build Coastguard Worker     mean_v += res_l6_m + res_l7_m;
644*fb1b10abSAndroid Build Coastguard Worker 
645*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src4, ref4, src_l0_m, src_l1_m);
646*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src5, ref5, src_l2_m, src_l3_m);
647*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src6, ref6, src_l4_m, src_l5_m);
648*fb1b10abSAndroid Build Coastguard Worker     ILVRL_H2_SH(src7, ref7, src_l6_m, src_l7_m);
649*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l0_m, src_l1_m, res_l0_m, res_l1_m);
650*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l2_m, src_l3_m, res_l2_m, res_l3_m);
651*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l4_m, src_l5_m, res_l4_m, res_l5_m);
652*fb1b10abSAndroid Build Coastguard Worker     HSUB_UH2_SW(src_l6_m, src_l7_m, res_l6_m, res_l7_m);
653*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(res_l0_m, res_l1_m, sse_v, sse_v);
654*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(res_l2_m, res_l3_m, sse_v, sse_v);
655*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(res_l4_m, res_l5_m, sse_v, sse_v);
656*fb1b10abSAndroid Build Coastguard Worker     DPADD_SD2_SD(res_l6_m, res_l7_m, sse_v, sse_v);
657*fb1b10abSAndroid Build Coastguard Worker     mean_v += res_l0_m + res_l1_m;
658*fb1b10abSAndroid Build Coastguard Worker     mean_v += res_l2_m + res_l3_m;
659*fb1b10abSAndroid Build Coastguard Worker     mean_v += res_l4_m + res_l5_m;
660*fb1b10abSAndroid Build Coastguard Worker     mean_v += res_l6_m + res_l7_m;
661*fb1b10abSAndroid Build Coastguard Worker 
662*fb1b10abSAndroid Build Coastguard Worker     sse_v += __msa_splati_d(sse_v, 1);
663*fb1b10abSAndroid Build Coastguard Worker     sse = __msa_copy_s_w((v4i32)sse_v, 0);
664*fb1b10abSAndroid Build Coastguard Worker 
665*fb1b10abSAndroid Build Coastguard Worker     mean = HADD_SW_S32(mean_v);
666*fb1b10abSAndroid Build Coastguard Worker   } else {
667*fb1b10abSAndroid Build Coastguard Worker     int i;
668*fb1b10abSAndroid Build Coastguard Worker     const int width = 4 << bwl;
669*fb1b10abSAndroid Build Coastguard Worker 
670*fb1b10abSAndroid Build Coastguard Worker     sse = 0;
671*fb1b10abSAndroid Build Coastguard Worker     mean = 0;
672*fb1b10abSAndroid Build Coastguard Worker 
673*fb1b10abSAndroid Build Coastguard Worker     for (i = 0; i < width; ++i) {
674*fb1b10abSAndroid Build Coastguard Worker       const int diff = ref[i] - src[i];
675*fb1b10abSAndroid Build Coastguard Worker 
676*fb1b10abSAndroid Build Coastguard Worker       mean += diff;
677*fb1b10abSAndroid Build Coastguard Worker       sse += diff * diff;
678*fb1b10abSAndroid Build Coastguard Worker     }
679*fb1b10abSAndroid Build Coastguard Worker   }
680*fb1b10abSAndroid Build Coastguard Worker 
681*fb1b10abSAndroid Build Coastguard Worker   var = sse - ((mean * mean) >> (bwl + 2));
682*fb1b10abSAndroid Build Coastguard Worker 
683*fb1b10abSAndroid Build Coastguard Worker   return var;
684*fb1b10abSAndroid Build Coastguard Worker }
685*fb1b10abSAndroid Build Coastguard Worker 
vpx_minmax_8x8_msa(const uint8_t * s,int p,const uint8_t * d,int dp,int * min,int * max)686*fb1b10abSAndroid Build Coastguard Worker void vpx_minmax_8x8_msa(const uint8_t *s, int p, const uint8_t *d, int dp,
687*fb1b10abSAndroid Build Coastguard Worker                         int *min, int *max) {
688*fb1b10abSAndroid Build Coastguard Worker   v16u8 s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7;
689*fb1b10abSAndroid Build Coastguard Worker   v16u8 diff0, diff1, diff2, diff3, min0, min1, max0, max1;
690*fb1b10abSAndroid Build Coastguard Worker 
691*fb1b10abSAndroid Build Coastguard Worker   LD_UB8(s, p, s0, s1, s2, s3, s4, s5, s6, s7);
692*fb1b10abSAndroid Build Coastguard Worker   LD_UB8(d, dp, d0, d1, d2, d3, d4, d5, d6, d7);
693*fb1b10abSAndroid Build Coastguard Worker   PCKEV_D4_UB(s1, s0, s3, s2, s5, s4, s7, s6, s0, s1, s2, s3);
694*fb1b10abSAndroid Build Coastguard Worker   PCKEV_D4_UB(d1, d0, d3, d2, d5, d4, d7, d6, d0, d1, d2, d3);
695*fb1b10abSAndroid Build Coastguard Worker 
696*fb1b10abSAndroid Build Coastguard Worker   diff0 = __msa_asub_u_b(s0, d0);
697*fb1b10abSAndroid Build Coastguard Worker   diff1 = __msa_asub_u_b(s1, d1);
698*fb1b10abSAndroid Build Coastguard Worker   diff2 = __msa_asub_u_b(s2, d2);
699*fb1b10abSAndroid Build Coastguard Worker   diff3 = __msa_asub_u_b(s3, d3);
700*fb1b10abSAndroid Build Coastguard Worker 
701*fb1b10abSAndroid Build Coastguard Worker   min0 = __msa_min_u_b(diff0, diff1);
702*fb1b10abSAndroid Build Coastguard Worker   min1 = __msa_min_u_b(diff2, diff3);
703*fb1b10abSAndroid Build Coastguard Worker   min0 = __msa_min_u_b(min0, min1);
704*fb1b10abSAndroid Build Coastguard Worker 
705*fb1b10abSAndroid Build Coastguard Worker   max0 = __msa_max_u_b(diff0, diff1);
706*fb1b10abSAndroid Build Coastguard Worker   max1 = __msa_max_u_b(diff2, diff3);
707*fb1b10abSAndroid Build Coastguard Worker   max0 = __msa_max_u_b(max0, max1);
708*fb1b10abSAndroid Build Coastguard Worker 
709*fb1b10abSAndroid Build Coastguard Worker   min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 8);
710*fb1b10abSAndroid Build Coastguard Worker   min0 = __msa_min_u_b(min0, min1);
711*fb1b10abSAndroid Build Coastguard Worker   max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 8);
712*fb1b10abSAndroid Build Coastguard Worker   max0 = __msa_max_u_b(max0, max1);
713*fb1b10abSAndroid Build Coastguard Worker 
714*fb1b10abSAndroid Build Coastguard Worker   min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 4);
715*fb1b10abSAndroid Build Coastguard Worker   min0 = __msa_min_u_b(min0, min1);
716*fb1b10abSAndroid Build Coastguard Worker   max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 4);
717*fb1b10abSAndroid Build Coastguard Worker   max0 = __msa_max_u_b(max0, max1);
718*fb1b10abSAndroid Build Coastguard Worker 
719*fb1b10abSAndroid Build Coastguard Worker   min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 2);
720*fb1b10abSAndroid Build Coastguard Worker   min0 = __msa_min_u_b(min0, min1);
721*fb1b10abSAndroid Build Coastguard Worker   max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 2);
722*fb1b10abSAndroid Build Coastguard Worker   max0 = __msa_max_u_b(max0, max1);
723*fb1b10abSAndroid Build Coastguard Worker 
724*fb1b10abSAndroid Build Coastguard Worker   min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 1);
725*fb1b10abSAndroid Build Coastguard Worker   min0 = __msa_min_u_b(min0, min1);
726*fb1b10abSAndroid Build Coastguard Worker   max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 1);
727*fb1b10abSAndroid Build Coastguard Worker   max0 = __msa_max_u_b(max0, max1);
728*fb1b10abSAndroid Build Coastguard Worker 
729*fb1b10abSAndroid Build Coastguard Worker   *min = min0[0];
730*fb1b10abSAndroid Build Coastguard Worker   *max = max0[0];
731*fb1b10abSAndroid Build Coastguard Worker }
732