xref: /aosp_15_r20/external/libvpx/vpx_dsp/mips/variance_mmi.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker  *
4*fb1b10abSAndroid Build Coastguard Worker  *  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker  *  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker  *  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker  *  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker  *  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker  */
10*fb1b10abSAndroid Build Coastguard Worker 
11*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
12*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/variance.h"
13*fb1b10abSAndroid Build Coastguard Worker #include "vpx_ports/mem.h"
14*fb1b10abSAndroid Build Coastguard Worker #include "vpx/vpx_integer.h"
15*fb1b10abSAndroid Build Coastguard Worker #include "vpx_ports/asmdefs_mmi.h"
16*fb1b10abSAndroid Build Coastguard Worker 
17*fb1b10abSAndroid Build Coastguard Worker static const uint8_t bilinear_filters[8][2] = {
18*fb1b10abSAndroid Build Coastguard Worker   { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
19*fb1b10abSAndroid Build Coastguard Worker   { 64, 64 }, { 48, 80 },  { 32, 96 }, { 16, 112 },
20*fb1b10abSAndroid Build Coastguard Worker };
21*fb1b10abSAndroid Build Coastguard Worker 
22*fb1b10abSAndroid Build Coastguard Worker /* Use VARIANCE_SSE_SUM_8_FOR_W64 in vpx_variance64x64,vpx_variance64x32,
23*fb1b10abSAndroid Build Coastguard Worker    vpx_variance32x64. VARIANCE_SSE_SUM_8 will lead to sum overflow. */
24*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_SSE_SUM_8_FOR_W64                                  \
25*fb1b10abSAndroid Build Coastguard Worker   /* sse */                                                         \
26*fb1b10abSAndroid Build Coastguard Worker   "pasubub    %[ftmp3],   %[ftmp1],       %[ftmp2]            \n\t" \
27*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp4],   %[ftmp3],       %[ftmp0]            \n\t" \
28*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t" \
29*fb1b10abSAndroid Build Coastguard Worker   "pmaddhw    %[ftmp6],   %[ftmp4],       %[ftmp4]            \n\t" \
30*fb1b10abSAndroid Build Coastguard Worker   "pmaddhw    %[ftmp7],   %[ftmp5],       %[ftmp5]            \n\t" \
31*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp10],  %[ftmp10],      %[ftmp6]            \n\t" \
32*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp10],  %[ftmp10],      %[ftmp7]            \n\t" \
33*fb1b10abSAndroid Build Coastguard Worker                                                                     \
34*fb1b10abSAndroid Build Coastguard Worker   /* sum */                                                         \
35*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t" \
36*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t" \
37*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]            \n\t" \
38*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t" \
39*fb1b10abSAndroid Build Coastguard Worker   "punpcklhw  %[ftmp1],   %[ftmp3],       %[ftmp0]            \n\t" \
40*fb1b10abSAndroid Build Coastguard Worker   "punpckhhw  %[ftmp2],   %[ftmp3],       %[ftmp0]            \n\t" \
41*fb1b10abSAndroid Build Coastguard Worker   "punpcklhw  %[ftmp7],   %[ftmp5],       %[ftmp0]            \n\t" \
42*fb1b10abSAndroid Build Coastguard Worker   "punpckhhw  %[ftmp8],   %[ftmp5],       %[ftmp0]            \n\t" \
43*fb1b10abSAndroid Build Coastguard Worker   "psubw      %[ftmp3],   %[ftmp1],       %[ftmp7]            \n\t" \
44*fb1b10abSAndroid Build Coastguard Worker   "psubw      %[ftmp5],   %[ftmp2],       %[ftmp8]            \n\t" \
45*fb1b10abSAndroid Build Coastguard Worker   "punpcklhw  %[ftmp1],   %[ftmp4],       %[ftmp0]            \n\t" \
46*fb1b10abSAndroid Build Coastguard Worker   "punpckhhw  %[ftmp2],   %[ftmp4],       %[ftmp0]            \n\t" \
47*fb1b10abSAndroid Build Coastguard Worker   "punpcklhw  %[ftmp7],   %[ftmp6],       %[ftmp0]            \n\t" \
48*fb1b10abSAndroid Build Coastguard Worker   "punpckhhw  %[ftmp8],   %[ftmp6],       %[ftmp0]            \n\t" \
49*fb1b10abSAndroid Build Coastguard Worker   "psubw      %[ftmp4],   %[ftmp1],       %[ftmp7]            \n\t" \
50*fb1b10abSAndroid Build Coastguard Worker   "psubw      %[ftmp6],   %[ftmp2],       %[ftmp8]            \n\t" \
51*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp9],   %[ftmp9],       %[ftmp3]            \n\t" \
52*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp9],   %[ftmp9],       %[ftmp4]            \n\t" \
53*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp9],   %[ftmp9],       %[ftmp5]            \n\t" \
54*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp9],   %[ftmp9],       %[ftmp6]            \n\t"
55*fb1b10abSAndroid Build Coastguard Worker 
56*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_SSE_SUM_4                                          \
57*fb1b10abSAndroid Build Coastguard Worker   /* sse */                                                         \
58*fb1b10abSAndroid Build Coastguard Worker   "pasubub    %[ftmp3],   %[ftmp1],       %[ftmp2]            \n\t" \
59*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp4],   %[ftmp3],       %[ftmp0]            \n\t" \
60*fb1b10abSAndroid Build Coastguard Worker   "pmaddhw    %[ftmp5],   %[ftmp4],       %[ftmp4]            \n\t" \
61*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t" \
62*fb1b10abSAndroid Build Coastguard Worker                                                                     \
63*fb1b10abSAndroid Build Coastguard Worker   /* sum */                                                         \
64*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t" \
65*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t" \
66*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp7],   %[ftmp7],       %[ftmp3]            \n\t" \
67*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp8],   %[ftmp8],       %[ftmp4]            \n\t"
68*fb1b10abSAndroid Build Coastguard Worker 
69*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_SSE_SUM_8                                          \
70*fb1b10abSAndroid Build Coastguard Worker   /* sse */                                                         \
71*fb1b10abSAndroid Build Coastguard Worker   "pasubub    %[ftmp3],   %[ftmp1],       %[ftmp2]            \n\t" \
72*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp4],   %[ftmp3],       %[ftmp0]            \n\t" \
73*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t" \
74*fb1b10abSAndroid Build Coastguard Worker   "pmaddhw    %[ftmp6],   %[ftmp4],       %[ftmp4]            \n\t" \
75*fb1b10abSAndroid Build Coastguard Worker   "pmaddhw    %[ftmp7],   %[ftmp5],       %[ftmp5]            \n\t" \
76*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp8],   %[ftmp8],       %[ftmp6]            \n\t" \
77*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp8],   %[ftmp8],       %[ftmp7]            \n\t" \
78*fb1b10abSAndroid Build Coastguard Worker                                                                     \
79*fb1b10abSAndroid Build Coastguard Worker   /* sum */                                                         \
80*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t" \
81*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t" \
82*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]            \n\t" \
83*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t" \
84*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp10],  %[ftmp10],      %[ftmp3]            \n\t" \
85*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp10],  %[ftmp10],      %[ftmp4]            \n\t" \
86*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp12],  %[ftmp12],      %[ftmp5]            \n\t" \
87*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp12],  %[ftmp12],      %[ftmp6]            \n\t"
88*fb1b10abSAndroid Build Coastguard Worker 
89*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_SSE_8                                              \
90*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t" \
91*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t" \
92*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp2],   0x07(%[ref_ptr])                    \n\t" \
93*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp2],   0x00(%[ref_ptr])                    \n\t" \
94*fb1b10abSAndroid Build Coastguard Worker   "pasubub    %[ftmp3],   %[ftmp1],       %[ftmp2]            \n\t" \
95*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp4],   %[ftmp3],       %[ftmp0]            \n\t" \
96*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t" \
97*fb1b10abSAndroid Build Coastguard Worker   "pmaddhw    %[ftmp6],   %[ftmp4],       %[ftmp4]            \n\t" \
98*fb1b10abSAndroid Build Coastguard Worker   "pmaddhw    %[ftmp7],   %[ftmp5],       %[ftmp5]            \n\t" \
99*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp8],   %[ftmp8],       %[ftmp6]            \n\t" \
100*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp8],   %[ftmp8],       %[ftmp7]            \n\t"
101*fb1b10abSAndroid Build Coastguard Worker 
102*fb1b10abSAndroid Build Coastguard Worker #define VARIANCE_SSE_16                                             \
103*fb1b10abSAndroid Build Coastguard Worker   VARIANCE_SSE_8                                                    \
104*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x0f(%[src_ptr])                    \n\t" \
105*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x08(%[src_ptr])                    \n\t" \
106*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp2],   0x0f(%[ref_ptr])                    \n\t" \
107*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp2],   0x08(%[ref_ptr])                    \n\t" \
108*fb1b10abSAndroid Build Coastguard Worker   "pasubub    %[ftmp3],   %[ftmp1],       %[ftmp2]            \n\t" \
109*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp4],   %[ftmp3],       %[ftmp0]            \n\t" \
110*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t" \
111*fb1b10abSAndroid Build Coastguard Worker   "pmaddhw    %[ftmp6],   %[ftmp4],       %[ftmp4]            \n\t" \
112*fb1b10abSAndroid Build Coastguard Worker   "pmaddhw    %[ftmp7],   %[ftmp5],       %[ftmp5]            \n\t" \
113*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp8],   %[ftmp8],       %[ftmp6]            \n\t" \
114*fb1b10abSAndroid Build Coastguard Worker   "paddw      %[ftmp8],   %[ftmp8],       %[ftmp7]            \n\t"
115*fb1b10abSAndroid Build Coastguard Worker 
116*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A                       \
117*fb1b10abSAndroid Build Coastguard Worker   /* calculate fdata3[0]~fdata3[3], store at ftmp2*/                \
118*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t" \
119*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t" \
120*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t" \
121*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x08(%[src_ptr])                    \n\t" \
122*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x01(%[src_ptr])                    \n\t" \
123*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t" \
124*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp2],   %[ftmp2],       %[filter_x0]        \n\t" \
125*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp2],   %[ftmp2],       %[ff_ph_40]         \n\t" \
126*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp3],   %[ftmp3],       %[filter_x1]        \n\t" \
127*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp2],   %[ftmp2],       %[ftmp3]            \n\t" \
128*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp6]            \n\t"
129*fb1b10abSAndroid Build Coastguard Worker 
130*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B                       \
131*fb1b10abSAndroid Build Coastguard Worker   /* calculate fdata3[0]~fdata3[3], store at ftmp4*/                \
132*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t" \
133*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t" \
134*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t" \
135*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x08(%[src_ptr])                    \n\t" \
136*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x01(%[src_ptr])                    \n\t" \
137*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t" \
138*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp4],   %[ftmp4],       %[filter_x0]        \n\t" \
139*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp4],   %[ftmp4],       %[ff_ph_40]         \n\t" \
140*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp5],   %[ftmp5],       %[filter_x1]        \n\t" \
141*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp4],   %[ftmp4],       %[ftmp5]            \n\t" \
142*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp6]            \n\t"
143*fb1b10abSAndroid Build Coastguard Worker 
144*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_A                      \
145*fb1b10abSAndroid Build Coastguard Worker   /* calculate: temp2[0] ~ temp2[3] */                              \
146*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp2],   %[ftmp2],       %[filter_y0]        \n\t" \
147*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp2],   %[ftmp2],       %[ff_ph_40]         \n\t" \
148*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp1],   %[ftmp4],       %[filter_y1]        \n\t" \
149*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]            \n\t" \
150*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp6]            \n\t" \
151*fb1b10abSAndroid Build Coastguard Worker                                                                     \
152*fb1b10abSAndroid Build Coastguard Worker   /* store: temp2[0] ~ temp2[3] */                                  \
153*fb1b10abSAndroid Build Coastguard Worker   "pand       %[ftmp2],   %[ftmp2],       %[mask]             \n\t" \
154*fb1b10abSAndroid Build Coastguard Worker   "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t" \
155*fb1b10abSAndroid Build Coastguard Worker   "gssdrc1    %[ftmp2],   0x00(%[temp2_ptr])                  \n\t"
156*fb1b10abSAndroid Build Coastguard Worker 
157*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_B                      \
158*fb1b10abSAndroid Build Coastguard Worker   /* calculate: temp2[0] ~ temp2[3] */                              \
159*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp4],   %[ftmp4],       %[filter_y0]        \n\t" \
160*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp4],   %[ftmp4],       %[ff_ph_40]         \n\t" \
161*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp1],   %[ftmp2],       %[filter_y1]        \n\t" \
162*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp4],   %[ftmp4],       %[ftmp1]            \n\t" \
163*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp6]            \n\t" \
164*fb1b10abSAndroid Build Coastguard Worker                                                                     \
165*fb1b10abSAndroid Build Coastguard Worker   /* store: temp2[0] ~ temp2[3] */                                  \
166*fb1b10abSAndroid Build Coastguard Worker   "pand       %[ftmp4],   %[ftmp4],       %[mask]             \n\t" \
167*fb1b10abSAndroid Build Coastguard Worker   "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t" \
168*fb1b10abSAndroid Build Coastguard Worker   "gssdrc1    %[ftmp4],   0x00(%[temp2_ptr])                  \n\t"
169*fb1b10abSAndroid Build Coastguard Worker 
170*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A                       \
171*fb1b10abSAndroid Build Coastguard Worker   /* calculate fdata3[0]~fdata3[7], store at ftmp2 and ftmp3*/      \
172*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t" \
173*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t" \
174*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t" \
175*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t" \
176*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x08(%[src_ptr])                    \n\t" \
177*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x01(%[src_ptr])                    \n\t" \
178*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t" \
179*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t" \
180*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp2],   %[ftmp2],       %[filter_x0]        \n\t" \
181*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp3],   %[ftmp3],       %[filter_x0]        \n\t" \
182*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp2],   %[ftmp2],       %[ff_ph_40]         \n\t" \
183*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp3],   %[ftmp3],       %[ff_ph_40]         \n\t" \
184*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp4],   %[ftmp4],       %[filter_x1]        \n\t" \
185*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp5],   %[ftmp5],       %[filter_x1]        \n\t" \
186*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
187*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t" \
188*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp14]           \n\t" \
189*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp3],   %[ftmp3],       %[ftmp14]           \n\t"
190*fb1b10abSAndroid Build Coastguard Worker 
191*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B                       \
192*fb1b10abSAndroid Build Coastguard Worker   /* calculate fdata3[0]~fdata3[7], store at ftmp8 and ftmp9*/      \
193*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t" \
194*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t" \
195*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp8],   %[ftmp1],       %[ftmp0]            \n\t" \
196*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp9],   %[ftmp1],       %[ftmp0]            \n\t" \
197*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x08(%[src_ptr])                    \n\t" \
198*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x01(%[src_ptr])                    \n\t" \
199*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp10],  %[ftmp1],       %[ftmp0]            \n\t" \
200*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp11],  %[ftmp1],       %[ftmp0]            \n\t" \
201*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp8],   %[ftmp8],       %[filter_x0]        \n\t" \
202*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp9],   %[ftmp9],       %[filter_x0]        \n\t" \
203*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp8],   %[ftmp8],       %[ff_ph_40]         \n\t" \
204*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp9],   %[ftmp9],       %[ff_ph_40]         \n\t" \
205*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp10],  %[ftmp10],      %[filter_x1]        \n\t" \
206*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp11],  %[ftmp11],      %[filter_x1]        \n\t" \
207*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp8],   %[ftmp8],       %[ftmp10]           \n\t" \
208*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp9],   %[ftmp9],       %[ftmp11]           \n\t" \
209*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp8],   %[ftmp8],       %[ftmp14]           \n\t" \
210*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp9],   %[ftmp9],       %[ftmp14]           \n\t"
211*fb1b10abSAndroid Build Coastguard Worker 
212*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A                      \
213*fb1b10abSAndroid Build Coastguard Worker   /* calculate: temp2[0] ~ temp2[3] */                              \
214*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp2],   %[ftmp2],       %[filter_y0]        \n\t" \
215*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp2],   %[ftmp2],       %[ff_ph_40]         \n\t" \
216*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp1],   %[ftmp8],       %[filter_y1]        \n\t" \
217*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]            \n\t" \
218*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp14]           \n\t" \
219*fb1b10abSAndroid Build Coastguard Worker                                                                     \
220*fb1b10abSAndroid Build Coastguard Worker   /* calculate: temp2[4] ~ temp2[7] */                              \
221*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp3],   %[ftmp3],       %[filter_y0]        \n\t" \
222*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp3],   %[ftmp3],       %[ff_ph_40]         \n\t" \
223*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp1],   %[ftmp9],       %[filter_y1]        \n\t" \
224*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t" \
225*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp3],   %[ftmp3],       %[ftmp14]           \n\t" \
226*fb1b10abSAndroid Build Coastguard Worker                                                                     \
227*fb1b10abSAndroid Build Coastguard Worker   /* store: temp2[0] ~ temp2[7] */                                  \
228*fb1b10abSAndroid Build Coastguard Worker   "pand       %[ftmp2],   %[ftmp2],       %[mask]             \n\t" \
229*fb1b10abSAndroid Build Coastguard Worker   "pand       %[ftmp3],   %[ftmp3],       %[mask]             \n\t" \
230*fb1b10abSAndroid Build Coastguard Worker   "packushb   %[ftmp2],   %[ftmp2],       %[ftmp3]            \n\t" \
231*fb1b10abSAndroid Build Coastguard Worker   "gssdlc1    %[ftmp2],   0x07(%[temp2_ptr])                  \n\t" \
232*fb1b10abSAndroid Build Coastguard Worker   "gssdrc1    %[ftmp2],   0x00(%[temp2_ptr])                  \n\t"
233*fb1b10abSAndroid Build Coastguard Worker 
234*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B                      \
235*fb1b10abSAndroid Build Coastguard Worker   /* calculate: temp2[0] ~ temp2[3] */                              \
236*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp8],   %[ftmp8],       %[filter_y0]        \n\t" \
237*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp8],   %[ftmp8],       %[ff_ph_40]         \n\t" \
238*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp1],   %[ftmp2],       %[filter_y1]        \n\t" \
239*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp8],   %[ftmp8],       %[ftmp1]            \n\t" \
240*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp8],   %[ftmp8],       %[ftmp14]           \n\t" \
241*fb1b10abSAndroid Build Coastguard Worker                                                                     \
242*fb1b10abSAndroid Build Coastguard Worker   /* calculate: temp2[4] ~ temp2[7] */                              \
243*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp9],   %[ftmp9],       %[filter_y0]        \n\t" \
244*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp9],   %[ftmp9],       %[ff_ph_40]         \n\t" \
245*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp1],   %[ftmp3],       %[filter_y1]        \n\t" \
246*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp9],   %[ftmp9],       %[ftmp1]            \n\t" \
247*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp9],   %[ftmp9],       %[ftmp14]           \n\t" \
248*fb1b10abSAndroid Build Coastguard Worker                                                                     \
249*fb1b10abSAndroid Build Coastguard Worker   /* store: temp2[0] ~ temp2[7] */                                  \
250*fb1b10abSAndroid Build Coastguard Worker   "pand       %[ftmp8],   %[ftmp8],       %[mask]             \n\t" \
251*fb1b10abSAndroid Build Coastguard Worker   "pand       %[ftmp9],   %[ftmp9],       %[mask]             \n\t" \
252*fb1b10abSAndroid Build Coastguard Worker   "packushb   %[ftmp8],   %[ftmp8],       %[ftmp9]            \n\t" \
253*fb1b10abSAndroid Build Coastguard Worker   "gssdlc1    %[ftmp8],   0x07(%[temp2_ptr])                  \n\t" \
254*fb1b10abSAndroid Build Coastguard Worker   "gssdrc1    %[ftmp8],   0x00(%[temp2_ptr])                  \n\t"
255*fb1b10abSAndroid Build Coastguard Worker 
256*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A                      \
257*fb1b10abSAndroid Build Coastguard Worker   /* calculate fdata3[0]~fdata3[7], store at ftmp2 and ftmp3*/      \
258*fb1b10abSAndroid Build Coastguard Worker   VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A                             \
259*fb1b10abSAndroid Build Coastguard Worker                                                                     \
260*fb1b10abSAndroid Build Coastguard Worker   /* calculate fdata3[8]~fdata3[15], store at ftmp4 and ftmp5*/     \
261*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x0f(%[src_ptr])                    \n\t" \
262*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x08(%[src_ptr])                    \n\t" \
263*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp4],   %[ftmp1],       %[ftmp0]            \n\t" \
264*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t" \
265*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x10(%[src_ptr])                    \n\t" \
266*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x09(%[src_ptr])                    \n\t" \
267*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp6],   %[ftmp1],       %[ftmp0]            \n\t" \
268*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp7],   %[ftmp1],       %[ftmp0]            \n\t" \
269*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp4],   %[ftmp4],       %[filter_x0]        \n\t" \
270*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp5],   %[ftmp5],       %[filter_x0]        \n\t" \
271*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp4],   %[ftmp4],       %[ff_ph_40]         \n\t" \
272*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp5],   %[ftmp5],       %[ff_ph_40]         \n\t" \
273*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp6],   %[ftmp6],       %[filter_x1]        \n\t" \
274*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp7],   %[ftmp7],       %[filter_x1]        \n\t" \
275*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp4],   %[ftmp4],       %[ftmp6]            \n\t" \
276*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t" \
277*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp14]           \n\t" \
278*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp5],   %[ftmp5],       %[ftmp14]           \n\t"
279*fb1b10abSAndroid Build Coastguard Worker 
280*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_B                      \
281*fb1b10abSAndroid Build Coastguard Worker   /* calculate fdata3[0]~fdata3[7], store at ftmp8 and ftmp9*/      \
282*fb1b10abSAndroid Build Coastguard Worker   VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B                             \
283*fb1b10abSAndroid Build Coastguard Worker                                                                     \
284*fb1b10abSAndroid Build Coastguard Worker   /* calculate fdata3[8]~fdata3[15], store at ftmp10 and ftmp11*/   \
285*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x0f(%[src_ptr])                    \n\t" \
286*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x08(%[src_ptr])                    \n\t" \
287*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp10],  %[ftmp1],       %[ftmp0]            \n\t" \
288*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp11],  %[ftmp1],       %[ftmp0]            \n\t" \
289*fb1b10abSAndroid Build Coastguard Worker   "gsldlc1    %[ftmp1],   0x10(%[src_ptr])                    \n\t" \
290*fb1b10abSAndroid Build Coastguard Worker   "gsldrc1    %[ftmp1],   0x09(%[src_ptr])                    \n\t" \
291*fb1b10abSAndroid Build Coastguard Worker   "punpcklbh  %[ftmp12],  %[ftmp1],       %[ftmp0]            \n\t" \
292*fb1b10abSAndroid Build Coastguard Worker   "punpckhbh  %[ftmp13],  %[ftmp1],       %[ftmp0]            \n\t" \
293*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp10],  %[ftmp10],      %[filter_x0]        \n\t" \
294*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp11],  %[ftmp11],      %[filter_x0]        \n\t" \
295*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp10],  %[ftmp10],      %[ff_ph_40]         \n\t" \
296*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp11],  %[ftmp11],      %[ff_ph_40]         \n\t" \
297*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp12],  %[ftmp12],      %[filter_x1]        \n\t" \
298*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp13],  %[ftmp13],      %[filter_x1]        \n\t" \
299*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp10],  %[ftmp10],      %[ftmp12]           \n\t" \
300*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp11],  %[ftmp11],      %[ftmp13]           \n\t" \
301*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp10],  %[ftmp10],      %[ftmp14]           \n\t" \
302*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp11],  %[ftmp11],      %[ftmp14]           \n\t"
303*fb1b10abSAndroid Build Coastguard Worker 
304*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_A                     \
305*fb1b10abSAndroid Build Coastguard Worker   VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A                            \
306*fb1b10abSAndroid Build Coastguard Worker                                                                     \
307*fb1b10abSAndroid Build Coastguard Worker   /* calculate: temp2[8] ~ temp2[11] */                             \
308*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp4],   %[ftmp4],       %[filter_y0]        \n\t" \
309*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp4],   %[ftmp4],       %[ff_ph_40]         \n\t" \
310*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp1],   %[ftmp10],      %[filter_y1]        \n\t" \
311*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp4],   %[ftmp4],       %[ftmp1]            \n\t" \
312*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp14]           \n\t" \
313*fb1b10abSAndroid Build Coastguard Worker                                                                     \
314*fb1b10abSAndroid Build Coastguard Worker   /* calculate: temp2[12] ~ temp2[15] */                            \
315*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp5],   %[ftmp5],       %[filter_y0]        \n\t" \
316*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp5],   %[ftmp5],       %[ff_ph_40]         \n\t" \
317*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp1],   %[ftmp11],       %[filter_y1]       \n\t" \
318*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
319*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp5],   %[ftmp5],       %[ftmp14]           \n\t" \
320*fb1b10abSAndroid Build Coastguard Worker                                                                     \
321*fb1b10abSAndroid Build Coastguard Worker   /* store: temp2[8] ~ temp2[15] */                                 \
322*fb1b10abSAndroid Build Coastguard Worker   "pand       %[ftmp4],   %[ftmp4],       %[mask]             \n\t" \
323*fb1b10abSAndroid Build Coastguard Worker   "pand       %[ftmp5],   %[ftmp5],       %[mask]             \n\t" \
324*fb1b10abSAndroid Build Coastguard Worker   "packushb   %[ftmp4],   %[ftmp4],       %[ftmp5]            \n\t" \
325*fb1b10abSAndroid Build Coastguard Worker   "gssdlc1    %[ftmp4],   0x0f(%[temp2_ptr])                  \n\t" \
326*fb1b10abSAndroid Build Coastguard Worker   "gssdrc1    %[ftmp4],   0x08(%[temp2_ptr])                  \n\t"
327*fb1b10abSAndroid Build Coastguard Worker 
328*fb1b10abSAndroid Build Coastguard Worker #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_B                     \
329*fb1b10abSAndroid Build Coastguard Worker   VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B                            \
330*fb1b10abSAndroid Build Coastguard Worker                                                                     \
331*fb1b10abSAndroid Build Coastguard Worker   /* calculate: temp2[8] ~ temp2[11] */                             \
332*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp10],  %[ftmp10],      %[filter_y0]        \n\t" \
333*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp10],  %[ftmp10],      %[ff_ph_40]         \n\t" \
334*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp1],   %[ftmp4],       %[filter_y1]        \n\t" \
335*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp10],  %[ftmp10],      %[ftmp1]            \n\t" \
336*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp10],  %[ftmp10],      %[ftmp14]           \n\t" \
337*fb1b10abSAndroid Build Coastguard Worker                                                                     \
338*fb1b10abSAndroid Build Coastguard Worker   /* calculate: temp2[12] ~ temp2[15] */                            \
339*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp11],  %[ftmp11],      %[filter_y0]        \n\t" \
340*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp11],  %[ftmp11],      %[ff_ph_40]         \n\t" \
341*fb1b10abSAndroid Build Coastguard Worker   "pmullh     %[ftmp1],   %[ftmp5],       %[filter_y1]        \n\t" \
342*fb1b10abSAndroid Build Coastguard Worker   "paddh      %[ftmp11],  %[ftmp11],      %[ftmp1]            \n\t" \
343*fb1b10abSAndroid Build Coastguard Worker   "psrlh      %[ftmp11],  %[ftmp11],      %[ftmp14]           \n\t" \
344*fb1b10abSAndroid Build Coastguard Worker                                                                     \
345*fb1b10abSAndroid Build Coastguard Worker   /* store: temp2[8] ~ temp2[15] */                                 \
346*fb1b10abSAndroid Build Coastguard Worker   "pand       %[ftmp10],  %[ftmp10],      %[mask]             \n\t" \
347*fb1b10abSAndroid Build Coastguard Worker   "pand       %[ftmp11],  %[ftmp11],      %[mask]             \n\t" \
348*fb1b10abSAndroid Build Coastguard Worker   "packushb   %[ftmp10],  %[ftmp10],      %[ftmp11]           \n\t" \
349*fb1b10abSAndroid Build Coastguard Worker   "gssdlc1    %[ftmp10],  0x0f(%[temp2_ptr])                  \n\t" \
350*fb1b10abSAndroid Build Coastguard Worker   "gssdrc1    %[ftmp10],  0x08(%[temp2_ptr])                  \n\t"
351*fb1b10abSAndroid Build Coastguard Worker 
352*fb1b10abSAndroid Build Coastguard Worker // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
353*fb1b10abSAndroid Build Coastguard Worker // or vertical direction to produce the filtered output block. Used to implement
354*fb1b10abSAndroid Build Coastguard Worker // the first-pass of 2-D separable filter.
355*fb1b10abSAndroid Build Coastguard Worker //
356*fb1b10abSAndroid Build Coastguard Worker // Produces int16_t output to retain precision for the next pass. Two filter
357*fb1b10abSAndroid Build Coastguard Worker // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
358*fb1b10abSAndroid Build Coastguard Worker // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
359*fb1b10abSAndroid Build Coastguard Worker // It defines the offset required to move from one input to the next.
var_filter_block2d_bil_first_pass(const uint8_t * src_ptr,uint16_t * ref_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)360*fb1b10abSAndroid Build Coastguard Worker static void var_filter_block2d_bil_first_pass(
361*fb1b10abSAndroid Build Coastguard Worker     const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line,
362*fb1b10abSAndroid Build Coastguard Worker     int pixel_step, unsigned int output_height, unsigned int output_width,
363*fb1b10abSAndroid Build Coastguard Worker     const uint8_t *filter) {
364*fb1b10abSAndroid Build Coastguard Worker   unsigned int i, j;
365*fb1b10abSAndroid Build Coastguard Worker 
366*fb1b10abSAndroid Build Coastguard Worker   for (i = 0; i < output_height; ++i) {
367*fb1b10abSAndroid Build Coastguard Worker     for (j = 0; j < output_width; ++j) {
368*fb1b10abSAndroid Build Coastguard Worker       ref_ptr[j] = ROUND_POWER_OF_TWO(
369*fb1b10abSAndroid Build Coastguard Worker           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
370*fb1b10abSAndroid Build Coastguard Worker           FILTER_BITS);
371*fb1b10abSAndroid Build Coastguard Worker 
372*fb1b10abSAndroid Build Coastguard Worker       ++src_ptr;
373*fb1b10abSAndroid Build Coastguard Worker     }
374*fb1b10abSAndroid Build Coastguard Worker 
375*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_pixels_per_line - output_width;
376*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += output_width;
377*fb1b10abSAndroid Build Coastguard Worker   }
378*fb1b10abSAndroid Build Coastguard Worker }
379*fb1b10abSAndroid Build Coastguard Worker 
380*fb1b10abSAndroid Build Coastguard Worker // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
381*fb1b10abSAndroid Build Coastguard Worker // or vertical direction to produce the filtered output block. Used to implement
382*fb1b10abSAndroid Build Coastguard Worker // the second-pass of 2-D separable filter.
383*fb1b10abSAndroid Build Coastguard Worker //
384*fb1b10abSAndroid Build Coastguard Worker // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
385*fb1b10abSAndroid Build Coastguard Worker // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
386*fb1b10abSAndroid Build Coastguard Worker // filter is applied horizontally (pixel_step = 1) or vertically
387*fb1b10abSAndroid Build Coastguard Worker // (pixel_step = stride). It defines the offset required to move from one input
388*fb1b10abSAndroid Build Coastguard Worker // to the next. Output is 8-bit.
var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint8_t * ref_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)389*fb1b10abSAndroid Build Coastguard Worker static void var_filter_block2d_bil_second_pass(
390*fb1b10abSAndroid Build Coastguard Worker     const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line,
391*fb1b10abSAndroid Build Coastguard Worker     unsigned int pixel_step, unsigned int output_height,
392*fb1b10abSAndroid Build Coastguard Worker     unsigned int output_width, const uint8_t *filter) {
393*fb1b10abSAndroid Build Coastguard Worker   unsigned int i, j;
394*fb1b10abSAndroid Build Coastguard Worker 
395*fb1b10abSAndroid Build Coastguard Worker   for (i = 0; i < output_height; ++i) {
396*fb1b10abSAndroid Build Coastguard Worker     for (j = 0; j < output_width; ++j) {
397*fb1b10abSAndroid Build Coastguard Worker       ref_ptr[j] = ROUND_POWER_OF_TWO(
398*fb1b10abSAndroid Build Coastguard Worker           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
399*fb1b10abSAndroid Build Coastguard Worker           FILTER_BITS);
400*fb1b10abSAndroid Build Coastguard Worker       ++src_ptr;
401*fb1b10abSAndroid Build Coastguard Worker     }
402*fb1b10abSAndroid Build Coastguard Worker 
403*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_pixels_per_line - output_width;
404*fb1b10abSAndroid Build Coastguard Worker     ref_ptr += output_width;
405*fb1b10abSAndroid Build Coastguard Worker   }
406*fb1b10abSAndroid Build Coastguard Worker }
407*fb1b10abSAndroid Build Coastguard Worker 
vpx_variance64x(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride,uint32_t * sse,int high)408*fb1b10abSAndroid Build Coastguard Worker static inline uint32_t vpx_variance64x(const uint8_t *src_ptr, int src_stride,
409*fb1b10abSAndroid Build Coastguard Worker                                        const uint8_t *ref_ptr, int ref_stride,
410*fb1b10abSAndroid Build Coastguard Worker                                        uint32_t *sse, int high) {
411*fb1b10abSAndroid Build Coastguard Worker   int sum;
412*fb1b10abSAndroid Build Coastguard Worker   double ftmp[12];
413*fb1b10abSAndroid Build Coastguard Worker   uint32_t tmp[3];
414*fb1b10abSAndroid Build Coastguard Worker 
415*fb1b10abSAndroid Build Coastguard Worker   *sse = 0;
416*fb1b10abSAndroid Build Coastguard Worker 
417*fb1b10abSAndroid Build Coastguard Worker   /* clang-format off */
418*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile (
419*fb1b10abSAndroid Build Coastguard Worker     "li         %[tmp0],    0x20                                \n\t"
420*fb1b10abSAndroid Build Coastguard Worker     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
421*fb1b10abSAndroid Build Coastguard Worker     MMI_L(%[tmp0], %[high], 0x00)
422*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
423*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp9],   %[ftmp9],       %[ftmp9]            \n\t"
424*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
425*fb1b10abSAndroid Build Coastguard Worker     "1:                                                         \n\t"
426*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
427*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
428*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x07(%[ref_ptr])                    \n\t"
429*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x00(%[ref_ptr])                    \n\t"
430*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
431*fb1b10abSAndroid Build Coastguard Worker 
432*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x0f(%[src_ptr])                    \n\t"
433*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x08(%[src_ptr])                    \n\t"
434*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x0f(%[ref_ptr])                    \n\t"
435*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x08(%[ref_ptr])                    \n\t"
436*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
437*fb1b10abSAndroid Build Coastguard Worker 
438*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x17(%[src_ptr])                    \n\t"
439*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x10(%[src_ptr])                    \n\t"
440*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x17(%[ref_ptr])                    \n\t"
441*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x10(%[ref_ptr])                    \n\t"
442*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
443*fb1b10abSAndroid Build Coastguard Worker 
444*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x1f(%[src_ptr])                    \n\t"
445*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x18(%[src_ptr])                    \n\t"
446*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x1f(%[ref_ptr])                    \n\t"
447*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x18(%[ref_ptr])                    \n\t"
448*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
449*fb1b10abSAndroid Build Coastguard Worker 
450*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x27(%[src_ptr])                    \n\t"
451*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x20(%[src_ptr])                    \n\t"
452*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x27(%[ref_ptr])                    \n\t"
453*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x20(%[ref_ptr])                    \n\t"
454*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
455*fb1b10abSAndroid Build Coastguard Worker 
456*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x2f(%[src_ptr])                    \n\t"
457*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x28(%[src_ptr])                    \n\t"
458*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x2f(%[ref_ptr])                    \n\t"
459*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x28(%[ref_ptr])                    \n\t"
460*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
461*fb1b10abSAndroid Build Coastguard Worker 
462*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x37(%[src_ptr])                    \n\t"
463*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x30(%[src_ptr])                    \n\t"
464*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x37(%[ref_ptr])                    \n\t"
465*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x30(%[ref_ptr])                    \n\t"
466*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
467*fb1b10abSAndroid Build Coastguard Worker 
468*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x3f(%[src_ptr])                    \n\t"
469*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x38(%[src_ptr])                    \n\t"
470*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x3f(%[ref_ptr])                    \n\t"
471*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x38(%[ref_ptr])                    \n\t"
472*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
473*fb1b10abSAndroid Build Coastguard Worker 
474*fb1b10abSAndroid Build Coastguard Worker     "addiu      %[tmp0],    %[tmp0],        -0x01               \n\t"
475*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
476*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
477*fb1b10abSAndroid Build Coastguard Worker     "bnez       %[tmp0],    1b                                  \n\t"
478*fb1b10abSAndroid Build Coastguard Worker 
479*fb1b10abSAndroid Build Coastguard Worker     "mfc1       %[tmp1],    %[ftmp9]                            \n\t"
480*fb1b10abSAndroid Build Coastguard Worker     "mfhc1      %[tmp2],    %[ftmp9]                            \n\t"
481*fb1b10abSAndroid Build Coastguard Worker     "addu       %[sum],     %[tmp1],        %[tmp2]             \n\t"
482*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp1],   %[ftmp10],      %[ftmp11]           \n\t"
483*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp1],   %[ftmp1],       %[ftmp10]           \n\t"
484*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp1],   0x00(%[sse])                        \n\t"
485*fb1b10abSAndroid Build Coastguard Worker     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
486*fb1b10abSAndroid Build Coastguard Worker       [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
487*fb1b10abSAndroid Build Coastguard Worker       [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
488*fb1b10abSAndroid Build Coastguard Worker       [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
489*fb1b10abSAndroid Build Coastguard Worker       [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
490*fb1b10abSAndroid Build Coastguard Worker       [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
491*fb1b10abSAndroid Build Coastguard Worker       [tmp0]"=&r"(tmp[0]),              [tmp1]"=&r"(tmp[1]),
492*fb1b10abSAndroid Build Coastguard Worker       [tmp2]"=&r"(tmp[2]),
493*fb1b10abSAndroid Build Coastguard Worker       [src_ptr]"+&r"(src_ptr),          [ref_ptr]"+&r"(ref_ptr),
494*fb1b10abSAndroid Build Coastguard Worker       [sum]"=&r"(sum)
495*fb1b10abSAndroid Build Coastguard Worker     : [src_stride]"r"((mips_reg)src_stride),
496*fb1b10abSAndroid Build Coastguard Worker       [ref_stride]"r"((mips_reg)ref_stride),
497*fb1b10abSAndroid Build Coastguard Worker       [high]"r"(&high), [sse]"r"(sse)
498*fb1b10abSAndroid Build Coastguard Worker     : "memory"
499*fb1b10abSAndroid Build Coastguard Worker   );
500*fb1b10abSAndroid Build Coastguard Worker   /* clang-format on */
501*fb1b10abSAndroid Build Coastguard Worker 
502*fb1b10abSAndroid Build Coastguard Worker   return *sse - (((int64_t)sum * sum) / (64 * high));
503*fb1b10abSAndroid Build Coastguard Worker }
504*fb1b10abSAndroid Build Coastguard Worker 
505*fb1b10abSAndroid Build Coastguard Worker #define VPX_VARIANCE64XN(n)                                                   \
506*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_variance64x##n##_mmi(const uint8_t *src_ptr, int src_stride,   \
507*fb1b10abSAndroid Build Coastguard Worker                                     const uint8_t *ref_ptr, int ref_stride,   \
508*fb1b10abSAndroid Build Coastguard Worker                                     uint32_t *sse) {                          \
509*fb1b10abSAndroid Build Coastguard Worker     return vpx_variance64x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
510*fb1b10abSAndroid Build Coastguard Worker   }
511*fb1b10abSAndroid Build Coastguard Worker 
512*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE64XN(64)
513*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE64XN(32)
514*fb1b10abSAndroid Build Coastguard Worker 
vpx_variance32x64_mmi(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride,uint32_t * sse)515*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_variance32x64_mmi(const uint8_t *src_ptr, int src_stride,
516*fb1b10abSAndroid Build Coastguard Worker                                const uint8_t *ref_ptr, int ref_stride,
517*fb1b10abSAndroid Build Coastguard Worker                                uint32_t *sse) {
518*fb1b10abSAndroid Build Coastguard Worker   int sum;
519*fb1b10abSAndroid Build Coastguard Worker   double ftmp[12];
520*fb1b10abSAndroid Build Coastguard Worker   uint32_t tmp[3];
521*fb1b10abSAndroid Build Coastguard Worker 
522*fb1b10abSAndroid Build Coastguard Worker   *sse = 0;
523*fb1b10abSAndroid Build Coastguard Worker 
524*fb1b10abSAndroid Build Coastguard Worker   /* clang-format off */
525*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile (
526*fb1b10abSAndroid Build Coastguard Worker     "li         %[tmp0],    0x20                                \n\t"
527*fb1b10abSAndroid Build Coastguard Worker     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
528*fb1b10abSAndroid Build Coastguard Worker     "li         %[tmp0],    0x40                                \n\t"
529*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
530*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp9],   %[ftmp9],       %[ftmp9]            \n\t"
531*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
532*fb1b10abSAndroid Build Coastguard Worker     "1:                                                         \n\t"
533*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
534*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
535*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x07(%[ref_ptr])                    \n\t"
536*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x00(%[ref_ptr])                    \n\t"
537*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
538*fb1b10abSAndroid Build Coastguard Worker 
539*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x0f(%[src_ptr])                    \n\t"
540*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x08(%[src_ptr])                    \n\t"
541*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x0f(%[ref_ptr])                    \n\t"
542*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x08(%[ref_ptr])                    \n\t"
543*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
544*fb1b10abSAndroid Build Coastguard Worker 
545*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x17(%[src_ptr])                    \n\t"
546*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x10(%[src_ptr])                    \n\t"
547*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x17(%[ref_ptr])                    \n\t"
548*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x10(%[ref_ptr])                    \n\t"
549*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
550*fb1b10abSAndroid Build Coastguard Worker 
551*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x1f(%[src_ptr])                    \n\t"
552*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x18(%[src_ptr])                    \n\t"
553*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x1f(%[ref_ptr])                    \n\t"
554*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x18(%[ref_ptr])                    \n\t"
555*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8_FOR_W64
556*fb1b10abSAndroid Build Coastguard Worker 
557*fb1b10abSAndroid Build Coastguard Worker     "addiu      %[tmp0],    %[tmp0],        -0x01               \n\t"
558*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
559*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
560*fb1b10abSAndroid Build Coastguard Worker     "bnez       %[tmp0],    1b                                  \n\t"
561*fb1b10abSAndroid Build Coastguard Worker 
562*fb1b10abSAndroid Build Coastguard Worker     "mfc1       %[tmp1],    %[ftmp9]                            \n\t"
563*fb1b10abSAndroid Build Coastguard Worker     "mfhc1      %[tmp2],    %[ftmp9]                            \n\t"
564*fb1b10abSAndroid Build Coastguard Worker     "addu       %[sum],     %[tmp1],        %[tmp2]             \n\t"
565*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp1],   %[ftmp10],      %[ftmp11]           \n\t"
566*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp1],   %[ftmp1],       %[ftmp10]           \n\t"
567*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp1],   0x00(%[sse])                        \n\t"
568*fb1b10abSAndroid Build Coastguard Worker     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
569*fb1b10abSAndroid Build Coastguard Worker       [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
570*fb1b10abSAndroid Build Coastguard Worker       [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
571*fb1b10abSAndroid Build Coastguard Worker       [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
572*fb1b10abSAndroid Build Coastguard Worker       [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
573*fb1b10abSAndroid Build Coastguard Worker       [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
574*fb1b10abSAndroid Build Coastguard Worker       [tmp0]"=&r"(tmp[0]),              [tmp1]"=&r"(tmp[1]),
575*fb1b10abSAndroid Build Coastguard Worker       [tmp2]"=&r"(tmp[2]),
576*fb1b10abSAndroid Build Coastguard Worker       [src_ptr]"+&r"(src_ptr),          [ref_ptr]"+&r"(ref_ptr),
577*fb1b10abSAndroid Build Coastguard Worker       [sum]"=&r"(sum)
578*fb1b10abSAndroid Build Coastguard Worker     : [src_stride]"r"((mips_reg)src_stride),
579*fb1b10abSAndroid Build Coastguard Worker       [ref_stride]"r"((mips_reg)ref_stride),
580*fb1b10abSAndroid Build Coastguard Worker       [sse]"r"(sse)
581*fb1b10abSAndroid Build Coastguard Worker     : "memory"
582*fb1b10abSAndroid Build Coastguard Worker   );
583*fb1b10abSAndroid Build Coastguard Worker   /* clang-format on */
584*fb1b10abSAndroid Build Coastguard Worker 
585*fb1b10abSAndroid Build Coastguard Worker   return *sse - (((int64_t)sum * sum) / 2048);
586*fb1b10abSAndroid Build Coastguard Worker }
587*fb1b10abSAndroid Build Coastguard Worker 
vpx_variance32x(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride,uint32_t * sse,int high)588*fb1b10abSAndroid Build Coastguard Worker static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride,
589*fb1b10abSAndroid Build Coastguard Worker                                        const uint8_t *ref_ptr, int ref_stride,
590*fb1b10abSAndroid Build Coastguard Worker                                        uint32_t *sse, int high) {
591*fb1b10abSAndroid Build Coastguard Worker   int sum;
592*fb1b10abSAndroid Build Coastguard Worker   double ftmp[13];
593*fb1b10abSAndroid Build Coastguard Worker   uint32_t tmp[3];
594*fb1b10abSAndroid Build Coastguard Worker 
595*fb1b10abSAndroid Build Coastguard Worker   *sse = 0;
596*fb1b10abSAndroid Build Coastguard Worker 
597*fb1b10abSAndroid Build Coastguard Worker   /* clang-format off */
598*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile (
599*fb1b10abSAndroid Build Coastguard Worker     "li         %[tmp0],    0x20                                \n\t"
600*fb1b10abSAndroid Build Coastguard Worker     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
601*fb1b10abSAndroid Build Coastguard Worker     MMI_L(%[tmp0], %[high], 0x00)
602*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
603*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
604*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
605*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp12],  %[ftmp12],      %[ftmp12]           \n\t"
606*fb1b10abSAndroid Build Coastguard Worker     "1:                                                         \n\t"
607*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
608*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
609*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x07(%[ref_ptr])                    \n\t"
610*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x00(%[ref_ptr])                    \n\t"
611*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8
612*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x0f(%[src_ptr])                    \n\t"
613*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x08(%[src_ptr])                    \n\t"
614*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x0f(%[ref_ptr])                    \n\t"
615*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x08(%[ref_ptr])                    \n\t"
616*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8
617*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x17(%[src_ptr])                    \n\t"
618*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x10(%[src_ptr])                    \n\t"
619*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x17(%[ref_ptr])                    \n\t"
620*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x10(%[ref_ptr])                    \n\t"
621*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8
622*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x1f(%[src_ptr])                    \n\t"
623*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x18(%[src_ptr])                    \n\t"
624*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x1f(%[ref_ptr])                    \n\t"
625*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x18(%[ref_ptr])                    \n\t"
626*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8
627*fb1b10abSAndroid Build Coastguard Worker 
628*fb1b10abSAndroid Build Coastguard Worker     "addiu      %[tmp0],    %[tmp0],        -0x01               \n\t"
629*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
630*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
631*fb1b10abSAndroid Build Coastguard Worker     "bnez       %[tmp0],    1b                                  \n\t"
632*fb1b10abSAndroid Build Coastguard Worker 
633*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
634*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"
635*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
636*fb1b10abSAndroid Build Coastguard Worker 
637*fb1b10abSAndroid Build Coastguard Worker     "punpcklhw  %[ftmp3],   %[ftmp10],      %[ftmp0]            \n\t"
638*fb1b10abSAndroid Build Coastguard Worker     "punpckhhw  %[ftmp4],   %[ftmp10],      %[ftmp0]            \n\t"
639*fb1b10abSAndroid Build Coastguard Worker     "punpcklhw  %[ftmp5],   %[ftmp12],      %[ftmp0]            \n\t"
640*fb1b10abSAndroid Build Coastguard Worker     "punpckhhw  %[ftmp6],   %[ftmp12],      %[ftmp0]            \n\t"
641*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
642*fb1b10abSAndroid Build Coastguard Worker     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
643*fb1b10abSAndroid Build Coastguard Worker     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"
644*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp0],   %[ftmp3],       %[ftmp11]           \n\t"
645*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp0],   %[ftmp0],       %[ftmp3]            \n\t"
646*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp0],   0x00(%[sum])                        \n\t"
647*fb1b10abSAndroid Build Coastguard Worker 
648*fb1b10abSAndroid Build Coastguard Worker     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
649*fb1b10abSAndroid Build Coastguard Worker       [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
650*fb1b10abSAndroid Build Coastguard Worker       [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
651*fb1b10abSAndroid Build Coastguard Worker       [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
652*fb1b10abSAndroid Build Coastguard Worker       [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
653*fb1b10abSAndroid Build Coastguard Worker       [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
654*fb1b10abSAndroid Build Coastguard Worker       [ftmp12]"=&f"(ftmp[12]),          [tmp0]"=&r"(tmp[0]),
655*fb1b10abSAndroid Build Coastguard Worker       [src_ptr]"+&r"(src_ptr),          [ref_ptr]"+&r"(ref_ptr)
656*fb1b10abSAndroid Build Coastguard Worker     : [src_stride]"r"((mips_reg)src_stride),
657*fb1b10abSAndroid Build Coastguard Worker       [ref_stride]"r"((mips_reg)ref_stride),
658*fb1b10abSAndroid Build Coastguard Worker       [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
659*fb1b10abSAndroid Build Coastguard Worker     : "memory"
660*fb1b10abSAndroid Build Coastguard Worker   );
661*fb1b10abSAndroid Build Coastguard Worker   /* clang-format on */
662*fb1b10abSAndroid Build Coastguard Worker 
663*fb1b10abSAndroid Build Coastguard Worker   return *sse - (((int64_t)sum * sum) / (32 * high));
664*fb1b10abSAndroid Build Coastguard Worker }
665*fb1b10abSAndroid Build Coastguard Worker 
666*fb1b10abSAndroid Build Coastguard Worker #define VPX_VARIANCE32XN(n)                                                   \
667*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_variance32x##n##_mmi(const uint8_t *src_ptr, int src_stride,   \
668*fb1b10abSAndroid Build Coastguard Worker                                     const uint8_t *ref_ptr, int ref_stride,   \
669*fb1b10abSAndroid Build Coastguard Worker                                     uint32_t *sse) {                          \
670*fb1b10abSAndroid Build Coastguard Worker     return vpx_variance32x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
671*fb1b10abSAndroid Build Coastguard Worker   }
672*fb1b10abSAndroid Build Coastguard Worker 
673*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE32XN(32)
674*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE32XN(16)
675*fb1b10abSAndroid Build Coastguard Worker 
vpx_variance16x(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride,uint32_t * sse,int high)676*fb1b10abSAndroid Build Coastguard Worker static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride,
677*fb1b10abSAndroid Build Coastguard Worker                                        const uint8_t *ref_ptr, int ref_stride,
678*fb1b10abSAndroid Build Coastguard Worker                                        uint32_t *sse, int high) {
679*fb1b10abSAndroid Build Coastguard Worker   int sum;
680*fb1b10abSAndroid Build Coastguard Worker   double ftmp[13];
681*fb1b10abSAndroid Build Coastguard Worker   uint32_t tmp[3];
682*fb1b10abSAndroid Build Coastguard Worker 
683*fb1b10abSAndroid Build Coastguard Worker   *sse = 0;
684*fb1b10abSAndroid Build Coastguard Worker 
685*fb1b10abSAndroid Build Coastguard Worker   /* clang-format off */
686*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile (
687*fb1b10abSAndroid Build Coastguard Worker     "li         %[tmp0],    0x20                                \n\t"
688*fb1b10abSAndroid Build Coastguard Worker     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
689*fb1b10abSAndroid Build Coastguard Worker     MMI_L(%[tmp0], %[high], 0x00)
690*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
691*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
692*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
693*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp12],  %[ftmp12],      %[ftmp12]           \n\t"
694*fb1b10abSAndroid Build Coastguard Worker     "1:                                                         \n\t"
695*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
696*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
697*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x07(%[ref_ptr])                    \n\t"
698*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x00(%[ref_ptr])                    \n\t"
699*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8
700*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x0f(%[src_ptr])                    \n\t"
701*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x08(%[src_ptr])                    \n\t"
702*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x0f(%[ref_ptr])                    \n\t"
703*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x08(%[ref_ptr])                    \n\t"
704*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8
705*fb1b10abSAndroid Build Coastguard Worker 
706*fb1b10abSAndroid Build Coastguard Worker     "addiu      %[tmp0],    %[tmp0],        -0x01               \n\t"
707*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
708*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
709*fb1b10abSAndroid Build Coastguard Worker     "bnez       %[tmp0],    1b                                  \n\t"
710*fb1b10abSAndroid Build Coastguard Worker 
711*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
712*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"
713*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
714*fb1b10abSAndroid Build Coastguard Worker 
715*fb1b10abSAndroid Build Coastguard Worker     "punpcklhw  %[ftmp3],   %[ftmp10],      %[ftmp0]            \n\t"
716*fb1b10abSAndroid Build Coastguard Worker     "punpckhhw  %[ftmp4],   %[ftmp10],      %[ftmp0]            \n\t"
717*fb1b10abSAndroid Build Coastguard Worker     "punpcklhw  %[ftmp5],   %[ftmp12],      %[ftmp0]            \n\t"
718*fb1b10abSAndroid Build Coastguard Worker     "punpckhhw  %[ftmp6],   %[ftmp12],      %[ftmp0]            \n\t"
719*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
720*fb1b10abSAndroid Build Coastguard Worker     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
721*fb1b10abSAndroid Build Coastguard Worker     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"
722*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp0],   %[ftmp3],       %[ftmp11]           \n\t"
723*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp0],   %[ftmp0],       %[ftmp3]            \n\t"
724*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp0],   0x00(%[sum])                        \n\t"
725*fb1b10abSAndroid Build Coastguard Worker 
726*fb1b10abSAndroid Build Coastguard Worker     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
727*fb1b10abSAndroid Build Coastguard Worker       [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
728*fb1b10abSAndroid Build Coastguard Worker       [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
729*fb1b10abSAndroid Build Coastguard Worker       [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
730*fb1b10abSAndroid Build Coastguard Worker       [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
731*fb1b10abSAndroid Build Coastguard Worker       [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
732*fb1b10abSAndroid Build Coastguard Worker       [ftmp12]"=&f"(ftmp[12]),          [tmp0]"=&r"(tmp[0]),
733*fb1b10abSAndroid Build Coastguard Worker       [src_ptr]"+&r"(src_ptr),          [ref_ptr]"+&r"(ref_ptr)
734*fb1b10abSAndroid Build Coastguard Worker     : [src_stride]"r"((mips_reg)src_stride),
735*fb1b10abSAndroid Build Coastguard Worker       [ref_stride]"r"((mips_reg)ref_stride),
736*fb1b10abSAndroid Build Coastguard Worker       [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
737*fb1b10abSAndroid Build Coastguard Worker     : "memory"
738*fb1b10abSAndroid Build Coastguard Worker   );
739*fb1b10abSAndroid Build Coastguard Worker   /* clang-format on */
740*fb1b10abSAndroid Build Coastguard Worker 
741*fb1b10abSAndroid Build Coastguard Worker   return *sse - (((int64_t)sum * sum) / (16 * high));
742*fb1b10abSAndroid Build Coastguard Worker }
743*fb1b10abSAndroid Build Coastguard Worker 
744*fb1b10abSAndroid Build Coastguard Worker #define VPX_VARIANCE16XN(n)                                                   \
745*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_variance16x##n##_mmi(const uint8_t *src_ptr, int src_stride,   \
746*fb1b10abSAndroid Build Coastguard Worker                                     const uint8_t *ref_ptr, int ref_stride,   \
747*fb1b10abSAndroid Build Coastguard Worker                                     uint32_t *sse) {                          \
748*fb1b10abSAndroid Build Coastguard Worker     return vpx_variance16x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
749*fb1b10abSAndroid Build Coastguard Worker   }
750*fb1b10abSAndroid Build Coastguard Worker 
751*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE16XN(32)
752*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE16XN(16)
753*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE16XN(8)
754*fb1b10abSAndroid Build Coastguard Worker 
vpx_variance8x(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride,uint32_t * sse,int high)755*fb1b10abSAndroid Build Coastguard Worker static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride,
756*fb1b10abSAndroid Build Coastguard Worker                                       const uint8_t *ref_ptr, int ref_stride,
757*fb1b10abSAndroid Build Coastguard Worker                                       uint32_t *sse, int high) {
758*fb1b10abSAndroid Build Coastguard Worker   int sum;
759*fb1b10abSAndroid Build Coastguard Worker   double ftmp[13];
760*fb1b10abSAndroid Build Coastguard Worker   uint32_t tmp[3];
761*fb1b10abSAndroid Build Coastguard Worker 
762*fb1b10abSAndroid Build Coastguard Worker   *sse = 0;
763*fb1b10abSAndroid Build Coastguard Worker 
764*fb1b10abSAndroid Build Coastguard Worker   /* clang-format off */
765*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile (
766*fb1b10abSAndroid Build Coastguard Worker     "li         %[tmp0],    0x20                                \n\t"
767*fb1b10abSAndroid Build Coastguard Worker     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
768*fb1b10abSAndroid Build Coastguard Worker     MMI_L(%[tmp0], %[high], 0x00)
769*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
770*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
771*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp10],  %[ftmp10],      %[ftmp10]           \n\t"
772*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp12],  %[ftmp12],      %[ftmp12]           \n\t"
773*fb1b10abSAndroid Build Coastguard Worker     "1:                                                         \n\t"
774*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
775*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
776*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x07(%[ref_ptr])                    \n\t"
777*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x00(%[ref_ptr])                    \n\t"
778*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_8
779*fb1b10abSAndroid Build Coastguard Worker 
780*fb1b10abSAndroid Build Coastguard Worker     "addiu      %[tmp0],    %[tmp0],        -0x01               \n\t"
781*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
782*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
783*fb1b10abSAndroid Build Coastguard Worker     "bnez       %[tmp0],    1b                                  \n\t"
784*fb1b10abSAndroid Build Coastguard Worker 
785*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
786*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"
787*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
788*fb1b10abSAndroid Build Coastguard Worker 
789*fb1b10abSAndroid Build Coastguard Worker     "punpcklhw  %[ftmp3],   %[ftmp10],      %[ftmp0]            \n\t"
790*fb1b10abSAndroid Build Coastguard Worker     "punpckhhw  %[ftmp4],   %[ftmp10],      %[ftmp0]            \n\t"
791*fb1b10abSAndroid Build Coastguard Worker     "punpcklhw  %[ftmp5],   %[ftmp12],      %[ftmp0]            \n\t"
792*fb1b10abSAndroid Build Coastguard Worker     "punpckhhw  %[ftmp6],   %[ftmp12],      %[ftmp0]            \n\t"
793*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
794*fb1b10abSAndroid Build Coastguard Worker     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
795*fb1b10abSAndroid Build Coastguard Worker     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"
796*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp0],   %[ftmp3],       %[ftmp11]           \n\t"
797*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp0],   %[ftmp0],       %[ftmp3]            \n\t"
798*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp0],   0x00(%[sum])                        \n\t"
799*fb1b10abSAndroid Build Coastguard Worker 
800*fb1b10abSAndroid Build Coastguard Worker     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
801*fb1b10abSAndroid Build Coastguard Worker       [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
802*fb1b10abSAndroid Build Coastguard Worker       [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
803*fb1b10abSAndroid Build Coastguard Worker       [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
804*fb1b10abSAndroid Build Coastguard Worker       [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
805*fb1b10abSAndroid Build Coastguard Worker       [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
806*fb1b10abSAndroid Build Coastguard Worker       [ftmp12]"=&f"(ftmp[12]),          [tmp0]"=&r"(tmp[0]),
807*fb1b10abSAndroid Build Coastguard Worker       [src_ptr]"+&r"(src_ptr),          [ref_ptr]"+&r"(ref_ptr)
808*fb1b10abSAndroid Build Coastguard Worker     : [src_stride]"r"((mips_reg)src_stride),
809*fb1b10abSAndroid Build Coastguard Worker       [ref_stride]"r"((mips_reg)ref_stride),
810*fb1b10abSAndroid Build Coastguard Worker       [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
811*fb1b10abSAndroid Build Coastguard Worker     : "memory"
812*fb1b10abSAndroid Build Coastguard Worker   );
813*fb1b10abSAndroid Build Coastguard Worker   /* clang-format on */
814*fb1b10abSAndroid Build Coastguard Worker 
815*fb1b10abSAndroid Build Coastguard Worker   return *sse - (((int64_t)sum * sum) / (8 * high));
816*fb1b10abSAndroid Build Coastguard Worker }
817*fb1b10abSAndroid Build Coastguard Worker 
818*fb1b10abSAndroid Build Coastguard Worker #define VPX_VARIANCE8XN(n)                                                   \
819*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_variance8x##n##_mmi(const uint8_t *src_ptr, int src_stride,   \
820*fb1b10abSAndroid Build Coastguard Worker                                    const uint8_t *ref_ptr, int ref_stride,   \
821*fb1b10abSAndroid Build Coastguard Worker                                    uint32_t *sse) {                          \
822*fb1b10abSAndroid Build Coastguard Worker     return vpx_variance8x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
823*fb1b10abSAndroid Build Coastguard Worker   }
824*fb1b10abSAndroid Build Coastguard Worker 
825*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE8XN(16)
826*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE8XN(8)
827*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE8XN(4)
828*fb1b10abSAndroid Build Coastguard Worker 
vpx_variance4x(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride,uint32_t * sse,int high)829*fb1b10abSAndroid Build Coastguard Worker static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride,
830*fb1b10abSAndroid Build Coastguard Worker                                       const uint8_t *ref_ptr, int ref_stride,
831*fb1b10abSAndroid Build Coastguard Worker                                       uint32_t *sse, int high) {
832*fb1b10abSAndroid Build Coastguard Worker   int sum;
833*fb1b10abSAndroid Build Coastguard Worker   double ftmp[12];
834*fb1b10abSAndroid Build Coastguard Worker   uint32_t tmp[3];
835*fb1b10abSAndroid Build Coastguard Worker 
836*fb1b10abSAndroid Build Coastguard Worker   *sse = 0;
837*fb1b10abSAndroid Build Coastguard Worker 
838*fb1b10abSAndroid Build Coastguard Worker   /* clang-format off */
839*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile (
840*fb1b10abSAndroid Build Coastguard Worker     "li         %[tmp0],    0x20                                \n\t"
841*fb1b10abSAndroid Build Coastguard Worker     "mtc1       %[tmp0],    %[ftmp10]                           \n\t"
842*fb1b10abSAndroid Build Coastguard Worker     MMI_L(%[tmp0], %[high], 0x00)
843*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
844*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
845*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp7],   %[ftmp7],       %[ftmp7]            \n\t"
846*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
847*fb1b10abSAndroid Build Coastguard Worker     "1:                                                         \n\t"
848*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp1],   0x07(%[src_ptr])                    \n\t"
849*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp1],   0x00(%[src_ptr])                    \n\t"
850*fb1b10abSAndroid Build Coastguard Worker     "gsldlc1    %[ftmp2],   0x07(%[ref_ptr])                    \n\t"
851*fb1b10abSAndroid Build Coastguard Worker     "gsldrc1    %[ftmp2],   0x00(%[ref_ptr])                    \n\t"
852*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_SUM_4
853*fb1b10abSAndroid Build Coastguard Worker 
854*fb1b10abSAndroid Build Coastguard Worker     "addiu      %[tmp0],    %[tmp0],        -0x01               \n\t"
855*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
856*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
857*fb1b10abSAndroid Build Coastguard Worker     "bnez       %[tmp0],    1b                                  \n\t"
858*fb1b10abSAndroid Build Coastguard Worker 
859*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp9],   %[ftmp6],       %[ftmp10]           \n\t"
860*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp6]            \n\t"
861*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
862*fb1b10abSAndroid Build Coastguard Worker 
863*fb1b10abSAndroid Build Coastguard Worker     "punpcklhw  %[ftmp3],   %[ftmp7],       %[ftmp0]            \n\t"
864*fb1b10abSAndroid Build Coastguard Worker     "punpckhhw  %[ftmp4],   %[ftmp7],       %[ftmp0]            \n\t"
865*fb1b10abSAndroid Build Coastguard Worker     "punpcklhw  %[ftmp5],   %[ftmp8],       %[ftmp0]            \n\t"
866*fb1b10abSAndroid Build Coastguard Worker     "punpckhhw  %[ftmp6],   %[ftmp8],       %[ftmp0]            \n\t"
867*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
868*fb1b10abSAndroid Build Coastguard Worker     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
869*fb1b10abSAndroid Build Coastguard Worker     "psubw      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"
870*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp0],   %[ftmp3],       %[ftmp10]           \n\t"
871*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp0],   %[ftmp0],       %[ftmp3]            \n\t"
872*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp0],   0x00(%[sum])                        \n\t"
873*fb1b10abSAndroid Build Coastguard Worker     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
874*fb1b10abSAndroid Build Coastguard Worker       [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
875*fb1b10abSAndroid Build Coastguard Worker       [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
876*fb1b10abSAndroid Build Coastguard Worker       [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
877*fb1b10abSAndroid Build Coastguard Worker       [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
878*fb1b10abSAndroid Build Coastguard Worker       [ftmp10]"=&f"(ftmp[10]),
879*fb1b10abSAndroid Build Coastguard Worker       [tmp0]"=&r"(tmp[0]),
880*fb1b10abSAndroid Build Coastguard Worker       [src_ptr]"+&r"(src_ptr),          [ref_ptr]"+&r"(ref_ptr)
881*fb1b10abSAndroid Build Coastguard Worker     : [src_stride]"r"((mips_reg)src_stride),
882*fb1b10abSAndroid Build Coastguard Worker       [ref_stride]"r"((mips_reg)ref_stride),
883*fb1b10abSAndroid Build Coastguard Worker       [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum)
884*fb1b10abSAndroid Build Coastguard Worker     : "memory"
885*fb1b10abSAndroid Build Coastguard Worker   );
886*fb1b10abSAndroid Build Coastguard Worker   /* clang-format on */
887*fb1b10abSAndroid Build Coastguard Worker 
888*fb1b10abSAndroid Build Coastguard Worker   return *sse - (((int64_t)sum * sum) / (4 * high));
889*fb1b10abSAndroid Build Coastguard Worker }
890*fb1b10abSAndroid Build Coastguard Worker 
891*fb1b10abSAndroid Build Coastguard Worker #define VPX_VARIANCE4XN(n)                                                   \
892*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_variance4x##n##_mmi(const uint8_t *src_ptr, int src_stride,   \
893*fb1b10abSAndroid Build Coastguard Worker                                    const uint8_t *ref_ptr, int ref_stride,   \
894*fb1b10abSAndroid Build Coastguard Worker                                    uint32_t *sse) {                          \
895*fb1b10abSAndroid Build Coastguard Worker     return vpx_variance4x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
896*fb1b10abSAndroid Build Coastguard Worker   }
897*fb1b10abSAndroid Build Coastguard Worker 
898*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE4XN(8)
899*fb1b10abSAndroid Build Coastguard Worker VPX_VARIANCE4XN(4)
900*fb1b10abSAndroid Build Coastguard Worker 
vpx_mse16x(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride,uint32_t * sse,uint64_t high)901*fb1b10abSAndroid Build Coastguard Worker static inline uint32_t vpx_mse16x(const uint8_t *src_ptr, int src_stride,
902*fb1b10abSAndroid Build Coastguard Worker                                   const uint8_t *ref_ptr, int ref_stride,
903*fb1b10abSAndroid Build Coastguard Worker                                   uint32_t *sse, uint64_t high) {
904*fb1b10abSAndroid Build Coastguard Worker   double ftmp[12];
905*fb1b10abSAndroid Build Coastguard Worker   uint32_t tmp[1];
906*fb1b10abSAndroid Build Coastguard Worker 
907*fb1b10abSAndroid Build Coastguard Worker   *sse = 0;
908*fb1b10abSAndroid Build Coastguard Worker 
909*fb1b10abSAndroid Build Coastguard Worker   /* clang-format off */
910*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile (
911*fb1b10abSAndroid Build Coastguard Worker     "li         %[tmp0],    0x20                                \n\t"
912*fb1b10abSAndroid Build Coastguard Worker     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
913*fb1b10abSAndroid Build Coastguard Worker     MMI_L(%[tmp0], %[high], 0x00)
914*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
915*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
916*fb1b10abSAndroid Build Coastguard Worker 
917*fb1b10abSAndroid Build Coastguard Worker     "1:                                                         \n\t"
918*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_16
919*fb1b10abSAndroid Build Coastguard Worker 
920*fb1b10abSAndroid Build Coastguard Worker     "addiu      %[tmp0],    %[tmp0],        -0x01               \n\t"
921*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
922*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
923*fb1b10abSAndroid Build Coastguard Worker     "bnez       %[tmp0],    1b                                  \n\t"
924*fb1b10abSAndroid Build Coastguard Worker 
925*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
926*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"
927*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
928*fb1b10abSAndroid Build Coastguard Worker     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
929*fb1b10abSAndroid Build Coastguard Worker       [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
930*fb1b10abSAndroid Build Coastguard Worker       [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
931*fb1b10abSAndroid Build Coastguard Worker       [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
932*fb1b10abSAndroid Build Coastguard Worker       [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
933*fb1b10abSAndroid Build Coastguard Worker       [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
934*fb1b10abSAndroid Build Coastguard Worker       [tmp0]"=&r"(tmp[0]),
935*fb1b10abSAndroid Build Coastguard Worker       [src_ptr]"+&r"(src_ptr),          [ref_ptr]"+&r"(ref_ptr)
936*fb1b10abSAndroid Build Coastguard Worker     : [src_stride]"r"((mips_reg)src_stride),
937*fb1b10abSAndroid Build Coastguard Worker       [ref_stride]"r"((mips_reg)ref_stride),
938*fb1b10abSAndroid Build Coastguard Worker       [high]"r"(&high), [sse]"r"(sse)
939*fb1b10abSAndroid Build Coastguard Worker     : "memory"
940*fb1b10abSAndroid Build Coastguard Worker   );
941*fb1b10abSAndroid Build Coastguard Worker   /* clang-format on */
942*fb1b10abSAndroid Build Coastguard Worker 
943*fb1b10abSAndroid Build Coastguard Worker   return *sse;
944*fb1b10abSAndroid Build Coastguard Worker }
945*fb1b10abSAndroid Build Coastguard Worker 
946*fb1b10abSAndroid Build Coastguard Worker #define vpx_mse16xN(n)                                                   \
947*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_mse16x##n##_mmi(const uint8_t *src_ptr, int src_stride,   \
948*fb1b10abSAndroid Build Coastguard Worker                                const uint8_t *ref_ptr, int ref_stride,   \
949*fb1b10abSAndroid Build Coastguard Worker                                uint32_t *sse) {                          \
950*fb1b10abSAndroid Build Coastguard Worker     return vpx_mse16x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
951*fb1b10abSAndroid Build Coastguard Worker   }
952*fb1b10abSAndroid Build Coastguard Worker 
953*fb1b10abSAndroid Build Coastguard Worker vpx_mse16xN(16);
954*fb1b10abSAndroid Build Coastguard Worker vpx_mse16xN(8);
955*fb1b10abSAndroid Build Coastguard Worker 
vpx_mse8x(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride,uint32_t * sse,uint64_t high)956*fb1b10abSAndroid Build Coastguard Worker static inline uint32_t vpx_mse8x(const uint8_t *src_ptr, int src_stride,
957*fb1b10abSAndroid Build Coastguard Worker                                  const uint8_t *ref_ptr, int ref_stride,
958*fb1b10abSAndroid Build Coastguard Worker                                  uint32_t *sse, uint64_t high) {
959*fb1b10abSAndroid Build Coastguard Worker   double ftmp[12];
960*fb1b10abSAndroid Build Coastguard Worker   uint32_t tmp[1];
961*fb1b10abSAndroid Build Coastguard Worker 
962*fb1b10abSAndroid Build Coastguard Worker   *sse = 0;
963*fb1b10abSAndroid Build Coastguard Worker 
964*fb1b10abSAndroid Build Coastguard Worker   /* clang-format off */
965*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile (
966*fb1b10abSAndroid Build Coastguard Worker     "li         %[tmp0],    0x20                                \n\t"
967*fb1b10abSAndroid Build Coastguard Worker     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
968*fb1b10abSAndroid Build Coastguard Worker     MMI_L(%[tmp0], %[high], 0x00)
969*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
970*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp8],   %[ftmp8],       %[ftmp8]            \n\t"
971*fb1b10abSAndroid Build Coastguard Worker 
972*fb1b10abSAndroid Build Coastguard Worker     "1:                                                         \n\t"
973*fb1b10abSAndroid Build Coastguard Worker     VARIANCE_SSE_8
974*fb1b10abSAndroid Build Coastguard Worker 
975*fb1b10abSAndroid Build Coastguard Worker     "addiu      %[tmp0],    %[tmp0],        -0x01               \n\t"
976*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
977*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride])
978*fb1b10abSAndroid Build Coastguard Worker     "bnez       %[tmp0],    1b                                  \n\t"
979*fb1b10abSAndroid Build Coastguard Worker 
980*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp9],   %[ftmp8],       %[ftmp11]           \n\t"
981*fb1b10abSAndroid Build Coastguard Worker     "paddw      %[ftmp9],   %[ftmp9],       %[ftmp8]            \n\t"
982*fb1b10abSAndroid Build Coastguard Worker     "swc1       %[ftmp9],   0x00(%[sse])                        \n\t"
983*fb1b10abSAndroid Build Coastguard Worker     : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
984*fb1b10abSAndroid Build Coastguard Worker       [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
985*fb1b10abSAndroid Build Coastguard Worker       [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
986*fb1b10abSAndroid Build Coastguard Worker       [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
987*fb1b10abSAndroid Build Coastguard Worker       [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
988*fb1b10abSAndroid Build Coastguard Worker       [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
989*fb1b10abSAndroid Build Coastguard Worker       [tmp0]"=&r"(tmp[0]),
990*fb1b10abSAndroid Build Coastguard Worker       [src_ptr]"+&r"(src_ptr),          [ref_ptr]"+&r"(ref_ptr)
991*fb1b10abSAndroid Build Coastguard Worker     : [src_stride]"r"((mips_reg)src_stride),
992*fb1b10abSAndroid Build Coastguard Worker       [ref_stride]"r"((mips_reg)ref_stride),
993*fb1b10abSAndroid Build Coastguard Worker       [high]"r"(&high), [sse]"r"(sse)
994*fb1b10abSAndroid Build Coastguard Worker     : "memory"
995*fb1b10abSAndroid Build Coastguard Worker   );
996*fb1b10abSAndroid Build Coastguard Worker   /* clang-format on */
997*fb1b10abSAndroid Build Coastguard Worker 
998*fb1b10abSAndroid Build Coastguard Worker   return *sse;
999*fb1b10abSAndroid Build Coastguard Worker }
1000*fb1b10abSAndroid Build Coastguard Worker 
1001*fb1b10abSAndroid Build Coastguard Worker #define vpx_mse8xN(n)                                                   \
1002*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_mse8x##n##_mmi(const uint8_t *src_ptr, int src_stride,   \
1003*fb1b10abSAndroid Build Coastguard Worker                               const uint8_t *ref_ptr, int ref_stride,   \
1004*fb1b10abSAndroid Build Coastguard Worker                               uint32_t *sse) {                          \
1005*fb1b10abSAndroid Build Coastguard Worker     return vpx_mse8x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \
1006*fb1b10abSAndroid Build Coastguard Worker   }
1007*fb1b10abSAndroid Build Coastguard Worker 
1008*fb1b10abSAndroid Build Coastguard Worker vpx_mse8xN(16);
1009*fb1b10abSAndroid Build Coastguard Worker vpx_mse8xN(8);
1010*fb1b10abSAndroid Build Coastguard Worker 
1011*fb1b10abSAndroid Build Coastguard Worker #define SUBPIX_VAR(W, H)                                                       \
1012*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_sub_pixel_variance##W##x##H##_mmi(                              \
1013*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,      \
1014*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) {                 \
1015*fb1b10abSAndroid Build Coastguard Worker     uint16_t fdata3[((H) + 1) * (W)];                                          \
1016*fb1b10abSAndroid Build Coastguard Worker     uint8_t temp2[(H) * (W)];                                                  \
1017*fb1b10abSAndroid Build Coastguard Worker                                                                                \
1018*fb1b10abSAndroid Build Coastguard Worker     var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, (H) + 1, \
1019*fb1b10abSAndroid Build Coastguard Worker                                       W, bilinear_filters[x_offset]);          \
1020*fb1b10abSAndroid Build Coastguard Worker     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,              \
1021*fb1b10abSAndroid Build Coastguard Worker                                        bilinear_filters[y_offset]);            \
1022*fb1b10abSAndroid Build Coastguard Worker                                                                                \
1023*fb1b10abSAndroid Build Coastguard Worker     return vpx_variance##W##x##H##_mmi(temp2, W, ref_ptr, ref_stride, sse);    \
1024*fb1b10abSAndroid Build Coastguard Worker   }
1025*fb1b10abSAndroid Build Coastguard Worker 
1026*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR(64, 64)
1027*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR(64, 32)
1028*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR(32, 64)
1029*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR(32, 32)
1030*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR(32, 16)
1031*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR(16, 32)
1032*fb1b10abSAndroid Build Coastguard Worker 
var_filter_block2d_bil_16x(const uint8_t * src_ptr,int src_stride,int x_offset,int y_offset,uint8_t * temp2,int counter)1033*fb1b10abSAndroid Build Coastguard Worker static inline void var_filter_block2d_bil_16x(const uint8_t *src_ptr,
1034*fb1b10abSAndroid Build Coastguard Worker                                               int src_stride, int x_offset,
1035*fb1b10abSAndroid Build Coastguard Worker                                               int y_offset, uint8_t *temp2,
1036*fb1b10abSAndroid Build Coastguard Worker                                               int counter) {
1037*fb1b10abSAndroid Build Coastguard Worker   uint8_t *temp2_ptr = temp2;
1038*fb1b10abSAndroid Build Coastguard Worker   mips_reg l_counter = counter;
1039*fb1b10abSAndroid Build Coastguard Worker   double ftmp[15];
1040*fb1b10abSAndroid Build Coastguard Worker   double ff_ph_40, mask;
1041*fb1b10abSAndroid Build Coastguard Worker   double filter_x0, filter_x1, filter_y0, filter_y1;
1042*fb1b10abSAndroid Build Coastguard Worker   mips_reg tmp[2];
1043*fb1b10abSAndroid Build Coastguard Worker   uint64_t x0, x1, y0, y1, all;
1044*fb1b10abSAndroid Build Coastguard Worker 
1045*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *filter_x = bilinear_filters[x_offset];
1046*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *filter_y = bilinear_filters[y_offset];
1047*fb1b10abSAndroid Build Coastguard Worker   x0 = (uint64_t)filter_x[0];
1048*fb1b10abSAndroid Build Coastguard Worker   x1 = (uint64_t)filter_x[1];
1049*fb1b10abSAndroid Build Coastguard Worker   y0 = (uint64_t)filter_y[0];
1050*fb1b10abSAndroid Build Coastguard Worker   y1 = (uint64_t)filter_y[1];
1051*fb1b10abSAndroid Build Coastguard Worker   all = x0 | x1 << 8 | y0 << 16 | y1 << 24;
1052*fb1b10abSAndroid Build Coastguard Worker 
1053*fb1b10abSAndroid Build Coastguard Worker   /* clang-format off */
1054*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile (
1055*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1056*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[all], %[ftmp14])
1057*fb1b10abSAndroid Build Coastguard Worker     "punpcklbh  %[ftmp14],  %[ftmp14],      %[ftmp0]            \n\t"
1058*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_x0], %[ftmp14],    %[ftmp0]            \n\t"
1059*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x10)
1060*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[mask])
1061*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp14],  %[ftmp14],      %[mask]             \n\t"
1062*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_x1], %[ftmp14],    %[ftmp0]            \n\t"
1063*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp14],  %[ftmp14],      %[mask]             \n\t"
1064*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_y0], %[ftmp14],    %[ftmp0]            \n\t"
1065*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp14],  %[ftmp14],      %[mask]             \n\t"
1066*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_y1], %[ftmp14],    %[ftmp0]            \n\t"
1067*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x07)
1068*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[ftmp14])
1069*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x0040004000400040)
1070*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[ff_ph_40])
1071*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x00ff00ff00ff00ff)
1072*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[mask])
1073*fb1b10abSAndroid Build Coastguard Worker     // fdata3: fdata3[0] ~ fdata3[15]
1074*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A
1075*fb1b10abSAndroid Build Coastguard Worker 
1076*fb1b10abSAndroid Build Coastguard Worker     // fdata3 +src_stride*1: fdata3[0] ~ fdata3[15]
1077*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1078*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_B
1079*fb1b10abSAndroid Build Coastguard Worker     // temp2: temp2[0] ~ temp2[15]
1080*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_A
1081*fb1b10abSAndroid Build Coastguard Worker 
1082*fb1b10abSAndroid Build Coastguard Worker     // fdata3 +src_stride*2: fdata3[0] ~ fdata3[15]
1083*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1084*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A
1085*fb1b10abSAndroid Build Coastguard Worker     // temp2+16*1: temp2[0] ~ temp2[15]
1086*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10)
1087*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_B
1088*fb1b10abSAndroid Build Coastguard Worker 
1089*fb1b10abSAndroid Build Coastguard Worker     "1:                                                         \n\t"
1090*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1091*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_B
1092*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10)
1093*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_A
1094*fb1b10abSAndroid Build Coastguard Worker 
1095*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1096*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A
1097*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10)
1098*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_B
1099*fb1b10abSAndroid Build Coastguard Worker     "addiu      %[counter], %[counter],     -0x01               \n\t"
1100*fb1b10abSAndroid Build Coastguard Worker     "bnez       %[counter], 1b                                  \n\t"
1101*fb1b10abSAndroid Build Coastguard Worker     : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
1102*fb1b10abSAndroid Build Coastguard Worker       [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
1103*fb1b10abSAndroid Build Coastguard Worker       [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]),
1104*fb1b10abSAndroid Build Coastguard Worker       [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]),
1105*fb1b10abSAndroid Build Coastguard Worker       [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]),
1106*fb1b10abSAndroid Build Coastguard Worker       [ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]),
1107*fb1b10abSAndroid Build Coastguard Worker       [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr),
1108*fb1b10abSAndroid Build Coastguard Worker       [counter]"+&r"(l_counter), [ff_ph_40] "=&f"(ff_ph_40), [mask] "=&f"(mask),
1109*fb1b10abSAndroid Build Coastguard Worker       [filter_x0] "=&f"(filter_x0), [filter_x1] "=&f"(filter_x1),
1110*fb1b10abSAndroid Build Coastguard Worker       [filter_y0] "=&f"(filter_y0), [filter_y1] "=&f"(filter_y1)
1111*fb1b10abSAndroid Build Coastguard Worker     : [src_stride] "r"((mips_reg)src_stride), [all] "r"(all)
1112*fb1b10abSAndroid Build Coastguard Worker     : "memory"
1113*fb1b10abSAndroid Build Coastguard Worker   );
1114*fb1b10abSAndroid Build Coastguard Worker   /* clang-format on */
1115*fb1b10abSAndroid Build Coastguard Worker }
1116*fb1b10abSAndroid Build Coastguard Worker 
1117*fb1b10abSAndroid Build Coastguard Worker #define SUBPIX_VAR16XN(H)                                                      \
1118*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_sub_pixel_variance16x##H##_mmi(                                 \
1119*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,      \
1120*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) {                 \
1121*fb1b10abSAndroid Build Coastguard Worker     uint8_t temp2[16 * (H)];                                                   \
1122*fb1b10abSAndroid Build Coastguard Worker     var_filter_block2d_bil_16x(src_ptr, src_stride, x_offset, y_offset, temp2, \
1123*fb1b10abSAndroid Build Coastguard Worker                                ((H)-2) / 2);                                   \
1124*fb1b10abSAndroid Build Coastguard Worker                                                                                \
1125*fb1b10abSAndroid Build Coastguard Worker     return vpx_variance16x##H##_mmi(temp2, 16, ref_ptr, ref_stride, sse);      \
1126*fb1b10abSAndroid Build Coastguard Worker   }
1127*fb1b10abSAndroid Build Coastguard Worker 
1128*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR16XN(16)
1129*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR16XN(8)
1130*fb1b10abSAndroid Build Coastguard Worker 
var_filter_block2d_bil_8x(const uint8_t * src_ptr,int src_stride,int x_offset,int y_offset,uint8_t * temp2,int counter)1131*fb1b10abSAndroid Build Coastguard Worker static inline void var_filter_block2d_bil_8x(const uint8_t *src_ptr,
1132*fb1b10abSAndroid Build Coastguard Worker                                              int src_stride, int x_offset,
1133*fb1b10abSAndroid Build Coastguard Worker                                              int y_offset, uint8_t *temp2,
1134*fb1b10abSAndroid Build Coastguard Worker                                              int counter) {
1135*fb1b10abSAndroid Build Coastguard Worker   uint8_t *temp2_ptr = temp2;
1136*fb1b10abSAndroid Build Coastguard Worker   mips_reg l_counter = counter;
1137*fb1b10abSAndroid Build Coastguard Worker   double ftmp[15];
1138*fb1b10abSAndroid Build Coastguard Worker   mips_reg tmp[2];
1139*fb1b10abSAndroid Build Coastguard Worker   double ff_ph_40, mask;
1140*fb1b10abSAndroid Build Coastguard Worker   uint64_t x0, x1, y0, y1, all;
1141*fb1b10abSAndroid Build Coastguard Worker   double filter_x0, filter_x1, filter_y0, filter_y1;
1142*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *filter_x = bilinear_filters[x_offset];
1143*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *filter_y = bilinear_filters[y_offset];
1144*fb1b10abSAndroid Build Coastguard Worker   x0 = (uint64_t)filter_x[0];
1145*fb1b10abSAndroid Build Coastguard Worker   x1 = (uint64_t)filter_x[1];
1146*fb1b10abSAndroid Build Coastguard Worker   y0 = (uint64_t)filter_y[0];
1147*fb1b10abSAndroid Build Coastguard Worker   y1 = (uint64_t)filter_y[1];
1148*fb1b10abSAndroid Build Coastguard Worker   all = x0 | x1 << 8 | y0 << 16 | y1 << 24;
1149*fb1b10abSAndroid Build Coastguard Worker 
1150*fb1b10abSAndroid Build Coastguard Worker   /* clang-format off */
1151*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile (
1152*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1153*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[all], %[ftmp14])
1154*fb1b10abSAndroid Build Coastguard Worker     "punpcklbh  %[ftmp14],  %[ftmp14],      %[ftmp0]            \n\t"
1155*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_x0], %[ftmp14],    %[ftmp0]            \n\t"
1156*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x10)
1157*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[mask])
1158*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp14],  %[ftmp14],      %[mask]             \n\t"
1159*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_x1], %[ftmp14],    %[ftmp0]            \n\t"
1160*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp14],  %[ftmp14],      %[mask]             \n\t"
1161*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_y0], %[ftmp14],    %[ftmp0]            \n\t"
1162*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp14],  %[ftmp14],      %[mask]             \n\t"
1163*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_y1], %[ftmp14],    %[ftmp0]            \n\t"
1164*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1165*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x07)
1166*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[ftmp14])
1167*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x0040004000400040)
1168*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[ff_ph_40])
1169*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x00ff00ff00ff00ff)
1170*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[mask])
1171*fb1b10abSAndroid Build Coastguard Worker 
1172*fb1b10abSAndroid Build Coastguard Worker     // fdata3: fdata3[0] ~ fdata3[7]
1173*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A
1174*fb1b10abSAndroid Build Coastguard Worker 
1175*fb1b10abSAndroid Build Coastguard Worker     // fdata3 +src_stride*1: fdata3[0] ~ fdata3[7]
1176*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1177*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B
1178*fb1b10abSAndroid Build Coastguard Worker     // temp2: temp2[0] ~ temp2[7]
1179*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A
1180*fb1b10abSAndroid Build Coastguard Worker 
1181*fb1b10abSAndroid Build Coastguard Worker     // fdata3 +src_stride*2: fdata3[0] ~ fdata3[7]
1182*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1183*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A
1184*fb1b10abSAndroid Build Coastguard Worker     // temp2+8*1: temp2[0] ~ temp2[7]
1185*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08)
1186*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B
1187*fb1b10abSAndroid Build Coastguard Worker 
1188*fb1b10abSAndroid Build Coastguard Worker     "1:                                                         \n\t"
1189*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1190*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B
1191*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08)
1192*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A
1193*fb1b10abSAndroid Build Coastguard Worker 
1194*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1195*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A
1196*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08)
1197*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B
1198*fb1b10abSAndroid Build Coastguard Worker     "addiu      %[counter], %[counter],     -0x01               \n\t"
1199*fb1b10abSAndroid Build Coastguard Worker     "bnez       %[counter], 1b                                  \n\t"
1200*fb1b10abSAndroid Build Coastguard Worker     : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
1201*fb1b10abSAndroid Build Coastguard Worker       [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
1202*fb1b10abSAndroid Build Coastguard Worker       [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]),
1203*fb1b10abSAndroid Build Coastguard Worker       [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]),
1204*fb1b10abSAndroid Build Coastguard Worker       [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]),
1205*fb1b10abSAndroid Build Coastguard Worker       [ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]),
1206*fb1b10abSAndroid Build Coastguard Worker       [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr),
1207*fb1b10abSAndroid Build Coastguard Worker       [counter]"+&r"(l_counter), [ff_ph_40] "=&f"(ff_ph_40), [mask] "=&f"(mask),
1208*fb1b10abSAndroid Build Coastguard Worker       [filter_x0] "=&f"(filter_x0), [filter_x1] "=&f"(filter_x1),
1209*fb1b10abSAndroid Build Coastguard Worker       [filter_y0] "=&f"(filter_y0), [filter_y1] "=&f"(filter_y1)
1210*fb1b10abSAndroid Build Coastguard Worker     : [src_stride] "r"((mips_reg)src_stride), [all] "r"(all)
1211*fb1b10abSAndroid Build Coastguard Worker     : "memory"
1212*fb1b10abSAndroid Build Coastguard Worker   );
1213*fb1b10abSAndroid Build Coastguard Worker   /* clang-format on */
1214*fb1b10abSAndroid Build Coastguard Worker }
1215*fb1b10abSAndroid Build Coastguard Worker 
1216*fb1b10abSAndroid Build Coastguard Worker #define SUBPIX_VAR8XN(H)                                                      \
1217*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_sub_pixel_variance8x##H##_mmi(                                 \
1218*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,     \
1219*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) {                \
1220*fb1b10abSAndroid Build Coastguard Worker     uint8_t temp2[8 * (H)];                                                   \
1221*fb1b10abSAndroid Build Coastguard Worker     var_filter_block2d_bil_8x(src_ptr, src_stride, x_offset, y_offset, temp2, \
1222*fb1b10abSAndroid Build Coastguard Worker                               ((H)-2) / 2);                                   \
1223*fb1b10abSAndroid Build Coastguard Worker                                                                               \
1224*fb1b10abSAndroid Build Coastguard Worker     return vpx_variance8x##H##_mmi(temp2, 8, ref_ptr, ref_stride, sse);       \
1225*fb1b10abSAndroid Build Coastguard Worker   }
1226*fb1b10abSAndroid Build Coastguard Worker 
1227*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR8XN(16)
1228*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR8XN(8)
1229*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR8XN(4)
1230*fb1b10abSAndroid Build Coastguard Worker 
var_filter_block2d_bil_4x(const uint8_t * src_ptr,int src_stride,int x_offset,int y_offset,uint8_t * temp2,int counter)1231*fb1b10abSAndroid Build Coastguard Worker static inline void var_filter_block2d_bil_4x(const uint8_t *src_ptr,
1232*fb1b10abSAndroid Build Coastguard Worker                                              int src_stride, int x_offset,
1233*fb1b10abSAndroid Build Coastguard Worker                                              int y_offset, uint8_t *temp2,
1234*fb1b10abSAndroid Build Coastguard Worker                                              int counter) {
1235*fb1b10abSAndroid Build Coastguard Worker   uint8_t *temp2_ptr = temp2;
1236*fb1b10abSAndroid Build Coastguard Worker   mips_reg l_counter = counter;
1237*fb1b10abSAndroid Build Coastguard Worker   double ftmp[7];
1238*fb1b10abSAndroid Build Coastguard Worker   mips_reg tmp[2];
1239*fb1b10abSAndroid Build Coastguard Worker   double ff_ph_40, mask;
1240*fb1b10abSAndroid Build Coastguard Worker   uint64_t x0, x1, y0, y1, all;
1241*fb1b10abSAndroid Build Coastguard Worker   double filter_x0, filter_x1, filter_y0, filter_y1;
1242*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *filter_x = bilinear_filters[x_offset];
1243*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *filter_y = bilinear_filters[y_offset];
1244*fb1b10abSAndroid Build Coastguard Worker   x0 = (uint64_t)filter_x[0];
1245*fb1b10abSAndroid Build Coastguard Worker   x1 = (uint64_t)filter_x[1];
1246*fb1b10abSAndroid Build Coastguard Worker   y0 = (uint64_t)filter_y[0];
1247*fb1b10abSAndroid Build Coastguard Worker   y1 = (uint64_t)filter_y[1];
1248*fb1b10abSAndroid Build Coastguard Worker   all = x0 | x1 << 8 | y0 << 16 | y1 << 24;
1249*fb1b10abSAndroid Build Coastguard Worker 
1250*fb1b10abSAndroid Build Coastguard Worker   /* clang-format off */
1251*fb1b10abSAndroid Build Coastguard Worker   __asm__ volatile (
1252*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1253*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[all], %[ftmp6])
1254*fb1b10abSAndroid Build Coastguard Worker     "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1255*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_x0], %[ftmp6],     %[ftmp0]            \n\t"
1256*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x10)
1257*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[mask])
1258*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp6],   %[ftmp6],       %[mask]             \n\t"
1259*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_x1], %[ftmp6],     %[ftmp0]            \n\t"
1260*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp6],   %[ftmp6],       %[mask]             \n\t"
1261*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_y0], %[ftmp6],     %[ftmp0]            \n\t"
1262*fb1b10abSAndroid Build Coastguard Worker     "ssrld      %[ftmp6],   %[ftmp6],       %[mask]             \n\t"
1263*fb1b10abSAndroid Build Coastguard Worker     "pshufh     %[filter_y1], %[ftmp6],     %[ftmp0]            \n\t"
1264*fb1b10abSAndroid Build Coastguard Worker     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
1265*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x07)
1266*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[ftmp6])
1267*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x0040004000400040)
1268*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[ff_ph_40])
1269*fb1b10abSAndroid Build Coastguard Worker     MMI_LI(%[tmp0], 0x00ff00ff00ff00ff)
1270*fb1b10abSAndroid Build Coastguard Worker     MMI_MTC1(%[tmp0], %[mask])
1271*fb1b10abSAndroid Build Coastguard Worker     // fdata3: fdata3[0] ~ fdata3[3]
1272*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A
1273*fb1b10abSAndroid Build Coastguard Worker 
1274*fb1b10abSAndroid Build Coastguard Worker     // fdata3 +src_stride*1: fdata3[0] ~ fdata3[3]
1275*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1276*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B
1277*fb1b10abSAndroid Build Coastguard Worker     // temp2: temp2[0] ~ temp2[7]
1278*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_A
1279*fb1b10abSAndroid Build Coastguard Worker 
1280*fb1b10abSAndroid Build Coastguard Worker     // fdata3 +src_stride*2: fdata3[0] ~ fdata3[3]
1281*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1282*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A
1283*fb1b10abSAndroid Build Coastguard Worker     // temp2+4*1: temp2[0] ~ temp2[7]
1284*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04)
1285*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_B
1286*fb1b10abSAndroid Build Coastguard Worker 
1287*fb1b10abSAndroid Build Coastguard Worker     "1:                                                         \n\t"
1288*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1289*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B
1290*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04)
1291*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_A
1292*fb1b10abSAndroid Build Coastguard Worker 
1293*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride])
1294*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A
1295*fb1b10abSAndroid Build Coastguard Worker     MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04)
1296*fb1b10abSAndroid Build Coastguard Worker     VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_B
1297*fb1b10abSAndroid Build Coastguard Worker     "addiu      %[counter], %[counter],     -0x01               \n\t"
1298*fb1b10abSAndroid Build Coastguard Worker     "bnez       %[counter], 1b                                  \n\t"
1299*fb1b10abSAndroid Build Coastguard Worker     : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
1300*fb1b10abSAndroid Build Coastguard Worker       [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
1301*fb1b10abSAndroid Build Coastguard Worker       [ftmp6] "=&f"(ftmp[6]), [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr),
1302*fb1b10abSAndroid Build Coastguard Worker       [temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter),
1303*fb1b10abSAndroid Build Coastguard Worker       [ff_ph_40] "=&f"(ff_ph_40), [mask] "=&f"(mask),
1304*fb1b10abSAndroid Build Coastguard Worker       [filter_x0] "=&f"(filter_x0), [filter_x1] "=&f"(filter_x1),
1305*fb1b10abSAndroid Build Coastguard Worker       [filter_y0] "=&f"(filter_y0), [filter_y1] "=&f"(filter_y1)
1306*fb1b10abSAndroid Build Coastguard Worker     : [src_stride] "r"((mips_reg)src_stride), [all] "r"(all)
1307*fb1b10abSAndroid Build Coastguard Worker     : "memory"
1308*fb1b10abSAndroid Build Coastguard Worker   );
1309*fb1b10abSAndroid Build Coastguard Worker   /* clang-format on */
1310*fb1b10abSAndroid Build Coastguard Worker }
1311*fb1b10abSAndroid Build Coastguard Worker 
1312*fb1b10abSAndroid Build Coastguard Worker #define SUBPIX_VAR4XN(H)                                                      \
1313*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_sub_pixel_variance4x##H##_mmi(                                 \
1314*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,     \
1315*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) {                \
1316*fb1b10abSAndroid Build Coastguard Worker     uint8_t temp2[4 * (H)];                                                   \
1317*fb1b10abSAndroid Build Coastguard Worker     var_filter_block2d_bil_4x(src_ptr, src_stride, x_offset, y_offset, temp2, \
1318*fb1b10abSAndroid Build Coastguard Worker                               ((H)-2) / 2);                                   \
1319*fb1b10abSAndroid Build Coastguard Worker                                                                               \
1320*fb1b10abSAndroid Build Coastguard Worker     return vpx_variance4x##H##_mmi(temp2, 4, ref_ptr, ref_stride, sse);       \
1321*fb1b10abSAndroid Build Coastguard Worker   }
1322*fb1b10abSAndroid Build Coastguard Worker 
1323*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR4XN(8)
1324*fb1b10abSAndroid Build Coastguard Worker SUBPIX_VAR4XN(4)
1325*fb1b10abSAndroid Build Coastguard Worker 
1326*fb1b10abSAndroid Build Coastguard Worker #define SUBPIX_AVG_VAR(W, H)                                                   \
1327*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_sub_pixel_avg_variance##W##x##H##_mmi(                          \
1328*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,      \
1329*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse,                   \
1330*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *second_pred) {                                            \
1331*fb1b10abSAndroid Build Coastguard Worker     uint16_t fdata3[((H) + 1) * (W)];                                          \
1332*fb1b10abSAndroid Build Coastguard Worker     uint8_t temp2[(H) * (W)];                                                  \
1333*fb1b10abSAndroid Build Coastguard Worker     DECLARE_ALIGNED(16, uint8_t, temp3[(H) * (W)]);                            \
1334*fb1b10abSAndroid Build Coastguard Worker                                                                                \
1335*fb1b10abSAndroid Build Coastguard Worker     var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, (H) + 1, \
1336*fb1b10abSAndroid Build Coastguard Worker                                       W, bilinear_filters[x_offset]);          \
1337*fb1b10abSAndroid Build Coastguard Worker     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,              \
1338*fb1b10abSAndroid Build Coastguard Worker                                        bilinear_filters[y_offset]);            \
1339*fb1b10abSAndroid Build Coastguard Worker                                                                                \
1340*fb1b10abSAndroid Build Coastguard Worker     vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W);                   \
1341*fb1b10abSAndroid Build Coastguard Worker                                                                                \
1342*fb1b10abSAndroid Build Coastguard Worker     return vpx_variance##W##x##H##_mmi(temp3, W, ref_ptr, ref_stride, sse);    \
1343*fb1b10abSAndroid Build Coastguard Worker   }
1344*fb1b10abSAndroid Build Coastguard Worker 
1345*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(64, 64)
1346*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(64, 32)
1347*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(32, 64)
1348*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(32, 32)
1349*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(32, 16)
1350*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(16, 32)
1351*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(16, 16)
1352*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(16, 8)
1353*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(8, 16)
1354*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(8, 8)
1355*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(8, 4)
1356*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(4, 8)
1357*fb1b10abSAndroid Build Coastguard Worker SUBPIX_AVG_VAR(4, 4)
1358