xref: /aosp_15_r20/external/libvpx/vpx_dsp/mips/convolve2_dspr2.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker  *
4*fb1b10abSAndroid Build Coastguard Worker  *  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker  *  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker  *  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker  *  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker  *  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker  */
10*fb1b10abSAndroid Build Coastguard Worker 
11*fb1b10abSAndroid Build Coastguard Worker #include <assert.h>
12*fb1b10abSAndroid Build Coastguard Worker #include <stdio.h>
13*fb1b10abSAndroid Build Coastguard Worker 
14*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
15*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/mips/convolve_common_dspr2.h"
16*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/vpx_dsp_common.h"
17*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/vpx_filter.h"
18*fb1b10abSAndroid Build Coastguard Worker #include "vpx_ports/mem.h"
19*fb1b10abSAndroid Build Coastguard Worker 
20*fb1b10abSAndroid Build Coastguard Worker #if HAVE_DSPR2
convolve_bi_horiz_4_transposed_dspr2(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,const int16_t * filter_x0,int32_t h)21*fb1b10abSAndroid Build Coastguard Worker static void convolve_bi_horiz_4_transposed_dspr2(
22*fb1b10abSAndroid Build Coastguard Worker     const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
23*fb1b10abSAndroid Build Coastguard Worker     const int16_t *filter_x0, int32_t h) {
24*fb1b10abSAndroid Build Coastguard Worker   int32_t y;
25*fb1b10abSAndroid Build Coastguard Worker   uint8_t *cm = vpx_ff_cropTbl;
26*fb1b10abSAndroid Build Coastguard Worker   uint8_t *dst_ptr;
27*fb1b10abSAndroid Build Coastguard Worker   int32_t Temp1, Temp2;
28*fb1b10abSAndroid Build Coastguard Worker   uint32_t vector4a = 64;
29*fb1b10abSAndroid Build Coastguard Worker   uint32_t tp1, tp2;
30*fb1b10abSAndroid Build Coastguard Worker   uint32_t p1, p2;
31*fb1b10abSAndroid Build Coastguard Worker   const int16_t *filter = &filter_x0[3];
32*fb1b10abSAndroid Build Coastguard Worker   uint32_t filter45;
33*fb1b10abSAndroid Build Coastguard Worker 
34*fb1b10abSAndroid Build Coastguard Worker   filter45 = ((const int32_t *)filter)[0];
35*fb1b10abSAndroid Build Coastguard Worker 
36*fb1b10abSAndroid Build Coastguard Worker   for (y = h; y--;) {
37*fb1b10abSAndroid Build Coastguard Worker     dst_ptr = dst;
38*fb1b10abSAndroid Build Coastguard Worker     /* prefetch data to cache memory */
39*fb1b10abSAndroid Build Coastguard Worker     prefetch_load(src + src_stride);
40*fb1b10abSAndroid Build Coastguard Worker     prefetch_load(src + src_stride + 32);
41*fb1b10abSAndroid Build Coastguard Worker 
42*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
43*fb1b10abSAndroid Build Coastguard Worker         "ulw              %[tp1],         0(%[src])                      \n\t"
44*fb1b10abSAndroid Build Coastguard Worker         "ulw              %[tp2],         4(%[src])                      \n\t"
45*fb1b10abSAndroid Build Coastguard Worker 
46*fb1b10abSAndroid Build Coastguard Worker         /* even 1. pixel */
47*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac3                           \n\t"
48*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac3                           \n\t"
49*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbr    %[p1],          %[tp1]                         \n\t"
50*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbl    %[p2],          %[tp1]                         \n\t"
51*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
52*fb1b10abSAndroid Build Coastguard Worker         "extp             %[Temp1],       $ac3,           31             \n\t"
53*fb1b10abSAndroid Build Coastguard Worker 
54*fb1b10abSAndroid Build Coastguard Worker         /* even 2. pixel */
55*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac2                           \n\t"
56*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac2                           \n\t"
57*fb1b10abSAndroid Build Coastguard Worker         "balign           %[tp2],         %[tp1],         3              \n\t"
58*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac2,           %[p2],          %[filter45]    \n\t"
59*fb1b10abSAndroid Build Coastguard Worker         "extp             %[Temp2],       $ac2,           31             \n\t"
60*fb1b10abSAndroid Build Coastguard Worker 
61*fb1b10abSAndroid Build Coastguard Worker         /* odd 1. pixel */
62*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[tp1],         %[Temp1](%[cm])                \n\t"
63*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac3                           \n\t"
64*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac3                           \n\t"
65*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbr    %[p1],          %[tp2]                         \n\t"
66*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbl    %[p2],          %[tp2]                         \n\t"
67*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
68*fb1b10abSAndroid Build Coastguard Worker         "extp             %[Temp1],       $ac3,           31             \n\t"
69*fb1b10abSAndroid Build Coastguard Worker 
70*fb1b10abSAndroid Build Coastguard Worker         /* odd 2. pixel */
71*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[tp2],         %[Temp2](%[cm])                \n\t"
72*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac2                           \n\t"
73*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac2                           \n\t"
74*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac2,           %[p2],          %[filter45]    \n\t"
75*fb1b10abSAndroid Build Coastguard Worker         "extp             %[Temp2],       $ac2,           31             \n\t"
76*fb1b10abSAndroid Build Coastguard Worker 
77*fb1b10abSAndroid Build Coastguard Worker         /* clamp */
78*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[p1],          %[Temp1](%[cm])                \n\t"
79*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[p2],          %[Temp2](%[cm])                \n\t"
80*fb1b10abSAndroid Build Coastguard Worker 
81*fb1b10abSAndroid Build Coastguard Worker         /* store bytes */
82*fb1b10abSAndroid Build Coastguard Worker         "sb               %[tp1],         0(%[dst_ptr])                  \n\t"
83*fb1b10abSAndroid Build Coastguard Worker         "addu             %[dst_ptr],     %[dst_ptr],     %[dst_stride]  \n\t"
84*fb1b10abSAndroid Build Coastguard Worker 
85*fb1b10abSAndroid Build Coastguard Worker         "sb               %[p1],          0(%[dst_ptr])                  \n\t"
86*fb1b10abSAndroid Build Coastguard Worker         "addu             %[dst_ptr],     %[dst_ptr],     %[dst_stride]  \n\t"
87*fb1b10abSAndroid Build Coastguard Worker 
88*fb1b10abSAndroid Build Coastguard Worker         "sb               %[tp2],         0(%[dst_ptr])                  \n\t"
89*fb1b10abSAndroid Build Coastguard Worker         "addu             %[dst_ptr],     %[dst_ptr],     %[dst_stride]  \n\t"
90*fb1b10abSAndroid Build Coastguard Worker 
91*fb1b10abSAndroid Build Coastguard Worker         "sb               %[p2],          0(%[dst_ptr])                  \n\t"
92*fb1b10abSAndroid Build Coastguard Worker         "addu             %[dst_ptr],     %[dst_ptr],     %[dst_stride]  \n\t"
93*fb1b10abSAndroid Build Coastguard Worker 
94*fb1b10abSAndroid Build Coastguard Worker         : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [p1] "=&r"(p1), [p2] "=&r"(p2),
95*fb1b10abSAndroid Build Coastguard Worker           [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [dst_ptr] "+r"(dst_ptr)
96*fb1b10abSAndroid Build Coastguard Worker         : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
97*fb1b10abSAndroid Build Coastguard Worker           [src] "r"(src), [dst_stride] "r"(dst_stride));
98*fb1b10abSAndroid Build Coastguard Worker 
99*fb1b10abSAndroid Build Coastguard Worker     /* Next row... */
100*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
101*fb1b10abSAndroid Build Coastguard Worker     dst += 1;
102*fb1b10abSAndroid Build Coastguard Worker   }
103*fb1b10abSAndroid Build Coastguard Worker }
104*fb1b10abSAndroid Build Coastguard Worker 
convolve_bi_horiz_8_transposed_dspr2(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,const int16_t * filter_x0,int32_t h)105*fb1b10abSAndroid Build Coastguard Worker static void convolve_bi_horiz_8_transposed_dspr2(
106*fb1b10abSAndroid Build Coastguard Worker     const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
107*fb1b10abSAndroid Build Coastguard Worker     const int16_t *filter_x0, int32_t h) {
108*fb1b10abSAndroid Build Coastguard Worker   int32_t y;
109*fb1b10abSAndroid Build Coastguard Worker   uint8_t *cm = vpx_ff_cropTbl;
110*fb1b10abSAndroid Build Coastguard Worker   uint8_t *dst_ptr;
111*fb1b10abSAndroid Build Coastguard Worker   uint32_t vector4a = 64;
112*fb1b10abSAndroid Build Coastguard Worker   int32_t Temp1, Temp2, Temp3;
113*fb1b10abSAndroid Build Coastguard Worker   uint32_t tp1, tp2, tp3;
114*fb1b10abSAndroid Build Coastguard Worker   uint32_t p1, p2, p3, p4;
115*fb1b10abSAndroid Build Coastguard Worker   uint8_t *odd_dst;
116*fb1b10abSAndroid Build Coastguard Worker   uint32_t dst_pitch_2 = (dst_stride << 1);
117*fb1b10abSAndroid Build Coastguard Worker   const int16_t *filter = &filter_x0[3];
118*fb1b10abSAndroid Build Coastguard Worker   uint32_t filter45;
119*fb1b10abSAndroid Build Coastguard Worker 
120*fb1b10abSAndroid Build Coastguard Worker   filter45 = ((const int32_t *)filter)[0];
121*fb1b10abSAndroid Build Coastguard Worker 
122*fb1b10abSAndroid Build Coastguard Worker   for (y = h; y--;) {
123*fb1b10abSAndroid Build Coastguard Worker     /* prefetch data to cache memory */
124*fb1b10abSAndroid Build Coastguard Worker     prefetch_load(src + src_stride);
125*fb1b10abSAndroid Build Coastguard Worker     prefetch_load(src + src_stride + 32);
126*fb1b10abSAndroid Build Coastguard Worker 
127*fb1b10abSAndroid Build Coastguard Worker     dst_ptr = dst;
128*fb1b10abSAndroid Build Coastguard Worker     odd_dst = (dst_ptr + dst_stride);
129*fb1b10abSAndroid Build Coastguard Worker 
130*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
131*fb1b10abSAndroid Build Coastguard Worker         "ulw              %[tp1],         0(%[src])                       \n\t"
132*fb1b10abSAndroid Build Coastguard Worker         "ulw              %[tp2],         4(%[src])                       \n\t"
133*fb1b10abSAndroid Build Coastguard Worker 
134*fb1b10abSAndroid Build Coastguard Worker         /* even 1. pixel */
135*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac3                            \n\t"
136*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac3                            \n\t"
137*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac2                            \n\t"
138*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac2                            \n\t"
139*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbr    %[p1],          %[tp1]                          \n\t"
140*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbl    %[p2],          %[tp1]                          \n\t"
141*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbr    %[p3],          %[tp2]                          \n\t"
142*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbl    %[p4],          %[tp2]                          \n\t"
143*fb1b10abSAndroid Build Coastguard Worker         "ulw              %[tp3],         8(%[src])                       \n\t"
144*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac3,           %[p1],          %[filter45]     \n\t"
145*fb1b10abSAndroid Build Coastguard Worker         "extp             %[Temp1],       $ac3,           31              \n\t"
146*fb1b10abSAndroid Build Coastguard Worker 
147*fb1b10abSAndroid Build Coastguard Worker         /* even 2. pixel */
148*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac2,           %[p2],          %[filter45]     \n\t"
149*fb1b10abSAndroid Build Coastguard Worker         "extp             %[Temp3],       $ac2,           31              \n\t"
150*fb1b10abSAndroid Build Coastguard Worker 
151*fb1b10abSAndroid Build Coastguard Worker         /* even 3. pixel */
152*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[Temp2],       %[Temp1](%[cm])                 \n\t"
153*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac1                            \n\t"
154*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac1                            \n\t"
155*fb1b10abSAndroid Build Coastguard Worker         "balign           %[tp3],         %[tp2],         3              \n\t"
156*fb1b10abSAndroid Build Coastguard Worker         "balign           %[tp2],         %[tp1],         3              \n\t"
157*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac1,           %[p3],          %[filter45]     \n\t"
158*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[tp1],         %[Temp3](%[cm])                 \n\t"
159*fb1b10abSAndroid Build Coastguard Worker         "extp             %[p3],          $ac1,           31              \n\t"
160*fb1b10abSAndroid Build Coastguard Worker 
161*fb1b10abSAndroid Build Coastguard Worker         /* even 4. pixel */
162*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac2                            \n\t"
163*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac2                            \n\t"
164*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac3                            \n\t"
165*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac3                            \n\t"
166*fb1b10abSAndroid Build Coastguard Worker         "sb               %[Temp2],       0(%[dst_ptr])                   \n\t"
167*fb1b10abSAndroid Build Coastguard Worker         "addu             %[dst_ptr],     %[dst_ptr],     %[dst_pitch_2]  \n\t"
168*fb1b10abSAndroid Build Coastguard Worker         "sb               %[tp1],         0(%[dst_ptr])                   \n\t"
169*fb1b10abSAndroid Build Coastguard Worker         "addu             %[dst_ptr],     %[dst_ptr],     %[dst_pitch_2]  \n\t"
170*fb1b10abSAndroid Build Coastguard Worker 
171*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac2,           %[p4],          %[filter45]     \n\t"
172*fb1b10abSAndroid Build Coastguard Worker         "extp             %[Temp3],       $ac2,           31              \n\t"
173*fb1b10abSAndroid Build Coastguard Worker 
174*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[Temp1],         %[p3](%[cm])                    "
175*fb1b10abSAndroid Build Coastguard Worker         "\n\t"
176*fb1b10abSAndroid Build Coastguard Worker 
177*fb1b10abSAndroid Build Coastguard Worker         /* odd 1. pixel */
178*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac1                            \n\t"
179*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac1                            \n\t"
180*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbr    %[p1],          %[tp2]                          \n\t"
181*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbl    %[p2],          %[tp2]                          \n\t"
182*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbr    %[p3],          %[tp3]                          \n\t"
183*fb1b10abSAndroid Build Coastguard Worker         "preceu.ph.qbl    %[p4],          %[tp3]                          \n\t"
184*fb1b10abSAndroid Build Coastguard Worker         "sb               %[Temp1],       0(%[dst_ptr])                   \n\t"
185*fb1b10abSAndroid Build Coastguard Worker         "addu             %[dst_ptr],     %[dst_ptr],     %[dst_pitch_2]  \n\t"
186*fb1b10abSAndroid Build Coastguard Worker 
187*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac3,           %[p1],          %[filter45]     \n\t"
188*fb1b10abSAndroid Build Coastguard Worker         "extp             %[Temp2],       $ac3,           31              \n\t"
189*fb1b10abSAndroid Build Coastguard Worker 
190*fb1b10abSAndroid Build Coastguard Worker         /* odd 2. pixel */
191*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[tp1],         %[Temp3](%[cm])                 \n\t"
192*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac3                            \n\t"
193*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac3                            \n\t"
194*fb1b10abSAndroid Build Coastguard Worker         "mtlo             %[vector4a],    $ac2                            \n\t"
195*fb1b10abSAndroid Build Coastguard Worker         "mthi             $zero,          $ac2                            \n\t"
196*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac1,           %[p2],          %[filter45]     \n\t"
197*fb1b10abSAndroid Build Coastguard Worker         "sb               %[tp1],         0(%[dst_ptr])                   \n\t"
198*fb1b10abSAndroid Build Coastguard Worker         "addu             %[dst_ptr],     %[dst_ptr],     %[dst_pitch_2]  \n\t"
199*fb1b10abSAndroid Build Coastguard Worker         "extp             %[Temp3],       $ac1,           31              \n\t"
200*fb1b10abSAndroid Build Coastguard Worker 
201*fb1b10abSAndroid Build Coastguard Worker         /* odd 3. pixel */
202*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[tp3],         %[Temp2](%[cm])                 \n\t"
203*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac3,           %[p3],          %[filter45]     \n\t"
204*fb1b10abSAndroid Build Coastguard Worker         "extp             %[Temp2],       $ac3,           31              \n\t"
205*fb1b10abSAndroid Build Coastguard Worker 
206*fb1b10abSAndroid Build Coastguard Worker         /* odd 4. pixel */
207*fb1b10abSAndroid Build Coastguard Worker         "sb               %[tp3],         0(%[odd_dst])                   \n\t"
208*fb1b10abSAndroid Build Coastguard Worker         "addu             %[odd_dst],     %[odd_dst],     %[dst_pitch_2]  \n\t"
209*fb1b10abSAndroid Build Coastguard Worker         "dpa.w.ph         $ac2,           %[p4],          %[filter45]     \n\t"
210*fb1b10abSAndroid Build Coastguard Worker         "extp             %[Temp1],       $ac2,           31              \n\t"
211*fb1b10abSAndroid Build Coastguard Worker 
212*fb1b10abSAndroid Build Coastguard Worker         /* clamp */
213*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[p4],          %[Temp3](%[cm])                 \n\t"
214*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[p2],          %[Temp2](%[cm])                 \n\t"
215*fb1b10abSAndroid Build Coastguard Worker         "lbux             %[p1],          %[Temp1](%[cm])                 \n\t"
216*fb1b10abSAndroid Build Coastguard Worker 
217*fb1b10abSAndroid Build Coastguard Worker         /* store bytes */
218*fb1b10abSAndroid Build Coastguard Worker         "sb               %[p4],          0(%[odd_dst])                   \n\t"
219*fb1b10abSAndroid Build Coastguard Worker         "addu             %[odd_dst],     %[odd_dst],     %[dst_pitch_2]  \n\t"
220*fb1b10abSAndroid Build Coastguard Worker 
221*fb1b10abSAndroid Build Coastguard Worker         "sb               %[p2],          0(%[odd_dst])                   \n\t"
222*fb1b10abSAndroid Build Coastguard Worker         "addu             %[odd_dst],     %[odd_dst],     %[dst_pitch_2]  \n\t"
223*fb1b10abSAndroid Build Coastguard Worker 
224*fb1b10abSAndroid Build Coastguard Worker         "sb               %[p1],          0(%[odd_dst])                   \n\t"
225*fb1b10abSAndroid Build Coastguard Worker 
226*fb1b10abSAndroid Build Coastguard Worker         : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [p1] "=&r"(p1),
227*fb1b10abSAndroid Build Coastguard Worker           [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1),
228*fb1b10abSAndroid Build Coastguard Worker           [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [dst_ptr] "+r"(dst_ptr),
229*fb1b10abSAndroid Build Coastguard Worker           [odd_dst] "+r"(odd_dst)
230*fb1b10abSAndroid Build Coastguard Worker         : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
231*fb1b10abSAndroid Build Coastguard Worker           [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2));
232*fb1b10abSAndroid Build Coastguard Worker 
233*fb1b10abSAndroid Build Coastguard Worker     /* Next row... */
234*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
235*fb1b10abSAndroid Build Coastguard Worker     dst += 1;
236*fb1b10abSAndroid Build Coastguard Worker   }
237*fb1b10abSAndroid Build Coastguard Worker }
238*fb1b10abSAndroid Build Coastguard Worker 
convolve_bi_horiz_16_transposed_dspr2(const uint8_t * src_ptr,int32_t src_stride,uint8_t * dst_ptr,int32_t dst_stride,const int16_t * filter_x0,int32_t h,int32_t count)239*fb1b10abSAndroid Build Coastguard Worker static void convolve_bi_horiz_16_transposed_dspr2(
240*fb1b10abSAndroid Build Coastguard Worker     const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr,
241*fb1b10abSAndroid Build Coastguard Worker     int32_t dst_stride, const int16_t *filter_x0, int32_t h, int32_t count) {
242*fb1b10abSAndroid Build Coastguard Worker   int32_t c, y;
243*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *src;
244*fb1b10abSAndroid Build Coastguard Worker   uint8_t *dst;
245*fb1b10abSAndroid Build Coastguard Worker   uint8_t *cm = vpx_ff_cropTbl;
246*fb1b10abSAndroid Build Coastguard Worker   uint32_t vector_64 = 64;
247*fb1b10abSAndroid Build Coastguard Worker   int32_t Temp1, Temp2, Temp3;
248*fb1b10abSAndroid Build Coastguard Worker   uint32_t qload1, qload2;
249*fb1b10abSAndroid Build Coastguard Worker   uint32_t p1, p2, p3, p4, p5;
250*fb1b10abSAndroid Build Coastguard Worker   uint32_t st1, st2, st3;
251*fb1b10abSAndroid Build Coastguard Worker   uint32_t dst_pitch_2 = (dst_stride << 1);
252*fb1b10abSAndroid Build Coastguard Worker   uint8_t *odd_dst;
253*fb1b10abSAndroid Build Coastguard Worker   const int16_t *filter = &filter_x0[3];
254*fb1b10abSAndroid Build Coastguard Worker   uint32_t filter45;
255*fb1b10abSAndroid Build Coastguard Worker 
256*fb1b10abSAndroid Build Coastguard Worker   filter45 = ((const int32_t *)filter)[0];
257*fb1b10abSAndroid Build Coastguard Worker 
258*fb1b10abSAndroid Build Coastguard Worker   for (y = h; y--;) {
259*fb1b10abSAndroid Build Coastguard Worker     /* prefetch data to cache memory */
260*fb1b10abSAndroid Build Coastguard Worker     prefetch_load(src_ptr + src_stride);
261*fb1b10abSAndroid Build Coastguard Worker     prefetch_load(src_ptr + src_stride + 32);
262*fb1b10abSAndroid Build Coastguard Worker 
263*fb1b10abSAndroid Build Coastguard Worker     src = src_ptr;
264*fb1b10abSAndroid Build Coastguard Worker     dst = dst_ptr;
265*fb1b10abSAndroid Build Coastguard Worker 
266*fb1b10abSAndroid Build Coastguard Worker     odd_dst = (dst + dst_stride);
267*fb1b10abSAndroid Build Coastguard Worker 
268*fb1b10abSAndroid Build Coastguard Worker     for (c = 0; c < count; c++) {
269*fb1b10abSAndroid Build Coastguard Worker       __asm__ __volatile__(
270*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        0(%[src])                       "
271*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
272*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload2],        4(%[src])                       "
273*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
274*fb1b10abSAndroid Build Coastguard Worker 
275*fb1b10abSAndroid Build Coastguard Worker           /* even 1. pixel */
276*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
277*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
278*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
279*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
280*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac2                            "
281*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 2 */
282*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac2                            "
283*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
284*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p1],            %[qload1]                       "
285*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
286*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p2],            %[qload1]                       "
287*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
288*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p3],            %[qload2]                       "
289*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
290*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p4],            %[qload2]                       "
291*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
292*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        8(%[src])                       "
293*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
294*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p1],          %[filter45]     "
295*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
296*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
297*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
298*fb1b10abSAndroid Build Coastguard Worker 
299*fb1b10abSAndroid Build Coastguard Worker           /* even 2. pixel */
300*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac3                            "
301*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 3 */
302*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac3                            "
303*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
304*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p1],            %[qload1]                       "
305*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
306*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p5],            %[qload1]                       "
307*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
308*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload2],        12(%[src])                      "
309*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
310*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac2,             %[p2],          %[filter45]     "
311*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
312*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
313*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
314*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp2],         $ac2,           31              "
315*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
316*fb1b10abSAndroid Build Coastguard Worker 
317*fb1b10abSAndroid Build Coastguard Worker           /* even 3. pixel */
318*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
319*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 4 */
320*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
321*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
322*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p2],            %[qload2]                       "
323*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
324*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[dst])                       "
325*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
326*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]   "
327*fb1b10abSAndroid Build Coastguard Worker           "          \n\t"
328*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac3,             %[p3],          %[filter45]     "
329*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 3 */
330*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp3],         $ac3,           31              "
331*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 3 */
332*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st2],           %[Temp2](%[cm])                 "
333*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
334*fb1b10abSAndroid Build Coastguard Worker 
335*fb1b10abSAndroid Build Coastguard Worker           /* even 4. pixel */
336*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac2                            "
337*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 5 */
338*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac2                            "
339*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
340*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p3],            %[qload2]                       "
341*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
342*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st2],           0(%[dst])                       "
343*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 2 */
344*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
345*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
346*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p4],          %[filter45]     "
347*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 4 */
348*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
349*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 4 */
350*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st3],           %[Temp3](%[cm])                 "
351*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 3 */
352*fb1b10abSAndroid Build Coastguard Worker 
353*fb1b10abSAndroid Build Coastguard Worker           /* even 5. pixel */
354*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac3                            "
355*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 6 */
356*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac3                            "
357*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
358*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st3],           0(%[dst])                       "
359*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 3 */
360*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
361*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
362*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac2,             %[p1],          %[filter45]     "
363*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 5 */
364*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp2],         $ac2,           31              "
365*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 5 */
366*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
367*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 4 */
368*fb1b10abSAndroid Build Coastguard Worker 
369*fb1b10abSAndroid Build Coastguard Worker           /* even 6. pixel */
370*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
371*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 7 */
372*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
373*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
374*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[dst])                       "
375*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 4 */
376*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
377*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
378*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        20(%[src])                      "
379*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
380*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac3,             %[p5],          %[filter45]     "
381*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 6 */
382*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp3],         $ac3,           31              "
383*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 6 */
384*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st2],           %[Temp2](%[cm])                 "
385*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 5 */
386*fb1b10abSAndroid Build Coastguard Worker 
387*fb1b10abSAndroid Build Coastguard Worker           /* even 7. pixel */
388*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac2                            "
389*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 8 */
390*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac2                            "
391*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
392*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p5],            %[qload1]                       "
393*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
394*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st2],           0(%[dst])                       "
395*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 5 */
396*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
397*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
398*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p2],          %[filter45]     "
399*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 7 */
400*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
401*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 7 */
402*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st3],           %[Temp3](%[cm])                 "
403*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 6 */
404*fb1b10abSAndroid Build Coastguard Worker 
405*fb1b10abSAndroid Build Coastguard Worker           /* even 8. pixel */
406*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac3                            "
407*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 1 */
408*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac3                            "
409*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
410*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac2,             %[p3],          %[filter45]     "
411*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 8 */
412*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st3],           0(%[dst])                       "
413*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 6 */
414*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
415*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
416*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp2],         $ac2,           31              "
417*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 8 */
418*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
419*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 7 */
420*fb1b10abSAndroid Build Coastguard Worker 
421*fb1b10abSAndroid Build Coastguard Worker           /* ODD pixels */
422*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        1(%[src])                       "
423*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
424*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload2],        5(%[src])                       "
425*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
426*fb1b10abSAndroid Build Coastguard Worker 
427*fb1b10abSAndroid Build Coastguard Worker           /* odd 1. pixel */
428*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
429*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 2 */
430*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
431*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
432*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p1],            %[qload1]                       "
433*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
434*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p2],            %[qload1]                       "
435*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
436*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p3],            %[qload2]                       "
437*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
438*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p4],            %[qload2]                       "
439*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
440*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[dst])                       "
441*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 7 */
442*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
443*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
444*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload2],        9(%[src])                       "
445*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
446*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac3,             %[p1],          %[filter45]     "
447*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 1 */
448*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp3],         $ac3,           31              "
449*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 1 */
450*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st2],           %[Temp2](%[cm])                 "
451*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 8 */
452*fb1b10abSAndroid Build Coastguard Worker 
453*fb1b10abSAndroid Build Coastguard Worker           /* odd 2. pixel */
454*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac2                            "
455*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 3 */
456*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac2                            "
457*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
458*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p1],            %[qload2]                       "
459*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
460*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p5],            %[qload2]                       "
461*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
462*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st2],           0(%[dst])                       "
463*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 8 */
464*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        13(%[src])                      "
465*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
466*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p2],          %[filter45]     "
467*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 2 */
468*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
469*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 2 */
470*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st3],           %[Temp3](%[cm])                 "
471*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 1 */
472*fb1b10abSAndroid Build Coastguard Worker 
473*fb1b10abSAndroid Build Coastguard Worker           /* odd 3. pixel */
474*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac3                            "
475*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 4 */
476*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac3                            "
477*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
478*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p2],            %[qload1]                       "
479*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
480*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st3],           0(%[odd_dst])                   "
481*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 1 */
482*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
483*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
484*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac2,             %[p3],          %[filter45]     "
485*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 3 */
486*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp2],         $ac2,           31              "
487*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 3 */
488*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
489*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 2 */
490*fb1b10abSAndroid Build Coastguard Worker 
491*fb1b10abSAndroid Build Coastguard Worker           /* odd 4. pixel */
492*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
493*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 5 */
494*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
495*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
496*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p3],            %[qload1]                       "
497*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
498*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[odd_dst])                   "
499*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 2 */
500*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
501*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
502*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac3,             %[p4],          %[filter45]     "
503*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 4 */
504*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp3],         $ac3,           31              "
505*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 4 */
506*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st2],           %[Temp2](%[cm])                 "
507*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 3 */
508*fb1b10abSAndroid Build Coastguard Worker 
509*fb1b10abSAndroid Build Coastguard Worker           /* odd 5. pixel */
510*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac2                            "
511*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 6 */
512*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac2                            "
513*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
514*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st2],           0(%[odd_dst])                   "
515*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 3 */
516*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
517*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
518*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p1],          %[filter45]     "
519*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 5 */
520*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
521*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 5 */
522*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st3],           %[Temp3](%[cm])                 "
523*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 4 */
524*fb1b10abSAndroid Build Coastguard Worker 
525*fb1b10abSAndroid Build Coastguard Worker           /* odd 6. pixel */
526*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac3                            "
527*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 7 */
528*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac3                            "
529*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
530*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st3],           0(%[odd_dst])                   "
531*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 4 */
532*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
533*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
534*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        21(%[src])                      "
535*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
536*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac2,             %[p5],          %[filter45]     "
537*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 6 */
538*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp2],         $ac2,           31              "
539*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 6 */
540*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
541*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 5 */
542*fb1b10abSAndroid Build Coastguard Worker 
543*fb1b10abSAndroid Build Coastguard Worker           /* odd 7. pixel */
544*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
545*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 8 */
546*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
547*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
548*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p5],            %[qload1]                       "
549*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
550*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[odd_dst])                   "
551*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 5 */
552*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
553*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
554*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac3,             %[p2],          %[filter45]     "
555*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 7 */
556*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp3],         $ac3,           31              "
557*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 7 */
558*fb1b10abSAndroid Build Coastguard Worker 
559*fb1b10abSAndroid Build Coastguard Worker           /* odd 8. pixel */
560*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p3],          %[filter45]     "
561*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 8 */
562*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
563*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 8 */
564*fb1b10abSAndroid Build Coastguard Worker 
565*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st2],           %[Temp2](%[cm])                 "
566*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 6 */
567*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st3],           %[Temp3](%[cm])                 "
568*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 7 */
569*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
570*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 8 */
571*fb1b10abSAndroid Build Coastguard Worker 
572*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st2],           0(%[odd_dst])                   "
573*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 6 */
574*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
575*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
576*fb1b10abSAndroid Build Coastguard Worker 
577*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st3],           0(%[odd_dst])                   "
578*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 7 */
579*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
580*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
581*fb1b10abSAndroid Build Coastguard Worker 
582*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[odd_dst])                   "
583*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 8 */
584*fb1b10abSAndroid Build Coastguard Worker 
585*fb1b10abSAndroid Build Coastguard Worker           : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [p5] "=&r"(p5),
586*fb1b10abSAndroid Build Coastguard Worker             [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3),
587*fb1b10abSAndroid Build Coastguard Worker             [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4),
588*fb1b10abSAndroid Build Coastguard Worker             [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3),
589*fb1b10abSAndroid Build Coastguard Worker             [dst] "+r"(dst), [odd_dst] "+r"(odd_dst)
590*fb1b10abSAndroid Build Coastguard Worker           : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
591*fb1b10abSAndroid Build Coastguard Worker             [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2));
592*fb1b10abSAndroid Build Coastguard Worker 
593*fb1b10abSAndroid Build Coastguard Worker       src += 16;
594*fb1b10abSAndroid Build Coastguard Worker       dst = (dst_ptr + ((c + 1) * 16 * dst_stride));
595*fb1b10abSAndroid Build Coastguard Worker       odd_dst = (dst + dst_stride);
596*fb1b10abSAndroid Build Coastguard Worker     }
597*fb1b10abSAndroid Build Coastguard Worker 
598*fb1b10abSAndroid Build Coastguard Worker     /* Next row... */
599*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
600*fb1b10abSAndroid Build Coastguard Worker     dst_ptr += 1;
601*fb1b10abSAndroid Build Coastguard Worker   }
602*fb1b10abSAndroid Build Coastguard Worker }
603*fb1b10abSAndroid Build Coastguard Worker 
convolve_bi_horiz_64_transposed_dspr2(const uint8_t * src_ptr,int32_t src_stride,uint8_t * dst_ptr,int32_t dst_stride,const int16_t * filter_x0,int32_t h)604*fb1b10abSAndroid Build Coastguard Worker static void convolve_bi_horiz_64_transposed_dspr2(
605*fb1b10abSAndroid Build Coastguard Worker     const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr,
606*fb1b10abSAndroid Build Coastguard Worker     int32_t dst_stride, const int16_t *filter_x0, int32_t h) {
607*fb1b10abSAndroid Build Coastguard Worker   int32_t c, y;
608*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *src;
609*fb1b10abSAndroid Build Coastguard Worker   uint8_t *dst;
610*fb1b10abSAndroid Build Coastguard Worker   uint8_t *cm = vpx_ff_cropTbl;
611*fb1b10abSAndroid Build Coastguard Worker   uint32_t vector_64 = 64;
612*fb1b10abSAndroid Build Coastguard Worker   int32_t Temp1, Temp2, Temp3;
613*fb1b10abSAndroid Build Coastguard Worker   uint32_t qload1, qload2;
614*fb1b10abSAndroid Build Coastguard Worker   uint32_t p1, p2, p3, p4, p5;
615*fb1b10abSAndroid Build Coastguard Worker   uint32_t st1, st2, st3;
616*fb1b10abSAndroid Build Coastguard Worker   uint32_t dst_pitch_2 = (dst_stride << 1);
617*fb1b10abSAndroid Build Coastguard Worker   uint8_t *odd_dst;
618*fb1b10abSAndroid Build Coastguard Worker   const int16_t *filter = &filter_x0[3];
619*fb1b10abSAndroid Build Coastguard Worker   uint32_t filter45;
620*fb1b10abSAndroid Build Coastguard Worker 
621*fb1b10abSAndroid Build Coastguard Worker   filter45 = ((const int32_t *)filter)[0];
622*fb1b10abSAndroid Build Coastguard Worker 
623*fb1b10abSAndroid Build Coastguard Worker   for (y = h; y--;) {
624*fb1b10abSAndroid Build Coastguard Worker     /* prefetch data to cache memory */
625*fb1b10abSAndroid Build Coastguard Worker     prefetch_load(src_ptr + src_stride);
626*fb1b10abSAndroid Build Coastguard Worker     prefetch_load(src_ptr + src_stride + 32);
627*fb1b10abSAndroid Build Coastguard Worker     prefetch_load(src_ptr + src_stride + 64);
628*fb1b10abSAndroid Build Coastguard Worker 
629*fb1b10abSAndroid Build Coastguard Worker     src = src_ptr;
630*fb1b10abSAndroid Build Coastguard Worker     dst = dst_ptr;
631*fb1b10abSAndroid Build Coastguard Worker 
632*fb1b10abSAndroid Build Coastguard Worker     odd_dst = (dst + dst_stride);
633*fb1b10abSAndroid Build Coastguard Worker 
634*fb1b10abSAndroid Build Coastguard Worker     for (c = 0; c < 4; c++) {
635*fb1b10abSAndroid Build Coastguard Worker       __asm__ __volatile__(
636*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        0(%[src])                       "
637*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
638*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload2],        4(%[src])                       "
639*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
640*fb1b10abSAndroid Build Coastguard Worker 
641*fb1b10abSAndroid Build Coastguard Worker           /* even 1. pixel */
642*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
643*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
644*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
645*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
646*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac2                            "
647*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 2 */
648*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac2                            "
649*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
650*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p1],            %[qload1]                       "
651*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
652*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p2],            %[qload1]                       "
653*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
654*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p3],            %[qload2]                       "
655*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
656*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p4],            %[qload2]                       "
657*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
658*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        8(%[src])                       "
659*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
660*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p1],          %[filter45]     "
661*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
662*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
663*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
664*fb1b10abSAndroid Build Coastguard Worker 
665*fb1b10abSAndroid Build Coastguard Worker           /* even 2. pixel */
666*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac3                            "
667*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 3 */
668*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac3                            "
669*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
670*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p1],            %[qload1]                       "
671*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
672*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p5],            %[qload1]                       "
673*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
674*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload2],        12(%[src])                      "
675*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
676*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac2,             %[p2],          %[filter45]     "
677*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
678*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
679*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
680*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp2],         $ac2,           31              "
681*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
682*fb1b10abSAndroid Build Coastguard Worker 
683*fb1b10abSAndroid Build Coastguard Worker           /* even 3. pixel */
684*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
685*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 4 */
686*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
687*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
688*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p2],            %[qload2]                       "
689*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
690*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[dst])                       "
691*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
692*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]   "
693*fb1b10abSAndroid Build Coastguard Worker           "          \n\t"
694*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac3,             %[p3],          %[filter45]     "
695*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 3 */
696*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp3],         $ac3,           31              "
697*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 3 */
698*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st2],           %[Temp2](%[cm])                 "
699*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 1 */
700*fb1b10abSAndroid Build Coastguard Worker 
701*fb1b10abSAndroid Build Coastguard Worker           /* even 4. pixel */
702*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac2                            "
703*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 5 */
704*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac2                            "
705*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
706*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p3],            %[qload2]                       "
707*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
708*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st2],           0(%[dst])                       "
709*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 2 */
710*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
711*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
712*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p4],          %[filter45]     "
713*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 4 */
714*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
715*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 4 */
716*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st3],           %[Temp3](%[cm])                 "
717*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 3 */
718*fb1b10abSAndroid Build Coastguard Worker 
719*fb1b10abSAndroid Build Coastguard Worker           /* even 5. pixel */
720*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac3                            "
721*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 6 */
722*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac3                            "
723*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
724*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st3],           0(%[dst])                       "
725*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 3 */
726*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
727*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
728*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac2,             %[p1],          %[filter45]     "
729*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 5 */
730*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp2],         $ac2,           31              "
731*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 5 */
732*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
733*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 4 */
734*fb1b10abSAndroid Build Coastguard Worker 
735*fb1b10abSAndroid Build Coastguard Worker           /* even 6. pixel */
736*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
737*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 7 */
738*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
739*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
740*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[dst])                       "
741*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 4 */
742*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
743*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
744*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        20(%[src])                      "
745*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
746*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac3,             %[p5],          %[filter45]     "
747*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 6 */
748*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp3],         $ac3,           31              "
749*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 6 */
750*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st2],           %[Temp2](%[cm])                 "
751*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 5 */
752*fb1b10abSAndroid Build Coastguard Worker 
753*fb1b10abSAndroid Build Coastguard Worker           /* even 7. pixel */
754*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac2                            "
755*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 8 */
756*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac2                            "
757*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
758*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p5],            %[qload1]                       "
759*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
760*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st2],           0(%[dst])                       "
761*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 5 */
762*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
763*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
764*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p2],          %[filter45]     "
765*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 7 */
766*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
767*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 7 */
768*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st3],           %[Temp3](%[cm])                 "
769*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 6 */
770*fb1b10abSAndroid Build Coastguard Worker 
771*fb1b10abSAndroid Build Coastguard Worker           /* even 8. pixel */
772*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac3                            "
773*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 1 */
774*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac3                            "
775*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
776*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac2,             %[p3],          %[filter45]     "
777*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 8 */
778*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st3],           0(%[dst])                       "
779*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 6 */
780*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
781*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
782*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp2],         $ac2,           31              "
783*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 8 */
784*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
785*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 7 */
786*fb1b10abSAndroid Build Coastguard Worker 
787*fb1b10abSAndroid Build Coastguard Worker           /* ODD pixels */
788*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        1(%[src])                       "
789*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
790*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload2],        5(%[src])                       "
791*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
792*fb1b10abSAndroid Build Coastguard Worker 
793*fb1b10abSAndroid Build Coastguard Worker           /* odd 1. pixel */
794*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
795*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 2 */
796*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
797*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
798*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p1],            %[qload1]                       "
799*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
800*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p2],            %[qload1]                       "
801*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
802*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p3],            %[qload2]                       "
803*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
804*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p4],            %[qload2]                       "
805*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
806*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[dst])                       "
807*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 7 */
808*fb1b10abSAndroid Build Coastguard Worker           "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
809*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
810*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload2],        9(%[src])                       "
811*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
812*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac3,             %[p1],          %[filter45]     "
813*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 1 */
814*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp3],         $ac3,           31              "
815*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 1 */
816*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st2],           %[Temp2](%[cm])                 "
817*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 8 */
818*fb1b10abSAndroid Build Coastguard Worker 
819*fb1b10abSAndroid Build Coastguard Worker           /* odd 2. pixel */
820*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac2                            "
821*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 3 */
822*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac2                            "
823*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
824*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p1],            %[qload2]                       "
825*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
826*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p5],            %[qload2]                       "
827*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
828*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st2],           0(%[dst])                       "
829*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* even 8 */
830*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        13(%[src])                      "
831*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
832*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p2],          %[filter45]     "
833*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 2 */
834*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
835*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 2 */
836*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st3],           %[Temp3](%[cm])                 "
837*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 1 */
838*fb1b10abSAndroid Build Coastguard Worker 
839*fb1b10abSAndroid Build Coastguard Worker           /* odd 3. pixel */
840*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac3                            "
841*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 4 */
842*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac3                            "
843*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
844*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p2],            %[qload1]                       "
845*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
846*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st3],           0(%[odd_dst])                   "
847*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 1 */
848*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
849*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
850*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac2,             %[p3],          %[filter45]     "
851*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 3 */
852*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp2],         $ac2,           31              "
853*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 3 */
854*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
855*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 2 */
856*fb1b10abSAndroid Build Coastguard Worker 
857*fb1b10abSAndroid Build Coastguard Worker           /* odd 4. pixel */
858*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
859*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 5 */
860*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
861*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
862*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbl    %[p3],            %[qload1]                       "
863*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
864*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[odd_dst])                   "
865*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 2 */
866*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
867*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
868*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac3,             %[p4],          %[filter45]     "
869*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 4 */
870*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp3],         $ac3,           31              "
871*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 4 */
872*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st2],           %[Temp2](%[cm])                 "
873*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 3 */
874*fb1b10abSAndroid Build Coastguard Worker 
875*fb1b10abSAndroid Build Coastguard Worker           /* odd 5. pixel */
876*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac2                            "
877*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 6 */
878*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac2                            "
879*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
880*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st2],           0(%[odd_dst])                   "
881*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 3 */
882*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
883*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
884*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p1],          %[filter45]     "
885*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 5 */
886*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
887*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 5 */
888*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st3],           %[Temp3](%[cm])                 "
889*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 4 */
890*fb1b10abSAndroid Build Coastguard Worker 
891*fb1b10abSAndroid Build Coastguard Worker           /* odd 6. pixel */
892*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac3                            "
893*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 7 */
894*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac3                            "
895*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
896*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st3],           0(%[odd_dst])                   "
897*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 4 */
898*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
899*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
900*fb1b10abSAndroid Build Coastguard Worker           "ulw              %[qload1],        21(%[src])                      "
901*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
902*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac2,             %[p5],          %[filter45]     "
903*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 6 */
904*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp2],         $ac2,           31              "
905*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 6 */
906*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
907*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 5 */
908*fb1b10abSAndroid Build Coastguard Worker 
909*fb1b10abSAndroid Build Coastguard Worker           /* odd 7. pixel */
910*fb1b10abSAndroid Build Coastguard Worker           "mtlo             %[vector_64],     $ac1                            "
911*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 8 */
912*fb1b10abSAndroid Build Coastguard Worker           "mthi             $zero,            $ac1                            "
913*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
914*fb1b10abSAndroid Build Coastguard Worker           "preceu.ph.qbr    %[p5],            %[qload1]                       "
915*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
916*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[odd_dst])                   "
917*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 5 */
918*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
919*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
920*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac3,             %[p2],          %[filter45]     "
921*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 7 */
922*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp3],         $ac3,           31              "
923*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 7 */
924*fb1b10abSAndroid Build Coastguard Worker 
925*fb1b10abSAndroid Build Coastguard Worker           /* odd 8. pixel */
926*fb1b10abSAndroid Build Coastguard Worker           "dpa.w.ph         $ac1,             %[p3],          %[filter45]     "
927*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 8 */
928*fb1b10abSAndroid Build Coastguard Worker           "extp             %[Temp1],         $ac1,           31              "
929*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 8 */
930*fb1b10abSAndroid Build Coastguard Worker 
931*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st2],           %[Temp2](%[cm])                 "
932*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 6 */
933*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st3],           %[Temp3](%[cm])                 "
934*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 7 */
935*fb1b10abSAndroid Build Coastguard Worker           "lbux             %[st1],           %[Temp1](%[cm])                 "
936*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 8 */
937*fb1b10abSAndroid Build Coastguard Worker 
938*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st2],           0(%[odd_dst])                   "
939*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 6 */
940*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
941*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
942*fb1b10abSAndroid Build Coastguard Worker 
943*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st3],           0(%[odd_dst])                   "
944*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 7 */
945*fb1b10abSAndroid Build Coastguard Worker           "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
946*fb1b10abSAndroid Build Coastguard Worker           "\n\t"
947*fb1b10abSAndroid Build Coastguard Worker 
948*fb1b10abSAndroid Build Coastguard Worker           "sb               %[st1],           0(%[odd_dst])                   "
949*fb1b10abSAndroid Build Coastguard Worker           "\n\t" /* odd 8 */
950*fb1b10abSAndroid Build Coastguard Worker 
951*fb1b10abSAndroid Build Coastguard Worker           : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [p5] "=&r"(p5),
952*fb1b10abSAndroid Build Coastguard Worker             [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3),
953*fb1b10abSAndroid Build Coastguard Worker             [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4),
954*fb1b10abSAndroid Build Coastguard Worker             [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3),
955*fb1b10abSAndroid Build Coastguard Worker             [dst] "+r"(dst), [odd_dst] "+r"(odd_dst)
956*fb1b10abSAndroid Build Coastguard Worker           : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
957*fb1b10abSAndroid Build Coastguard Worker             [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2));
958*fb1b10abSAndroid Build Coastguard Worker 
959*fb1b10abSAndroid Build Coastguard Worker       src += 16;
960*fb1b10abSAndroid Build Coastguard Worker       dst = (dst_ptr + ((c + 1) * 16 * dst_stride));
961*fb1b10abSAndroid Build Coastguard Worker       odd_dst = (dst + dst_stride);
962*fb1b10abSAndroid Build Coastguard Worker     }
963*fb1b10abSAndroid Build Coastguard Worker 
964*fb1b10abSAndroid Build Coastguard Worker     /* Next row... */
965*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
966*fb1b10abSAndroid Build Coastguard Worker     dst_ptr += 1;
967*fb1b10abSAndroid Build Coastguard Worker   }
968*fb1b10abSAndroid Build Coastguard Worker }
969*fb1b10abSAndroid Build Coastguard Worker 
convolve_bi_horiz_transposed(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter,int w,int h)970*fb1b10abSAndroid Build Coastguard Worker void convolve_bi_horiz_transposed(const uint8_t *src, ptrdiff_t src_stride,
971*fb1b10abSAndroid Build Coastguard Worker                                   uint8_t *dst, ptrdiff_t dst_stride,
972*fb1b10abSAndroid Build Coastguard Worker                                   const int16_t *filter, int w, int h) {
973*fb1b10abSAndroid Build Coastguard Worker   int x, y;
974*fb1b10abSAndroid Build Coastguard Worker 
975*fb1b10abSAndroid Build Coastguard Worker   for (y = 0; y < h; ++y) {
976*fb1b10abSAndroid Build Coastguard Worker     for (x = 0; x < w; ++x) {
977*fb1b10abSAndroid Build Coastguard Worker       int sum = 0;
978*fb1b10abSAndroid Build Coastguard Worker 
979*fb1b10abSAndroid Build Coastguard Worker       sum += src[x] * filter[3];
980*fb1b10abSAndroid Build Coastguard Worker       sum += src[x + 1] * filter[4];
981*fb1b10abSAndroid Build Coastguard Worker 
982*fb1b10abSAndroid Build Coastguard Worker       dst[x * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
983*fb1b10abSAndroid Build Coastguard Worker     }
984*fb1b10abSAndroid Build Coastguard Worker 
985*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
986*fb1b10abSAndroid Build Coastguard Worker     dst += 1;
987*fb1b10abSAndroid Build Coastguard Worker   }
988*fb1b10abSAndroid Build Coastguard Worker }
989*fb1b10abSAndroid Build Coastguard Worker 
vpx_convolve2_dspr2(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter,int w,int h)990*fb1b10abSAndroid Build Coastguard Worker void vpx_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
991*fb1b10abSAndroid Build Coastguard Worker                          ptrdiff_t dst_stride, const int16_t *filter, int w,
992*fb1b10abSAndroid Build Coastguard Worker                          int h) {
993*fb1b10abSAndroid Build Coastguard Worker   uint32_t pos = 38;
994*fb1b10abSAndroid Build Coastguard Worker 
995*fb1b10abSAndroid Build Coastguard Worker   /* bit positon for extract from acc */
996*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
997*fb1b10abSAndroid Build Coastguard Worker                        :
998*fb1b10abSAndroid Build Coastguard Worker                        : [pos] "r"(pos));
999*fb1b10abSAndroid Build Coastguard Worker 
1000*fb1b10abSAndroid Build Coastguard Worker   /* prefetch data to cache memory */
1001*fb1b10abSAndroid Build Coastguard Worker   prefetch_load(src);
1002*fb1b10abSAndroid Build Coastguard Worker   prefetch_load(src + 32);
1003*fb1b10abSAndroid Build Coastguard Worker 
1004*fb1b10abSAndroid Build Coastguard Worker   switch (w) {
1005*fb1b10abSAndroid Build Coastguard Worker     case 4:
1006*fb1b10abSAndroid Build Coastguard Worker       convolve_bi_horiz_4_transposed_dspr2(src, src_stride, dst, dst_stride,
1007*fb1b10abSAndroid Build Coastguard Worker                                            filter, h);
1008*fb1b10abSAndroid Build Coastguard Worker       break;
1009*fb1b10abSAndroid Build Coastguard Worker     case 8:
1010*fb1b10abSAndroid Build Coastguard Worker       convolve_bi_horiz_8_transposed_dspr2(src, src_stride, dst, dst_stride,
1011*fb1b10abSAndroid Build Coastguard Worker                                            filter, h);
1012*fb1b10abSAndroid Build Coastguard Worker       break;
1013*fb1b10abSAndroid Build Coastguard Worker     case 16:
1014*fb1b10abSAndroid Build Coastguard Worker     case 32:
1015*fb1b10abSAndroid Build Coastguard Worker       convolve_bi_horiz_16_transposed_dspr2(src, src_stride, dst, dst_stride,
1016*fb1b10abSAndroid Build Coastguard Worker                                             filter, h, (w / 16));
1017*fb1b10abSAndroid Build Coastguard Worker       break;
1018*fb1b10abSAndroid Build Coastguard Worker     case 64:
1019*fb1b10abSAndroid Build Coastguard Worker       prefetch_load(src + 32);
1020*fb1b10abSAndroid Build Coastguard Worker       convolve_bi_horiz_64_transposed_dspr2(src, src_stride, dst, dst_stride,
1021*fb1b10abSAndroid Build Coastguard Worker                                             filter, h);
1022*fb1b10abSAndroid Build Coastguard Worker       break;
1023*fb1b10abSAndroid Build Coastguard Worker     default:
1024*fb1b10abSAndroid Build Coastguard Worker       convolve_bi_horiz_transposed(src, src_stride, dst, dst_stride, filter, w,
1025*fb1b10abSAndroid Build Coastguard Worker                                    h);
1026*fb1b10abSAndroid Build Coastguard Worker       break;
1027*fb1b10abSAndroid Build Coastguard Worker   }
1028*fb1b10abSAndroid Build Coastguard Worker }
1029*fb1b10abSAndroid Build Coastguard Worker #endif
1030