xref: /aosp_15_r20/external/libvpx/vpx_dsp/mips/itrans32_dspr2.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker  *
4*fb1b10abSAndroid Build Coastguard Worker  *  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker  *  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker  *  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker  *  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker  *  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker  */
10*fb1b10abSAndroid Build Coastguard Worker 
11*fb1b10abSAndroid Build Coastguard Worker #include <assert.h>
12*fb1b10abSAndroid Build Coastguard Worker #include <stdio.h>
13*fb1b10abSAndroid Build Coastguard Worker 
14*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_config.h"
15*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/mips/inv_txfm_dspr2.h"
16*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/txfm_common.h"
17*fb1b10abSAndroid Build Coastguard Worker 
18*fb1b10abSAndroid Build Coastguard Worker #if HAVE_DSPR2
idct32_rows_dspr2(const int16_t * input,int16_t * output,uint32_t no_rows)19*fb1b10abSAndroid Build Coastguard Worker static void idct32_rows_dspr2(const int16_t *input, int16_t *output,
20*fb1b10abSAndroid Build Coastguard Worker                               uint32_t no_rows) {
21*fb1b10abSAndroid Build Coastguard Worker   int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
22*fb1b10abSAndroid Build Coastguard Worker   int step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
23*fb1b10abSAndroid Build Coastguard Worker   int step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20;
24*fb1b10abSAndroid Build Coastguard Worker   int step1_21, step1_22, step1_23, step1_24, step1_25, step1_26, step1_27;
25*fb1b10abSAndroid Build Coastguard Worker   int step1_28, step1_29, step1_30, step1_31;
26*fb1b10abSAndroid Build Coastguard Worker   int step2_0, step2_1, step2_2, step2_3, step2_4, step2_5, step2_6;
27*fb1b10abSAndroid Build Coastguard Worker   int step2_7, step2_8, step2_9, step2_10, step2_11, step2_12, step2_13;
28*fb1b10abSAndroid Build Coastguard Worker   int step2_14, step2_15, step2_16, step2_17, step2_18, step2_19, step2_20;
29*fb1b10abSAndroid Build Coastguard Worker   int step2_21, step2_22, step2_23, step2_24, step2_25, step2_26, step2_27;
30*fb1b10abSAndroid Build Coastguard Worker   int step2_28, step2_29, step2_30, step2_31;
31*fb1b10abSAndroid Build Coastguard Worker   int step3_8, step3_9, step3_10, step3_11, step3_12, step3_13, step3_14;
32*fb1b10abSAndroid Build Coastguard Worker   int step3_15, step3_16, step3_17, step3_18, step3_19, step3_20, step3_21;
33*fb1b10abSAndroid Build Coastguard Worker   int step3_22, step3_23, step3_24, step3_25, step3_26, step3_27, step3_28;
34*fb1b10abSAndroid Build Coastguard Worker   int step3_29, step3_30, step3_31;
35*fb1b10abSAndroid Build Coastguard Worker   int temp0, temp1, temp2, temp3;
36*fb1b10abSAndroid Build Coastguard Worker   int load1, load2, load3, load4;
37*fb1b10abSAndroid Build Coastguard Worker   int result1, result2;
38*fb1b10abSAndroid Build Coastguard Worker   int i;
39*fb1b10abSAndroid Build Coastguard Worker   const int const_2_power_13 = 8192;
40*fb1b10abSAndroid Build Coastguard Worker   const int32_t *input_int;
41*fb1b10abSAndroid Build Coastguard Worker 
42*fb1b10abSAndroid Build Coastguard Worker   for (i = no_rows; i--;) {
43*fb1b10abSAndroid Build Coastguard Worker     input_int = (const int32_t *)input;
44*fb1b10abSAndroid Build Coastguard Worker 
45*fb1b10abSAndroid Build Coastguard Worker     if (!(input_int[0] | input_int[1] | input_int[2] | input_int[3] |
46*fb1b10abSAndroid Build Coastguard Worker           input_int[4] | input_int[5] | input_int[6] | input_int[7] |
47*fb1b10abSAndroid Build Coastguard Worker           input_int[8] | input_int[9] | input_int[10] | input_int[11] |
48*fb1b10abSAndroid Build Coastguard Worker           input_int[12] | input_int[13] | input_int[14] | input_int[15])) {
49*fb1b10abSAndroid Build Coastguard Worker       input += 32;
50*fb1b10abSAndroid Build Coastguard Worker 
51*fb1b10abSAndroid Build Coastguard Worker       __asm__ __volatile__(
52*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,     0(%[output])     \n\t"
53*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,    64(%[output])     \n\t"
54*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   128(%[output])     \n\t"
55*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   192(%[output])     \n\t"
56*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   256(%[output])     \n\t"
57*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   320(%[output])     \n\t"
58*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   384(%[output])     \n\t"
59*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   448(%[output])     \n\t"
60*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   512(%[output])     \n\t"
61*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   576(%[output])     \n\t"
62*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   640(%[output])     \n\t"
63*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   704(%[output])     \n\t"
64*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   768(%[output])     \n\t"
65*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   832(%[output])     \n\t"
66*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   896(%[output])     \n\t"
67*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,   960(%[output])     \n\t"
68*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1024(%[output])     \n\t"
69*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1088(%[output])     \n\t"
70*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1152(%[output])     \n\t"
71*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1216(%[output])     \n\t"
72*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1280(%[output])     \n\t"
73*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1344(%[output])     \n\t"
74*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1408(%[output])     \n\t"
75*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1472(%[output])     \n\t"
76*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1536(%[output])     \n\t"
77*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1600(%[output])     \n\t"
78*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1664(%[output])     \n\t"
79*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1728(%[output])     \n\t"
80*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1792(%[output])     \n\t"
81*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1856(%[output])     \n\t"
82*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1920(%[output])     \n\t"
83*fb1b10abSAndroid Build Coastguard Worker           "sh     $zero,  1984(%[output])     \n\t"
84*fb1b10abSAndroid Build Coastguard Worker 
85*fb1b10abSAndroid Build Coastguard Worker           :
86*fb1b10abSAndroid Build Coastguard Worker           : [output] "r"(output));
87*fb1b10abSAndroid Build Coastguard Worker 
88*fb1b10abSAndroid Build Coastguard Worker       output += 1;
89*fb1b10abSAndroid Build Coastguard Worker 
90*fb1b10abSAndroid Build Coastguard Worker       continue;
91*fb1b10abSAndroid Build Coastguard Worker     }
92*fb1b10abSAndroid Build Coastguard Worker 
93*fb1b10abSAndroid Build Coastguard Worker     /* prefetch row */
94*fb1b10abSAndroid Build Coastguard Worker     prefetch_load((const uint8_t *)(input + 32));
95*fb1b10abSAndroid Build Coastguard Worker     prefetch_load((const uint8_t *)(input + 48));
96*fb1b10abSAndroid Build Coastguard Worker 
97*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
98*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load1],             2(%[input])                     \n\t"
99*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load2],             62(%[input])                    \n\t"
100*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load3],             34(%[input])                    \n\t"
101*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load4],             30(%[input])                    \n\t"
102*fb1b10abSAndroid Build Coastguard Worker 
103*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
104*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
105*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
106*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
107*fb1b10abSAndroid Build Coastguard Worker 
108*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load1],       %[cospi_31_64]  \n\t"
109*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load2],       %[cospi_1_64]   \n\t"
110*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp0],             $ac1,           31              \n\t"
111*fb1b10abSAndroid Build Coastguard Worker 
112*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load1],       %[cospi_1_64]   \n\t"
113*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_31_64]  \n\t"
114*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp3],             $ac3,           31              \n\t"
115*fb1b10abSAndroid Build Coastguard Worker 
116*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
117*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
118*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac2                            \n\t"
119*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac2                            \n\t"
120*fb1b10abSAndroid Build Coastguard Worker 
121*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac2,                 %[load3],       %[cospi_15_64]  \n\t"
122*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac2,                 %[load4],       %[cospi_17_64]  \n\t"
123*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp1],             $ac2,           31              \n\t"
124*fb1b10abSAndroid Build Coastguard Worker 
125*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load3],       %[cospi_17_64]  \n\t"
126*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load4],       %[cospi_15_64]  \n\t"
127*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp2],             $ac1,           31              \n\t"
128*fb1b10abSAndroid Build Coastguard Worker 
129*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
130*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
131*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
132*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
133*fb1b10abSAndroid Build Coastguard Worker 
134*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],             %[temp3],       %[temp2]        \n\t"
135*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],             %[temp0],       %[temp1]        \n\t"
136*fb1b10abSAndroid Build Coastguard Worker 
137*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load1],       %[cospi_28_64]  \n\t"
138*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load2],       %[cospi_4_64]   \n\t"
139*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load1],       %[cospi_4_64]   \n\t"
140*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_28_64]  \n\t"
141*fb1b10abSAndroid Build Coastguard Worker 
142*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_17],          $ac1,           31              \n\t"
143*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_30],          $ac3,           31              \n\t"
144*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_16],          %[temp0],       %[temp1]        \n\t"
145*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_31],          %[temp2],       %[temp3]        \n\t"
146*fb1b10abSAndroid Build Coastguard Worker 
147*fb1b10abSAndroid Build Coastguard Worker         : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
148*fb1b10abSAndroid Build Coastguard Worker           [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
149*fb1b10abSAndroid Build Coastguard Worker           [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
150*fb1b10abSAndroid Build Coastguard Worker           [step1_16] "=&r"(step1_16), [step1_17] "=&r"(step1_17),
151*fb1b10abSAndroid Build Coastguard Worker           [step1_30] "=&r"(step1_30), [step1_31] "=&r"(step1_31)
152*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
153*fb1b10abSAndroid Build Coastguard Worker           [cospi_31_64] "r"(cospi_31_64), [cospi_1_64] "r"(cospi_1_64),
154*fb1b10abSAndroid Build Coastguard Worker           [cospi_4_64] "r"(cospi_4_64), [cospi_17_64] "r"(cospi_17_64),
155*fb1b10abSAndroid Build Coastguard Worker           [cospi_15_64] "r"(cospi_15_64), [cospi_28_64] "r"(cospi_28_64));
156*fb1b10abSAndroid Build Coastguard Worker 
157*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
158*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load1],             18(%[input])                    \n\t"
159*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load2],             46(%[input])                    \n\t"
160*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load3],             50(%[input])                    \n\t"
161*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load4],             14(%[input])                    \n\t"
162*fb1b10abSAndroid Build Coastguard Worker 
163*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
164*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
165*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
166*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
167*fb1b10abSAndroid Build Coastguard Worker 
168*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load1],       %[cospi_23_64]  \n\t"
169*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load2],       %[cospi_9_64]   \n\t"
170*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp0],             $ac1,           31              \n\t"
171*fb1b10abSAndroid Build Coastguard Worker 
172*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load1],       %[cospi_9_64]   \n\t"
173*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_23_64]  \n\t"
174*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp3],             $ac3,           31              \n\t"
175*fb1b10abSAndroid Build Coastguard Worker 
176*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
177*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
178*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac2                            \n\t"
179*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac2                            \n\t"
180*fb1b10abSAndroid Build Coastguard Worker 
181*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac2,                 %[load3],       %[cospi_7_64]   \n\t"
182*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac2,                 %[load4],       %[cospi_25_64]  \n\t"
183*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp1],             $ac2,           31              \n\t"
184*fb1b10abSAndroid Build Coastguard Worker 
185*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load3],       %[cospi_25_64]  \n\t"
186*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load4],       %[cospi_7_64]   \n\t"
187*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp2],             $ac1,           31              \n\t"
188*fb1b10abSAndroid Build Coastguard Worker 
189*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
190*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
191*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
192*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
193*fb1b10abSAndroid Build Coastguard Worker 
194*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],             %[temp1],       %[temp0]        \n\t"
195*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],             %[temp2],       %[temp3]        \n\t"
196*fb1b10abSAndroid Build Coastguard Worker 
197*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load1],       %[cospi_28_64]  \n\t"
198*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load2],       %[cospi_4_64]   \n\t"
199*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac3,                 %[load1],       %[cospi_4_64]   \n\t"
200*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_28_64]  \n\t"
201*fb1b10abSAndroid Build Coastguard Worker 
202*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_18],          $ac1,           31              \n\t"
203*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_29],          $ac3,           31              \n\t"
204*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_19],          %[temp0],       %[temp1]        \n\t"
205*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_28],          %[temp2],       %[temp3]        \n\t"
206*fb1b10abSAndroid Build Coastguard Worker 
207*fb1b10abSAndroid Build Coastguard Worker         : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
208*fb1b10abSAndroid Build Coastguard Worker           [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
209*fb1b10abSAndroid Build Coastguard Worker           [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
210*fb1b10abSAndroid Build Coastguard Worker           [step1_18] "=&r"(step1_18), [step1_19] "=&r"(step1_19),
211*fb1b10abSAndroid Build Coastguard Worker           [step1_28] "=&r"(step1_28), [step1_29] "=&r"(step1_29)
212*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
213*fb1b10abSAndroid Build Coastguard Worker           [cospi_23_64] "r"(cospi_23_64), [cospi_9_64] "r"(cospi_9_64),
214*fb1b10abSAndroid Build Coastguard Worker           [cospi_4_64] "r"(cospi_4_64), [cospi_7_64] "r"(cospi_7_64),
215*fb1b10abSAndroid Build Coastguard Worker           [cospi_25_64] "r"(cospi_25_64), [cospi_28_64] "r"(cospi_28_64));
216*fb1b10abSAndroid Build Coastguard Worker 
217*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
218*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load1],             10(%[input])                    \n\t"
219*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load2],             54(%[input])                    \n\t"
220*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load3],             42(%[input])                    \n\t"
221*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load4],             22(%[input])                    \n\t"
222*fb1b10abSAndroid Build Coastguard Worker 
223*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
224*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
225*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
226*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
227*fb1b10abSAndroid Build Coastguard Worker 
228*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load1],       %[cospi_27_64]  \n\t"
229*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load2],       %[cospi_5_64]   \n\t"
230*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp0],             $ac1,           31              \n\t"
231*fb1b10abSAndroid Build Coastguard Worker 
232*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load1],       %[cospi_5_64]   \n\t"
233*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_27_64]  \n\t"
234*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp3],             $ac3,           31              \n\t"
235*fb1b10abSAndroid Build Coastguard Worker 
236*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
237*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
238*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac2                            \n\t"
239*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac2                            \n\t"
240*fb1b10abSAndroid Build Coastguard Worker 
241*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac2,                 %[load3],       %[cospi_11_64]  \n\t"
242*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac2,                 %[load4],       %[cospi_21_64]  \n\t"
243*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp1],             $ac2,           31              \n\t"
244*fb1b10abSAndroid Build Coastguard Worker 
245*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load3],       %[cospi_21_64]  \n\t"
246*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load4],       %[cospi_11_64]  \n\t"
247*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp2],             $ac1,           31              \n\t"
248*fb1b10abSAndroid Build Coastguard Worker 
249*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
250*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
251*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
252*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
253*fb1b10abSAndroid Build Coastguard Worker 
254*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],             %[temp0],       %[temp1]        \n\t"
255*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],             %[temp3],       %[temp2]        \n\t"
256*fb1b10abSAndroid Build Coastguard Worker 
257*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load2],       %[cospi_12_64]  \n\t"
258*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load1],       %[cospi_20_64]  \n\t"
259*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load1],       %[cospi_12_64]  \n\t"
260*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_20_64]  \n\t"
261*fb1b10abSAndroid Build Coastguard Worker 
262*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_21],          $ac1,           31              \n\t"
263*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_26],          $ac3,           31              \n\t"
264*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_20],          %[temp0],       %[temp1]        \n\t"
265*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_27],          %[temp2],       %[temp3]        \n\t"
266*fb1b10abSAndroid Build Coastguard Worker 
267*fb1b10abSAndroid Build Coastguard Worker         : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
268*fb1b10abSAndroid Build Coastguard Worker           [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
269*fb1b10abSAndroid Build Coastguard Worker           [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
270*fb1b10abSAndroid Build Coastguard Worker           [step1_20] "=&r"(step1_20), [step1_21] "=&r"(step1_21),
271*fb1b10abSAndroid Build Coastguard Worker           [step1_26] "=&r"(step1_26), [step1_27] "=&r"(step1_27)
272*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
273*fb1b10abSAndroid Build Coastguard Worker           [cospi_27_64] "r"(cospi_27_64), [cospi_5_64] "r"(cospi_5_64),
274*fb1b10abSAndroid Build Coastguard Worker           [cospi_11_64] "r"(cospi_11_64), [cospi_21_64] "r"(cospi_21_64),
275*fb1b10abSAndroid Build Coastguard Worker           [cospi_12_64] "r"(cospi_12_64), [cospi_20_64] "r"(cospi_20_64));
276*fb1b10abSAndroid Build Coastguard Worker 
277*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
278*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load1],             26(%[input])                    \n\t"
279*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load2],             38(%[input])                    \n\t"
280*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load3],             58(%[input])                    \n\t"
281*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load4],              6(%[input])                    \n\t"
282*fb1b10abSAndroid Build Coastguard Worker 
283*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
284*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
285*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
286*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
287*fb1b10abSAndroid Build Coastguard Worker 
288*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load1],       %[cospi_19_64]  \n\t"
289*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load2],       %[cospi_13_64]  \n\t"
290*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp0],             $ac1,           31              \n\t"
291*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load1],       %[cospi_13_64]  \n\t"
292*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_19_64]  \n\t"
293*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp3],             $ac3,           31              \n\t"
294*fb1b10abSAndroid Build Coastguard Worker 
295*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
296*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
297*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac2                            \n\t"
298*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac2                            \n\t"
299*fb1b10abSAndroid Build Coastguard Worker 
300*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac2,                 %[load3],       %[cospi_3_64]   \n\t"
301*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac2,                 %[load4],       %[cospi_29_64]  \n\t"
302*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp1],             $ac2,           31              \n\t"
303*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load3],       %[cospi_29_64]  \n\t"
304*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load4],       %[cospi_3_64]   \n\t"
305*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp2],             $ac1,           31              \n\t"
306*fb1b10abSAndroid Build Coastguard Worker 
307*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
308*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
309*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
310*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
311*fb1b10abSAndroid Build Coastguard Worker 
312*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],             %[temp1],       %[temp0]        \n\t"
313*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],             %[temp2],       %[temp3]        \n\t"
314*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load1],       %[cospi_12_64]  \n\t"
315*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load2],       %[cospi_20_64]  \n\t"
316*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac3,                 %[load1],       %[cospi_20_64]  \n\t"
317*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_12_64]  \n\t"
318*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_22],          $ac1,           31              \n\t"
319*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_25],          $ac3,           31              \n\t"
320*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_23],          %[temp0],       %[temp1]        \n\t"
321*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_24],          %[temp2],       %[temp3]        \n\t"
322*fb1b10abSAndroid Build Coastguard Worker 
323*fb1b10abSAndroid Build Coastguard Worker         : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
324*fb1b10abSAndroid Build Coastguard Worker           [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
325*fb1b10abSAndroid Build Coastguard Worker           [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
326*fb1b10abSAndroid Build Coastguard Worker           [step1_22] "=&r"(step1_22), [step1_23] "=&r"(step1_23),
327*fb1b10abSAndroid Build Coastguard Worker           [step1_24] "=&r"(step1_24), [step1_25] "=&r"(step1_25)
328*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
329*fb1b10abSAndroid Build Coastguard Worker           [cospi_19_64] "r"(cospi_19_64), [cospi_13_64] "r"(cospi_13_64),
330*fb1b10abSAndroid Build Coastguard Worker           [cospi_3_64] "r"(cospi_3_64), [cospi_29_64] "r"(cospi_29_64),
331*fb1b10abSAndroid Build Coastguard Worker           [cospi_12_64] "r"(cospi_12_64), [cospi_20_64] "r"(cospi_20_64));
332*fb1b10abSAndroid Build Coastguard Worker 
333*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
334*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load1],              4(%[input])                    \n\t"
335*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load2],             60(%[input])                    \n\t"
336*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load3],             36(%[input])                    \n\t"
337*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load4],             28(%[input])                    \n\t"
338*fb1b10abSAndroid Build Coastguard Worker 
339*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
340*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
341*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
342*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
343*fb1b10abSAndroid Build Coastguard Worker 
344*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load1],       %[cospi_30_64]  \n\t"
345*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load2],       %[cospi_2_64]   \n\t"
346*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp0],             $ac1,           31              \n\t"
347*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load1],       %[cospi_2_64]   \n\t"
348*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_30_64]  \n\t"
349*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp3],             $ac3,           31              \n\t"
350*fb1b10abSAndroid Build Coastguard Worker 
351*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
352*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
353*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac2                            \n\t"
354*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac2                            \n\t"
355*fb1b10abSAndroid Build Coastguard Worker 
356*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac2,                 %[load3],       %[cospi_14_64]  \n\t"
357*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac2,                 %[load4],       %[cospi_18_64]  \n\t"
358*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp1],             $ac2,           31              \n\t"
359*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load3],       %[cospi_18_64]  \n\t"
360*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load4],       %[cospi_14_64]  \n\t"
361*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp2],             $ac1,           31              \n\t"
362*fb1b10abSAndroid Build Coastguard Worker 
363*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
364*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
365*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
366*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
367*fb1b10abSAndroid Build Coastguard Worker 
368*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],             %[temp0],       %[temp1]        \n\t"
369*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],             %[temp3],       %[temp2]        \n\t"
370*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load1],       %[cospi_8_64]   \n\t"
371*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load2],       %[cospi_24_64]  \n\t"
372*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load1],       %[cospi_24_64]  \n\t"
373*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_8_64]   \n\t"
374*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step2_9],           $ac1,           31              \n\t"
375*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step2_14],          $ac3,           31              \n\t"
376*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_8],           %[temp0],       %[temp1]        \n\t"
377*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_15],          %[temp2],       %[temp3]        \n\t"
378*fb1b10abSAndroid Build Coastguard Worker 
379*fb1b10abSAndroid Build Coastguard Worker         : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
380*fb1b10abSAndroid Build Coastguard Worker           [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
381*fb1b10abSAndroid Build Coastguard Worker           [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step2_8] "=&r"(step2_8),
382*fb1b10abSAndroid Build Coastguard Worker           [step2_9] "=&r"(step2_9), [step2_14] "=&r"(step2_14),
383*fb1b10abSAndroid Build Coastguard Worker           [step2_15] "=&r"(step2_15)
384*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
385*fb1b10abSAndroid Build Coastguard Worker           [cospi_30_64] "r"(cospi_30_64), [cospi_2_64] "r"(cospi_2_64),
386*fb1b10abSAndroid Build Coastguard Worker           [cospi_14_64] "r"(cospi_14_64), [cospi_18_64] "r"(cospi_18_64),
387*fb1b10abSAndroid Build Coastguard Worker           [cospi_8_64] "r"(cospi_8_64), [cospi_24_64] "r"(cospi_24_64));
388*fb1b10abSAndroid Build Coastguard Worker 
389*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
390*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load1],             20(%[input])                    \n\t"
391*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load2],             44(%[input])                    \n\t"
392*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load3],             52(%[input])                    \n\t"
393*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load4],             12(%[input])                    \n\t"
394*fb1b10abSAndroid Build Coastguard Worker 
395*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
396*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
397*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
398*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
399*fb1b10abSAndroid Build Coastguard Worker 
400*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load1],       %[cospi_22_64]  \n\t"
401*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load2],       %[cospi_10_64]  \n\t"
402*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp0],             $ac1,           31              \n\t"
403*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load1],       %[cospi_10_64]  \n\t"
404*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_22_64]  \n\t"
405*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp3],             $ac3,           31              \n\t"
406*fb1b10abSAndroid Build Coastguard Worker 
407*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
408*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
409*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac2                            \n\t"
410*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac2                            \n\t"
411*fb1b10abSAndroid Build Coastguard Worker 
412*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac2,                 %[load3],       %[cospi_6_64]   \n\t"
413*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac2,                 %[load4],       %[cospi_26_64]  \n\t"
414*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp1],             $ac2,           31              \n\t"
415*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load3],       %[cospi_26_64]  \n\t"
416*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load4],       %[cospi_6_64]   \n\t"
417*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp2],             $ac1,           31              \n\t"
418*fb1b10abSAndroid Build Coastguard Worker 
419*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
420*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
421*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
422*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
423*fb1b10abSAndroid Build Coastguard Worker 
424*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],             %[temp1],       %[temp0]        \n\t"
425*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],             %[temp2],       %[temp3]        \n\t"
426*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load1],       %[cospi_24_64]  \n\t"
427*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load2],       %[cospi_8_64]   \n\t"
428*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_24_64]  \n\t"
429*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac3,                 %[load1],       %[cospi_8_64]   \n\t"
430*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step2_10],          $ac1,           31              \n\t"
431*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step2_13],          $ac3,           31              \n\t"
432*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_11],          %[temp0],       %[temp1]        \n\t"
433*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_12],          %[temp2],       %[temp3]        \n\t"
434*fb1b10abSAndroid Build Coastguard Worker 
435*fb1b10abSAndroid Build Coastguard Worker         : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
436*fb1b10abSAndroid Build Coastguard Worker           [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
437*fb1b10abSAndroid Build Coastguard Worker           [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
438*fb1b10abSAndroid Build Coastguard Worker           [step2_10] "=&r"(step2_10), [step2_11] "=&r"(step2_11),
439*fb1b10abSAndroid Build Coastguard Worker           [step2_12] "=&r"(step2_12), [step2_13] "=&r"(step2_13)
440*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
441*fb1b10abSAndroid Build Coastguard Worker           [cospi_22_64] "r"(cospi_22_64), [cospi_10_64] "r"(cospi_10_64),
442*fb1b10abSAndroid Build Coastguard Worker           [cospi_6_64] "r"(cospi_6_64), [cospi_26_64] "r"(cospi_26_64),
443*fb1b10abSAndroid Build Coastguard Worker           [cospi_8_64] "r"(cospi_8_64), [cospi_24_64] "r"(cospi_24_64));
444*fb1b10abSAndroid Build Coastguard Worker 
445*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
446*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac0                            \n\t"
447*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac0                            \n\t"
448*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp0],             %[step2_14],    %[step2_13]     \n\t"
449*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp0],             %[temp0],       %[step2_9]      \n\t"
450*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp0],             %[temp0],       %[step2_10]     \n\t"
451*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac0,                 %[temp0],       %[cospi_16_64]  \n\t"
452*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
453*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
454*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp1],             %[step2_14],    %[step2_13]     \n\t"
455*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp1],             %[temp1],       %[step2_9]      \n\t"
456*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp1],             %[temp1],       %[step2_10]     \n\t"
457*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[temp1],       %[cospi_16_64]  \n\t"
458*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac2                            \n\t"
459*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac2                            \n\t"
460*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp0],             %[step2_15],    %[step2_12]     \n\t"
461*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp0],             %[temp0],       %[step2_8]      \n\t"
462*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp0],             %[temp0],       %[step2_11]     \n\t"
463*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac2,                 %[temp0],       %[cospi_16_64]  \n\t"
464*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
465*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
466*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp1],             %[step2_15],    %[step2_12]     \n\t"
467*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp1],             %[temp1],       %[step2_8]      \n\t"
468*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp1],             %[temp1],       %[step2_11]     \n\t"
469*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[temp1],       %[cospi_16_64]  \n\t"
470*fb1b10abSAndroid Build Coastguard Worker 
471*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_8],           %[step2_8],     %[step2_11]     \n\t"
472*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_9],           %[step2_9],     %[step2_10]     \n\t"
473*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_14],          %[step2_13],    %[step2_14]     \n\t"
474*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_15],          %[step2_12],    %[step2_15]     \n\t"
475*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_10],          $ac0,           31              \n\t"
476*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_13],          $ac1,           31              \n\t"
477*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_11],          $ac2,           31              \n\t"
478*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_12],          $ac3,           31              \n\t"
479*fb1b10abSAndroid Build Coastguard Worker 
480*fb1b10abSAndroid Build Coastguard Worker         : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_8] "=&r"(step3_8),
481*fb1b10abSAndroid Build Coastguard Worker           [step3_9] "=&r"(step3_9), [step3_10] "=&r"(step3_10),
482*fb1b10abSAndroid Build Coastguard Worker           [step3_11] "=&r"(step3_11), [step3_12] "=&r"(step3_12),
483*fb1b10abSAndroid Build Coastguard Worker           [step3_13] "=&r"(step3_13), [step3_14] "=&r"(step3_14),
484*fb1b10abSAndroid Build Coastguard Worker           [step3_15] "=&r"(step3_15)
485*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [step2_8] "r"(step2_8),
486*fb1b10abSAndroid Build Coastguard Worker           [step2_9] "r"(step2_9), [step2_10] "r"(step2_10),
487*fb1b10abSAndroid Build Coastguard Worker           [step2_11] "r"(step2_11), [step2_12] "r"(step2_12),
488*fb1b10abSAndroid Build Coastguard Worker           [step2_13] "r"(step2_13), [step2_14] "r"(step2_14),
489*fb1b10abSAndroid Build Coastguard Worker           [step2_15] "r"(step2_15), [cospi_16_64] "r"(cospi_16_64));
490*fb1b10abSAndroid Build Coastguard Worker 
491*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
492*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac0                            \n\t"
493*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac0                            \n\t"
494*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
495*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
496*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp0],             %[step1_17],    %[step1_18]     \n\t"
497*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp1],             %[step1_30],    %[step1_29]     \n\t"
498*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_17],          %[step1_17],    %[step1_18]     \n\t"
499*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_30],          %[step1_30],    %[step1_29]     \n\t"
500*fb1b10abSAndroid Build Coastguard Worker 
501*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac0,                 %[temp0],       %[cospi_8_64]   \n\t"
502*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac0,                 %[temp1],       %[cospi_24_64]  \n\t"
503*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_18],          $ac0,           31              \n\t"
504*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[temp0],       %[cospi_24_64]  \n\t"
505*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[temp1],       %[cospi_8_64]   \n\t"
506*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_29],          $ac1,           31              \n\t"
507*fb1b10abSAndroid Build Coastguard Worker 
508*fb1b10abSAndroid Build Coastguard Worker         : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
509*fb1b10abSAndroid Build Coastguard Worker           [step3_18] "=&r"(step3_18), [step3_29] "=&r"(step3_29),
510*fb1b10abSAndroid Build Coastguard Worker           [step3_17] "=&r"(step3_17), [step3_30] "=&r"(step3_30)
511*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [step1_17] "r"(step1_17),
512*fb1b10abSAndroid Build Coastguard Worker           [step1_18] "r"(step1_18), [step1_30] "r"(step1_30),
513*fb1b10abSAndroid Build Coastguard Worker           [step1_29] "r"(step1_29), [cospi_24_64] "r"(cospi_24_64),
514*fb1b10abSAndroid Build Coastguard Worker           [cospi_8_64] "r"(cospi_8_64));
515*fb1b10abSAndroid Build Coastguard Worker 
516*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
517*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac0                            \n\t"
518*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac0                            \n\t"
519*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
520*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
521*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp0],             %[step1_16],    %[step1_19]     \n\t"
522*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp1],             %[step1_31],    %[step1_28]     \n\t"
523*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_16],          %[step1_16],    %[step1_19]     \n\t"
524*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_31],          %[step1_31],    %[step1_28]     \n\t"
525*fb1b10abSAndroid Build Coastguard Worker 
526*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac0,                 %[temp0],       %[cospi_8_64]   \n\t"
527*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac0,                 %[temp1],       %[cospi_24_64]  \n\t"
528*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_19],          $ac0,           31              \n\t"
529*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[temp0],       %[cospi_24_64]  \n\t"
530*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[temp1],       %[cospi_8_64]   \n\t"
531*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_28],          $ac1,           31              \n\t"
532*fb1b10abSAndroid Build Coastguard Worker 
533*fb1b10abSAndroid Build Coastguard Worker         : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
534*fb1b10abSAndroid Build Coastguard Worker           [step3_16] "=&r"(step3_16), [step3_31] "=&r"(step3_31),
535*fb1b10abSAndroid Build Coastguard Worker           [step3_19] "=&r"(step3_19), [step3_28] "=&r"(step3_28)
536*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [step1_16] "r"(step1_16),
537*fb1b10abSAndroid Build Coastguard Worker           [step1_19] "r"(step1_19), [step1_31] "r"(step1_31),
538*fb1b10abSAndroid Build Coastguard Worker           [step1_28] "r"(step1_28), [cospi_24_64] "r"(cospi_24_64),
539*fb1b10abSAndroid Build Coastguard Worker           [cospi_8_64] "r"(cospi_8_64));
540*fb1b10abSAndroid Build Coastguard Worker 
541*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
542*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac0                            \n\t"
543*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac0                            \n\t"
544*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
545*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
546*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp0],             %[step1_23],    %[step1_20]     \n\t"
547*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp1],             %[step1_24],    %[step1_27]     \n\t"
548*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_23],          %[step1_23],    %[step1_20]     \n\t"
549*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_24],          %[step1_24],    %[step1_27]     \n\t"
550*fb1b10abSAndroid Build Coastguard Worker 
551*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac0,                 %[temp0],       %[cospi_8_64]   \n\t"
552*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac0,                 %[temp1],       %[cospi_24_64]  \n\t"
553*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_27],          $ac0,           31              \n\t"
554*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[temp0],       %[cospi_24_64]  \n\t"
555*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[temp1],       %[cospi_8_64]   \n\t"
556*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_20],          $ac1,           31              \n\t"
557*fb1b10abSAndroid Build Coastguard Worker 
558*fb1b10abSAndroid Build Coastguard Worker         : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
559*fb1b10abSAndroid Build Coastguard Worker           [step3_23] "=&r"(step3_23), [step3_24] "=&r"(step3_24),
560*fb1b10abSAndroid Build Coastguard Worker           [step3_20] "=&r"(step3_20), [step3_27] "=&r"(step3_27)
561*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [step1_23] "r"(step1_23),
562*fb1b10abSAndroid Build Coastguard Worker           [step1_20] "r"(step1_20), [step1_24] "r"(step1_24),
563*fb1b10abSAndroid Build Coastguard Worker           [step1_27] "r"(step1_27), [cospi_24_64] "r"(cospi_24_64),
564*fb1b10abSAndroid Build Coastguard Worker           [cospi_8_64] "r"(cospi_8_64));
565*fb1b10abSAndroid Build Coastguard Worker 
566*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
567*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac0                            \n\t"
568*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac0                            \n\t"
569*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
570*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
571*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp0],             %[step1_22],    %[step1_21]     \n\t"
572*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp1],             %[step1_25],    %[step1_26]     \n\t"
573*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_22],          %[step1_22],    %[step1_21]     \n\t"
574*fb1b10abSAndroid Build Coastguard Worker         "add      %[step3_25],          %[step1_25],    %[step1_26]     \n\t"
575*fb1b10abSAndroid Build Coastguard Worker 
576*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac0,                 %[temp0],       %[cospi_24_64]  \n\t"
577*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac0,                 %[temp1],       %[cospi_8_64]   \n\t"
578*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_21],          $ac0,           31              \n\t"
579*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[temp0],       %[cospi_8_64]   \n\t"
580*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[temp1],       %[cospi_24_64]  \n\t"
581*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step3_26],          $ac1,           31              \n\t"
582*fb1b10abSAndroid Build Coastguard Worker 
583*fb1b10abSAndroid Build Coastguard Worker         : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
584*fb1b10abSAndroid Build Coastguard Worker           [step3_22] "=&r"(step3_22), [step3_25] "=&r"(step3_25),
585*fb1b10abSAndroid Build Coastguard Worker           [step3_21] "=&r"(step3_21), [step3_26] "=&r"(step3_26)
586*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [step1_22] "r"(step1_22),
587*fb1b10abSAndroid Build Coastguard Worker           [step1_21] "r"(step1_21), [step1_25] "r"(step1_25),
588*fb1b10abSAndroid Build Coastguard Worker           [step1_26] "r"(step1_26), [cospi_24_64] "r"(cospi_24_64),
589*fb1b10abSAndroid Build Coastguard Worker           [cospi_8_64] "r"(cospi_8_64));
590*fb1b10abSAndroid Build Coastguard Worker 
591*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
592*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_16],          %[step3_16],    %[step3_23]     \n\t"
593*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_17],          %[step3_17],    %[step3_22]     \n\t"
594*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_18],          %[step3_18],    %[step3_21]     \n\t"
595*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_19],          %[step3_19],    %[step3_20]     \n\t"
596*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_20],          %[step3_19],    %[step3_20]     \n\t"
597*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_21],          %[step3_18],    %[step3_21]     \n\t"
598*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_22],          %[step3_17],    %[step3_22]     \n\t"
599*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_23],          %[step3_16],    %[step3_23]     \n\t"
600*fb1b10abSAndroid Build Coastguard Worker 
601*fb1b10abSAndroid Build Coastguard Worker         : [step2_16] "=&r"(step2_16), [step2_17] "=&r"(step2_17),
602*fb1b10abSAndroid Build Coastguard Worker           [step2_18] "=&r"(step2_18), [step2_19] "=&r"(step2_19),
603*fb1b10abSAndroid Build Coastguard Worker           [step2_20] "=&r"(step2_20), [step2_21] "=&r"(step2_21),
604*fb1b10abSAndroid Build Coastguard Worker           [step2_22] "=&r"(step2_22), [step2_23] "=&r"(step2_23)
605*fb1b10abSAndroid Build Coastguard Worker         : [step3_16] "r"(step3_16), [step3_23] "r"(step3_23),
606*fb1b10abSAndroid Build Coastguard Worker           [step3_17] "r"(step3_17), [step3_22] "r"(step3_22),
607*fb1b10abSAndroid Build Coastguard Worker           [step3_18] "r"(step3_18), [step3_21] "r"(step3_21),
608*fb1b10abSAndroid Build Coastguard Worker           [step3_19] "r"(step3_19), [step3_20] "r"(step3_20));
609*fb1b10abSAndroid Build Coastguard Worker 
610*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
611*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_24],          %[step3_31],    %[step3_24]     \n\t"
612*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_25],          %[step3_30],    %[step3_25]     \n\t"
613*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_26],          %[step3_29],    %[step3_26]     \n\t"
614*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_27],          %[step3_28],    %[step3_27]     \n\t"
615*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_28],          %[step3_28],    %[step3_27]     \n\t"
616*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_29],          %[step3_29],    %[step3_26]     \n\t"
617*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_30],          %[step3_30],    %[step3_25]     \n\t"
618*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_31],          %[step3_31],    %[step3_24]     \n\t"
619*fb1b10abSAndroid Build Coastguard Worker 
620*fb1b10abSAndroid Build Coastguard Worker         : [step2_24] "=&r"(step2_24), [step2_28] "=&r"(step2_28),
621*fb1b10abSAndroid Build Coastguard Worker           [step2_25] "=&r"(step2_25), [step2_29] "=&r"(step2_29),
622*fb1b10abSAndroid Build Coastguard Worker           [step2_26] "=&r"(step2_26), [step2_30] "=&r"(step2_30),
623*fb1b10abSAndroid Build Coastguard Worker           [step2_27] "=&r"(step2_27), [step2_31] "=&r"(step2_31)
624*fb1b10abSAndroid Build Coastguard Worker         : [step3_31] "r"(step3_31), [step3_24] "r"(step3_24),
625*fb1b10abSAndroid Build Coastguard Worker           [step3_30] "r"(step3_30), [step3_25] "r"(step3_25),
626*fb1b10abSAndroid Build Coastguard Worker           [step3_29] "r"(step3_29), [step3_26] "r"(step3_26),
627*fb1b10abSAndroid Build Coastguard Worker           [step3_28] "r"(step3_28), [step3_27] "r"(step3_27));
628*fb1b10abSAndroid Build Coastguard Worker 
629*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
630*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load1],             0(%[input])                     \n\t"
631*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load2],             32(%[input])                    \n\t"
632*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load3],             16(%[input])                    \n\t"
633*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load4],             48(%[input])                    \n\t"
634*fb1b10abSAndroid Build Coastguard Worker 
635*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
636*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
637*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac2                            \n\t"
638*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac2                            \n\t"
639*fb1b10abSAndroid Build Coastguard Worker         "add      %[result1],           %[load1],       %[load2]        \n\t"
640*fb1b10abSAndroid Build Coastguard Worker         "sub      %[result2],           %[load1],       %[load2]        \n\t"
641*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[result1],     %[cospi_16_64]  \n\t"
642*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac2,                 %[result2],     %[cospi_16_64]  \n\t"
643*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp0],             $ac1,           31              \n\t"
644*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp1],             $ac2,           31              \n\t"
645*fb1b10abSAndroid Build Coastguard Worker 
646*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
647*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
648*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load3],       %[cospi_24_64]  \n\t"
649*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac3,                 %[load4],       %[cospi_8_64]   \n\t"
650*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp2],             $ac3,           31              \n\t"
651*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
652*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
653*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load3],       %[cospi_8_64]   \n\t"
654*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load4],       %[cospi_24_64]  \n\t"
655*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp3],             $ac1,           31              \n\t"
656*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_0],           %[temp0],       %[temp3]        \n\t"
657*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_1],           %[temp1],       %[temp2]        \n\t"
658*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step1_2],           %[temp1],       %[temp2]        \n\t"
659*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step1_3],           %[temp0],       %[temp3]        \n\t"
660*fb1b10abSAndroid Build Coastguard Worker 
661*fb1b10abSAndroid Build Coastguard Worker         : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
662*fb1b10abSAndroid Build Coastguard Worker           [load4] "=&r"(load4), [result1] "=&r"(result1),
663*fb1b10abSAndroid Build Coastguard Worker           [result2] "=&r"(result2), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
664*fb1b10abSAndroid Build Coastguard Worker           [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_0] "=&r"(step1_0),
665*fb1b10abSAndroid Build Coastguard Worker           [step1_1] "=&r"(step1_1), [step1_2] "=&r"(step1_2),
666*fb1b10abSAndroid Build Coastguard Worker           [step1_3] "=&r"(step1_3)
667*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
668*fb1b10abSAndroid Build Coastguard Worker           [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64),
669*fb1b10abSAndroid Build Coastguard Worker           [cospi_16_64] "r"(cospi_16_64));
670*fb1b10abSAndroid Build Coastguard Worker 
671*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
672*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load1],             8(%[input])                     \n\t"
673*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load2],             56(%[input])                    \n\t"
674*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load3],             40(%[input])                    \n\t"
675*fb1b10abSAndroid Build Coastguard Worker         "lh       %[load4],             24(%[input])                    \n\t"
676*fb1b10abSAndroid Build Coastguard Worker 
677*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
678*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
679*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
680*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
681*fb1b10abSAndroid Build Coastguard Worker 
682*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load1],       %[cospi_28_64]  \n\t"
683*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac1,                 %[load2],       %[cospi_4_64]   \n\t"
684*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp0],             $ac1,           31              \n\t"
685*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load1],       %[cospi_4_64]   \n\t"
686*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_28_64]  \n\t"
687*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp3],             $ac3,           31              \n\t"
688*fb1b10abSAndroid Build Coastguard Worker 
689*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
690*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
691*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac2                            \n\t"
692*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac2                            \n\t"
693*fb1b10abSAndroid Build Coastguard Worker 
694*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac2,                 %[load3],       %[cospi_12_64]  \n\t"
695*fb1b10abSAndroid Build Coastguard Worker         "msub     $ac2,                 %[load4],       %[cospi_20_64]  \n\t"
696*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp1],             $ac2,           31              \n\t"
697*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load3],       %[cospi_20_64]  \n\t"
698*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load4],       %[cospi_12_64]  \n\t"
699*fb1b10abSAndroid Build Coastguard Worker         "extp     %[temp2],             $ac1,           31              \n\t"
700*fb1b10abSAndroid Build Coastguard Worker 
701*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
702*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
703*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
704*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
705*fb1b10abSAndroid Build Coastguard Worker 
706*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],             %[temp3],       %[temp2]        \n\t"
707*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],             %[load1],       %[temp0]        \n\t"
708*fb1b10abSAndroid Build Coastguard Worker         "add      %[load1],             %[load1],       %[temp1]        \n\t"
709*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],             %[temp0],       %[temp1]        \n\t"
710*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],             %[load2],       %[temp2]        \n\t"
711*fb1b10abSAndroid Build Coastguard Worker         "add      %[load2],             %[load2],       %[temp3]        \n\t"
712*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[load1],       %[cospi_16_64]  \n\t"
713*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[load2],       %[cospi_16_64]  \n\t"
714*fb1b10abSAndroid Build Coastguard Worker 
715*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_5],           $ac1,           31              \n\t"
716*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_6],           $ac3,           31              \n\t"
717*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_4],           %[temp0],       %[temp1]        \n\t"
718*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_7],           %[temp3],       %[temp2]        \n\t"
719*fb1b10abSAndroid Build Coastguard Worker 
720*fb1b10abSAndroid Build Coastguard Worker         : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
721*fb1b10abSAndroid Build Coastguard Worker           [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
722*fb1b10abSAndroid Build Coastguard Worker           [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_4] "=&r"(step1_4),
723*fb1b10abSAndroid Build Coastguard Worker           [step1_5] "=&r"(step1_5), [step1_6] "=&r"(step1_6),
724*fb1b10abSAndroid Build Coastguard Worker           [step1_7] "=&r"(step1_7)
725*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
726*fb1b10abSAndroid Build Coastguard Worker           [cospi_20_64] "r"(cospi_20_64), [cospi_12_64] "r"(cospi_12_64),
727*fb1b10abSAndroid Build Coastguard Worker           [cospi_4_64] "r"(cospi_4_64), [cospi_28_64] "r"(cospi_28_64),
728*fb1b10abSAndroid Build Coastguard Worker           [cospi_16_64] "r"(cospi_16_64));
729*fb1b10abSAndroid Build Coastguard Worker 
730*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
731*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_0],          %[step1_0],    %[step1_7]     \n\t"
732*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_1],          %[step1_1],    %[step1_6]     \n\t"
733*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_2],          %[step1_2],    %[step1_5]     \n\t"
734*fb1b10abSAndroid Build Coastguard Worker         "add      %[step2_3],          %[step1_3],    %[step1_4]     \n\t"
735*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_4],          %[step1_3],    %[step1_4]     \n\t"
736*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_5],          %[step1_2],    %[step1_5]     \n\t"
737*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_6],          %[step1_1],    %[step1_6]     \n\t"
738*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step2_7],          %[step1_0],    %[step1_7]     \n\t"
739*fb1b10abSAndroid Build Coastguard Worker 
740*fb1b10abSAndroid Build Coastguard Worker         : [step2_0] "=&r"(step2_0), [step2_4] "=&r"(step2_4),
741*fb1b10abSAndroid Build Coastguard Worker           [step2_1] "=&r"(step2_1), [step2_5] "=&r"(step2_5),
742*fb1b10abSAndroid Build Coastguard Worker           [step2_2] "=&r"(step2_2), [step2_6] "=&r"(step2_6),
743*fb1b10abSAndroid Build Coastguard Worker           [step2_3] "=&r"(step2_3), [step2_7] "=&r"(step2_7)
744*fb1b10abSAndroid Build Coastguard Worker         : [step1_0] "r"(step1_0), [step1_7] "r"(step1_7),
745*fb1b10abSAndroid Build Coastguard Worker           [step1_1] "r"(step1_1), [step1_6] "r"(step1_6),
746*fb1b10abSAndroid Build Coastguard Worker           [step1_2] "r"(step1_2), [step1_5] "r"(step1_5),
747*fb1b10abSAndroid Build Coastguard Worker           [step1_3] "r"(step1_3), [step1_4] "r"(step1_4));
748*fb1b10abSAndroid Build Coastguard Worker 
749*fb1b10abSAndroid Build Coastguard Worker     // stage 7
750*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
751*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_0],          %[step2_0],    %[step3_15]     \n\t"
752*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_1],          %[step2_1],    %[step3_14]     \n\t"
753*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_2],          %[step2_2],    %[step3_13]     \n\t"
754*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_3],          %[step2_3],    %[step3_12]     \n\t"
755*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step1_12],         %[step2_3],    %[step3_12]     \n\t"
756*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step1_13],         %[step2_2],    %[step3_13]     \n\t"
757*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step1_14],         %[step2_1],    %[step3_14]     \n\t"
758*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step1_15],         %[step2_0],    %[step3_15]     \n\t"
759*fb1b10abSAndroid Build Coastguard Worker 
760*fb1b10abSAndroid Build Coastguard Worker         : [step1_0] "=&r"(step1_0), [step1_12] "=&r"(step1_12),
761*fb1b10abSAndroid Build Coastguard Worker           [step1_1] "=&r"(step1_1), [step1_13] "=&r"(step1_13),
762*fb1b10abSAndroid Build Coastguard Worker           [step1_2] "=&r"(step1_2), [step1_14] "=&r"(step1_14),
763*fb1b10abSAndroid Build Coastguard Worker           [step1_3] "=&r"(step1_3), [step1_15] "=&r"(step1_15)
764*fb1b10abSAndroid Build Coastguard Worker         : [step2_0] "r"(step2_0), [step3_15] "r"(step3_15),
765*fb1b10abSAndroid Build Coastguard Worker           [step2_1] "r"(step2_1), [step3_14] "r"(step3_14),
766*fb1b10abSAndroid Build Coastguard Worker           [step2_2] "r"(step2_2), [step3_13] "r"(step3_13),
767*fb1b10abSAndroid Build Coastguard Worker           [step2_3] "r"(step2_3), [step3_12] "r"(step3_12));
768*fb1b10abSAndroid Build Coastguard Worker 
769*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
770*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_4],          %[step2_4],    %[step3_11]     \n\t"
771*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_5],          %[step2_5],    %[step3_10]     \n\t"
772*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_6],          %[step2_6],    %[step3_9]      \n\t"
773*fb1b10abSAndroid Build Coastguard Worker         "add      %[step1_7],          %[step2_7],    %[step3_8]      \n\t"
774*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step1_8],          %[step2_7],    %[step3_8]      \n\t"
775*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step1_9],          %[step2_6],    %[step3_9]      \n\t"
776*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step1_10],         %[step2_5],    %[step3_10]     \n\t"
777*fb1b10abSAndroid Build Coastguard Worker         "sub      %[step1_11],         %[step2_4],    %[step3_11]     \n\t"
778*fb1b10abSAndroid Build Coastguard Worker 
779*fb1b10abSAndroid Build Coastguard Worker         : [step1_4] "=&r"(step1_4), [step1_8] "=&r"(step1_8),
780*fb1b10abSAndroid Build Coastguard Worker           [step1_5] "=&r"(step1_5), [step1_9] "=&r"(step1_9),
781*fb1b10abSAndroid Build Coastguard Worker           [step1_6] "=&r"(step1_6), [step1_10] "=&r"(step1_10),
782*fb1b10abSAndroid Build Coastguard Worker           [step1_7] "=&r"(step1_7), [step1_11] "=&r"(step1_11)
783*fb1b10abSAndroid Build Coastguard Worker         : [step2_4] "r"(step2_4), [step3_11] "r"(step3_11),
784*fb1b10abSAndroid Build Coastguard Worker           [step2_5] "r"(step2_5), [step3_10] "r"(step3_10),
785*fb1b10abSAndroid Build Coastguard Worker           [step2_6] "r"(step2_6), [step3_9] "r"(step3_9),
786*fb1b10abSAndroid Build Coastguard Worker           [step2_7] "r"(step2_7), [step3_8] "r"(step3_8));
787*fb1b10abSAndroid Build Coastguard Worker 
788*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
789*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp0],             %[step2_27],    %[step2_20]     \n\t"
790*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp1],             %[step2_27],    %[step2_20]     \n\t"
791*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp2],             %[step2_26],    %[step2_21]     \n\t"
792*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp3],             %[step2_26],    %[step2_21]     \n\t"
793*fb1b10abSAndroid Build Coastguard Worker 
794*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac0                            \n\t"
795*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac0                            \n\t"
796*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
797*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
798*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac2                            \n\t"
799*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac2                            \n\t"
800*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
801*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
802*fb1b10abSAndroid Build Coastguard Worker 
803*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac0,                 %[temp0],       %[cospi_16_64]  \n\t"
804*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[temp1],       %[cospi_16_64]  \n\t"
805*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac2,                 %[temp2],       %[cospi_16_64]  \n\t"
806*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[temp3],       %[cospi_16_64]  \n\t"
807*fb1b10abSAndroid Build Coastguard Worker 
808*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_20],          $ac0,           31              \n\t"
809*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_27],          $ac1,           31              \n\t"
810*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_21],          $ac2,           31              \n\t"
811*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_26],          $ac3,           31              \n\t"
812*fb1b10abSAndroid Build Coastguard Worker 
813*fb1b10abSAndroid Build Coastguard Worker         : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
814*fb1b10abSAndroid Build Coastguard Worker           [temp3] "=&r"(temp3), [step1_20] "=&r"(step1_20),
815*fb1b10abSAndroid Build Coastguard Worker           [step1_27] "=&r"(step1_27), [step1_21] "=&r"(step1_21),
816*fb1b10abSAndroid Build Coastguard Worker           [step1_26] "=&r"(step1_26)
817*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [step2_20] "r"(step2_20),
818*fb1b10abSAndroid Build Coastguard Worker           [step2_27] "r"(step2_27), [step2_21] "r"(step2_21),
819*fb1b10abSAndroid Build Coastguard Worker           [step2_26] "r"(step2_26), [cospi_16_64] "r"(cospi_16_64));
820*fb1b10abSAndroid Build Coastguard Worker 
821*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
822*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp0],             %[step2_25],    %[step2_22]     \n\t"
823*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp1],             %[step2_25],    %[step2_22]     \n\t"
824*fb1b10abSAndroid Build Coastguard Worker         "sub      %[temp2],             %[step2_24],    %[step2_23]     \n\t"
825*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp3],             %[step2_24],    %[step2_23]     \n\t"
826*fb1b10abSAndroid Build Coastguard Worker 
827*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac0                            \n\t"
828*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac0                            \n\t"
829*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac1                            \n\t"
830*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac1                            \n\t"
831*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac2                            \n\t"
832*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac2                            \n\t"
833*fb1b10abSAndroid Build Coastguard Worker         "mtlo     %[const_2_power_13],  $ac3                            \n\t"
834*fb1b10abSAndroid Build Coastguard Worker         "mthi     $zero,                $ac3                            \n\t"
835*fb1b10abSAndroid Build Coastguard Worker 
836*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac0,                 %[temp0],       %[cospi_16_64]  \n\t"
837*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac1,                 %[temp1],       %[cospi_16_64]  \n\t"
838*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac2,                 %[temp2],       %[cospi_16_64]  \n\t"
839*fb1b10abSAndroid Build Coastguard Worker         "madd     $ac3,                 %[temp3],       %[cospi_16_64]  \n\t"
840*fb1b10abSAndroid Build Coastguard Worker 
841*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_22],          $ac0,           31              \n\t"
842*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_25],          $ac1,           31              \n\t"
843*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_23],          $ac2,           31              \n\t"
844*fb1b10abSAndroid Build Coastguard Worker         "extp     %[step1_24],          $ac3,           31              \n\t"
845*fb1b10abSAndroid Build Coastguard Worker 
846*fb1b10abSAndroid Build Coastguard Worker         : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
847*fb1b10abSAndroid Build Coastguard Worker           [temp3] "=&r"(temp3), [step1_22] "=&r"(step1_22),
848*fb1b10abSAndroid Build Coastguard Worker           [step1_25] "=&r"(step1_25), [step1_23] "=&r"(step1_23),
849*fb1b10abSAndroid Build Coastguard Worker           [step1_24] "=&r"(step1_24)
850*fb1b10abSAndroid Build Coastguard Worker         : [const_2_power_13] "r"(const_2_power_13), [step2_22] "r"(step2_22),
851*fb1b10abSAndroid Build Coastguard Worker           [step2_25] "r"(step2_25), [step2_23] "r"(step2_23),
852*fb1b10abSAndroid Build Coastguard Worker           [step2_24] "r"(step2_24), [cospi_16_64] "r"(cospi_16_64));
853*fb1b10abSAndroid Build Coastguard Worker 
854*fb1b10abSAndroid Build Coastguard Worker     // final stage
855*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
856*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp0],            %[step1_0],    %[step2_31]     \n\t"
857*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp1],            %[step1_1],    %[step2_30]     \n\t"
858*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp2],            %[step1_2],    %[step2_29]     \n\t"
859*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp3],            %[step1_3],    %[step2_28]     \n\t"
860*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],            %[step1_3],    %[step2_28]     \n\t"
861*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],            %[step1_2],    %[step2_29]     \n\t"
862*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load3],            %[step1_1],    %[step2_30]     \n\t"
863*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load4],            %[step1_0],    %[step2_31]     \n\t"
864*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp0],          0(%[output])                     \n\t"
865*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp1],         64(%[output])                     \n\t"
866*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp2],        128(%[output])                     \n\t"
867*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp3],        192(%[output])                     \n\t"
868*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load1],       1792(%[output])                     \n\t"
869*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load2],       1856(%[output])                     \n\t"
870*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load3],       1920(%[output])                     \n\t"
871*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load4],       1984(%[output])                     \n\t"
872*fb1b10abSAndroid Build Coastguard Worker 
873*fb1b10abSAndroid Build Coastguard Worker         : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1),
874*fb1b10abSAndroid Build Coastguard Worker           [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3),
875*fb1b10abSAndroid Build Coastguard Worker           [temp3] "=&r"(temp3), [load4] "=&r"(load4)
876*fb1b10abSAndroid Build Coastguard Worker         : [step1_0] "r"(step1_0), [step2_31] "r"(step2_31),
877*fb1b10abSAndroid Build Coastguard Worker           [step1_1] "r"(step1_1), [step2_30] "r"(step2_30),
878*fb1b10abSAndroid Build Coastguard Worker           [step1_2] "r"(step1_2), [step2_29] "r"(step2_29),
879*fb1b10abSAndroid Build Coastguard Worker           [step1_3] "r"(step1_3), [step2_28] "r"(step2_28),
880*fb1b10abSAndroid Build Coastguard Worker           [output] "r"(output));
881*fb1b10abSAndroid Build Coastguard Worker 
882*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
883*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp0],            %[step1_4],    %[step1_27]     \n\t"
884*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp1],            %[step1_5],    %[step1_26]     \n\t"
885*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp2],            %[step1_6],    %[step1_25]     \n\t"
886*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp3],            %[step1_7],    %[step1_24]     \n\t"
887*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],            %[step1_7],    %[step1_24]     \n\t"
888*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],            %[step1_6],    %[step1_25]     \n\t"
889*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load3],            %[step1_5],    %[step1_26]     \n\t"
890*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load4],            %[step1_4],    %[step1_27]     \n\t"
891*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp0],        256(%[output])                     \n\t"
892*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp1],        320(%[output])                     \n\t"
893*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp2],        384(%[output])                     \n\t"
894*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp3],        448(%[output])                     \n\t"
895*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load1],       1536(%[output])                     \n\t"
896*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load2],       1600(%[output])                     \n\t"
897*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load3],       1664(%[output])                     \n\t"
898*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load4],       1728(%[output])                     \n\t"
899*fb1b10abSAndroid Build Coastguard Worker 
900*fb1b10abSAndroid Build Coastguard Worker         : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1),
901*fb1b10abSAndroid Build Coastguard Worker           [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3),
902*fb1b10abSAndroid Build Coastguard Worker           [temp3] "=&r"(temp3), [load4] "=&r"(load4)
903*fb1b10abSAndroid Build Coastguard Worker         : [step1_4] "r"(step1_4), [step1_27] "r"(step1_27),
904*fb1b10abSAndroid Build Coastguard Worker           [step1_5] "r"(step1_5), [step1_26] "r"(step1_26),
905*fb1b10abSAndroid Build Coastguard Worker           [step1_6] "r"(step1_6), [step1_25] "r"(step1_25),
906*fb1b10abSAndroid Build Coastguard Worker           [step1_7] "r"(step1_7), [step1_24] "r"(step1_24),
907*fb1b10abSAndroid Build Coastguard Worker           [output] "r"(output));
908*fb1b10abSAndroid Build Coastguard Worker 
909*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
910*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp0],            %[step1_8],     %[step1_23]     \n\t"
911*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp1],            %[step1_9],     %[step1_22]     \n\t"
912*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp2],            %[step1_10],    %[step1_21]     \n\t"
913*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp3],            %[step1_11],    %[step1_20]     \n\t"
914*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],            %[step1_11],    %[step1_20]     \n\t"
915*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],            %[step1_10],    %[step1_21]     \n\t"
916*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load3],            %[step1_9],     %[step1_22]     \n\t"
917*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load4],            %[step1_8],     %[step1_23]     \n\t"
918*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp0],        512(%[output])                      \n\t"
919*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp1],        576(%[output])                      \n\t"
920*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp2],        640(%[output])                      \n\t"
921*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp3],        704(%[output])                      \n\t"
922*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load1],       1280(%[output])                      \n\t"
923*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load2],       1344(%[output])                      \n\t"
924*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load3],       1408(%[output])                      \n\t"
925*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load4],       1472(%[output])                      \n\t"
926*fb1b10abSAndroid Build Coastguard Worker 
927*fb1b10abSAndroid Build Coastguard Worker         : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1),
928*fb1b10abSAndroid Build Coastguard Worker           [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3),
929*fb1b10abSAndroid Build Coastguard Worker           [temp3] "=&r"(temp3), [load4] "=&r"(load4)
930*fb1b10abSAndroid Build Coastguard Worker         : [step1_8] "r"(step1_8), [step1_23] "r"(step1_23),
931*fb1b10abSAndroid Build Coastguard Worker           [step1_9] "r"(step1_9), [step1_22] "r"(step1_22),
932*fb1b10abSAndroid Build Coastguard Worker           [step1_10] "r"(step1_10), [step1_21] "r"(step1_21),
933*fb1b10abSAndroid Build Coastguard Worker           [step1_11] "r"(step1_11), [step1_20] "r"(step1_20),
934*fb1b10abSAndroid Build Coastguard Worker           [output] "r"(output));
935*fb1b10abSAndroid Build Coastguard Worker 
936*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
937*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp0],            %[step1_12],    %[step2_19]     \n\t"
938*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp1],            %[step1_13],    %[step2_18]     \n\t"
939*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp2],            %[step1_14],    %[step2_17]     \n\t"
940*fb1b10abSAndroid Build Coastguard Worker         "add      %[temp3],            %[step1_15],    %[step2_16]     \n\t"
941*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load1],            %[step1_15],    %[step2_16]     \n\t"
942*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load2],            %[step1_14],    %[step2_17]     \n\t"
943*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load3],            %[step1_13],    %[step2_18]     \n\t"
944*fb1b10abSAndroid Build Coastguard Worker         "sub      %[load4],            %[step1_12],    %[step2_19]     \n\t"
945*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp0],        768(%[output])                      \n\t"
946*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp1],        832(%[output])                      \n\t"
947*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp2],        896(%[output])                      \n\t"
948*fb1b10abSAndroid Build Coastguard Worker         "sh       %[temp3],        960(%[output])                      \n\t"
949*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load1],       1024(%[output])                      \n\t"
950*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load2],       1088(%[output])                      \n\t"
951*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load3],       1152(%[output])                      \n\t"
952*fb1b10abSAndroid Build Coastguard Worker         "sh       %[load4],       1216(%[output])                      \n\t"
953*fb1b10abSAndroid Build Coastguard Worker 
954*fb1b10abSAndroid Build Coastguard Worker         : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1),
955*fb1b10abSAndroid Build Coastguard Worker           [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3),
956*fb1b10abSAndroid Build Coastguard Worker           [temp3] "=&r"(temp3), [load4] "=&r"(load4)
957*fb1b10abSAndroid Build Coastguard Worker         : [step1_12] "r"(step1_12), [step2_19] "r"(step2_19),
958*fb1b10abSAndroid Build Coastguard Worker           [step1_13] "r"(step1_13), [step2_18] "r"(step2_18),
959*fb1b10abSAndroid Build Coastguard Worker           [step1_14] "r"(step1_14), [step2_17] "r"(step2_17),
960*fb1b10abSAndroid Build Coastguard Worker           [step1_15] "r"(step1_15), [step2_16] "r"(step2_16),
961*fb1b10abSAndroid Build Coastguard Worker           [output] "r"(output));
962*fb1b10abSAndroid Build Coastguard Worker 
963*fb1b10abSAndroid Build Coastguard Worker     input += 32;
964*fb1b10abSAndroid Build Coastguard Worker     output += 1;
965*fb1b10abSAndroid Build Coastguard Worker   }
966*fb1b10abSAndroid Build Coastguard Worker }
967*fb1b10abSAndroid Build Coastguard Worker 
vpx_idct32x32_1024_add_dspr2(const int16_t * input,uint8_t * dest,int stride)968*fb1b10abSAndroid Build Coastguard Worker void vpx_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest,
969*fb1b10abSAndroid Build Coastguard Worker                                   int stride) {
970*fb1b10abSAndroid Build Coastguard Worker   DECLARE_ALIGNED(32, int16_t, out[32 * 32]);
971*fb1b10abSAndroid Build Coastguard Worker   int16_t *outptr = out;
972*fb1b10abSAndroid Build Coastguard Worker   uint32_t pos = 45;
973*fb1b10abSAndroid Build Coastguard Worker 
974*fb1b10abSAndroid Build Coastguard Worker   /* bit positon for extract from acc */
975*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
976*fb1b10abSAndroid Build Coastguard Worker                        :
977*fb1b10abSAndroid Build Coastguard Worker                        : [pos] "r"(pos));
978*fb1b10abSAndroid Build Coastguard Worker 
979*fb1b10abSAndroid Build Coastguard Worker   // Rows
980*fb1b10abSAndroid Build Coastguard Worker   idct32_rows_dspr2(input, outptr, 32);
981*fb1b10abSAndroid Build Coastguard Worker 
982*fb1b10abSAndroid Build Coastguard Worker   // Columns
983*fb1b10abSAndroid Build Coastguard Worker   vpx_idct32_cols_add_blk_dspr2(out, dest, stride);
984*fb1b10abSAndroid Build Coastguard Worker }
985*fb1b10abSAndroid Build Coastguard Worker 
vpx_idct32x32_34_add_dspr2(const int16_t * input,uint8_t * dest,int stride)986*fb1b10abSAndroid Build Coastguard Worker void vpx_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
987*fb1b10abSAndroid Build Coastguard Worker                                 int stride) {
988*fb1b10abSAndroid Build Coastguard Worker   DECLARE_ALIGNED(32, int16_t, out[32 * 32]);
989*fb1b10abSAndroid Build Coastguard Worker   int16_t *outptr = out;
990*fb1b10abSAndroid Build Coastguard Worker   uint32_t i;
991*fb1b10abSAndroid Build Coastguard Worker   uint32_t pos = 45;
992*fb1b10abSAndroid Build Coastguard Worker 
993*fb1b10abSAndroid Build Coastguard Worker   /* bit positon for extract from acc */
994*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
995*fb1b10abSAndroid Build Coastguard Worker                        :
996*fb1b10abSAndroid Build Coastguard Worker                        : [pos] "r"(pos));
997*fb1b10abSAndroid Build Coastguard Worker 
998*fb1b10abSAndroid Build Coastguard Worker   // Rows
999*fb1b10abSAndroid Build Coastguard Worker   idct32_rows_dspr2(input, outptr, 8);
1000*fb1b10abSAndroid Build Coastguard Worker 
1001*fb1b10abSAndroid Build Coastguard Worker   outptr += 8;
1002*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__(
1003*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,      0(%[outptr])     \n\t"
1004*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,      4(%[outptr])     \n\t"
1005*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,      8(%[outptr])     \n\t"
1006*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,     12(%[outptr])     \n\t"
1007*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,     16(%[outptr])     \n\t"
1008*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,     20(%[outptr])     \n\t"
1009*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,     24(%[outptr])     \n\t"
1010*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,     28(%[outptr])     \n\t"
1011*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,     32(%[outptr])     \n\t"
1012*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,     36(%[outptr])     \n\t"
1013*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,     40(%[outptr])     \n\t"
1014*fb1b10abSAndroid Build Coastguard Worker       "sw     $zero,     44(%[outptr])     \n\t"
1015*fb1b10abSAndroid Build Coastguard Worker 
1016*fb1b10abSAndroid Build Coastguard Worker       :
1017*fb1b10abSAndroid Build Coastguard Worker       : [outptr] "r"(outptr));
1018*fb1b10abSAndroid Build Coastguard Worker 
1019*fb1b10abSAndroid Build Coastguard Worker   for (i = 0; i < 31; ++i) {
1020*fb1b10abSAndroid Build Coastguard Worker     outptr += 32;
1021*fb1b10abSAndroid Build Coastguard Worker 
1022*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
1023*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,      0(%[outptr])     \n\t"
1024*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,      4(%[outptr])     \n\t"
1025*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,      8(%[outptr])     \n\t"
1026*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,     12(%[outptr])     \n\t"
1027*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,     16(%[outptr])     \n\t"
1028*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,     20(%[outptr])     \n\t"
1029*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,     24(%[outptr])     \n\t"
1030*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,     28(%[outptr])     \n\t"
1031*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,     32(%[outptr])     \n\t"
1032*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,     36(%[outptr])     \n\t"
1033*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,     40(%[outptr])     \n\t"
1034*fb1b10abSAndroid Build Coastguard Worker         "sw     $zero,     44(%[outptr])     \n\t"
1035*fb1b10abSAndroid Build Coastguard Worker 
1036*fb1b10abSAndroid Build Coastguard Worker         :
1037*fb1b10abSAndroid Build Coastguard Worker         : [outptr] "r"(outptr));
1038*fb1b10abSAndroid Build Coastguard Worker   }
1039*fb1b10abSAndroid Build Coastguard Worker 
1040*fb1b10abSAndroid Build Coastguard Worker   // Columns
1041*fb1b10abSAndroid Build Coastguard Worker   vpx_idct32_cols_add_blk_dspr2(out, dest, stride);
1042*fb1b10abSAndroid Build Coastguard Worker }
1043*fb1b10abSAndroid Build Coastguard Worker 
vpx_idct32x32_1_add_dspr2(const int16_t * input,uint8_t * dest,int stride)1044*fb1b10abSAndroid Build Coastguard Worker void vpx_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,
1045*fb1b10abSAndroid Build Coastguard Worker                                int stride) {
1046*fb1b10abSAndroid Build Coastguard Worker   int r, out;
1047*fb1b10abSAndroid Build Coastguard Worker   int32_t a1, absa1;
1048*fb1b10abSAndroid Build Coastguard Worker   int32_t vector_a1;
1049*fb1b10abSAndroid Build Coastguard Worker   int32_t t1, t2, t3, t4;
1050*fb1b10abSAndroid Build Coastguard Worker   int32_t vector_1, vector_2, vector_3, vector_4;
1051*fb1b10abSAndroid Build Coastguard Worker   uint32_t pos = 45;
1052*fb1b10abSAndroid Build Coastguard Worker 
1053*fb1b10abSAndroid Build Coastguard Worker   /* bit positon for extract from acc */
1054*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
1055*fb1b10abSAndroid Build Coastguard Worker 
1056*fb1b10abSAndroid Build Coastguard Worker                        :
1057*fb1b10abSAndroid Build Coastguard Worker                        : [pos] "r"(pos));
1058*fb1b10abSAndroid Build Coastguard Worker 
1059*fb1b10abSAndroid Build Coastguard Worker   out = DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input[0]);
1060*fb1b10abSAndroid Build Coastguard Worker   __asm__ __volatile__(
1061*fb1b10abSAndroid Build Coastguard Worker       "addi     %[out],    %[out],    32      \n\t"
1062*fb1b10abSAndroid Build Coastguard Worker       "sra      %[a1],     %[out],    6       \n\t"
1063*fb1b10abSAndroid Build Coastguard Worker 
1064*fb1b10abSAndroid Build Coastguard Worker       : [out] "+r"(out), [a1] "=r"(a1)
1065*fb1b10abSAndroid Build Coastguard Worker       :);
1066*fb1b10abSAndroid Build Coastguard Worker 
1067*fb1b10abSAndroid Build Coastguard Worker   if (a1 < 0) {
1068*fb1b10abSAndroid Build Coastguard Worker     /* use quad-byte
1069*fb1b10abSAndroid Build Coastguard Worker      * input and output memory are four byte aligned */
1070*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
1071*fb1b10abSAndroid Build Coastguard Worker         "abs        %[absa1],     %[a1]         \n\t"
1072*fb1b10abSAndroid Build Coastguard Worker         "replv.qb   %[vector_a1], %[absa1]      \n\t"
1073*fb1b10abSAndroid Build Coastguard Worker 
1074*fb1b10abSAndroid Build Coastguard Worker         : [absa1] "=&r"(absa1), [vector_a1] "=&r"(vector_a1)
1075*fb1b10abSAndroid Build Coastguard Worker         : [a1] "r"(a1));
1076*fb1b10abSAndroid Build Coastguard Worker 
1077*fb1b10abSAndroid Build Coastguard Worker     for (r = 32; r--;) {
1078*fb1b10abSAndroid Build Coastguard Worker       __asm__ __volatile__(
1079*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t1],          0(%[dest])                      \n\t"
1080*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t2],          4(%[dest])                      \n\t"
1081*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t3],          8(%[dest])                      \n\t"
1082*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t4],          12(%[dest])                     \n\t"
1083*fb1b10abSAndroid Build Coastguard Worker           "subu_s.qb      %[vector_1],    %[t1],          %[vector_a1]    \n\t"
1084*fb1b10abSAndroid Build Coastguard Worker           "subu_s.qb      %[vector_2],    %[t2],          %[vector_a1]    \n\t"
1085*fb1b10abSAndroid Build Coastguard Worker           "subu_s.qb      %[vector_3],    %[t3],          %[vector_a1]    \n\t"
1086*fb1b10abSAndroid Build Coastguard Worker           "subu_s.qb      %[vector_4],    %[t4],          %[vector_a1]    \n\t"
1087*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_1],    0(%[dest])                      \n\t"
1088*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_2],    4(%[dest])                      \n\t"
1089*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_3],    8(%[dest])                      \n\t"
1090*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_4],    12(%[dest])                     \n\t"
1091*fb1b10abSAndroid Build Coastguard Worker 
1092*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t1],          16(%[dest])                     \n\t"
1093*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t2],          20(%[dest])                     \n\t"
1094*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t3],          24(%[dest])                     \n\t"
1095*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t4],          28(%[dest])                     \n\t"
1096*fb1b10abSAndroid Build Coastguard Worker           "subu_s.qb      %[vector_1],    %[t1],          %[vector_a1]    \n\t"
1097*fb1b10abSAndroid Build Coastguard Worker           "subu_s.qb      %[vector_2],    %[t2],          %[vector_a1]    \n\t"
1098*fb1b10abSAndroid Build Coastguard Worker           "subu_s.qb      %[vector_3],    %[t3],          %[vector_a1]    \n\t"
1099*fb1b10abSAndroid Build Coastguard Worker           "subu_s.qb      %[vector_4],    %[t4],          %[vector_a1]    \n\t"
1100*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_1],    16(%[dest])                     \n\t"
1101*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_2],    20(%[dest])                     \n\t"
1102*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_3],    24(%[dest])                     \n\t"
1103*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_4],    28(%[dest])                     \n\t"
1104*fb1b10abSAndroid Build Coastguard Worker 
1105*fb1b10abSAndroid Build Coastguard Worker           "add            %[dest],        %[dest],        %[stride]       \n\t"
1106*fb1b10abSAndroid Build Coastguard Worker 
1107*fb1b10abSAndroid Build Coastguard Worker           : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4),
1108*fb1b10abSAndroid Build Coastguard Worker             [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2),
1109*fb1b10abSAndroid Build Coastguard Worker             [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4),
1110*fb1b10abSAndroid Build Coastguard Worker             [dest] "+&r"(dest)
1111*fb1b10abSAndroid Build Coastguard Worker           : [stride] "r"(stride), [vector_a1] "r"(vector_a1));
1112*fb1b10abSAndroid Build Coastguard Worker     }
1113*fb1b10abSAndroid Build Coastguard Worker   } else if (a1 > 255) {
1114*fb1b10abSAndroid Build Coastguard Worker     int32_t a11, a12, vector_a11, vector_a12;
1115*fb1b10abSAndroid Build Coastguard Worker 
1116*fb1b10abSAndroid Build Coastguard Worker     /* use quad-byte
1117*fb1b10abSAndroid Build Coastguard Worker      * input and output memory are four byte aligned */
1118*fb1b10abSAndroid Build Coastguard Worker     a11 = a1 >> 1;
1119*fb1b10abSAndroid Build Coastguard Worker     a12 = a1 - a11;
1120*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__(
1121*fb1b10abSAndroid Build Coastguard Worker         "replv.qb       %[vector_a11],  %[a11]     \n\t"
1122*fb1b10abSAndroid Build Coastguard Worker         "replv.qb       %[vector_a12],  %[a12]     \n\t"
1123*fb1b10abSAndroid Build Coastguard Worker 
1124*fb1b10abSAndroid Build Coastguard Worker         : [vector_a11] "=&r"(vector_a11), [vector_a12] "=&r"(vector_a12)
1125*fb1b10abSAndroid Build Coastguard Worker         : [a11] "r"(a11), [a12] "r"(a12));
1126*fb1b10abSAndroid Build Coastguard Worker 
1127*fb1b10abSAndroid Build Coastguard Worker     for (r = 32; r--;) {
1128*fb1b10abSAndroid Build Coastguard Worker       __asm__ __volatile__(
1129*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t1],          0(%[dest])                      \n\t"
1130*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t2],          4(%[dest])                      \n\t"
1131*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t3],          8(%[dest])                      \n\t"
1132*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t4],          12(%[dest])                     \n\t"
1133*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_1],    %[t1],          %[vector_a11]   \n\t"
1134*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_2],    %[t2],          %[vector_a11]   \n\t"
1135*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_3],    %[t3],          %[vector_a11]   \n\t"
1136*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_4],    %[t4],          %[vector_a11]   \n\t"
1137*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_1],    %[vector_1],    %[vector_a12]   \n\t"
1138*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_2],    %[vector_2],    %[vector_a12]   \n\t"
1139*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_3],    %[vector_3],    %[vector_a12]   \n\t"
1140*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_4],    %[vector_4],    %[vector_a12]   \n\t"
1141*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_1],    0(%[dest])                      \n\t"
1142*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_2],    4(%[dest])                      \n\t"
1143*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_3],    8(%[dest])                      \n\t"
1144*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_4],    12(%[dest])                     \n\t"
1145*fb1b10abSAndroid Build Coastguard Worker 
1146*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t1],          16(%[dest])                     \n\t"
1147*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t2],          20(%[dest])                     \n\t"
1148*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t3],          24(%[dest])                     \n\t"
1149*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t4],          28(%[dest])                     \n\t"
1150*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_1],    %[t1],          %[vector_a11]    \n\t"
1151*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_2],    %[t2],          %[vector_a11]    \n\t"
1152*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_3],    %[t3],          %[vector_a11]    \n\t"
1153*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_4],    %[t4],          %[vector_a11]    \n\t"
1154*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_1],    %[vector_1],    %[vector_a12]   \n\t"
1155*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_2],    %[vector_2],    %[vector_a12]   \n\t"
1156*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_3],    %[vector_3],    %[vector_a12]   \n\t"
1157*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_4],    %[vector_4],    %[vector_a12]   \n\t"
1158*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_1],    16(%[dest])                     \n\t"
1159*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_2],    20(%[dest])                     \n\t"
1160*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_3],    24(%[dest])                     \n\t"
1161*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_4],    28(%[dest])                     \n\t"
1162*fb1b10abSAndroid Build Coastguard Worker 
1163*fb1b10abSAndroid Build Coastguard Worker           "add            %[dest],        %[dest],        %[stride]       \n\t"
1164*fb1b10abSAndroid Build Coastguard Worker 
1165*fb1b10abSAndroid Build Coastguard Worker           : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4),
1166*fb1b10abSAndroid Build Coastguard Worker             [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2),
1167*fb1b10abSAndroid Build Coastguard Worker             [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4),
1168*fb1b10abSAndroid Build Coastguard Worker             [dest] "+&r"(dest)
1169*fb1b10abSAndroid Build Coastguard Worker           : [stride] "r"(stride), [vector_a11] "r"(vector_a11),
1170*fb1b10abSAndroid Build Coastguard Worker             [vector_a12] "r"(vector_a12));
1171*fb1b10abSAndroid Build Coastguard Worker     }
1172*fb1b10abSAndroid Build Coastguard Worker   } else {
1173*fb1b10abSAndroid Build Coastguard Worker     /* use quad-byte
1174*fb1b10abSAndroid Build Coastguard Worker      * input and output memory are four byte aligned */
1175*fb1b10abSAndroid Build Coastguard Worker     __asm__ __volatile__("replv.qb       %[vector_a1],   %[a1]     \n\t"
1176*fb1b10abSAndroid Build Coastguard Worker 
1177*fb1b10abSAndroid Build Coastguard Worker                          : [vector_a1] "=&r"(vector_a1)
1178*fb1b10abSAndroid Build Coastguard Worker                          : [a1] "r"(a1));
1179*fb1b10abSAndroid Build Coastguard Worker 
1180*fb1b10abSAndroid Build Coastguard Worker     for (r = 32; r--;) {
1181*fb1b10abSAndroid Build Coastguard Worker       __asm__ __volatile__(
1182*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t1],          0(%[dest])                      \n\t"
1183*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t2],          4(%[dest])                      \n\t"
1184*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t3],          8(%[dest])                      \n\t"
1185*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t4],          12(%[dest])                     \n\t"
1186*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_1],    %[t1],          %[vector_a1]    \n\t"
1187*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_2],    %[t2],          %[vector_a1]    \n\t"
1188*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_3],    %[t3],          %[vector_a1]    \n\t"
1189*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_4],    %[t4],          %[vector_a1]    \n\t"
1190*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_1],    0(%[dest])                      \n\t"
1191*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_2],    4(%[dest])                      \n\t"
1192*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_3],    8(%[dest])                      \n\t"
1193*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_4],    12(%[dest])                     \n\t"
1194*fb1b10abSAndroid Build Coastguard Worker 
1195*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t1],          16(%[dest])                     \n\t"
1196*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t2],          20(%[dest])                     \n\t"
1197*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t3],          24(%[dest])                     \n\t"
1198*fb1b10abSAndroid Build Coastguard Worker           "lw             %[t4],          28(%[dest])                     \n\t"
1199*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_1],    %[t1],          %[vector_a1]    \n\t"
1200*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_2],    %[t2],          %[vector_a1]    \n\t"
1201*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_3],    %[t3],          %[vector_a1]    \n\t"
1202*fb1b10abSAndroid Build Coastguard Worker           "addu_s.qb      %[vector_4],    %[t4],          %[vector_a1]    \n\t"
1203*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_1],    16(%[dest])                     \n\t"
1204*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_2],    20(%[dest])                     \n\t"
1205*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_3],    24(%[dest])                     \n\t"
1206*fb1b10abSAndroid Build Coastguard Worker           "sw             %[vector_4],    28(%[dest])                     \n\t"
1207*fb1b10abSAndroid Build Coastguard Worker 
1208*fb1b10abSAndroid Build Coastguard Worker           "add            %[dest],        %[dest],        %[stride]       \n\t"
1209*fb1b10abSAndroid Build Coastguard Worker 
1210*fb1b10abSAndroid Build Coastguard Worker           : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4),
1211*fb1b10abSAndroid Build Coastguard Worker             [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2),
1212*fb1b10abSAndroid Build Coastguard Worker             [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4),
1213*fb1b10abSAndroid Build Coastguard Worker             [dest] "+&r"(dest)
1214*fb1b10abSAndroid Build Coastguard Worker           : [stride] "r"(stride), [vector_a1] "r"(vector_a1));
1215*fb1b10abSAndroid Build Coastguard Worker     }
1216*fb1b10abSAndroid Build Coastguard Worker   }
1217*fb1b10abSAndroid Build Coastguard Worker }
1218*fb1b10abSAndroid Build Coastguard Worker #endif  // #if HAVE_DSPR2
1219