1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker *
4*fb1b10abSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker * that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker * tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker * in the file PATENTS. All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker * be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker */
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker #include <assert.h>
12*fb1b10abSAndroid Build Coastguard Worker #include <stdio.h>
13*fb1b10abSAndroid Build Coastguard Worker
14*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_config.h"
15*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/mips/inv_txfm_dspr2.h"
16*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/txfm_common.h"
17*fb1b10abSAndroid Build Coastguard Worker
18*fb1b10abSAndroid Build Coastguard Worker #if HAVE_DSPR2
idct32_rows_dspr2(const int16_t * input,int16_t * output,uint32_t no_rows)19*fb1b10abSAndroid Build Coastguard Worker static void idct32_rows_dspr2(const int16_t *input, int16_t *output,
20*fb1b10abSAndroid Build Coastguard Worker uint32_t no_rows) {
21*fb1b10abSAndroid Build Coastguard Worker int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
22*fb1b10abSAndroid Build Coastguard Worker int step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
23*fb1b10abSAndroid Build Coastguard Worker int step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20;
24*fb1b10abSAndroid Build Coastguard Worker int step1_21, step1_22, step1_23, step1_24, step1_25, step1_26, step1_27;
25*fb1b10abSAndroid Build Coastguard Worker int step1_28, step1_29, step1_30, step1_31;
26*fb1b10abSAndroid Build Coastguard Worker int step2_0, step2_1, step2_2, step2_3, step2_4, step2_5, step2_6;
27*fb1b10abSAndroid Build Coastguard Worker int step2_7, step2_8, step2_9, step2_10, step2_11, step2_12, step2_13;
28*fb1b10abSAndroid Build Coastguard Worker int step2_14, step2_15, step2_16, step2_17, step2_18, step2_19, step2_20;
29*fb1b10abSAndroid Build Coastguard Worker int step2_21, step2_22, step2_23, step2_24, step2_25, step2_26, step2_27;
30*fb1b10abSAndroid Build Coastguard Worker int step2_28, step2_29, step2_30, step2_31;
31*fb1b10abSAndroid Build Coastguard Worker int step3_8, step3_9, step3_10, step3_11, step3_12, step3_13, step3_14;
32*fb1b10abSAndroid Build Coastguard Worker int step3_15, step3_16, step3_17, step3_18, step3_19, step3_20, step3_21;
33*fb1b10abSAndroid Build Coastguard Worker int step3_22, step3_23, step3_24, step3_25, step3_26, step3_27, step3_28;
34*fb1b10abSAndroid Build Coastguard Worker int step3_29, step3_30, step3_31;
35*fb1b10abSAndroid Build Coastguard Worker int temp0, temp1, temp2, temp3;
36*fb1b10abSAndroid Build Coastguard Worker int load1, load2, load3, load4;
37*fb1b10abSAndroid Build Coastguard Worker int result1, result2;
38*fb1b10abSAndroid Build Coastguard Worker int i;
39*fb1b10abSAndroid Build Coastguard Worker const int const_2_power_13 = 8192;
40*fb1b10abSAndroid Build Coastguard Worker const int32_t *input_int;
41*fb1b10abSAndroid Build Coastguard Worker
42*fb1b10abSAndroid Build Coastguard Worker for (i = no_rows; i--;) {
43*fb1b10abSAndroid Build Coastguard Worker input_int = (const int32_t *)input;
44*fb1b10abSAndroid Build Coastguard Worker
45*fb1b10abSAndroid Build Coastguard Worker if (!(input_int[0] | input_int[1] | input_int[2] | input_int[3] |
46*fb1b10abSAndroid Build Coastguard Worker input_int[4] | input_int[5] | input_int[6] | input_int[7] |
47*fb1b10abSAndroid Build Coastguard Worker input_int[8] | input_int[9] | input_int[10] | input_int[11] |
48*fb1b10abSAndroid Build Coastguard Worker input_int[12] | input_int[13] | input_int[14] | input_int[15])) {
49*fb1b10abSAndroid Build Coastguard Worker input += 32;
50*fb1b10abSAndroid Build Coastguard Worker
51*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
52*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 0(%[output]) \n\t"
53*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 64(%[output]) \n\t"
54*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 128(%[output]) \n\t"
55*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 192(%[output]) \n\t"
56*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 256(%[output]) \n\t"
57*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 320(%[output]) \n\t"
58*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 384(%[output]) \n\t"
59*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 448(%[output]) \n\t"
60*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 512(%[output]) \n\t"
61*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 576(%[output]) \n\t"
62*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 640(%[output]) \n\t"
63*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 704(%[output]) \n\t"
64*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 768(%[output]) \n\t"
65*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 832(%[output]) \n\t"
66*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 896(%[output]) \n\t"
67*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 960(%[output]) \n\t"
68*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1024(%[output]) \n\t"
69*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1088(%[output]) \n\t"
70*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1152(%[output]) \n\t"
71*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1216(%[output]) \n\t"
72*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1280(%[output]) \n\t"
73*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1344(%[output]) \n\t"
74*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1408(%[output]) \n\t"
75*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1472(%[output]) \n\t"
76*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1536(%[output]) \n\t"
77*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1600(%[output]) \n\t"
78*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1664(%[output]) \n\t"
79*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1728(%[output]) \n\t"
80*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1792(%[output]) \n\t"
81*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1856(%[output]) \n\t"
82*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1920(%[output]) \n\t"
83*fb1b10abSAndroid Build Coastguard Worker "sh $zero, 1984(%[output]) \n\t"
84*fb1b10abSAndroid Build Coastguard Worker
85*fb1b10abSAndroid Build Coastguard Worker :
86*fb1b10abSAndroid Build Coastguard Worker : [output] "r"(output));
87*fb1b10abSAndroid Build Coastguard Worker
88*fb1b10abSAndroid Build Coastguard Worker output += 1;
89*fb1b10abSAndroid Build Coastguard Worker
90*fb1b10abSAndroid Build Coastguard Worker continue;
91*fb1b10abSAndroid Build Coastguard Worker }
92*fb1b10abSAndroid Build Coastguard Worker
93*fb1b10abSAndroid Build Coastguard Worker /* prefetch row */
94*fb1b10abSAndroid Build Coastguard Worker prefetch_load((const uint8_t *)(input + 32));
95*fb1b10abSAndroid Build Coastguard Worker prefetch_load((const uint8_t *)(input + 48));
96*fb1b10abSAndroid Build Coastguard Worker
97*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
98*fb1b10abSAndroid Build Coastguard Worker "lh %[load1], 2(%[input]) \n\t"
99*fb1b10abSAndroid Build Coastguard Worker "lh %[load2], 62(%[input]) \n\t"
100*fb1b10abSAndroid Build Coastguard Worker "lh %[load3], 34(%[input]) \n\t"
101*fb1b10abSAndroid Build Coastguard Worker "lh %[load4], 30(%[input]) \n\t"
102*fb1b10abSAndroid Build Coastguard Worker
103*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
104*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
105*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
106*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
107*fb1b10abSAndroid Build Coastguard Worker
108*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load1], %[cospi_31_64] \n\t"
109*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load2], %[cospi_1_64] \n\t"
110*fb1b10abSAndroid Build Coastguard Worker "extp %[temp0], $ac1, 31 \n\t"
111*fb1b10abSAndroid Build Coastguard Worker
112*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load1], %[cospi_1_64] \n\t"
113*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_31_64] \n\t"
114*fb1b10abSAndroid Build Coastguard Worker "extp %[temp3], $ac3, 31 \n\t"
115*fb1b10abSAndroid Build Coastguard Worker
116*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
117*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
118*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac2 \n\t"
119*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac2 \n\t"
120*fb1b10abSAndroid Build Coastguard Worker
121*fb1b10abSAndroid Build Coastguard Worker "madd $ac2, %[load3], %[cospi_15_64] \n\t"
122*fb1b10abSAndroid Build Coastguard Worker "msub $ac2, %[load4], %[cospi_17_64] \n\t"
123*fb1b10abSAndroid Build Coastguard Worker "extp %[temp1], $ac2, 31 \n\t"
124*fb1b10abSAndroid Build Coastguard Worker
125*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load3], %[cospi_17_64] \n\t"
126*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load4], %[cospi_15_64] \n\t"
127*fb1b10abSAndroid Build Coastguard Worker "extp %[temp2], $ac1, 31 \n\t"
128*fb1b10abSAndroid Build Coastguard Worker
129*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
130*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
131*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
132*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
133*fb1b10abSAndroid Build Coastguard Worker
134*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[temp3], %[temp2] \n\t"
135*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[temp0], %[temp1] \n\t"
136*fb1b10abSAndroid Build Coastguard Worker
137*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load1], %[cospi_28_64] \n\t"
138*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load2], %[cospi_4_64] \n\t"
139*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load1], %[cospi_4_64] \n\t"
140*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_28_64] \n\t"
141*fb1b10abSAndroid Build Coastguard Worker
142*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_17], $ac1, 31 \n\t"
143*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_30], $ac3, 31 \n\t"
144*fb1b10abSAndroid Build Coastguard Worker "add %[step1_16], %[temp0], %[temp1] \n\t"
145*fb1b10abSAndroid Build Coastguard Worker "add %[step1_31], %[temp2], %[temp3] \n\t"
146*fb1b10abSAndroid Build Coastguard Worker
147*fb1b10abSAndroid Build Coastguard Worker : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
148*fb1b10abSAndroid Build Coastguard Worker [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
149*fb1b10abSAndroid Build Coastguard Worker [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
150*fb1b10abSAndroid Build Coastguard Worker [step1_16] "=&r"(step1_16), [step1_17] "=&r"(step1_17),
151*fb1b10abSAndroid Build Coastguard Worker [step1_30] "=&r"(step1_30), [step1_31] "=&r"(step1_31)
152*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
153*fb1b10abSAndroid Build Coastguard Worker [cospi_31_64] "r"(cospi_31_64), [cospi_1_64] "r"(cospi_1_64),
154*fb1b10abSAndroid Build Coastguard Worker [cospi_4_64] "r"(cospi_4_64), [cospi_17_64] "r"(cospi_17_64),
155*fb1b10abSAndroid Build Coastguard Worker [cospi_15_64] "r"(cospi_15_64), [cospi_28_64] "r"(cospi_28_64));
156*fb1b10abSAndroid Build Coastguard Worker
157*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
158*fb1b10abSAndroid Build Coastguard Worker "lh %[load1], 18(%[input]) \n\t"
159*fb1b10abSAndroid Build Coastguard Worker "lh %[load2], 46(%[input]) \n\t"
160*fb1b10abSAndroid Build Coastguard Worker "lh %[load3], 50(%[input]) \n\t"
161*fb1b10abSAndroid Build Coastguard Worker "lh %[load4], 14(%[input]) \n\t"
162*fb1b10abSAndroid Build Coastguard Worker
163*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
164*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
165*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
166*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
167*fb1b10abSAndroid Build Coastguard Worker
168*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load1], %[cospi_23_64] \n\t"
169*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load2], %[cospi_9_64] \n\t"
170*fb1b10abSAndroid Build Coastguard Worker "extp %[temp0], $ac1, 31 \n\t"
171*fb1b10abSAndroid Build Coastguard Worker
172*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load1], %[cospi_9_64] \n\t"
173*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_23_64] \n\t"
174*fb1b10abSAndroid Build Coastguard Worker "extp %[temp3], $ac3, 31 \n\t"
175*fb1b10abSAndroid Build Coastguard Worker
176*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
177*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
178*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac2 \n\t"
179*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac2 \n\t"
180*fb1b10abSAndroid Build Coastguard Worker
181*fb1b10abSAndroid Build Coastguard Worker "madd $ac2, %[load3], %[cospi_7_64] \n\t"
182*fb1b10abSAndroid Build Coastguard Worker "msub $ac2, %[load4], %[cospi_25_64] \n\t"
183*fb1b10abSAndroid Build Coastguard Worker "extp %[temp1], $ac2, 31 \n\t"
184*fb1b10abSAndroid Build Coastguard Worker
185*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load3], %[cospi_25_64] \n\t"
186*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load4], %[cospi_7_64] \n\t"
187*fb1b10abSAndroid Build Coastguard Worker "extp %[temp2], $ac1, 31 \n\t"
188*fb1b10abSAndroid Build Coastguard Worker
189*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
190*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
191*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
192*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
193*fb1b10abSAndroid Build Coastguard Worker
194*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[temp1], %[temp0] \n\t"
195*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[temp2], %[temp3] \n\t"
196*fb1b10abSAndroid Build Coastguard Worker
197*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load1], %[cospi_28_64] \n\t"
198*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load2], %[cospi_4_64] \n\t"
199*fb1b10abSAndroid Build Coastguard Worker "msub $ac3, %[load1], %[cospi_4_64] \n\t"
200*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_28_64] \n\t"
201*fb1b10abSAndroid Build Coastguard Worker
202*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_18], $ac1, 31 \n\t"
203*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_29], $ac3, 31 \n\t"
204*fb1b10abSAndroid Build Coastguard Worker "add %[step1_19], %[temp0], %[temp1] \n\t"
205*fb1b10abSAndroid Build Coastguard Worker "add %[step1_28], %[temp2], %[temp3] \n\t"
206*fb1b10abSAndroid Build Coastguard Worker
207*fb1b10abSAndroid Build Coastguard Worker : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
208*fb1b10abSAndroid Build Coastguard Worker [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
209*fb1b10abSAndroid Build Coastguard Worker [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
210*fb1b10abSAndroid Build Coastguard Worker [step1_18] "=&r"(step1_18), [step1_19] "=&r"(step1_19),
211*fb1b10abSAndroid Build Coastguard Worker [step1_28] "=&r"(step1_28), [step1_29] "=&r"(step1_29)
212*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
213*fb1b10abSAndroid Build Coastguard Worker [cospi_23_64] "r"(cospi_23_64), [cospi_9_64] "r"(cospi_9_64),
214*fb1b10abSAndroid Build Coastguard Worker [cospi_4_64] "r"(cospi_4_64), [cospi_7_64] "r"(cospi_7_64),
215*fb1b10abSAndroid Build Coastguard Worker [cospi_25_64] "r"(cospi_25_64), [cospi_28_64] "r"(cospi_28_64));
216*fb1b10abSAndroid Build Coastguard Worker
217*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
218*fb1b10abSAndroid Build Coastguard Worker "lh %[load1], 10(%[input]) \n\t"
219*fb1b10abSAndroid Build Coastguard Worker "lh %[load2], 54(%[input]) \n\t"
220*fb1b10abSAndroid Build Coastguard Worker "lh %[load3], 42(%[input]) \n\t"
221*fb1b10abSAndroid Build Coastguard Worker "lh %[load4], 22(%[input]) \n\t"
222*fb1b10abSAndroid Build Coastguard Worker
223*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
224*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
225*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
226*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
227*fb1b10abSAndroid Build Coastguard Worker
228*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load1], %[cospi_27_64] \n\t"
229*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load2], %[cospi_5_64] \n\t"
230*fb1b10abSAndroid Build Coastguard Worker "extp %[temp0], $ac1, 31 \n\t"
231*fb1b10abSAndroid Build Coastguard Worker
232*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load1], %[cospi_5_64] \n\t"
233*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_27_64] \n\t"
234*fb1b10abSAndroid Build Coastguard Worker "extp %[temp3], $ac3, 31 \n\t"
235*fb1b10abSAndroid Build Coastguard Worker
236*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
237*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
238*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac2 \n\t"
239*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac2 \n\t"
240*fb1b10abSAndroid Build Coastguard Worker
241*fb1b10abSAndroid Build Coastguard Worker "madd $ac2, %[load3], %[cospi_11_64] \n\t"
242*fb1b10abSAndroid Build Coastguard Worker "msub $ac2, %[load4], %[cospi_21_64] \n\t"
243*fb1b10abSAndroid Build Coastguard Worker "extp %[temp1], $ac2, 31 \n\t"
244*fb1b10abSAndroid Build Coastguard Worker
245*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load3], %[cospi_21_64] \n\t"
246*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load4], %[cospi_11_64] \n\t"
247*fb1b10abSAndroid Build Coastguard Worker "extp %[temp2], $ac1, 31 \n\t"
248*fb1b10abSAndroid Build Coastguard Worker
249*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
250*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
251*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
252*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
253*fb1b10abSAndroid Build Coastguard Worker
254*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[temp0], %[temp1] \n\t"
255*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[temp3], %[temp2] \n\t"
256*fb1b10abSAndroid Build Coastguard Worker
257*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load2], %[cospi_12_64] \n\t"
258*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load1], %[cospi_20_64] \n\t"
259*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load1], %[cospi_12_64] \n\t"
260*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_20_64] \n\t"
261*fb1b10abSAndroid Build Coastguard Worker
262*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_21], $ac1, 31 \n\t"
263*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_26], $ac3, 31 \n\t"
264*fb1b10abSAndroid Build Coastguard Worker "add %[step1_20], %[temp0], %[temp1] \n\t"
265*fb1b10abSAndroid Build Coastguard Worker "add %[step1_27], %[temp2], %[temp3] \n\t"
266*fb1b10abSAndroid Build Coastguard Worker
267*fb1b10abSAndroid Build Coastguard Worker : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
268*fb1b10abSAndroid Build Coastguard Worker [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
269*fb1b10abSAndroid Build Coastguard Worker [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
270*fb1b10abSAndroid Build Coastguard Worker [step1_20] "=&r"(step1_20), [step1_21] "=&r"(step1_21),
271*fb1b10abSAndroid Build Coastguard Worker [step1_26] "=&r"(step1_26), [step1_27] "=&r"(step1_27)
272*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
273*fb1b10abSAndroid Build Coastguard Worker [cospi_27_64] "r"(cospi_27_64), [cospi_5_64] "r"(cospi_5_64),
274*fb1b10abSAndroid Build Coastguard Worker [cospi_11_64] "r"(cospi_11_64), [cospi_21_64] "r"(cospi_21_64),
275*fb1b10abSAndroid Build Coastguard Worker [cospi_12_64] "r"(cospi_12_64), [cospi_20_64] "r"(cospi_20_64));
276*fb1b10abSAndroid Build Coastguard Worker
277*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
278*fb1b10abSAndroid Build Coastguard Worker "lh %[load1], 26(%[input]) \n\t"
279*fb1b10abSAndroid Build Coastguard Worker "lh %[load2], 38(%[input]) \n\t"
280*fb1b10abSAndroid Build Coastguard Worker "lh %[load3], 58(%[input]) \n\t"
281*fb1b10abSAndroid Build Coastguard Worker "lh %[load4], 6(%[input]) \n\t"
282*fb1b10abSAndroid Build Coastguard Worker
283*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
284*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
285*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
286*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
287*fb1b10abSAndroid Build Coastguard Worker
288*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load1], %[cospi_19_64] \n\t"
289*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load2], %[cospi_13_64] \n\t"
290*fb1b10abSAndroid Build Coastguard Worker "extp %[temp0], $ac1, 31 \n\t"
291*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load1], %[cospi_13_64] \n\t"
292*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_19_64] \n\t"
293*fb1b10abSAndroid Build Coastguard Worker "extp %[temp3], $ac3, 31 \n\t"
294*fb1b10abSAndroid Build Coastguard Worker
295*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
296*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
297*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac2 \n\t"
298*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac2 \n\t"
299*fb1b10abSAndroid Build Coastguard Worker
300*fb1b10abSAndroid Build Coastguard Worker "madd $ac2, %[load3], %[cospi_3_64] \n\t"
301*fb1b10abSAndroid Build Coastguard Worker "msub $ac2, %[load4], %[cospi_29_64] \n\t"
302*fb1b10abSAndroid Build Coastguard Worker "extp %[temp1], $ac2, 31 \n\t"
303*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load3], %[cospi_29_64] \n\t"
304*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load4], %[cospi_3_64] \n\t"
305*fb1b10abSAndroid Build Coastguard Worker "extp %[temp2], $ac1, 31 \n\t"
306*fb1b10abSAndroid Build Coastguard Worker
307*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
308*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
309*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
310*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
311*fb1b10abSAndroid Build Coastguard Worker
312*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[temp1], %[temp0] \n\t"
313*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[temp2], %[temp3] \n\t"
314*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load1], %[cospi_12_64] \n\t"
315*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load2], %[cospi_20_64] \n\t"
316*fb1b10abSAndroid Build Coastguard Worker "msub $ac3, %[load1], %[cospi_20_64] \n\t"
317*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_12_64] \n\t"
318*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_22], $ac1, 31 \n\t"
319*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_25], $ac3, 31 \n\t"
320*fb1b10abSAndroid Build Coastguard Worker "add %[step1_23], %[temp0], %[temp1] \n\t"
321*fb1b10abSAndroid Build Coastguard Worker "add %[step1_24], %[temp2], %[temp3] \n\t"
322*fb1b10abSAndroid Build Coastguard Worker
323*fb1b10abSAndroid Build Coastguard Worker : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
324*fb1b10abSAndroid Build Coastguard Worker [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
325*fb1b10abSAndroid Build Coastguard Worker [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
326*fb1b10abSAndroid Build Coastguard Worker [step1_22] "=&r"(step1_22), [step1_23] "=&r"(step1_23),
327*fb1b10abSAndroid Build Coastguard Worker [step1_24] "=&r"(step1_24), [step1_25] "=&r"(step1_25)
328*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
329*fb1b10abSAndroid Build Coastguard Worker [cospi_19_64] "r"(cospi_19_64), [cospi_13_64] "r"(cospi_13_64),
330*fb1b10abSAndroid Build Coastguard Worker [cospi_3_64] "r"(cospi_3_64), [cospi_29_64] "r"(cospi_29_64),
331*fb1b10abSAndroid Build Coastguard Worker [cospi_12_64] "r"(cospi_12_64), [cospi_20_64] "r"(cospi_20_64));
332*fb1b10abSAndroid Build Coastguard Worker
333*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
334*fb1b10abSAndroid Build Coastguard Worker "lh %[load1], 4(%[input]) \n\t"
335*fb1b10abSAndroid Build Coastguard Worker "lh %[load2], 60(%[input]) \n\t"
336*fb1b10abSAndroid Build Coastguard Worker "lh %[load3], 36(%[input]) \n\t"
337*fb1b10abSAndroid Build Coastguard Worker "lh %[load4], 28(%[input]) \n\t"
338*fb1b10abSAndroid Build Coastguard Worker
339*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
340*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
341*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
342*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
343*fb1b10abSAndroid Build Coastguard Worker
344*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load1], %[cospi_30_64] \n\t"
345*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load2], %[cospi_2_64] \n\t"
346*fb1b10abSAndroid Build Coastguard Worker "extp %[temp0], $ac1, 31 \n\t"
347*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load1], %[cospi_2_64] \n\t"
348*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_30_64] \n\t"
349*fb1b10abSAndroid Build Coastguard Worker "extp %[temp3], $ac3, 31 \n\t"
350*fb1b10abSAndroid Build Coastguard Worker
351*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
352*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
353*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac2 \n\t"
354*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac2 \n\t"
355*fb1b10abSAndroid Build Coastguard Worker
356*fb1b10abSAndroid Build Coastguard Worker "madd $ac2, %[load3], %[cospi_14_64] \n\t"
357*fb1b10abSAndroid Build Coastguard Worker "msub $ac2, %[load4], %[cospi_18_64] \n\t"
358*fb1b10abSAndroid Build Coastguard Worker "extp %[temp1], $ac2, 31 \n\t"
359*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load3], %[cospi_18_64] \n\t"
360*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load4], %[cospi_14_64] \n\t"
361*fb1b10abSAndroid Build Coastguard Worker "extp %[temp2], $ac1, 31 \n\t"
362*fb1b10abSAndroid Build Coastguard Worker
363*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
364*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
365*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
366*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
367*fb1b10abSAndroid Build Coastguard Worker
368*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[temp0], %[temp1] \n\t"
369*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[temp3], %[temp2] \n\t"
370*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load1], %[cospi_8_64] \n\t"
371*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load2], %[cospi_24_64] \n\t"
372*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load1], %[cospi_24_64] \n\t"
373*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_8_64] \n\t"
374*fb1b10abSAndroid Build Coastguard Worker "extp %[step2_9], $ac1, 31 \n\t"
375*fb1b10abSAndroid Build Coastguard Worker "extp %[step2_14], $ac3, 31 \n\t"
376*fb1b10abSAndroid Build Coastguard Worker "add %[step2_8], %[temp0], %[temp1] \n\t"
377*fb1b10abSAndroid Build Coastguard Worker "add %[step2_15], %[temp2], %[temp3] \n\t"
378*fb1b10abSAndroid Build Coastguard Worker
379*fb1b10abSAndroid Build Coastguard Worker : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
380*fb1b10abSAndroid Build Coastguard Worker [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
381*fb1b10abSAndroid Build Coastguard Worker [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step2_8] "=&r"(step2_8),
382*fb1b10abSAndroid Build Coastguard Worker [step2_9] "=&r"(step2_9), [step2_14] "=&r"(step2_14),
383*fb1b10abSAndroid Build Coastguard Worker [step2_15] "=&r"(step2_15)
384*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
385*fb1b10abSAndroid Build Coastguard Worker [cospi_30_64] "r"(cospi_30_64), [cospi_2_64] "r"(cospi_2_64),
386*fb1b10abSAndroid Build Coastguard Worker [cospi_14_64] "r"(cospi_14_64), [cospi_18_64] "r"(cospi_18_64),
387*fb1b10abSAndroid Build Coastguard Worker [cospi_8_64] "r"(cospi_8_64), [cospi_24_64] "r"(cospi_24_64));
388*fb1b10abSAndroid Build Coastguard Worker
389*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
390*fb1b10abSAndroid Build Coastguard Worker "lh %[load1], 20(%[input]) \n\t"
391*fb1b10abSAndroid Build Coastguard Worker "lh %[load2], 44(%[input]) \n\t"
392*fb1b10abSAndroid Build Coastguard Worker "lh %[load3], 52(%[input]) \n\t"
393*fb1b10abSAndroid Build Coastguard Worker "lh %[load4], 12(%[input]) \n\t"
394*fb1b10abSAndroid Build Coastguard Worker
395*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
396*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
397*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
398*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
399*fb1b10abSAndroid Build Coastguard Worker
400*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load1], %[cospi_22_64] \n\t"
401*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load2], %[cospi_10_64] \n\t"
402*fb1b10abSAndroid Build Coastguard Worker "extp %[temp0], $ac1, 31 \n\t"
403*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load1], %[cospi_10_64] \n\t"
404*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_22_64] \n\t"
405*fb1b10abSAndroid Build Coastguard Worker "extp %[temp3], $ac3, 31 \n\t"
406*fb1b10abSAndroid Build Coastguard Worker
407*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
408*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
409*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac2 \n\t"
410*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac2 \n\t"
411*fb1b10abSAndroid Build Coastguard Worker
412*fb1b10abSAndroid Build Coastguard Worker "madd $ac2, %[load3], %[cospi_6_64] \n\t"
413*fb1b10abSAndroid Build Coastguard Worker "msub $ac2, %[load4], %[cospi_26_64] \n\t"
414*fb1b10abSAndroid Build Coastguard Worker "extp %[temp1], $ac2, 31 \n\t"
415*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load3], %[cospi_26_64] \n\t"
416*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load4], %[cospi_6_64] \n\t"
417*fb1b10abSAndroid Build Coastguard Worker "extp %[temp2], $ac1, 31 \n\t"
418*fb1b10abSAndroid Build Coastguard Worker
419*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
420*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
421*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
422*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
423*fb1b10abSAndroid Build Coastguard Worker
424*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[temp1], %[temp0] \n\t"
425*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[temp2], %[temp3] \n\t"
426*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load1], %[cospi_24_64] \n\t"
427*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load2], %[cospi_8_64] \n\t"
428*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_24_64] \n\t"
429*fb1b10abSAndroid Build Coastguard Worker "msub $ac3, %[load1], %[cospi_8_64] \n\t"
430*fb1b10abSAndroid Build Coastguard Worker "extp %[step2_10], $ac1, 31 \n\t"
431*fb1b10abSAndroid Build Coastguard Worker "extp %[step2_13], $ac3, 31 \n\t"
432*fb1b10abSAndroid Build Coastguard Worker "add %[step2_11], %[temp0], %[temp1] \n\t"
433*fb1b10abSAndroid Build Coastguard Worker "add %[step2_12], %[temp2], %[temp3] \n\t"
434*fb1b10abSAndroid Build Coastguard Worker
435*fb1b10abSAndroid Build Coastguard Worker : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
436*fb1b10abSAndroid Build Coastguard Worker [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
437*fb1b10abSAndroid Build Coastguard Worker [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
438*fb1b10abSAndroid Build Coastguard Worker [step2_10] "=&r"(step2_10), [step2_11] "=&r"(step2_11),
439*fb1b10abSAndroid Build Coastguard Worker [step2_12] "=&r"(step2_12), [step2_13] "=&r"(step2_13)
440*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
441*fb1b10abSAndroid Build Coastguard Worker [cospi_22_64] "r"(cospi_22_64), [cospi_10_64] "r"(cospi_10_64),
442*fb1b10abSAndroid Build Coastguard Worker [cospi_6_64] "r"(cospi_6_64), [cospi_26_64] "r"(cospi_26_64),
443*fb1b10abSAndroid Build Coastguard Worker [cospi_8_64] "r"(cospi_8_64), [cospi_24_64] "r"(cospi_24_64));
444*fb1b10abSAndroid Build Coastguard Worker
445*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
446*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac0 \n\t"
447*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac0 \n\t"
448*fb1b10abSAndroid Build Coastguard Worker "sub %[temp0], %[step2_14], %[step2_13] \n\t"
449*fb1b10abSAndroid Build Coastguard Worker "sub %[temp0], %[temp0], %[step2_9] \n\t"
450*fb1b10abSAndroid Build Coastguard Worker "add %[temp0], %[temp0], %[step2_10] \n\t"
451*fb1b10abSAndroid Build Coastguard Worker "madd $ac0, %[temp0], %[cospi_16_64] \n\t"
452*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
453*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
454*fb1b10abSAndroid Build Coastguard Worker "sub %[temp1], %[step2_14], %[step2_13] \n\t"
455*fb1b10abSAndroid Build Coastguard Worker "add %[temp1], %[temp1], %[step2_9] \n\t"
456*fb1b10abSAndroid Build Coastguard Worker "sub %[temp1], %[temp1], %[step2_10] \n\t"
457*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[temp1], %[cospi_16_64] \n\t"
458*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac2 \n\t"
459*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac2 \n\t"
460*fb1b10abSAndroid Build Coastguard Worker "sub %[temp0], %[step2_15], %[step2_12] \n\t"
461*fb1b10abSAndroid Build Coastguard Worker "sub %[temp0], %[temp0], %[step2_8] \n\t"
462*fb1b10abSAndroid Build Coastguard Worker "add %[temp0], %[temp0], %[step2_11] \n\t"
463*fb1b10abSAndroid Build Coastguard Worker "madd $ac2, %[temp0], %[cospi_16_64] \n\t"
464*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
465*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
466*fb1b10abSAndroid Build Coastguard Worker "sub %[temp1], %[step2_15], %[step2_12] \n\t"
467*fb1b10abSAndroid Build Coastguard Worker "add %[temp1], %[temp1], %[step2_8] \n\t"
468*fb1b10abSAndroid Build Coastguard Worker "sub %[temp1], %[temp1], %[step2_11] \n\t"
469*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[temp1], %[cospi_16_64] \n\t"
470*fb1b10abSAndroid Build Coastguard Worker
471*fb1b10abSAndroid Build Coastguard Worker "add %[step3_8], %[step2_8], %[step2_11] \n\t"
472*fb1b10abSAndroid Build Coastguard Worker "add %[step3_9], %[step2_9], %[step2_10] \n\t"
473*fb1b10abSAndroid Build Coastguard Worker "add %[step3_14], %[step2_13], %[step2_14] \n\t"
474*fb1b10abSAndroid Build Coastguard Worker "add %[step3_15], %[step2_12], %[step2_15] \n\t"
475*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_10], $ac0, 31 \n\t"
476*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_13], $ac1, 31 \n\t"
477*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_11], $ac2, 31 \n\t"
478*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_12], $ac3, 31 \n\t"
479*fb1b10abSAndroid Build Coastguard Worker
480*fb1b10abSAndroid Build Coastguard Worker : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_8] "=&r"(step3_8),
481*fb1b10abSAndroid Build Coastguard Worker [step3_9] "=&r"(step3_9), [step3_10] "=&r"(step3_10),
482*fb1b10abSAndroid Build Coastguard Worker [step3_11] "=&r"(step3_11), [step3_12] "=&r"(step3_12),
483*fb1b10abSAndroid Build Coastguard Worker [step3_13] "=&r"(step3_13), [step3_14] "=&r"(step3_14),
484*fb1b10abSAndroid Build Coastguard Worker [step3_15] "=&r"(step3_15)
485*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [step2_8] "r"(step2_8),
486*fb1b10abSAndroid Build Coastguard Worker [step2_9] "r"(step2_9), [step2_10] "r"(step2_10),
487*fb1b10abSAndroid Build Coastguard Worker [step2_11] "r"(step2_11), [step2_12] "r"(step2_12),
488*fb1b10abSAndroid Build Coastguard Worker [step2_13] "r"(step2_13), [step2_14] "r"(step2_14),
489*fb1b10abSAndroid Build Coastguard Worker [step2_15] "r"(step2_15), [cospi_16_64] "r"(cospi_16_64));
490*fb1b10abSAndroid Build Coastguard Worker
491*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
492*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac0 \n\t"
493*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac0 \n\t"
494*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
495*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
496*fb1b10abSAndroid Build Coastguard Worker "sub %[temp0], %[step1_17], %[step1_18] \n\t"
497*fb1b10abSAndroid Build Coastguard Worker "sub %[temp1], %[step1_30], %[step1_29] \n\t"
498*fb1b10abSAndroid Build Coastguard Worker "add %[step3_17], %[step1_17], %[step1_18] \n\t"
499*fb1b10abSAndroid Build Coastguard Worker "add %[step3_30], %[step1_30], %[step1_29] \n\t"
500*fb1b10abSAndroid Build Coastguard Worker
501*fb1b10abSAndroid Build Coastguard Worker "msub $ac0, %[temp0], %[cospi_8_64] \n\t"
502*fb1b10abSAndroid Build Coastguard Worker "madd $ac0, %[temp1], %[cospi_24_64] \n\t"
503*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_18], $ac0, 31 \n\t"
504*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[temp0], %[cospi_24_64] \n\t"
505*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[temp1], %[cospi_8_64] \n\t"
506*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_29], $ac1, 31 \n\t"
507*fb1b10abSAndroid Build Coastguard Worker
508*fb1b10abSAndroid Build Coastguard Worker : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
509*fb1b10abSAndroid Build Coastguard Worker [step3_18] "=&r"(step3_18), [step3_29] "=&r"(step3_29),
510*fb1b10abSAndroid Build Coastguard Worker [step3_17] "=&r"(step3_17), [step3_30] "=&r"(step3_30)
511*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [step1_17] "r"(step1_17),
512*fb1b10abSAndroid Build Coastguard Worker [step1_18] "r"(step1_18), [step1_30] "r"(step1_30),
513*fb1b10abSAndroid Build Coastguard Worker [step1_29] "r"(step1_29), [cospi_24_64] "r"(cospi_24_64),
514*fb1b10abSAndroid Build Coastguard Worker [cospi_8_64] "r"(cospi_8_64));
515*fb1b10abSAndroid Build Coastguard Worker
516*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
517*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac0 \n\t"
518*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac0 \n\t"
519*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
520*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
521*fb1b10abSAndroid Build Coastguard Worker "sub %[temp0], %[step1_16], %[step1_19] \n\t"
522*fb1b10abSAndroid Build Coastguard Worker "sub %[temp1], %[step1_31], %[step1_28] \n\t"
523*fb1b10abSAndroid Build Coastguard Worker "add %[step3_16], %[step1_16], %[step1_19] \n\t"
524*fb1b10abSAndroid Build Coastguard Worker "add %[step3_31], %[step1_31], %[step1_28] \n\t"
525*fb1b10abSAndroid Build Coastguard Worker
526*fb1b10abSAndroid Build Coastguard Worker "msub $ac0, %[temp0], %[cospi_8_64] \n\t"
527*fb1b10abSAndroid Build Coastguard Worker "madd $ac0, %[temp1], %[cospi_24_64] \n\t"
528*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_19], $ac0, 31 \n\t"
529*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[temp0], %[cospi_24_64] \n\t"
530*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[temp1], %[cospi_8_64] \n\t"
531*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_28], $ac1, 31 \n\t"
532*fb1b10abSAndroid Build Coastguard Worker
533*fb1b10abSAndroid Build Coastguard Worker : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
534*fb1b10abSAndroid Build Coastguard Worker [step3_16] "=&r"(step3_16), [step3_31] "=&r"(step3_31),
535*fb1b10abSAndroid Build Coastguard Worker [step3_19] "=&r"(step3_19), [step3_28] "=&r"(step3_28)
536*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [step1_16] "r"(step1_16),
537*fb1b10abSAndroid Build Coastguard Worker [step1_19] "r"(step1_19), [step1_31] "r"(step1_31),
538*fb1b10abSAndroid Build Coastguard Worker [step1_28] "r"(step1_28), [cospi_24_64] "r"(cospi_24_64),
539*fb1b10abSAndroid Build Coastguard Worker [cospi_8_64] "r"(cospi_8_64));
540*fb1b10abSAndroid Build Coastguard Worker
541*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
542*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac0 \n\t"
543*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac0 \n\t"
544*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
545*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
546*fb1b10abSAndroid Build Coastguard Worker "sub %[temp0], %[step1_23], %[step1_20] \n\t"
547*fb1b10abSAndroid Build Coastguard Worker "sub %[temp1], %[step1_24], %[step1_27] \n\t"
548*fb1b10abSAndroid Build Coastguard Worker "add %[step3_23], %[step1_23], %[step1_20] \n\t"
549*fb1b10abSAndroid Build Coastguard Worker "add %[step3_24], %[step1_24], %[step1_27] \n\t"
550*fb1b10abSAndroid Build Coastguard Worker
551*fb1b10abSAndroid Build Coastguard Worker "msub $ac0, %[temp0], %[cospi_8_64] \n\t"
552*fb1b10abSAndroid Build Coastguard Worker "madd $ac0, %[temp1], %[cospi_24_64] \n\t"
553*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_27], $ac0, 31 \n\t"
554*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[temp0], %[cospi_24_64] \n\t"
555*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[temp1], %[cospi_8_64] \n\t"
556*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_20], $ac1, 31 \n\t"
557*fb1b10abSAndroid Build Coastguard Worker
558*fb1b10abSAndroid Build Coastguard Worker : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
559*fb1b10abSAndroid Build Coastguard Worker [step3_23] "=&r"(step3_23), [step3_24] "=&r"(step3_24),
560*fb1b10abSAndroid Build Coastguard Worker [step3_20] "=&r"(step3_20), [step3_27] "=&r"(step3_27)
561*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [step1_23] "r"(step1_23),
562*fb1b10abSAndroid Build Coastguard Worker [step1_20] "r"(step1_20), [step1_24] "r"(step1_24),
563*fb1b10abSAndroid Build Coastguard Worker [step1_27] "r"(step1_27), [cospi_24_64] "r"(cospi_24_64),
564*fb1b10abSAndroid Build Coastguard Worker [cospi_8_64] "r"(cospi_8_64));
565*fb1b10abSAndroid Build Coastguard Worker
566*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
567*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac0 \n\t"
568*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac0 \n\t"
569*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
570*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
571*fb1b10abSAndroid Build Coastguard Worker "sub %[temp0], %[step1_22], %[step1_21] \n\t"
572*fb1b10abSAndroid Build Coastguard Worker "sub %[temp1], %[step1_25], %[step1_26] \n\t"
573*fb1b10abSAndroid Build Coastguard Worker "add %[step3_22], %[step1_22], %[step1_21] \n\t"
574*fb1b10abSAndroid Build Coastguard Worker "add %[step3_25], %[step1_25], %[step1_26] \n\t"
575*fb1b10abSAndroid Build Coastguard Worker
576*fb1b10abSAndroid Build Coastguard Worker "msub $ac0, %[temp0], %[cospi_24_64] \n\t"
577*fb1b10abSAndroid Build Coastguard Worker "msub $ac0, %[temp1], %[cospi_8_64] \n\t"
578*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_21], $ac0, 31 \n\t"
579*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[temp0], %[cospi_8_64] \n\t"
580*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[temp1], %[cospi_24_64] \n\t"
581*fb1b10abSAndroid Build Coastguard Worker "extp %[step3_26], $ac1, 31 \n\t"
582*fb1b10abSAndroid Build Coastguard Worker
583*fb1b10abSAndroid Build Coastguard Worker : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
584*fb1b10abSAndroid Build Coastguard Worker [step3_22] "=&r"(step3_22), [step3_25] "=&r"(step3_25),
585*fb1b10abSAndroid Build Coastguard Worker [step3_21] "=&r"(step3_21), [step3_26] "=&r"(step3_26)
586*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [step1_22] "r"(step1_22),
587*fb1b10abSAndroid Build Coastguard Worker [step1_21] "r"(step1_21), [step1_25] "r"(step1_25),
588*fb1b10abSAndroid Build Coastguard Worker [step1_26] "r"(step1_26), [cospi_24_64] "r"(cospi_24_64),
589*fb1b10abSAndroid Build Coastguard Worker [cospi_8_64] "r"(cospi_8_64));
590*fb1b10abSAndroid Build Coastguard Worker
591*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
592*fb1b10abSAndroid Build Coastguard Worker "add %[step2_16], %[step3_16], %[step3_23] \n\t"
593*fb1b10abSAndroid Build Coastguard Worker "add %[step2_17], %[step3_17], %[step3_22] \n\t"
594*fb1b10abSAndroid Build Coastguard Worker "add %[step2_18], %[step3_18], %[step3_21] \n\t"
595*fb1b10abSAndroid Build Coastguard Worker "add %[step2_19], %[step3_19], %[step3_20] \n\t"
596*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_20], %[step3_19], %[step3_20] \n\t"
597*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_21], %[step3_18], %[step3_21] \n\t"
598*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_22], %[step3_17], %[step3_22] \n\t"
599*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_23], %[step3_16], %[step3_23] \n\t"
600*fb1b10abSAndroid Build Coastguard Worker
601*fb1b10abSAndroid Build Coastguard Worker : [step2_16] "=&r"(step2_16), [step2_17] "=&r"(step2_17),
602*fb1b10abSAndroid Build Coastguard Worker [step2_18] "=&r"(step2_18), [step2_19] "=&r"(step2_19),
603*fb1b10abSAndroid Build Coastguard Worker [step2_20] "=&r"(step2_20), [step2_21] "=&r"(step2_21),
604*fb1b10abSAndroid Build Coastguard Worker [step2_22] "=&r"(step2_22), [step2_23] "=&r"(step2_23)
605*fb1b10abSAndroid Build Coastguard Worker : [step3_16] "r"(step3_16), [step3_23] "r"(step3_23),
606*fb1b10abSAndroid Build Coastguard Worker [step3_17] "r"(step3_17), [step3_22] "r"(step3_22),
607*fb1b10abSAndroid Build Coastguard Worker [step3_18] "r"(step3_18), [step3_21] "r"(step3_21),
608*fb1b10abSAndroid Build Coastguard Worker [step3_19] "r"(step3_19), [step3_20] "r"(step3_20));
609*fb1b10abSAndroid Build Coastguard Worker
610*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
611*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_24], %[step3_31], %[step3_24] \n\t"
612*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_25], %[step3_30], %[step3_25] \n\t"
613*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_26], %[step3_29], %[step3_26] \n\t"
614*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_27], %[step3_28], %[step3_27] \n\t"
615*fb1b10abSAndroid Build Coastguard Worker "add %[step2_28], %[step3_28], %[step3_27] \n\t"
616*fb1b10abSAndroid Build Coastguard Worker "add %[step2_29], %[step3_29], %[step3_26] \n\t"
617*fb1b10abSAndroid Build Coastguard Worker "add %[step2_30], %[step3_30], %[step3_25] \n\t"
618*fb1b10abSAndroid Build Coastguard Worker "add %[step2_31], %[step3_31], %[step3_24] \n\t"
619*fb1b10abSAndroid Build Coastguard Worker
620*fb1b10abSAndroid Build Coastguard Worker : [step2_24] "=&r"(step2_24), [step2_28] "=&r"(step2_28),
621*fb1b10abSAndroid Build Coastguard Worker [step2_25] "=&r"(step2_25), [step2_29] "=&r"(step2_29),
622*fb1b10abSAndroid Build Coastguard Worker [step2_26] "=&r"(step2_26), [step2_30] "=&r"(step2_30),
623*fb1b10abSAndroid Build Coastguard Worker [step2_27] "=&r"(step2_27), [step2_31] "=&r"(step2_31)
624*fb1b10abSAndroid Build Coastguard Worker : [step3_31] "r"(step3_31), [step3_24] "r"(step3_24),
625*fb1b10abSAndroid Build Coastguard Worker [step3_30] "r"(step3_30), [step3_25] "r"(step3_25),
626*fb1b10abSAndroid Build Coastguard Worker [step3_29] "r"(step3_29), [step3_26] "r"(step3_26),
627*fb1b10abSAndroid Build Coastguard Worker [step3_28] "r"(step3_28), [step3_27] "r"(step3_27));
628*fb1b10abSAndroid Build Coastguard Worker
629*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
630*fb1b10abSAndroid Build Coastguard Worker "lh %[load1], 0(%[input]) \n\t"
631*fb1b10abSAndroid Build Coastguard Worker "lh %[load2], 32(%[input]) \n\t"
632*fb1b10abSAndroid Build Coastguard Worker "lh %[load3], 16(%[input]) \n\t"
633*fb1b10abSAndroid Build Coastguard Worker "lh %[load4], 48(%[input]) \n\t"
634*fb1b10abSAndroid Build Coastguard Worker
635*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
636*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
637*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac2 \n\t"
638*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac2 \n\t"
639*fb1b10abSAndroid Build Coastguard Worker "add %[result1], %[load1], %[load2] \n\t"
640*fb1b10abSAndroid Build Coastguard Worker "sub %[result2], %[load1], %[load2] \n\t"
641*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[result1], %[cospi_16_64] \n\t"
642*fb1b10abSAndroid Build Coastguard Worker "madd $ac2, %[result2], %[cospi_16_64] \n\t"
643*fb1b10abSAndroid Build Coastguard Worker "extp %[temp0], $ac1, 31 \n\t"
644*fb1b10abSAndroid Build Coastguard Worker "extp %[temp1], $ac2, 31 \n\t"
645*fb1b10abSAndroid Build Coastguard Worker
646*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
647*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
648*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load3], %[cospi_24_64] \n\t"
649*fb1b10abSAndroid Build Coastguard Worker "msub $ac3, %[load4], %[cospi_8_64] \n\t"
650*fb1b10abSAndroid Build Coastguard Worker "extp %[temp2], $ac3, 31 \n\t"
651*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
652*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
653*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load3], %[cospi_8_64] \n\t"
654*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load4], %[cospi_24_64] \n\t"
655*fb1b10abSAndroid Build Coastguard Worker "extp %[temp3], $ac1, 31 \n\t"
656*fb1b10abSAndroid Build Coastguard Worker "add %[step1_0], %[temp0], %[temp3] \n\t"
657*fb1b10abSAndroid Build Coastguard Worker "add %[step1_1], %[temp1], %[temp2] \n\t"
658*fb1b10abSAndroid Build Coastguard Worker "sub %[step1_2], %[temp1], %[temp2] \n\t"
659*fb1b10abSAndroid Build Coastguard Worker "sub %[step1_3], %[temp0], %[temp3] \n\t"
660*fb1b10abSAndroid Build Coastguard Worker
661*fb1b10abSAndroid Build Coastguard Worker : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
662*fb1b10abSAndroid Build Coastguard Worker [load4] "=&r"(load4), [result1] "=&r"(result1),
663*fb1b10abSAndroid Build Coastguard Worker [result2] "=&r"(result2), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
664*fb1b10abSAndroid Build Coastguard Worker [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_0] "=&r"(step1_0),
665*fb1b10abSAndroid Build Coastguard Worker [step1_1] "=&r"(step1_1), [step1_2] "=&r"(step1_2),
666*fb1b10abSAndroid Build Coastguard Worker [step1_3] "=&r"(step1_3)
667*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
668*fb1b10abSAndroid Build Coastguard Worker [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64),
669*fb1b10abSAndroid Build Coastguard Worker [cospi_16_64] "r"(cospi_16_64));
670*fb1b10abSAndroid Build Coastguard Worker
671*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
672*fb1b10abSAndroid Build Coastguard Worker "lh %[load1], 8(%[input]) \n\t"
673*fb1b10abSAndroid Build Coastguard Worker "lh %[load2], 56(%[input]) \n\t"
674*fb1b10abSAndroid Build Coastguard Worker "lh %[load3], 40(%[input]) \n\t"
675*fb1b10abSAndroid Build Coastguard Worker "lh %[load4], 24(%[input]) \n\t"
676*fb1b10abSAndroid Build Coastguard Worker
677*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
678*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
679*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
680*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
681*fb1b10abSAndroid Build Coastguard Worker
682*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load1], %[cospi_28_64] \n\t"
683*fb1b10abSAndroid Build Coastguard Worker "msub $ac1, %[load2], %[cospi_4_64] \n\t"
684*fb1b10abSAndroid Build Coastguard Worker "extp %[temp0], $ac1, 31 \n\t"
685*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load1], %[cospi_4_64] \n\t"
686*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_28_64] \n\t"
687*fb1b10abSAndroid Build Coastguard Worker "extp %[temp3], $ac3, 31 \n\t"
688*fb1b10abSAndroid Build Coastguard Worker
689*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
690*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
691*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac2 \n\t"
692*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac2 \n\t"
693*fb1b10abSAndroid Build Coastguard Worker
694*fb1b10abSAndroid Build Coastguard Worker "madd $ac2, %[load3], %[cospi_12_64] \n\t"
695*fb1b10abSAndroid Build Coastguard Worker "msub $ac2, %[load4], %[cospi_20_64] \n\t"
696*fb1b10abSAndroid Build Coastguard Worker "extp %[temp1], $ac2, 31 \n\t"
697*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load3], %[cospi_20_64] \n\t"
698*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load4], %[cospi_12_64] \n\t"
699*fb1b10abSAndroid Build Coastguard Worker "extp %[temp2], $ac1, 31 \n\t"
700*fb1b10abSAndroid Build Coastguard Worker
701*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
702*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
703*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
704*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
705*fb1b10abSAndroid Build Coastguard Worker
706*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[temp3], %[temp2] \n\t"
707*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[load1], %[temp0] \n\t"
708*fb1b10abSAndroid Build Coastguard Worker "add %[load1], %[load1], %[temp1] \n\t"
709*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[temp0], %[temp1] \n\t"
710*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[load2], %[temp2] \n\t"
711*fb1b10abSAndroid Build Coastguard Worker "add %[load2], %[load2], %[temp3] \n\t"
712*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[load1], %[cospi_16_64] \n\t"
713*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[load2], %[cospi_16_64] \n\t"
714*fb1b10abSAndroid Build Coastguard Worker
715*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_5], $ac1, 31 \n\t"
716*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_6], $ac3, 31 \n\t"
717*fb1b10abSAndroid Build Coastguard Worker "add %[step1_4], %[temp0], %[temp1] \n\t"
718*fb1b10abSAndroid Build Coastguard Worker "add %[step1_7], %[temp3], %[temp2] \n\t"
719*fb1b10abSAndroid Build Coastguard Worker
720*fb1b10abSAndroid Build Coastguard Worker : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
721*fb1b10abSAndroid Build Coastguard Worker [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1),
722*fb1b10abSAndroid Build Coastguard Worker [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_4] "=&r"(step1_4),
723*fb1b10abSAndroid Build Coastguard Worker [step1_5] "=&r"(step1_5), [step1_6] "=&r"(step1_6),
724*fb1b10abSAndroid Build Coastguard Worker [step1_7] "=&r"(step1_7)
725*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input),
726*fb1b10abSAndroid Build Coastguard Worker [cospi_20_64] "r"(cospi_20_64), [cospi_12_64] "r"(cospi_12_64),
727*fb1b10abSAndroid Build Coastguard Worker [cospi_4_64] "r"(cospi_4_64), [cospi_28_64] "r"(cospi_28_64),
728*fb1b10abSAndroid Build Coastguard Worker [cospi_16_64] "r"(cospi_16_64));
729*fb1b10abSAndroid Build Coastguard Worker
730*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
731*fb1b10abSAndroid Build Coastguard Worker "add %[step2_0], %[step1_0], %[step1_7] \n\t"
732*fb1b10abSAndroid Build Coastguard Worker "add %[step2_1], %[step1_1], %[step1_6] \n\t"
733*fb1b10abSAndroid Build Coastguard Worker "add %[step2_2], %[step1_2], %[step1_5] \n\t"
734*fb1b10abSAndroid Build Coastguard Worker "add %[step2_3], %[step1_3], %[step1_4] \n\t"
735*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_4], %[step1_3], %[step1_4] \n\t"
736*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_5], %[step1_2], %[step1_5] \n\t"
737*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_6], %[step1_1], %[step1_6] \n\t"
738*fb1b10abSAndroid Build Coastguard Worker "sub %[step2_7], %[step1_0], %[step1_7] \n\t"
739*fb1b10abSAndroid Build Coastguard Worker
740*fb1b10abSAndroid Build Coastguard Worker : [step2_0] "=&r"(step2_0), [step2_4] "=&r"(step2_4),
741*fb1b10abSAndroid Build Coastguard Worker [step2_1] "=&r"(step2_1), [step2_5] "=&r"(step2_5),
742*fb1b10abSAndroid Build Coastguard Worker [step2_2] "=&r"(step2_2), [step2_6] "=&r"(step2_6),
743*fb1b10abSAndroid Build Coastguard Worker [step2_3] "=&r"(step2_3), [step2_7] "=&r"(step2_7)
744*fb1b10abSAndroid Build Coastguard Worker : [step1_0] "r"(step1_0), [step1_7] "r"(step1_7),
745*fb1b10abSAndroid Build Coastguard Worker [step1_1] "r"(step1_1), [step1_6] "r"(step1_6),
746*fb1b10abSAndroid Build Coastguard Worker [step1_2] "r"(step1_2), [step1_5] "r"(step1_5),
747*fb1b10abSAndroid Build Coastguard Worker [step1_3] "r"(step1_3), [step1_4] "r"(step1_4));
748*fb1b10abSAndroid Build Coastguard Worker
749*fb1b10abSAndroid Build Coastguard Worker // stage 7
750*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
751*fb1b10abSAndroid Build Coastguard Worker "add %[step1_0], %[step2_0], %[step3_15] \n\t"
752*fb1b10abSAndroid Build Coastguard Worker "add %[step1_1], %[step2_1], %[step3_14] \n\t"
753*fb1b10abSAndroid Build Coastguard Worker "add %[step1_2], %[step2_2], %[step3_13] \n\t"
754*fb1b10abSAndroid Build Coastguard Worker "add %[step1_3], %[step2_3], %[step3_12] \n\t"
755*fb1b10abSAndroid Build Coastguard Worker "sub %[step1_12], %[step2_3], %[step3_12] \n\t"
756*fb1b10abSAndroid Build Coastguard Worker "sub %[step1_13], %[step2_2], %[step3_13] \n\t"
757*fb1b10abSAndroid Build Coastguard Worker "sub %[step1_14], %[step2_1], %[step3_14] \n\t"
758*fb1b10abSAndroid Build Coastguard Worker "sub %[step1_15], %[step2_0], %[step3_15] \n\t"
759*fb1b10abSAndroid Build Coastguard Worker
760*fb1b10abSAndroid Build Coastguard Worker : [step1_0] "=&r"(step1_0), [step1_12] "=&r"(step1_12),
761*fb1b10abSAndroid Build Coastguard Worker [step1_1] "=&r"(step1_1), [step1_13] "=&r"(step1_13),
762*fb1b10abSAndroid Build Coastguard Worker [step1_2] "=&r"(step1_2), [step1_14] "=&r"(step1_14),
763*fb1b10abSAndroid Build Coastguard Worker [step1_3] "=&r"(step1_3), [step1_15] "=&r"(step1_15)
764*fb1b10abSAndroid Build Coastguard Worker : [step2_0] "r"(step2_0), [step3_15] "r"(step3_15),
765*fb1b10abSAndroid Build Coastguard Worker [step2_1] "r"(step2_1), [step3_14] "r"(step3_14),
766*fb1b10abSAndroid Build Coastguard Worker [step2_2] "r"(step2_2), [step3_13] "r"(step3_13),
767*fb1b10abSAndroid Build Coastguard Worker [step2_3] "r"(step2_3), [step3_12] "r"(step3_12));
768*fb1b10abSAndroid Build Coastguard Worker
769*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
770*fb1b10abSAndroid Build Coastguard Worker "add %[step1_4], %[step2_4], %[step3_11] \n\t"
771*fb1b10abSAndroid Build Coastguard Worker "add %[step1_5], %[step2_5], %[step3_10] \n\t"
772*fb1b10abSAndroid Build Coastguard Worker "add %[step1_6], %[step2_6], %[step3_9] \n\t"
773*fb1b10abSAndroid Build Coastguard Worker "add %[step1_7], %[step2_7], %[step3_8] \n\t"
774*fb1b10abSAndroid Build Coastguard Worker "sub %[step1_8], %[step2_7], %[step3_8] \n\t"
775*fb1b10abSAndroid Build Coastguard Worker "sub %[step1_9], %[step2_6], %[step3_9] \n\t"
776*fb1b10abSAndroid Build Coastguard Worker "sub %[step1_10], %[step2_5], %[step3_10] \n\t"
777*fb1b10abSAndroid Build Coastguard Worker "sub %[step1_11], %[step2_4], %[step3_11] \n\t"
778*fb1b10abSAndroid Build Coastguard Worker
779*fb1b10abSAndroid Build Coastguard Worker : [step1_4] "=&r"(step1_4), [step1_8] "=&r"(step1_8),
780*fb1b10abSAndroid Build Coastguard Worker [step1_5] "=&r"(step1_5), [step1_9] "=&r"(step1_9),
781*fb1b10abSAndroid Build Coastguard Worker [step1_6] "=&r"(step1_6), [step1_10] "=&r"(step1_10),
782*fb1b10abSAndroid Build Coastguard Worker [step1_7] "=&r"(step1_7), [step1_11] "=&r"(step1_11)
783*fb1b10abSAndroid Build Coastguard Worker : [step2_4] "r"(step2_4), [step3_11] "r"(step3_11),
784*fb1b10abSAndroid Build Coastguard Worker [step2_5] "r"(step2_5), [step3_10] "r"(step3_10),
785*fb1b10abSAndroid Build Coastguard Worker [step2_6] "r"(step2_6), [step3_9] "r"(step3_9),
786*fb1b10abSAndroid Build Coastguard Worker [step2_7] "r"(step2_7), [step3_8] "r"(step3_8));
787*fb1b10abSAndroid Build Coastguard Worker
788*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
789*fb1b10abSAndroid Build Coastguard Worker "sub %[temp0], %[step2_27], %[step2_20] \n\t"
790*fb1b10abSAndroid Build Coastguard Worker "add %[temp1], %[step2_27], %[step2_20] \n\t"
791*fb1b10abSAndroid Build Coastguard Worker "sub %[temp2], %[step2_26], %[step2_21] \n\t"
792*fb1b10abSAndroid Build Coastguard Worker "add %[temp3], %[step2_26], %[step2_21] \n\t"
793*fb1b10abSAndroid Build Coastguard Worker
794*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac0 \n\t"
795*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac0 \n\t"
796*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
797*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
798*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac2 \n\t"
799*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac2 \n\t"
800*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
801*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
802*fb1b10abSAndroid Build Coastguard Worker
803*fb1b10abSAndroid Build Coastguard Worker "madd $ac0, %[temp0], %[cospi_16_64] \n\t"
804*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[temp1], %[cospi_16_64] \n\t"
805*fb1b10abSAndroid Build Coastguard Worker "madd $ac2, %[temp2], %[cospi_16_64] \n\t"
806*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[temp3], %[cospi_16_64] \n\t"
807*fb1b10abSAndroid Build Coastguard Worker
808*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_20], $ac0, 31 \n\t"
809*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_27], $ac1, 31 \n\t"
810*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_21], $ac2, 31 \n\t"
811*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_26], $ac3, 31 \n\t"
812*fb1b10abSAndroid Build Coastguard Worker
813*fb1b10abSAndroid Build Coastguard Worker : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
814*fb1b10abSAndroid Build Coastguard Worker [temp3] "=&r"(temp3), [step1_20] "=&r"(step1_20),
815*fb1b10abSAndroid Build Coastguard Worker [step1_27] "=&r"(step1_27), [step1_21] "=&r"(step1_21),
816*fb1b10abSAndroid Build Coastguard Worker [step1_26] "=&r"(step1_26)
817*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [step2_20] "r"(step2_20),
818*fb1b10abSAndroid Build Coastguard Worker [step2_27] "r"(step2_27), [step2_21] "r"(step2_21),
819*fb1b10abSAndroid Build Coastguard Worker [step2_26] "r"(step2_26), [cospi_16_64] "r"(cospi_16_64));
820*fb1b10abSAndroid Build Coastguard Worker
821*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
822*fb1b10abSAndroid Build Coastguard Worker "sub %[temp0], %[step2_25], %[step2_22] \n\t"
823*fb1b10abSAndroid Build Coastguard Worker "add %[temp1], %[step2_25], %[step2_22] \n\t"
824*fb1b10abSAndroid Build Coastguard Worker "sub %[temp2], %[step2_24], %[step2_23] \n\t"
825*fb1b10abSAndroid Build Coastguard Worker "add %[temp3], %[step2_24], %[step2_23] \n\t"
826*fb1b10abSAndroid Build Coastguard Worker
827*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac0 \n\t"
828*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac0 \n\t"
829*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac1 \n\t"
830*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac1 \n\t"
831*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac2 \n\t"
832*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac2 \n\t"
833*fb1b10abSAndroid Build Coastguard Worker "mtlo %[const_2_power_13], $ac3 \n\t"
834*fb1b10abSAndroid Build Coastguard Worker "mthi $zero, $ac3 \n\t"
835*fb1b10abSAndroid Build Coastguard Worker
836*fb1b10abSAndroid Build Coastguard Worker "madd $ac0, %[temp0], %[cospi_16_64] \n\t"
837*fb1b10abSAndroid Build Coastguard Worker "madd $ac1, %[temp1], %[cospi_16_64] \n\t"
838*fb1b10abSAndroid Build Coastguard Worker "madd $ac2, %[temp2], %[cospi_16_64] \n\t"
839*fb1b10abSAndroid Build Coastguard Worker "madd $ac3, %[temp3], %[cospi_16_64] \n\t"
840*fb1b10abSAndroid Build Coastguard Worker
841*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_22], $ac0, 31 \n\t"
842*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_25], $ac1, 31 \n\t"
843*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_23], $ac2, 31 \n\t"
844*fb1b10abSAndroid Build Coastguard Worker "extp %[step1_24], $ac3, 31 \n\t"
845*fb1b10abSAndroid Build Coastguard Worker
846*fb1b10abSAndroid Build Coastguard Worker : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2),
847*fb1b10abSAndroid Build Coastguard Worker [temp3] "=&r"(temp3), [step1_22] "=&r"(step1_22),
848*fb1b10abSAndroid Build Coastguard Worker [step1_25] "=&r"(step1_25), [step1_23] "=&r"(step1_23),
849*fb1b10abSAndroid Build Coastguard Worker [step1_24] "=&r"(step1_24)
850*fb1b10abSAndroid Build Coastguard Worker : [const_2_power_13] "r"(const_2_power_13), [step2_22] "r"(step2_22),
851*fb1b10abSAndroid Build Coastguard Worker [step2_25] "r"(step2_25), [step2_23] "r"(step2_23),
852*fb1b10abSAndroid Build Coastguard Worker [step2_24] "r"(step2_24), [cospi_16_64] "r"(cospi_16_64));
853*fb1b10abSAndroid Build Coastguard Worker
854*fb1b10abSAndroid Build Coastguard Worker // final stage
855*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
856*fb1b10abSAndroid Build Coastguard Worker "add %[temp0], %[step1_0], %[step2_31] \n\t"
857*fb1b10abSAndroid Build Coastguard Worker "add %[temp1], %[step1_1], %[step2_30] \n\t"
858*fb1b10abSAndroid Build Coastguard Worker "add %[temp2], %[step1_2], %[step2_29] \n\t"
859*fb1b10abSAndroid Build Coastguard Worker "add %[temp3], %[step1_3], %[step2_28] \n\t"
860*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[step1_3], %[step2_28] \n\t"
861*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[step1_2], %[step2_29] \n\t"
862*fb1b10abSAndroid Build Coastguard Worker "sub %[load3], %[step1_1], %[step2_30] \n\t"
863*fb1b10abSAndroid Build Coastguard Worker "sub %[load4], %[step1_0], %[step2_31] \n\t"
864*fb1b10abSAndroid Build Coastguard Worker "sh %[temp0], 0(%[output]) \n\t"
865*fb1b10abSAndroid Build Coastguard Worker "sh %[temp1], 64(%[output]) \n\t"
866*fb1b10abSAndroid Build Coastguard Worker "sh %[temp2], 128(%[output]) \n\t"
867*fb1b10abSAndroid Build Coastguard Worker "sh %[temp3], 192(%[output]) \n\t"
868*fb1b10abSAndroid Build Coastguard Worker "sh %[load1], 1792(%[output]) \n\t"
869*fb1b10abSAndroid Build Coastguard Worker "sh %[load2], 1856(%[output]) \n\t"
870*fb1b10abSAndroid Build Coastguard Worker "sh %[load3], 1920(%[output]) \n\t"
871*fb1b10abSAndroid Build Coastguard Worker "sh %[load4], 1984(%[output]) \n\t"
872*fb1b10abSAndroid Build Coastguard Worker
873*fb1b10abSAndroid Build Coastguard Worker : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1),
874*fb1b10abSAndroid Build Coastguard Worker [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3),
875*fb1b10abSAndroid Build Coastguard Worker [temp3] "=&r"(temp3), [load4] "=&r"(load4)
876*fb1b10abSAndroid Build Coastguard Worker : [step1_0] "r"(step1_0), [step2_31] "r"(step2_31),
877*fb1b10abSAndroid Build Coastguard Worker [step1_1] "r"(step1_1), [step2_30] "r"(step2_30),
878*fb1b10abSAndroid Build Coastguard Worker [step1_2] "r"(step1_2), [step2_29] "r"(step2_29),
879*fb1b10abSAndroid Build Coastguard Worker [step1_3] "r"(step1_3), [step2_28] "r"(step2_28),
880*fb1b10abSAndroid Build Coastguard Worker [output] "r"(output));
881*fb1b10abSAndroid Build Coastguard Worker
882*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
883*fb1b10abSAndroid Build Coastguard Worker "add %[temp0], %[step1_4], %[step1_27] \n\t"
884*fb1b10abSAndroid Build Coastguard Worker "add %[temp1], %[step1_5], %[step1_26] \n\t"
885*fb1b10abSAndroid Build Coastguard Worker "add %[temp2], %[step1_6], %[step1_25] \n\t"
886*fb1b10abSAndroid Build Coastguard Worker "add %[temp3], %[step1_7], %[step1_24] \n\t"
887*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[step1_7], %[step1_24] \n\t"
888*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[step1_6], %[step1_25] \n\t"
889*fb1b10abSAndroid Build Coastguard Worker "sub %[load3], %[step1_5], %[step1_26] \n\t"
890*fb1b10abSAndroid Build Coastguard Worker "sub %[load4], %[step1_4], %[step1_27] \n\t"
891*fb1b10abSAndroid Build Coastguard Worker "sh %[temp0], 256(%[output]) \n\t"
892*fb1b10abSAndroid Build Coastguard Worker "sh %[temp1], 320(%[output]) \n\t"
893*fb1b10abSAndroid Build Coastguard Worker "sh %[temp2], 384(%[output]) \n\t"
894*fb1b10abSAndroid Build Coastguard Worker "sh %[temp3], 448(%[output]) \n\t"
895*fb1b10abSAndroid Build Coastguard Worker "sh %[load1], 1536(%[output]) \n\t"
896*fb1b10abSAndroid Build Coastguard Worker "sh %[load2], 1600(%[output]) \n\t"
897*fb1b10abSAndroid Build Coastguard Worker "sh %[load3], 1664(%[output]) \n\t"
898*fb1b10abSAndroid Build Coastguard Worker "sh %[load4], 1728(%[output]) \n\t"
899*fb1b10abSAndroid Build Coastguard Worker
900*fb1b10abSAndroid Build Coastguard Worker : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1),
901*fb1b10abSAndroid Build Coastguard Worker [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3),
902*fb1b10abSAndroid Build Coastguard Worker [temp3] "=&r"(temp3), [load4] "=&r"(load4)
903*fb1b10abSAndroid Build Coastguard Worker : [step1_4] "r"(step1_4), [step1_27] "r"(step1_27),
904*fb1b10abSAndroid Build Coastguard Worker [step1_5] "r"(step1_5), [step1_26] "r"(step1_26),
905*fb1b10abSAndroid Build Coastguard Worker [step1_6] "r"(step1_6), [step1_25] "r"(step1_25),
906*fb1b10abSAndroid Build Coastguard Worker [step1_7] "r"(step1_7), [step1_24] "r"(step1_24),
907*fb1b10abSAndroid Build Coastguard Worker [output] "r"(output));
908*fb1b10abSAndroid Build Coastguard Worker
909*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
910*fb1b10abSAndroid Build Coastguard Worker "add %[temp0], %[step1_8], %[step1_23] \n\t"
911*fb1b10abSAndroid Build Coastguard Worker "add %[temp1], %[step1_9], %[step1_22] \n\t"
912*fb1b10abSAndroid Build Coastguard Worker "add %[temp2], %[step1_10], %[step1_21] \n\t"
913*fb1b10abSAndroid Build Coastguard Worker "add %[temp3], %[step1_11], %[step1_20] \n\t"
914*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[step1_11], %[step1_20] \n\t"
915*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[step1_10], %[step1_21] \n\t"
916*fb1b10abSAndroid Build Coastguard Worker "sub %[load3], %[step1_9], %[step1_22] \n\t"
917*fb1b10abSAndroid Build Coastguard Worker "sub %[load4], %[step1_8], %[step1_23] \n\t"
918*fb1b10abSAndroid Build Coastguard Worker "sh %[temp0], 512(%[output]) \n\t"
919*fb1b10abSAndroid Build Coastguard Worker "sh %[temp1], 576(%[output]) \n\t"
920*fb1b10abSAndroid Build Coastguard Worker "sh %[temp2], 640(%[output]) \n\t"
921*fb1b10abSAndroid Build Coastguard Worker "sh %[temp3], 704(%[output]) \n\t"
922*fb1b10abSAndroid Build Coastguard Worker "sh %[load1], 1280(%[output]) \n\t"
923*fb1b10abSAndroid Build Coastguard Worker "sh %[load2], 1344(%[output]) \n\t"
924*fb1b10abSAndroid Build Coastguard Worker "sh %[load3], 1408(%[output]) \n\t"
925*fb1b10abSAndroid Build Coastguard Worker "sh %[load4], 1472(%[output]) \n\t"
926*fb1b10abSAndroid Build Coastguard Worker
927*fb1b10abSAndroid Build Coastguard Worker : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1),
928*fb1b10abSAndroid Build Coastguard Worker [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3),
929*fb1b10abSAndroid Build Coastguard Worker [temp3] "=&r"(temp3), [load4] "=&r"(load4)
930*fb1b10abSAndroid Build Coastguard Worker : [step1_8] "r"(step1_8), [step1_23] "r"(step1_23),
931*fb1b10abSAndroid Build Coastguard Worker [step1_9] "r"(step1_9), [step1_22] "r"(step1_22),
932*fb1b10abSAndroid Build Coastguard Worker [step1_10] "r"(step1_10), [step1_21] "r"(step1_21),
933*fb1b10abSAndroid Build Coastguard Worker [step1_11] "r"(step1_11), [step1_20] "r"(step1_20),
934*fb1b10abSAndroid Build Coastguard Worker [output] "r"(output));
935*fb1b10abSAndroid Build Coastguard Worker
936*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
937*fb1b10abSAndroid Build Coastguard Worker "add %[temp0], %[step1_12], %[step2_19] \n\t"
938*fb1b10abSAndroid Build Coastguard Worker "add %[temp1], %[step1_13], %[step2_18] \n\t"
939*fb1b10abSAndroid Build Coastguard Worker "add %[temp2], %[step1_14], %[step2_17] \n\t"
940*fb1b10abSAndroid Build Coastguard Worker "add %[temp3], %[step1_15], %[step2_16] \n\t"
941*fb1b10abSAndroid Build Coastguard Worker "sub %[load1], %[step1_15], %[step2_16] \n\t"
942*fb1b10abSAndroid Build Coastguard Worker "sub %[load2], %[step1_14], %[step2_17] \n\t"
943*fb1b10abSAndroid Build Coastguard Worker "sub %[load3], %[step1_13], %[step2_18] \n\t"
944*fb1b10abSAndroid Build Coastguard Worker "sub %[load4], %[step1_12], %[step2_19] \n\t"
945*fb1b10abSAndroid Build Coastguard Worker "sh %[temp0], 768(%[output]) \n\t"
946*fb1b10abSAndroid Build Coastguard Worker "sh %[temp1], 832(%[output]) \n\t"
947*fb1b10abSAndroid Build Coastguard Worker "sh %[temp2], 896(%[output]) \n\t"
948*fb1b10abSAndroid Build Coastguard Worker "sh %[temp3], 960(%[output]) \n\t"
949*fb1b10abSAndroid Build Coastguard Worker "sh %[load1], 1024(%[output]) \n\t"
950*fb1b10abSAndroid Build Coastguard Worker "sh %[load2], 1088(%[output]) \n\t"
951*fb1b10abSAndroid Build Coastguard Worker "sh %[load3], 1152(%[output]) \n\t"
952*fb1b10abSAndroid Build Coastguard Worker "sh %[load4], 1216(%[output]) \n\t"
953*fb1b10abSAndroid Build Coastguard Worker
954*fb1b10abSAndroid Build Coastguard Worker : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1),
955*fb1b10abSAndroid Build Coastguard Worker [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3),
956*fb1b10abSAndroid Build Coastguard Worker [temp3] "=&r"(temp3), [load4] "=&r"(load4)
957*fb1b10abSAndroid Build Coastguard Worker : [step1_12] "r"(step1_12), [step2_19] "r"(step2_19),
958*fb1b10abSAndroid Build Coastguard Worker [step1_13] "r"(step1_13), [step2_18] "r"(step2_18),
959*fb1b10abSAndroid Build Coastguard Worker [step1_14] "r"(step1_14), [step2_17] "r"(step2_17),
960*fb1b10abSAndroid Build Coastguard Worker [step1_15] "r"(step1_15), [step2_16] "r"(step2_16),
961*fb1b10abSAndroid Build Coastguard Worker [output] "r"(output));
962*fb1b10abSAndroid Build Coastguard Worker
963*fb1b10abSAndroid Build Coastguard Worker input += 32;
964*fb1b10abSAndroid Build Coastguard Worker output += 1;
965*fb1b10abSAndroid Build Coastguard Worker }
966*fb1b10abSAndroid Build Coastguard Worker }
967*fb1b10abSAndroid Build Coastguard Worker
vpx_idct32x32_1024_add_dspr2(const int16_t * input,uint8_t * dest,int stride)968*fb1b10abSAndroid Build Coastguard Worker void vpx_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest,
969*fb1b10abSAndroid Build Coastguard Worker int stride) {
970*fb1b10abSAndroid Build Coastguard Worker DECLARE_ALIGNED(32, int16_t, out[32 * 32]);
971*fb1b10abSAndroid Build Coastguard Worker int16_t *outptr = out;
972*fb1b10abSAndroid Build Coastguard Worker uint32_t pos = 45;
973*fb1b10abSAndroid Build Coastguard Worker
974*fb1b10abSAndroid Build Coastguard Worker /* bit positon for extract from acc */
975*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__("wrdsp %[pos], 1 \n\t"
976*fb1b10abSAndroid Build Coastguard Worker :
977*fb1b10abSAndroid Build Coastguard Worker : [pos] "r"(pos));
978*fb1b10abSAndroid Build Coastguard Worker
979*fb1b10abSAndroid Build Coastguard Worker // Rows
980*fb1b10abSAndroid Build Coastguard Worker idct32_rows_dspr2(input, outptr, 32);
981*fb1b10abSAndroid Build Coastguard Worker
982*fb1b10abSAndroid Build Coastguard Worker // Columns
983*fb1b10abSAndroid Build Coastguard Worker vpx_idct32_cols_add_blk_dspr2(out, dest, stride);
984*fb1b10abSAndroid Build Coastguard Worker }
985*fb1b10abSAndroid Build Coastguard Worker
vpx_idct32x32_34_add_dspr2(const int16_t * input,uint8_t * dest,int stride)986*fb1b10abSAndroid Build Coastguard Worker void vpx_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
987*fb1b10abSAndroid Build Coastguard Worker int stride) {
988*fb1b10abSAndroid Build Coastguard Worker DECLARE_ALIGNED(32, int16_t, out[32 * 32]);
989*fb1b10abSAndroid Build Coastguard Worker int16_t *outptr = out;
990*fb1b10abSAndroid Build Coastguard Worker uint32_t i;
991*fb1b10abSAndroid Build Coastguard Worker uint32_t pos = 45;
992*fb1b10abSAndroid Build Coastguard Worker
993*fb1b10abSAndroid Build Coastguard Worker /* bit positon for extract from acc */
994*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__("wrdsp %[pos], 1 \n\t"
995*fb1b10abSAndroid Build Coastguard Worker :
996*fb1b10abSAndroid Build Coastguard Worker : [pos] "r"(pos));
997*fb1b10abSAndroid Build Coastguard Worker
998*fb1b10abSAndroid Build Coastguard Worker // Rows
999*fb1b10abSAndroid Build Coastguard Worker idct32_rows_dspr2(input, outptr, 8);
1000*fb1b10abSAndroid Build Coastguard Worker
1001*fb1b10abSAndroid Build Coastguard Worker outptr += 8;
1002*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
1003*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 0(%[outptr]) \n\t"
1004*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 4(%[outptr]) \n\t"
1005*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 8(%[outptr]) \n\t"
1006*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 12(%[outptr]) \n\t"
1007*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 16(%[outptr]) \n\t"
1008*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 20(%[outptr]) \n\t"
1009*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 24(%[outptr]) \n\t"
1010*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 28(%[outptr]) \n\t"
1011*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 32(%[outptr]) \n\t"
1012*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 36(%[outptr]) \n\t"
1013*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 40(%[outptr]) \n\t"
1014*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 44(%[outptr]) \n\t"
1015*fb1b10abSAndroid Build Coastguard Worker
1016*fb1b10abSAndroid Build Coastguard Worker :
1017*fb1b10abSAndroid Build Coastguard Worker : [outptr] "r"(outptr));
1018*fb1b10abSAndroid Build Coastguard Worker
1019*fb1b10abSAndroid Build Coastguard Worker for (i = 0; i < 31; ++i) {
1020*fb1b10abSAndroid Build Coastguard Worker outptr += 32;
1021*fb1b10abSAndroid Build Coastguard Worker
1022*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
1023*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 0(%[outptr]) \n\t"
1024*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 4(%[outptr]) \n\t"
1025*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 8(%[outptr]) \n\t"
1026*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 12(%[outptr]) \n\t"
1027*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 16(%[outptr]) \n\t"
1028*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 20(%[outptr]) \n\t"
1029*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 24(%[outptr]) \n\t"
1030*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 28(%[outptr]) \n\t"
1031*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 32(%[outptr]) \n\t"
1032*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 36(%[outptr]) \n\t"
1033*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 40(%[outptr]) \n\t"
1034*fb1b10abSAndroid Build Coastguard Worker "sw $zero, 44(%[outptr]) \n\t"
1035*fb1b10abSAndroid Build Coastguard Worker
1036*fb1b10abSAndroid Build Coastguard Worker :
1037*fb1b10abSAndroid Build Coastguard Worker : [outptr] "r"(outptr));
1038*fb1b10abSAndroid Build Coastguard Worker }
1039*fb1b10abSAndroid Build Coastguard Worker
1040*fb1b10abSAndroid Build Coastguard Worker // Columns
1041*fb1b10abSAndroid Build Coastguard Worker vpx_idct32_cols_add_blk_dspr2(out, dest, stride);
1042*fb1b10abSAndroid Build Coastguard Worker }
1043*fb1b10abSAndroid Build Coastguard Worker
vpx_idct32x32_1_add_dspr2(const int16_t * input,uint8_t * dest,int stride)1044*fb1b10abSAndroid Build Coastguard Worker void vpx_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,
1045*fb1b10abSAndroid Build Coastguard Worker int stride) {
1046*fb1b10abSAndroid Build Coastguard Worker int r, out;
1047*fb1b10abSAndroid Build Coastguard Worker int32_t a1, absa1;
1048*fb1b10abSAndroid Build Coastguard Worker int32_t vector_a1;
1049*fb1b10abSAndroid Build Coastguard Worker int32_t t1, t2, t3, t4;
1050*fb1b10abSAndroid Build Coastguard Worker int32_t vector_1, vector_2, vector_3, vector_4;
1051*fb1b10abSAndroid Build Coastguard Worker uint32_t pos = 45;
1052*fb1b10abSAndroid Build Coastguard Worker
1053*fb1b10abSAndroid Build Coastguard Worker /* bit positon for extract from acc */
1054*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__("wrdsp %[pos], 1 \n\t"
1055*fb1b10abSAndroid Build Coastguard Worker
1056*fb1b10abSAndroid Build Coastguard Worker :
1057*fb1b10abSAndroid Build Coastguard Worker : [pos] "r"(pos));
1058*fb1b10abSAndroid Build Coastguard Worker
1059*fb1b10abSAndroid Build Coastguard Worker out = DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input[0]);
1060*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
1061*fb1b10abSAndroid Build Coastguard Worker "addi %[out], %[out], 32 \n\t"
1062*fb1b10abSAndroid Build Coastguard Worker "sra %[a1], %[out], 6 \n\t"
1063*fb1b10abSAndroid Build Coastguard Worker
1064*fb1b10abSAndroid Build Coastguard Worker : [out] "+r"(out), [a1] "=r"(a1)
1065*fb1b10abSAndroid Build Coastguard Worker :);
1066*fb1b10abSAndroid Build Coastguard Worker
1067*fb1b10abSAndroid Build Coastguard Worker if (a1 < 0) {
1068*fb1b10abSAndroid Build Coastguard Worker /* use quad-byte
1069*fb1b10abSAndroid Build Coastguard Worker * input and output memory are four byte aligned */
1070*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
1071*fb1b10abSAndroid Build Coastguard Worker "abs %[absa1], %[a1] \n\t"
1072*fb1b10abSAndroid Build Coastguard Worker "replv.qb %[vector_a1], %[absa1] \n\t"
1073*fb1b10abSAndroid Build Coastguard Worker
1074*fb1b10abSAndroid Build Coastguard Worker : [absa1] "=&r"(absa1), [vector_a1] "=&r"(vector_a1)
1075*fb1b10abSAndroid Build Coastguard Worker : [a1] "r"(a1));
1076*fb1b10abSAndroid Build Coastguard Worker
1077*fb1b10abSAndroid Build Coastguard Worker for (r = 32; r--;) {
1078*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
1079*fb1b10abSAndroid Build Coastguard Worker "lw %[t1], 0(%[dest]) \n\t"
1080*fb1b10abSAndroid Build Coastguard Worker "lw %[t2], 4(%[dest]) \n\t"
1081*fb1b10abSAndroid Build Coastguard Worker "lw %[t3], 8(%[dest]) \n\t"
1082*fb1b10abSAndroid Build Coastguard Worker "lw %[t4], 12(%[dest]) \n\t"
1083*fb1b10abSAndroid Build Coastguard Worker "subu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t"
1084*fb1b10abSAndroid Build Coastguard Worker "subu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t"
1085*fb1b10abSAndroid Build Coastguard Worker "subu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t"
1086*fb1b10abSAndroid Build Coastguard Worker "subu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t"
1087*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_1], 0(%[dest]) \n\t"
1088*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_2], 4(%[dest]) \n\t"
1089*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_3], 8(%[dest]) \n\t"
1090*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_4], 12(%[dest]) \n\t"
1091*fb1b10abSAndroid Build Coastguard Worker
1092*fb1b10abSAndroid Build Coastguard Worker "lw %[t1], 16(%[dest]) \n\t"
1093*fb1b10abSAndroid Build Coastguard Worker "lw %[t2], 20(%[dest]) \n\t"
1094*fb1b10abSAndroid Build Coastguard Worker "lw %[t3], 24(%[dest]) \n\t"
1095*fb1b10abSAndroid Build Coastguard Worker "lw %[t4], 28(%[dest]) \n\t"
1096*fb1b10abSAndroid Build Coastguard Worker "subu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t"
1097*fb1b10abSAndroid Build Coastguard Worker "subu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t"
1098*fb1b10abSAndroid Build Coastguard Worker "subu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t"
1099*fb1b10abSAndroid Build Coastguard Worker "subu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t"
1100*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_1], 16(%[dest]) \n\t"
1101*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_2], 20(%[dest]) \n\t"
1102*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_3], 24(%[dest]) \n\t"
1103*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_4], 28(%[dest]) \n\t"
1104*fb1b10abSAndroid Build Coastguard Worker
1105*fb1b10abSAndroid Build Coastguard Worker "add %[dest], %[dest], %[stride] \n\t"
1106*fb1b10abSAndroid Build Coastguard Worker
1107*fb1b10abSAndroid Build Coastguard Worker : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4),
1108*fb1b10abSAndroid Build Coastguard Worker [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2),
1109*fb1b10abSAndroid Build Coastguard Worker [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4),
1110*fb1b10abSAndroid Build Coastguard Worker [dest] "+&r"(dest)
1111*fb1b10abSAndroid Build Coastguard Worker : [stride] "r"(stride), [vector_a1] "r"(vector_a1));
1112*fb1b10abSAndroid Build Coastguard Worker }
1113*fb1b10abSAndroid Build Coastguard Worker } else if (a1 > 255) {
1114*fb1b10abSAndroid Build Coastguard Worker int32_t a11, a12, vector_a11, vector_a12;
1115*fb1b10abSAndroid Build Coastguard Worker
1116*fb1b10abSAndroid Build Coastguard Worker /* use quad-byte
1117*fb1b10abSAndroid Build Coastguard Worker * input and output memory are four byte aligned */
1118*fb1b10abSAndroid Build Coastguard Worker a11 = a1 >> 1;
1119*fb1b10abSAndroid Build Coastguard Worker a12 = a1 - a11;
1120*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
1121*fb1b10abSAndroid Build Coastguard Worker "replv.qb %[vector_a11], %[a11] \n\t"
1122*fb1b10abSAndroid Build Coastguard Worker "replv.qb %[vector_a12], %[a12] \n\t"
1123*fb1b10abSAndroid Build Coastguard Worker
1124*fb1b10abSAndroid Build Coastguard Worker : [vector_a11] "=&r"(vector_a11), [vector_a12] "=&r"(vector_a12)
1125*fb1b10abSAndroid Build Coastguard Worker : [a11] "r"(a11), [a12] "r"(a12));
1126*fb1b10abSAndroid Build Coastguard Worker
1127*fb1b10abSAndroid Build Coastguard Worker for (r = 32; r--;) {
1128*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
1129*fb1b10abSAndroid Build Coastguard Worker "lw %[t1], 0(%[dest]) \n\t"
1130*fb1b10abSAndroid Build Coastguard Worker "lw %[t2], 4(%[dest]) \n\t"
1131*fb1b10abSAndroid Build Coastguard Worker "lw %[t3], 8(%[dest]) \n\t"
1132*fb1b10abSAndroid Build Coastguard Worker "lw %[t4], 12(%[dest]) \n\t"
1133*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_1], %[t1], %[vector_a11] \n\t"
1134*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_2], %[t2], %[vector_a11] \n\t"
1135*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_3], %[t3], %[vector_a11] \n\t"
1136*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_4], %[t4], %[vector_a11] \n\t"
1137*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_1], %[vector_1], %[vector_a12] \n\t"
1138*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_2], %[vector_2], %[vector_a12] \n\t"
1139*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_3], %[vector_3], %[vector_a12] \n\t"
1140*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_4], %[vector_4], %[vector_a12] \n\t"
1141*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_1], 0(%[dest]) \n\t"
1142*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_2], 4(%[dest]) \n\t"
1143*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_3], 8(%[dest]) \n\t"
1144*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_4], 12(%[dest]) \n\t"
1145*fb1b10abSAndroid Build Coastguard Worker
1146*fb1b10abSAndroid Build Coastguard Worker "lw %[t1], 16(%[dest]) \n\t"
1147*fb1b10abSAndroid Build Coastguard Worker "lw %[t2], 20(%[dest]) \n\t"
1148*fb1b10abSAndroid Build Coastguard Worker "lw %[t3], 24(%[dest]) \n\t"
1149*fb1b10abSAndroid Build Coastguard Worker "lw %[t4], 28(%[dest]) \n\t"
1150*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_1], %[t1], %[vector_a11] \n\t"
1151*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_2], %[t2], %[vector_a11] \n\t"
1152*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_3], %[t3], %[vector_a11] \n\t"
1153*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_4], %[t4], %[vector_a11] \n\t"
1154*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_1], %[vector_1], %[vector_a12] \n\t"
1155*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_2], %[vector_2], %[vector_a12] \n\t"
1156*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_3], %[vector_3], %[vector_a12] \n\t"
1157*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_4], %[vector_4], %[vector_a12] \n\t"
1158*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_1], 16(%[dest]) \n\t"
1159*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_2], 20(%[dest]) \n\t"
1160*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_3], 24(%[dest]) \n\t"
1161*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_4], 28(%[dest]) \n\t"
1162*fb1b10abSAndroid Build Coastguard Worker
1163*fb1b10abSAndroid Build Coastguard Worker "add %[dest], %[dest], %[stride] \n\t"
1164*fb1b10abSAndroid Build Coastguard Worker
1165*fb1b10abSAndroid Build Coastguard Worker : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4),
1166*fb1b10abSAndroid Build Coastguard Worker [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2),
1167*fb1b10abSAndroid Build Coastguard Worker [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4),
1168*fb1b10abSAndroid Build Coastguard Worker [dest] "+&r"(dest)
1169*fb1b10abSAndroid Build Coastguard Worker : [stride] "r"(stride), [vector_a11] "r"(vector_a11),
1170*fb1b10abSAndroid Build Coastguard Worker [vector_a12] "r"(vector_a12));
1171*fb1b10abSAndroid Build Coastguard Worker }
1172*fb1b10abSAndroid Build Coastguard Worker } else {
1173*fb1b10abSAndroid Build Coastguard Worker /* use quad-byte
1174*fb1b10abSAndroid Build Coastguard Worker * input and output memory are four byte aligned */
1175*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__("replv.qb %[vector_a1], %[a1] \n\t"
1176*fb1b10abSAndroid Build Coastguard Worker
1177*fb1b10abSAndroid Build Coastguard Worker : [vector_a1] "=&r"(vector_a1)
1178*fb1b10abSAndroid Build Coastguard Worker : [a1] "r"(a1));
1179*fb1b10abSAndroid Build Coastguard Worker
1180*fb1b10abSAndroid Build Coastguard Worker for (r = 32; r--;) {
1181*fb1b10abSAndroid Build Coastguard Worker __asm__ __volatile__(
1182*fb1b10abSAndroid Build Coastguard Worker "lw %[t1], 0(%[dest]) \n\t"
1183*fb1b10abSAndroid Build Coastguard Worker "lw %[t2], 4(%[dest]) \n\t"
1184*fb1b10abSAndroid Build Coastguard Worker "lw %[t3], 8(%[dest]) \n\t"
1185*fb1b10abSAndroid Build Coastguard Worker "lw %[t4], 12(%[dest]) \n\t"
1186*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t"
1187*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t"
1188*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t"
1189*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t"
1190*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_1], 0(%[dest]) \n\t"
1191*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_2], 4(%[dest]) \n\t"
1192*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_3], 8(%[dest]) \n\t"
1193*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_4], 12(%[dest]) \n\t"
1194*fb1b10abSAndroid Build Coastguard Worker
1195*fb1b10abSAndroid Build Coastguard Worker "lw %[t1], 16(%[dest]) \n\t"
1196*fb1b10abSAndroid Build Coastguard Worker "lw %[t2], 20(%[dest]) \n\t"
1197*fb1b10abSAndroid Build Coastguard Worker "lw %[t3], 24(%[dest]) \n\t"
1198*fb1b10abSAndroid Build Coastguard Worker "lw %[t4], 28(%[dest]) \n\t"
1199*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t"
1200*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t"
1201*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t"
1202*fb1b10abSAndroid Build Coastguard Worker "addu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t"
1203*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_1], 16(%[dest]) \n\t"
1204*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_2], 20(%[dest]) \n\t"
1205*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_3], 24(%[dest]) \n\t"
1206*fb1b10abSAndroid Build Coastguard Worker "sw %[vector_4], 28(%[dest]) \n\t"
1207*fb1b10abSAndroid Build Coastguard Worker
1208*fb1b10abSAndroid Build Coastguard Worker "add %[dest], %[dest], %[stride] \n\t"
1209*fb1b10abSAndroid Build Coastguard Worker
1210*fb1b10abSAndroid Build Coastguard Worker : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4),
1211*fb1b10abSAndroid Build Coastguard Worker [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2),
1212*fb1b10abSAndroid Build Coastguard Worker [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4),
1213*fb1b10abSAndroid Build Coastguard Worker [dest] "+&r"(dest)
1214*fb1b10abSAndroid Build Coastguard Worker : [stride] "r"(stride), [vector_a1] "r"(vector_a1));
1215*fb1b10abSAndroid Build Coastguard Worker }
1216*fb1b10abSAndroid Build Coastguard Worker }
1217*fb1b10abSAndroid Build Coastguard Worker }
1218*fb1b10abSAndroid Build Coastguard Worker #endif // #if HAVE_DSPR2
1219