1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <stdlib.h>
12
13 #include "./vpx_dsp_rtcd.h"
14 #include "vpx/vpx_integer.h"
15 #include "vpx_dsp/mips/common_dspr2.h"
16 #include "vpx_dsp/mips/loopfilter_filters_dspr2.h"
17 #include "vpx_dsp/mips/loopfilter_macros_dspr2.h"
18 #include "vpx_dsp/mips/loopfilter_masks_dspr2.h"
19 #include "vpx_mem/vpx_mem.h"
20
21 #if HAVE_DSPR2
mb_lpf_horizontal_edge(unsigned char * s,int pitch,const uint8_t * blimit,const uint8_t * limit,const uint8_t * thresh,int count)22 static void mb_lpf_horizontal_edge(unsigned char *s, int pitch,
23 const uint8_t *blimit, const uint8_t *limit,
24 const uint8_t *thresh, int count) {
25 uint32_t mask;
26 uint32_t hev, flat, flat2;
27 uint8_t i;
28 uint8_t *sp7, *sp6, *sp5, *sp4, *sp3, *sp2, *sp1, *sp0;
29 uint8_t *sq0, *sq1, *sq2, *sq3, *sq4, *sq5, *sq6, *sq7;
30 uint32_t thresh_vec, flimit_vec, limit_vec;
31 uint32_t uflimit, ulimit, uthresh;
32 uint32_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
33 uint32_t p1_f0, p0_f0, q0_f0, q1_f0;
34 uint32_t p7_l, p6_l, p5_l, p4_l, p3_l, p2_l, p1_l, p0_l;
35 uint32_t q0_l, q1_l, q2_l, q3_l, q4_l, q5_l, q6_l, q7_l;
36 uint32_t p7_r, p6_r, p5_r, p4_r, p3_r, p2_r, p1_r, p0_r;
37 uint32_t q0_r, q1_r, q2_r, q3_r, q4_r, q5_r, q6_r, q7_r;
38 uint32_t p2_l_f1, p1_l_f1, p0_l_f1, p2_r_f1, p1_r_f1, p0_r_f1;
39 uint32_t q0_l_f1, q1_l_f1, q2_l_f1, q0_r_f1, q1_r_f1, q2_r_f1;
40
41 uflimit = *blimit;
42 ulimit = *limit;
43 uthresh = *thresh;
44
45 /* create quad-byte */
46 __asm__ __volatile__(
47 "replv.qb %[thresh_vec], %[uthresh] \n\t"
48 "replv.qb %[flimit_vec], %[uflimit] \n\t"
49 "replv.qb %[limit_vec], %[ulimit] \n\t"
50
51 : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
52 [limit_vec] "=r"(limit_vec)
53 : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
54
55 /* prefetch data for store */
56 prefetch_store(s);
57
58 for (i = 0; i < (2 * count); i++) {
59 sp7 = s - (pitch << 3);
60 sp6 = sp7 + pitch;
61 sp5 = sp6 + pitch;
62 sp4 = sp5 + pitch;
63 sp3 = sp4 + pitch;
64 sp2 = sp3 + pitch;
65 sp1 = sp2 + pitch;
66 sp0 = sp1 + pitch;
67 sq0 = s;
68 sq1 = s + pitch;
69 sq2 = sq1 + pitch;
70 sq3 = sq2 + pitch;
71 sq4 = sq3 + pitch;
72 sq5 = sq4 + pitch;
73 sq6 = sq5 + pitch;
74 sq7 = sq6 + pitch;
75
76 __asm__ __volatile__(
77 "lw %[p7], (%[sp7]) \n\t"
78 "lw %[p6], (%[sp6]) \n\t"
79 "lw %[p5], (%[sp5]) \n\t"
80 "lw %[p4], (%[sp4]) \n\t"
81 "lw %[p3], (%[sp3]) \n\t"
82 "lw %[p2], (%[sp2]) \n\t"
83 "lw %[p1], (%[sp1]) \n\t"
84 "lw %[p0], (%[sp0]) \n\t"
85
86 : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0),
87 [p7] "=&r"(p7), [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4)
88 : [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0),
89 [sp4] "r"(sp4), [sp5] "r"(sp5), [sp6] "r"(sp6), [sp7] "r"(sp7));
90
91 __asm__ __volatile__(
92 "lw %[q0], (%[sq0]) \n\t"
93 "lw %[q1], (%[sq1]) \n\t"
94 "lw %[q2], (%[sq2]) \n\t"
95 "lw %[q3], (%[sq3]) \n\t"
96 "lw %[q4], (%[sq4]) \n\t"
97 "lw %[q5], (%[sq5]) \n\t"
98 "lw %[q6], (%[sq6]) \n\t"
99 "lw %[q7], (%[sq7]) \n\t"
100
101 : [q3] "=&r"(q3), [q2] "=&r"(q2), [q1] "=&r"(q1), [q0] "=&r"(q0),
102 [q7] "=&r"(q7), [q6] "=&r"(q6), [q5] "=&r"(q5), [q4] "=&r"(q4)
103 : [sq3] "r"(sq3), [sq2] "r"(sq2), [sq1] "r"(sq1), [sq0] "r"(sq0),
104 [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6), [sq7] "r"(sq7));
105
106 filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0,
107 p3, p2, q0, q1, q2, q3, &hev, &mask, &flat);
108
109 flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
110
111 /* f0 */
112 if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
113 ((flat2 != 0) && (flat == 0) && (mask != 0))) {
114 filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
115
116 __asm__ __volatile__(
117 "sw %[p1_f0], (%[sp1]) \n\t"
118 "sw %[p0_f0], (%[sp0]) \n\t"
119 "sw %[q0_f0], (%[sq0]) \n\t"
120 "sw %[q1_f0], (%[sq1]) \n\t"
121
122 :
123 : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
124 [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
125 [sq1] "r"(sq1));
126 } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) &&
127 (mask == 0xFFFFFFFF)) {
128 /* f2 */
129 PACK_LEFT_0TO3()
130 PACK_LEFT_4TO7()
131 wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l,
132 &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l,
133 &q6_l, &q7_l);
134
135 PACK_RIGHT_0TO3()
136 PACK_RIGHT_4TO7()
137 wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r,
138 &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r,
139 &q6_r, &q7_r);
140
141 COMBINE_LEFT_RIGHT_0TO2()
142 COMBINE_LEFT_RIGHT_3TO6()
143
144 __asm__ __volatile__(
145 "sw %[p6], (%[sp6]) \n\t"
146 "sw %[p5], (%[sp5]) \n\t"
147 "sw %[p4], (%[sp4]) \n\t"
148 "sw %[p3], (%[sp3]) \n\t"
149 "sw %[p2], (%[sp2]) \n\t"
150 "sw %[p1], (%[sp1]) \n\t"
151 "sw %[p0], (%[sp0]) \n\t"
152
153 :
154 : [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3),
155 [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [sp6] "r"(sp6),
156 [sp5] "r"(sp5), [sp4] "r"(sp4), [sp3] "r"(sp3), [sp2] "r"(sp2),
157 [sp1] "r"(sp1), [sp0] "r"(sp0));
158
159 __asm__ __volatile__(
160 "sw %[q6], (%[sq6]) \n\t"
161 "sw %[q5], (%[sq5]) \n\t"
162 "sw %[q4], (%[sq4]) \n\t"
163 "sw %[q3], (%[sq3]) \n\t"
164 "sw %[q2], (%[sq2]) \n\t"
165 "sw %[q1], (%[sq1]) \n\t"
166 "sw %[q0], (%[sq0]) \n\t"
167
168 :
169 : [q6] "r"(q6), [q5] "r"(q5), [q4] "r"(q4), [q3] "r"(q3),
170 [q2] "r"(q2), [q1] "r"(q1), [q0] "r"(q0), [sq6] "r"(sq6),
171 [sq5] "r"(sq5), [sq4] "r"(sq4), [sq3] "r"(sq3), [sq2] "r"(sq2),
172 [sq1] "r"(sq1), [sq0] "r"(sq0));
173 } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) {
174 /* f1 */
175 /* left 2 element operation */
176 PACK_LEFT_0TO3()
177 mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
178
179 /* right 2 element operation */
180 PACK_RIGHT_0TO3()
181 mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
182
183 COMBINE_LEFT_RIGHT_0TO2()
184
185 __asm__ __volatile__(
186 "sw %[p2], (%[sp2]) \n\t"
187 "sw %[p1], (%[sp1]) \n\t"
188 "sw %[p0], (%[sp0]) \n\t"
189 "sw %[q0], (%[sq0]) \n\t"
190 "sw %[q1], (%[sq1]) \n\t"
191 "sw %[q2], (%[sq2]) \n\t"
192
193 :
194 : [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0),
195 [q1] "r"(q1), [q2] "r"(q2), [sp2] "r"(sp2), [sp1] "r"(sp1),
196 [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2));
197 } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
198 /* f0+f1 */
199 filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
200
201 /* left 2 element operation */
202 PACK_LEFT_0TO3()
203 mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
204
205 /* right 2 element operation */
206 PACK_RIGHT_0TO3()
207 mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
208
209 if (mask & flat & 0x000000FF) {
210 __asm__ __volatile__(
211 "sb %[p2_r], (%[sp2]) \n\t"
212 "sb %[p1_r], (%[sp1]) \n\t"
213 "sb %[p0_r], (%[sp0]) \n\t"
214 "sb %[q0_r], (%[sq0]) \n\t"
215 "sb %[q1_r], (%[sq1]) \n\t"
216 "sb %[q2_r], (%[sq2]) \n\t"
217
218 :
219 : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
220 [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
221 [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
222 [sq1] "r"(sq1), [sq2] "r"(sq2));
223 } else if (mask & 0x000000FF) {
224 __asm__ __volatile__(
225 "sb %[p1_f0], (%[sp1]) \n\t"
226 "sb %[p0_f0], (%[sp0]) \n\t"
227 "sb %[q0_f0], (%[sq0]) \n\t"
228 "sb %[q1_f0], (%[sq1]) \n\t"
229
230 :
231 : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
232 [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
233 [sq0] "r"(sq0), [sq1] "r"(sq1));
234 }
235
236 __asm__ __volatile__(
237 "srl %[p2_r], %[p2_r], 16 \n\t"
238 "srl %[p1_r], %[p1_r], 16 \n\t"
239 "srl %[p0_r], %[p0_r], 16 \n\t"
240 "srl %[q0_r], %[q0_r], 16 \n\t"
241 "srl %[q1_r], %[q1_r], 16 \n\t"
242 "srl %[q2_r], %[q2_r], 16 \n\t"
243 "srl %[p1_f0], %[p1_f0], 8 \n\t"
244 "srl %[p0_f0], %[p0_f0], 8 \n\t"
245 "srl %[q0_f0], %[q0_f0], 8 \n\t"
246 "srl %[q1_f0], %[q1_f0], 8 \n\t"
247
248 : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r),
249 [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
250 [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
251 [q1_f0] "+r"(q1_f0)
252 :);
253
254 if (mask & flat & 0x0000FF00) {
255 __asm__ __volatile__(
256 "sb %[p2_r], +1(%[sp2]) \n\t"
257 "sb %[p1_r], +1(%[sp1]) \n\t"
258 "sb %[p0_r], +1(%[sp0]) \n\t"
259 "sb %[q0_r], +1(%[sq0]) \n\t"
260 "sb %[q1_r], +1(%[sq1]) \n\t"
261 "sb %[q2_r], +1(%[sq2]) \n\t"
262
263 :
264 : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
265 [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
266 [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
267 [sq1] "r"(sq1), [sq2] "r"(sq2));
268 } else if (mask & 0x0000FF00) {
269 __asm__ __volatile__(
270 "sb %[p1_f0], +1(%[sp1]) \n\t"
271 "sb %[p0_f0], +1(%[sp0]) \n\t"
272 "sb %[q0_f0], +1(%[sq0]) \n\t"
273 "sb %[q1_f0], +1(%[sq1]) \n\t"
274
275 :
276 : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
277 [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
278 [sq0] "r"(sq0), [sq1] "r"(sq1));
279 }
280
281 __asm__ __volatile__(
282 "srl %[p1_f0], %[p1_f0], 8 \n\t"
283 "srl %[p0_f0], %[p0_f0], 8 \n\t"
284 "srl %[q0_f0], %[q0_f0], 8 \n\t"
285 "srl %[q1_f0], %[q1_f0], 8 \n\t"
286
287 : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
288 [q1_f0] "+r"(q1_f0)
289 :);
290
291 if (mask & flat & 0x00FF0000) {
292 __asm__ __volatile__(
293 "sb %[p2_l], +2(%[sp2]) \n\t"
294 "sb %[p1_l], +2(%[sp1]) \n\t"
295 "sb %[p0_l], +2(%[sp0]) \n\t"
296 "sb %[q0_l], +2(%[sq0]) \n\t"
297 "sb %[q1_l], +2(%[sq1]) \n\t"
298 "sb %[q2_l], +2(%[sq2]) \n\t"
299
300 :
301 : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
302 [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
303 [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
304 [sq1] "r"(sq1), [sq2] "r"(sq2));
305 } else if (mask & 0x00FF0000) {
306 __asm__ __volatile__(
307 "sb %[p1_f0], +2(%[sp1]) \n\t"
308 "sb %[p0_f0], +2(%[sp0]) \n\t"
309 "sb %[q0_f0], +2(%[sq0]) \n\t"
310 "sb %[q1_f0], +2(%[sq1]) \n\t"
311
312 :
313 : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
314 [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
315 [sq0] "r"(sq0), [sq1] "r"(sq1));
316 }
317
318 __asm__ __volatile__(
319 "srl %[p2_l], %[p2_l], 16 \n\t"
320 "srl %[p1_l], %[p1_l], 16 \n\t"
321 "srl %[p0_l], %[p0_l], 16 \n\t"
322 "srl %[q0_l], %[q0_l], 16 \n\t"
323 "srl %[q1_l], %[q1_l], 16 \n\t"
324 "srl %[q2_l], %[q2_l], 16 \n\t"
325 "srl %[p1_f0], %[p1_f0], 8 \n\t"
326 "srl %[p0_f0], %[p0_f0], 8 \n\t"
327 "srl %[q0_f0], %[q0_f0], 8 \n\t"
328 "srl %[q1_f0], %[q1_f0], 8 \n\t"
329
330 : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l),
331 [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
332 [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
333 [q1_f0] "+r"(q1_f0)
334 :);
335
336 if (mask & flat & 0xFF000000) {
337 __asm__ __volatile__(
338 "sb %[p2_l], +3(%[sp2]) \n\t"
339 "sb %[p1_l], +3(%[sp1]) \n\t"
340 "sb %[p0_l], +3(%[sp0]) \n\t"
341 "sb %[q0_l], +3(%[sq0]) \n\t"
342 "sb %[q1_l], +3(%[sq1]) \n\t"
343 "sb %[q2_l], +3(%[sq2]) \n\t"
344
345 :
346 : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
347 [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
348 [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
349 [sq1] "r"(sq1), [sq2] "r"(sq2));
350 } else if (mask & 0xFF000000) {
351 __asm__ __volatile__(
352 "sb %[p1_f0], +3(%[sp1]) \n\t"
353 "sb %[p0_f0], +3(%[sp0]) \n\t"
354 "sb %[q0_f0], +3(%[sq0]) \n\t"
355 "sb %[q1_f0], +3(%[sq1]) \n\t"
356
357 :
358 : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
359 [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
360 [sq0] "r"(sq0), [sq1] "r"(sq1));
361 }
362 } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
363 /* f0 + f1 + f2 */
364 /* f0 function */
365 filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
366
367 /* f1 function */
368 /* left 2 element operation */
369 PACK_LEFT_0TO3()
370 mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, &p2_l_f1,
371 &p1_l_f1, &p0_l_f1, &q0_l_f1, &q1_l_f1, &q2_l_f1);
372
373 /* right 2 element operation */
374 PACK_RIGHT_0TO3()
375 mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, &p2_r_f1,
376 &p1_r_f1, &p0_r_f1, &q0_r_f1, &q1_r_f1, &q2_r_f1);
377
378 /* f2 function */
379 PACK_LEFT_4TO7()
380 wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l,
381 &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l,
382 &q6_l, &q7_l);
383
384 PACK_RIGHT_4TO7()
385 wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r,
386 &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r,
387 &q6_r, &q7_r);
388
389 if (mask & flat & flat2 & 0x000000FF) {
390 __asm__ __volatile__(
391 "sb %[p6_r], (%[sp6]) \n\t"
392 "sb %[p5_r], (%[sp5]) \n\t"
393 "sb %[p4_r], (%[sp4]) \n\t"
394 "sb %[p3_r], (%[sp3]) \n\t"
395 "sb %[p2_r], (%[sp2]) \n\t"
396 "sb %[p1_r], (%[sp1]) \n\t"
397 "sb %[p0_r], (%[sp0]) \n\t"
398
399 :
400 : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r),
401 [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r),
402 [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4), [sp3] "r"(sp3),
403 [sp2] "r"(sp2), [sp1] "r"(sp1), [p0_r] "r"(p0_r), [sp0] "r"(sp0));
404
405 __asm__ __volatile__(
406 "sb %[q0_r], (%[sq0]) \n\t"
407 "sb %[q1_r], (%[sq1]) \n\t"
408 "sb %[q2_r], (%[sq2]) \n\t"
409 "sb %[q3_r], (%[sq3]) \n\t"
410 "sb %[q4_r], (%[sq4]) \n\t"
411 "sb %[q5_r], (%[sq5]) \n\t"
412 "sb %[q6_r], (%[sq6]) \n\t"
413
414 :
415 : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
416 [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),
417 [q6_r] "r"(q6_r), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2),
418 [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6));
419 } else if (mask & flat & 0x000000FF) {
420 __asm__ __volatile__(
421 "sb %[p2_r_f1], (%[sp2]) \n\t"
422 "sb %[p1_r_f1], (%[sp1]) \n\t"
423 "sb %[p0_r_f1], (%[sp0]) \n\t"
424 "sb %[q0_r_f1], (%[sq0]) \n\t"
425 "sb %[q1_r_f1], (%[sq1]) \n\t"
426 "sb %[q2_r_f1], (%[sq2]) \n\t"
427
428 :
429 : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1),
430 [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1),
431 [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [sp2] "r"(sp2),
432 [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
433 [sq2] "r"(sq2));
434 } else if (mask & 0x000000FF) {
435 __asm__ __volatile__(
436 "sb %[p1_f0], (%[sp1]) \n\t"
437 "sb %[p0_f0], (%[sp0]) \n\t"
438 "sb %[q0_f0], (%[sq0]) \n\t"
439 "sb %[q1_f0], (%[sq1]) \n\t"
440
441 :
442 : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
443 [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
444 [sq0] "r"(sq0), [sq1] "r"(sq1));
445 }
446
447 __asm__ __volatile__(
448 "srl %[p6_r], %[p6_r], 16 \n\t"
449 "srl %[p5_r], %[p5_r], 16 \n\t"
450 "srl %[p4_r], %[p4_r], 16 \n\t"
451 "srl %[p3_r], %[p3_r], 16 \n\t"
452 "srl %[p2_r], %[p2_r], 16 \n\t"
453 "srl %[p1_r], %[p1_r], 16 \n\t"
454 "srl %[p0_r], %[p0_r], 16 \n\t"
455 "srl %[q0_r], %[q0_r], 16 \n\t"
456 "srl %[q1_r], %[q1_r], 16 \n\t"
457 "srl %[q2_r], %[q2_r], 16 \n\t"
458 "srl %[q3_r], %[q3_r], 16 \n\t"
459 "srl %[q4_r], %[q4_r], 16 \n\t"
460 "srl %[q5_r], %[q5_r], 16 \n\t"
461 "srl %[q6_r], %[q6_r], 16 \n\t"
462
463 : [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
464 [q3_r] "+r"(q3_r), [q4_r] "+r"(q4_r), [q5_r] "+r"(q5_r),
465 [p6_r] "+r"(p6_r), [p5_r] "+r"(p5_r), [p4_r] "+r"(p4_r),
466 [p3_r] "+r"(p3_r), [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r),
467 [q6_r] "+r"(q6_r), [p0_r] "+r"(p0_r)
468 :);
469
470 __asm__ __volatile__(
471 "srl %[p2_r_f1], %[p2_r_f1], 16 \n\t"
472 "srl %[p1_r_f1], %[p1_r_f1], 16 \n\t"
473 "srl %[p0_r_f1], %[p0_r_f1], 16 \n\t"
474 "srl %[q0_r_f1], %[q0_r_f1], 16 \n\t"
475 "srl %[q1_r_f1], %[q1_r_f1], 16 \n\t"
476 "srl %[q2_r_f1], %[q2_r_f1], 16 \n\t"
477 "srl %[p1_f0], %[p1_f0], 8 \n\t"
478 "srl %[p0_f0], %[p0_f0], 8 \n\t"
479 "srl %[q0_f0], %[q0_f0], 8 \n\t"
480 "srl %[q1_f0], %[q1_f0], 8 \n\t"
481
482 : [p2_r_f1] "+r"(p2_r_f1), [p1_r_f1] "+r"(p1_r_f1),
483 [p0_r_f1] "+r"(p0_r_f1), [q0_r_f1] "+r"(q0_r_f1),
484 [q1_r_f1] "+r"(q1_r_f1), [q2_r_f1] "+r"(q2_r_f1),
485 [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
486 [q1_f0] "+r"(q1_f0)
487 :);
488
489 if (mask & flat & flat2 & 0x0000FF00) {
490 __asm__ __volatile__(
491 "sb %[p6_r], +1(%[sp6]) \n\t"
492 "sb %[p5_r], +1(%[sp5]) \n\t"
493 "sb %[p4_r], +1(%[sp4]) \n\t"
494 "sb %[p3_r], +1(%[sp3]) \n\t"
495 "sb %[p2_r], +1(%[sp2]) \n\t"
496 "sb %[p1_r], +1(%[sp1]) \n\t"
497 "sb %[p0_r], +1(%[sp0]) \n\t"
498
499 :
500 : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r),
501 [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r),
502 [p0_r] "r"(p0_r), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4),
503 [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0));
504
505 __asm__ __volatile__(
506 "sb %[q0_r], +1(%[sq0]) \n\t"
507 "sb %[q1_r], +1(%[sq1]) \n\t"
508 "sb %[q2_r], +1(%[sq2]) \n\t"
509 "sb %[q3_r], +1(%[sq3]) \n\t"
510 "sb %[q4_r], +1(%[sq4]) \n\t"
511 "sb %[q5_r], +1(%[sq5]) \n\t"
512 "sb %[q6_r], +1(%[sq6]) \n\t"
513
514 :
515 : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
516 [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),
517 [q6_r] "r"(q6_r), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2),
518 [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6));
519 } else if (mask & flat & 0x0000FF00) {
520 __asm__ __volatile__(
521 "sb %[p2_r_f1], +1(%[sp2]) \n\t"
522 "sb %[p1_r_f1], +1(%[sp1]) \n\t"
523 "sb %[p0_r_f1], +1(%[sp0]) \n\t"
524 "sb %[q0_r_f1], +1(%[sq0]) \n\t"
525 "sb %[q1_r_f1], +1(%[sq1]) \n\t"
526 "sb %[q2_r_f1], +1(%[sq2]) \n\t"
527
528 :
529 : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1),
530 [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1),
531 [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [sp2] "r"(sp2),
532 [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
533 [sq2] "r"(sq2));
534 } else if (mask & 0x0000FF00) {
535 __asm__ __volatile__(
536 "sb %[p1_f0], +1(%[sp1]) \n\t"
537 "sb %[p0_f0], +1(%[sp0]) \n\t"
538 "sb %[q0_f0], +1(%[sq0]) \n\t"
539 "sb %[q1_f0], +1(%[sq1]) \n\t"
540
541 :
542 : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
543 [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
544 [sq0] "r"(sq0), [sq1] "r"(sq1));
545 }
546
547 __asm__ __volatile__(
548 "srl %[p1_f0], %[p1_f0], 8 \n\t"
549 "srl %[p0_f0], %[p0_f0], 8 \n\t"
550 "srl %[q0_f0], %[q0_f0], 8 \n\t"
551 "srl %[q1_f0], %[q1_f0], 8 \n\t"
552
553 : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
554 [q1_f0] "+r"(q1_f0)
555 :);
556
557 if (mask & flat & flat2 & 0x00FF0000) {
558 __asm__ __volatile__(
559 "sb %[p6_l], +2(%[sp6]) \n\t"
560 "sb %[p5_l], +2(%[sp5]) \n\t"
561 "sb %[p4_l], +2(%[sp4]) \n\t"
562 "sb %[p3_l], +2(%[sp3]) \n\t"
563 "sb %[p2_l], +2(%[sp2]) \n\t"
564 "sb %[p1_l], +2(%[sp1]) \n\t"
565 "sb %[p0_l], +2(%[sp0]) \n\t"
566
567 :
568 : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),
569 [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l),
570 [p0_l] "r"(p0_l), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4),
571 [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0));
572
573 __asm__ __volatile__(
574 "sb %[q0_l], +2(%[sq0]) \n\t"
575 "sb %[q1_l], +2(%[sq1]) \n\t"
576 "sb %[q2_l], +2(%[sq2]) \n\t"
577 "sb %[q3_l], +2(%[sq3]) \n\t"
578 "sb %[q4_l], +2(%[sq4]) \n\t"
579 "sb %[q5_l], +2(%[sq5]) \n\t"
580 "sb %[q6_l], +2(%[sq6]) \n\t"
581
582 :
583 : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
584 [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l),
585 [q6_l] "r"(q6_l), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2),
586 [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6));
587 } else if (mask & flat & 0x00FF0000) {
588 __asm__ __volatile__(
589 "sb %[p2_l_f1], +2(%[sp2]) \n\t"
590 "sb %[p1_l_f1], +2(%[sp1]) \n\t"
591 "sb %[p0_l_f1], +2(%[sp0]) \n\t"
592 "sb %[q0_l_f1], +2(%[sq0]) \n\t"
593 "sb %[q1_l_f1], +2(%[sq1]) \n\t"
594 "sb %[q2_l_f1], +2(%[sq2]) \n\t"
595
596 :
597 : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1),
598 [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1),
599 [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [sp2] "r"(sp2),
600 [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
601 [sq2] "r"(sq2));
602 } else if (mask & 0x00FF0000) {
603 __asm__ __volatile__(
604 "sb %[p1_f0], +2(%[sp1]) \n\t"
605 "sb %[p0_f0], +2(%[sp0]) \n\t"
606 "sb %[q0_f0], +2(%[sq0]) \n\t"
607 "sb %[q1_f0], +2(%[sq1]) \n\t"
608
609 :
610 : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
611 [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
612 [sq0] "r"(sq0), [sq1] "r"(sq1));
613 }
614
615 __asm__ __volatile__(
616 "srl %[p6_l], %[p6_l], 16 \n\t"
617 "srl %[p5_l], %[p5_l], 16 \n\t"
618 "srl %[p4_l], %[p4_l], 16 \n\t"
619 "srl %[p3_l], %[p3_l], 16 \n\t"
620 "srl %[p2_l], %[p2_l], 16 \n\t"
621 "srl %[p1_l], %[p1_l], 16 \n\t"
622 "srl %[p0_l], %[p0_l], 16 \n\t"
623 "srl %[q0_l], %[q0_l], 16 \n\t"
624 "srl %[q1_l], %[q1_l], 16 \n\t"
625 "srl %[q2_l], %[q2_l], 16 \n\t"
626 "srl %[q3_l], %[q3_l], 16 \n\t"
627 "srl %[q4_l], %[q4_l], 16 \n\t"
628 "srl %[q5_l], %[q5_l], 16 \n\t"
629 "srl %[q6_l], %[q6_l], 16 \n\t"
630
631 : [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
632 [q3_l] "+r"(q3_l), [q4_l] "+r"(q4_l), [q5_l] "+r"(q5_l),
633 [q6_l] "+r"(q6_l), [p6_l] "+r"(p6_l), [p5_l] "+r"(p5_l),
634 [p4_l] "+r"(p4_l), [p3_l] "+r"(p3_l), [p2_l] "+r"(p2_l),
635 [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l)
636 :);
637
638 __asm__ __volatile__(
639 "srl %[p2_l_f1], %[p2_l_f1], 16 \n\t"
640 "srl %[p1_l_f1], %[p1_l_f1], 16 \n\t"
641 "srl %[p0_l_f1], %[p0_l_f1], 16 \n\t"
642 "srl %[q0_l_f1], %[q0_l_f1], 16 \n\t"
643 "srl %[q1_l_f1], %[q1_l_f1], 16 \n\t"
644 "srl %[q2_l_f1], %[q2_l_f1], 16 \n\t"
645 "srl %[p1_f0], %[p1_f0], 8 \n\t"
646 "srl %[p0_f0], %[p0_f0], 8 \n\t"
647 "srl %[q0_f0], %[q0_f0], 8 \n\t"
648 "srl %[q1_f0], %[q1_f0], 8 \n\t"
649
650 : [p2_l_f1] "+r"(p2_l_f1), [p1_l_f1] "+r"(p1_l_f1),
651 [p0_l_f1] "+r"(p0_l_f1), [q0_l_f1] "+r"(q0_l_f1),
652 [q1_l_f1] "+r"(q1_l_f1), [q2_l_f1] "+r"(q2_l_f1),
653 [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
654 [q1_f0] "+r"(q1_f0)
655 :);
656
657 if (mask & flat & flat2 & 0xFF000000) {
658 __asm__ __volatile__(
659 "sb %[p6_l], +3(%[sp6]) \n\t"
660 "sb %[p5_l], +3(%[sp5]) \n\t"
661 "sb %[p4_l], +3(%[sp4]) \n\t"
662 "sb %[p3_l], +3(%[sp3]) \n\t"
663 "sb %[p2_l], +3(%[sp2]) \n\t"
664 "sb %[p1_l], +3(%[sp1]) \n\t"
665 "sb %[p0_l], +3(%[sp0]) \n\t"
666
667 :
668 : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),
669 [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l),
670 [p0_l] "r"(p0_l), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4),
671 [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0));
672
673 __asm__ __volatile__(
674 "sb %[q0_l], +3(%[sq0]) \n\t"
675 "sb %[q1_l], +3(%[sq1]) \n\t"
676 "sb %[q2_l], +3(%[sq2]) \n\t"
677 "sb %[q3_l], +3(%[sq3]) \n\t"
678 "sb %[q4_l], +3(%[sq4]) \n\t"
679 "sb %[q5_l], +3(%[sq5]) \n\t"
680 "sb %[q6_l], +3(%[sq6]) \n\t"
681
682 :
683 : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
684 [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l),
685 [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2), [sq3] "r"(sq3),
686 [sq4] "r"(sq4), [sq5] "r"(sq5), [q6_l] "r"(q6_l), [sq6] "r"(sq6));
687 } else if (mask & flat & 0xFF000000) {
688 __asm__ __volatile__(
689 "sb %[p2_l_f1], +3(%[sp2]) \n\t"
690 "sb %[p1_l_f1], +3(%[sp1]) \n\t"
691 "sb %[p0_l_f1], +3(%[sp0]) \n\t"
692 "sb %[q0_l_f1], +3(%[sq0]) \n\t"
693 "sb %[q1_l_f1], +3(%[sq1]) \n\t"
694 "sb %[q2_l_f1], +3(%[sq2]) \n\t"
695
696 :
697 : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1),
698 [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1),
699 [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [sp2] "r"(sp2),
700 [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
701 [sq2] "r"(sq2));
702 } else if (mask & 0xFF000000) {
703 __asm__ __volatile__(
704 "sb %[p1_f0], +3(%[sp1]) \n\t"
705 "sb %[p0_f0], +3(%[sp0]) \n\t"
706 "sb %[q0_f0], +3(%[sq0]) \n\t"
707 "sb %[q1_f0], +3(%[sq1]) \n\t"
708
709 :
710 : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
711 [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
712 [sq0] "r"(sq0), [sq1] "r"(sq1));
713 }
714 }
715
716 s = s + 4;
717 }
718 }
719
vpx_lpf_horizontal_16_dspr2(unsigned char * s,int pitch,const uint8_t * blimit,const uint8_t * limit,const uint8_t * thresh)720 void vpx_lpf_horizontal_16_dspr2(unsigned char *s, int pitch,
721 const uint8_t *blimit, const uint8_t *limit,
722 const uint8_t *thresh) {
723 mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1);
724 }
725
vpx_lpf_horizontal_16_dual_dspr2(unsigned char * s,int pitch,const uint8_t * blimit,const uint8_t * limit,const uint8_t * thresh)726 void vpx_lpf_horizontal_16_dual_dspr2(unsigned char *s, int pitch,
727 const uint8_t *blimit,
728 const uint8_t *limit,
729 const uint8_t *thresh) {
730 mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 2);
731 }
732 #endif // #if HAVE_DSPR2
733