xref: /aosp_15_r20/external/libvpx/vp8/encoder/mips/mmi/vp8_quantize_mmi.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1 /*
2  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "vpx_mem/vpx_mem.h"
12 #include "vpx_ports/asmdefs_mmi.h"
13 #include "vp8/encoder/onyx_int.h"
14 #include "vp8/encoder/quantize.h"
15 #include "vp8/common/quant_common.h"
16 
17 #define REGULAR_SELECT_EOB(i, rc)                                        \
18   z = coeff_ptr[rc];                                                     \
19   sz = (z >> 31);                                                        \
20   x = (z ^ sz) - sz;                                                     \
21   zbin = zbin_ptr[rc] + *(zbin_boost_ptr++) + zbin_oq_value;             \
22   if (x >= zbin) {                                                       \
23     x += round_ptr[rc];                                                  \
24     y = ((((x * quant_ptr[rc]) >> 16) + x) * quant_shift_ptr[rc]) >> 16; \
25     if (y) {                                                             \
26       x = (y ^ sz) - sz;                                                 \
27       qcoeff_ptr[rc] = x;                                                \
28       dqcoeff_ptr[rc] = x * dequant_ptr[rc];                             \
29       eob = i;                                                           \
30       zbin_boost_ptr = b->zrun_zbin_boost;                               \
31     }                                                                    \
32   }
33 
vp8_fast_quantize_b_mmi(BLOCK * b,BLOCKD * d)34 void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
35   const int16_t *coeff_ptr = b->coeff;
36   const int16_t *round_ptr = b->round;
37   const int16_t *quant_ptr = b->quant_fast;
38   int16_t *qcoeff_ptr = d->qcoeff;
39   int16_t *dqcoeff_ptr = d->dqcoeff;
40   const int16_t *dequant_ptr = d->dequant;
41   const int16_t *inv_zig_zag = vp8_default_inv_zig_zag;
42 
43   double ftmp[13];
44   uint64_t tmp[1];
45   int64_t eob = 0;
46   double ones;
47 
48   __asm__ volatile(
49       // loop 0 ~ 7
50       "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
51       "pcmpeqh    %[ones],    %[ones],        %[ones]         \n\t"
52       "gsldlc1    %[ftmp1],   0x07(%[coeff_ptr])              \n\t"
53       "gsldrc1    %[ftmp1],   0x00(%[coeff_ptr])              \n\t"
54       "dli        %[tmp0],    0x0f                            \n\t"
55       "dmtc1      %[tmp0],    %[ftmp9]                        \n\t"
56       "gsldlc1    %[ftmp2],   0x0f(%[coeff_ptr])              \n\t"
57       "gsldrc1    %[ftmp2],   0x08(%[coeff_ptr])              \n\t"
58 
59       "psrah      %[ftmp3],   %[ftmp1],       %[ftmp9]        \n\t"
60       "pxor       %[ftmp1],   %[ftmp3],       %[ftmp1]        \n\t"
61       "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]        \n\t"
62       "psrah      %[ftmp4],   %[ftmp2],       %[ftmp9]        \n\t"
63       "pxor       %[ftmp2],   %[ftmp4],       %[ftmp2]        \n\t"
64       "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]        \n\t"
65 
66       "gsldlc1    %[ftmp5],   0x07(%[round_ptr])              \n\t"
67       "gsldrc1    %[ftmp5],   0x00(%[round_ptr])              \n\t"
68       "gsldlc1    %[ftmp6],   0x0f(%[round_ptr])              \n\t"
69       "gsldrc1    %[ftmp6],   0x08(%[round_ptr])              \n\t"
70       "paddh      %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
71       "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
72       "gsldlc1    %[ftmp7],   0x07(%[quant_ptr])              \n\t"
73       "gsldrc1    %[ftmp7],   0x00(%[quant_ptr])              \n\t"
74       "gsldlc1    %[ftmp8],   0x0f(%[quant_ptr])              \n\t"
75       "gsldrc1    %[ftmp8],   0x08(%[quant_ptr])              \n\t"
76       "pmulhuh    %[ftmp5],   %[ftmp5],       %[ftmp7]        \n\t"
77       "pmulhuh    %[ftmp6],   %[ftmp6],       %[ftmp8]        \n\t"
78 
79       "pxor       %[ftmp7],   %[ftmp5],       %[ftmp3]        \n\t"
80       "pxor       %[ftmp8],   %[ftmp6],       %[ftmp4]        \n\t"
81       "psubh      %[ftmp7],   %[ftmp7],       %[ftmp3]        \n\t"
82       "psubh      %[ftmp8],   %[ftmp8],       %[ftmp4]        \n\t"
83       "gssdlc1    %[ftmp7],   0x07(%[qcoeff_ptr])             \n\t"
84       "gssdrc1    %[ftmp7],   0x00(%[qcoeff_ptr])             \n\t"
85       "gssdlc1    %[ftmp8],   0x0f(%[qcoeff_ptr])             \n\t"
86       "gssdrc1    %[ftmp8],   0x08(%[qcoeff_ptr])             \n\t"
87 
88       "gsldlc1    %[ftmp1],   0x07(%[inv_zig_zag])            \n\t"
89       "gsldrc1    %[ftmp1],   0x00(%[inv_zig_zag])            \n\t"
90       "gsldlc1    %[ftmp2],   0x0f(%[inv_zig_zag])            \n\t"
91       "gsldrc1    %[ftmp2],   0x08(%[inv_zig_zag])            \n\t"
92       "pcmpeqh    %[ftmp5],   %[ftmp5],       %[ftmp0]        \n\t"
93       "pcmpeqh    %[ftmp6],   %[ftmp6],       %[ftmp0]        \n\t"
94       "pxor       %[ftmp5],   %[ftmp5],       %[ones]         \n\t"
95       "pxor       %[ftmp6],   %[ftmp6],       %[ones]         \n\t"
96       "pand       %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
97       "pand       %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
98       "pmaxsh     %[ftmp10],  %[ftmp5],       %[ftmp6]        \n\t"
99 
100       "gsldlc1    %[ftmp5],   0x07(%[dequant_ptr])            \n\t"
101       "gsldrc1    %[ftmp5],   0x00(%[dequant_ptr])            \n\t"
102       "gsldlc1    %[ftmp6],   0x0f(%[dequant_ptr])            \n\t"
103       "gsldrc1    %[ftmp6],   0x08(%[dequant_ptr])            \n\t"
104       "pmullh     %[ftmp5],   %[ftmp5],       %[ftmp7]        \n\t"
105       "pmullh     %[ftmp6],   %[ftmp6],       %[ftmp8]        \n\t"
106       "gssdlc1    %[ftmp5],   0x07(%[dqcoeff_ptr])            \n\t"
107       "gssdrc1    %[ftmp5],   0x00(%[dqcoeff_ptr])            \n\t"
108       "gssdlc1    %[ftmp6],   0x0f(%[dqcoeff_ptr])            \n\t"
109       "gssdrc1    %[ftmp6],   0x08(%[dqcoeff_ptr])            \n\t"
110 
111       // loop 8 ~ 15
112       "gsldlc1    %[ftmp1],   0x17(%[coeff_ptr])              \n\t"
113       "gsldrc1    %[ftmp1],   0x10(%[coeff_ptr])              \n\t"
114       "gsldlc1    %[ftmp2],   0x1f(%[coeff_ptr])              \n\t"
115       "gsldrc1    %[ftmp2],   0x18(%[coeff_ptr])              \n\t"
116 
117       "psrah      %[ftmp3],   %[ftmp1],       %[ftmp9]        \n\t"
118       "pxor       %[ftmp1],   %[ftmp3],       %[ftmp1]        \n\t"
119       "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]        \n\t"
120       "psrah      %[ftmp4],   %[ftmp2],       %[ftmp9]        \n\t"
121       "pxor       %[ftmp2],   %[ftmp4],       %[ftmp2]        \n\t"
122       "psubh      %[ftmp2],   %[ftmp2],       %[ftmp4]        \n\t"
123 
124       "gsldlc1    %[ftmp5],   0x17(%[round_ptr])              \n\t"
125       "gsldrc1    %[ftmp5],   0x10(%[round_ptr])              \n\t"
126       "gsldlc1    %[ftmp6],   0x1f(%[round_ptr])              \n\t"
127       "gsldrc1    %[ftmp6],   0x18(%[round_ptr])              \n\t"
128       "paddh      %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
129       "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
130       "gsldlc1    %[ftmp7],   0x17(%[quant_ptr])              \n\t"
131       "gsldrc1    %[ftmp7],   0x10(%[quant_ptr])              \n\t"
132       "gsldlc1    %[ftmp8],   0x1f(%[quant_ptr])              \n\t"
133       "gsldrc1    %[ftmp8],   0x18(%[quant_ptr])              \n\t"
134       "pmulhuh    %[ftmp5],   %[ftmp5],       %[ftmp7]        \n\t"
135       "pmulhuh    %[ftmp6],   %[ftmp6],       %[ftmp8]        \n\t"
136 
137       "pxor       %[ftmp7],   %[ftmp5],       %[ftmp3]        \n\t"
138       "pxor       %[ftmp8],   %[ftmp6],       %[ftmp4]        \n\t"
139       "psubh      %[ftmp7],   %[ftmp7],       %[ftmp3]        \n\t"
140       "psubh      %[ftmp8],   %[ftmp8],       %[ftmp4]        \n\t"
141       "gssdlc1    %[ftmp7],   0x17(%[qcoeff_ptr])             \n\t"
142       "gssdrc1    %[ftmp7],   0x10(%[qcoeff_ptr])             \n\t"
143       "gssdlc1    %[ftmp8],   0x1f(%[qcoeff_ptr])             \n\t"
144       "gssdrc1    %[ftmp8],   0x18(%[qcoeff_ptr])             \n\t"
145 
146       "gsldlc1    %[ftmp1],   0x17(%[inv_zig_zag])            \n\t"
147       "gsldrc1    %[ftmp1],   0x10(%[inv_zig_zag])            \n\t"
148       "gsldlc1    %[ftmp2],   0x1f(%[inv_zig_zag])            \n\t"
149       "gsldrc1    %[ftmp2],   0x18(%[inv_zig_zag])            \n\t"
150       "pcmpeqh    %[ftmp5],   %[ftmp5],       %[ftmp0]        \n\t"
151       "pcmpeqh    %[ftmp6],   %[ftmp6],       %[ftmp0]        \n\t"
152       "pxor       %[ftmp5],   %[ftmp5],       %[ones]         \n\t"
153       "pxor       %[ftmp6],   %[ftmp6],       %[ones]         \n\t"
154       "pand       %[ftmp5],   %[ftmp5],       %[ftmp1]        \n\t"
155       "pand       %[ftmp6],   %[ftmp6],       %[ftmp2]        \n\t"
156       "pmaxsh     %[ftmp11],  %[ftmp5],       %[ftmp6]        \n\t"
157 
158       "gsldlc1    %[ftmp5],   0x17(%[dequant_ptr])            \n\t"
159       "gsldrc1    %[ftmp5],   0x10(%[dequant_ptr])            \n\t"
160       "gsldlc1    %[ftmp6],   0x1f(%[dequant_ptr])            \n\t"
161       "gsldrc1    %[ftmp6],   0x18(%[dequant_ptr])            \n\t"
162       "pmullh     %[ftmp5],   %[ftmp5],       %[ftmp7]        \n\t"
163       "pmullh     %[ftmp6],   %[ftmp6],       %[ftmp8]        \n\t"
164       "gssdlc1    %[ftmp5],   0x17(%[dqcoeff_ptr])            \n\t"
165       "gssdrc1    %[ftmp5],   0x10(%[dqcoeff_ptr])            \n\t"
166       "gssdlc1    %[ftmp6],   0x1f(%[dqcoeff_ptr])            \n\t"
167       "gssdrc1    %[ftmp6],   0x18(%[dqcoeff_ptr])            \n\t"
168 
169       "dli        %[tmp0],    0x10                            \n\t"
170       "dmtc1      %[tmp0],    %[ftmp9]                        \n\t"
171 
172       "pmaxsh     %[ftmp10],  %[ftmp10],       %[ftmp11]      \n\t"
173       "psrlw      %[ftmp11],  %[ftmp10],       %[ftmp9]       \n\t"
174       "pmaxsh     %[ftmp10],  %[ftmp10],       %[ftmp11]      \n\t"
175       "dli        %[tmp0],    0xaa                            \n\t"
176       "dmtc1      %[tmp0],    %[ftmp9]                        \n\t"
177       "pshufh     %[ftmp11],  %[ftmp10],       %[ftmp9]       \n\t"
178       "pmaxsh     %[ftmp10],  %[ftmp10],       %[ftmp11]      \n\t"
179       "dli        %[tmp0],    0xffff                          \n\t"
180       "dmtc1      %[tmp0],    %[ftmp9]                        \n\t"
181       "pand       %[ftmp10],  %[ftmp10],       %[ftmp9]       \n\t"
182       "gssdlc1    %[ftmp10],  0x07(%[eob])                    \n\t"
183       "gssdrc1    %[ftmp10],  0x00(%[eob])                    \n\t"
184       : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
185         [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
186         [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]),
187         [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]),
188         [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]),
189         [tmp0] "=&r"(tmp[0]), [ones] "=&f"(ones)
190       : [coeff_ptr] "r"((mips_reg)coeff_ptr),
191         [qcoeff_ptr] "r"((mips_reg)qcoeff_ptr),
192         [dequant_ptr] "r"((mips_reg)dequant_ptr),
193         [round_ptr] "r"((mips_reg)round_ptr),
194         [quant_ptr] "r"((mips_reg)quant_ptr),
195         [dqcoeff_ptr] "r"((mips_reg)dqcoeff_ptr),
196         [inv_zig_zag] "r"((mips_reg)inv_zig_zag), [eob] "r"((mips_reg)&eob)
197       : "memory");
198 
199   *d->eob = eob;
200 }
201 
vp8_regular_quantize_b_mmi(BLOCK * b,BLOCKD * d)202 void vp8_regular_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
203   int eob = 0;
204   int x, y, z, sz, zbin;
205   const int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
206   const int16_t *coeff_ptr = b->coeff;
207   const int16_t *zbin_ptr = b->zbin;
208   const int16_t *round_ptr = b->round;
209   const int16_t *quant_ptr = b->quant;
210   const int16_t *quant_shift_ptr = b->quant_shift;
211   int16_t *qcoeff_ptr = d->qcoeff;
212   int16_t *dqcoeff_ptr = d->dqcoeff;
213   const int16_t *dequant_ptr = d->dequant;
214   const int16_t zbin_oq_value = b->zbin_extra;
215   register double ftmp0 asm("$f0");
216 
217   //  memset(qcoeff_ptr, 0, 32);
218   //  memset(dqcoeff_ptr, 0, 32);
219   /* clang-format off */
220   __asm__ volatile (
221     "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
222     "gssdlc1    %[ftmp0],   0x07(%[qcoeff_ptr])             \n\t"
223     "gssdrc1    %[ftmp0],   0x00(%[qcoeff_ptr])             \n\t"
224     "gssdlc1    %[ftmp0],   0x0f(%[qcoeff_ptr])             \n\t"
225     "gssdrc1    %[ftmp0],   0x08(%[qcoeff_ptr])             \n\t"
226     "gssdlc1    %[ftmp0],   0x17(%[qcoeff_ptr])             \n\t"
227     "gssdrc1    %[ftmp0],   0x10(%[qcoeff_ptr])             \n\t"
228     "gssdlc1    %[ftmp0],   0x1f(%[qcoeff_ptr])             \n\t"
229     "gssdrc1    %[ftmp0],   0x18(%[qcoeff_ptr])             \n\t"
230 
231     "gssdlc1    %[ftmp0],   0x07(%[dqcoeff_ptr])            \n\t"
232     "gssdrc1    %[ftmp0],   0x00(%[dqcoeff_ptr])            \n\t"
233     "gssdlc1    %[ftmp0],   0x0f(%[dqcoeff_ptr])            \n\t"
234     "gssdrc1    %[ftmp0],   0x08(%[dqcoeff_ptr])            \n\t"
235     "gssdlc1    %[ftmp0],   0x17(%[dqcoeff_ptr])            \n\t"
236     "gssdrc1    %[ftmp0],   0x10(%[dqcoeff_ptr])            \n\t"
237     "gssdlc1    %[ftmp0],   0x1f(%[dqcoeff_ptr])            \n\t"
238     "gssdrc1    %[ftmp0],   0x18(%[dqcoeff_ptr])            \n\t"
239     : [ftmp0]"=&f"(ftmp0)
240     : [qcoeff_ptr]"r"(qcoeff_ptr), [dqcoeff_ptr]"r"(dqcoeff_ptr)
241     : "memory"
242   );
243   /* clang-format on */
244 
245   REGULAR_SELECT_EOB(1, 0);
246   REGULAR_SELECT_EOB(2, 1);
247   REGULAR_SELECT_EOB(3, 4);
248   REGULAR_SELECT_EOB(4, 8);
249   REGULAR_SELECT_EOB(5, 5);
250   REGULAR_SELECT_EOB(6, 2);
251   REGULAR_SELECT_EOB(7, 3);
252   REGULAR_SELECT_EOB(8, 6);
253   REGULAR_SELECT_EOB(9, 9);
254   REGULAR_SELECT_EOB(10, 12);
255   REGULAR_SELECT_EOB(11, 13);
256   REGULAR_SELECT_EOB(12, 10);
257   REGULAR_SELECT_EOB(13, 7);
258   REGULAR_SELECT_EOB(14, 11);
259   REGULAR_SELECT_EOB(15, 14);
260   REGULAR_SELECT_EOB(16, 15);
261 
262   *d->eob = (char)eob;
263 }
264