xref: /aosp_15_r20/external/libvpx/vpx_dsp/ppc/quantize_vsx.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker  *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker  *
4*fb1b10abSAndroid Build Coastguard Worker  *  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker  *  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker  *  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker  *  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker  *  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker  */
10*fb1b10abSAndroid Build Coastguard Worker 
11*fb1b10abSAndroid Build Coastguard Worker #include <assert.h>
12*fb1b10abSAndroid Build Coastguard Worker 
13*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
14*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/ppc/types_vsx.h"
15*fb1b10abSAndroid Build Coastguard Worker 
16*fb1b10abSAndroid Build Coastguard Worker // Negate 16-bit integers in a when the corresponding signed 16-bit
17*fb1b10abSAndroid Build Coastguard Worker // integer in b is negative.
vec_sign(int16x8_t a,int16x8_t b)18*fb1b10abSAndroid Build Coastguard Worker static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) {
19*fb1b10abSAndroid Build Coastguard Worker   const int16x8_t mask = vec_sra(b, vec_shift_sign_s16);
20*fb1b10abSAndroid Build Coastguard Worker   return vec_xor(vec_add(a, mask), mask);
21*fb1b10abSAndroid Build Coastguard Worker }
22*fb1b10abSAndroid Build Coastguard Worker 
23*fb1b10abSAndroid Build Coastguard Worker // Sets the value of a 32-bit integers to 1 when the corresponding value in a is
24*fb1b10abSAndroid Build Coastguard Worker // negative.
vec_is_neg(int32x4_t a)25*fb1b10abSAndroid Build Coastguard Worker static INLINE int32x4_t vec_is_neg(int32x4_t a) {
26*fb1b10abSAndroid Build Coastguard Worker   return vec_sr(a, vec_shift_sign_s32);
27*fb1b10abSAndroid Build Coastguard Worker }
28*fb1b10abSAndroid Build Coastguard Worker 
29*fb1b10abSAndroid Build Coastguard Worker // Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit
30*fb1b10abSAndroid Build Coastguard Worker // integers, and return the high 16 bits of the intermediate integers.
31*fb1b10abSAndroid Build Coastguard Worker // (a * b) >> 16
vec_mulhi(int16x8_t a,int16x8_t b)32*fb1b10abSAndroid Build Coastguard Worker static INLINE int16x8_t vec_mulhi(int16x8_t a, int16x8_t b) {
33*fb1b10abSAndroid Build Coastguard Worker   // madds does ((A * B) >>15) + C, we need >> 16, so we perform an extra right
34*fb1b10abSAndroid Build Coastguard Worker   // shift.
35*fb1b10abSAndroid Build Coastguard Worker   return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_u16);
36*fb1b10abSAndroid Build Coastguard Worker }
37*fb1b10abSAndroid Build Coastguard Worker 
38*fb1b10abSAndroid Build Coastguard Worker // Quantization function used for 4x4, 8x8 and 16x16 blocks.
quantize_coeff(int16x8_t coeff,int16x8_t coeff_abs,int16x8_t round,int16x8_t quant,int16x8_t quant_shift,bool16x8_t mask)39*fb1b10abSAndroid Build Coastguard Worker static INLINE int16x8_t quantize_coeff(int16x8_t coeff, int16x8_t coeff_abs,
40*fb1b10abSAndroid Build Coastguard Worker                                        int16x8_t round, int16x8_t quant,
41*fb1b10abSAndroid Build Coastguard Worker                                        int16x8_t quant_shift, bool16x8_t mask) {
42*fb1b10abSAndroid Build Coastguard Worker   const int16x8_t rounded = vec_vaddshs(coeff_abs, round);
43*fb1b10abSAndroid Build Coastguard Worker   int16x8_t qcoeff = vec_mulhi(rounded, quant);
44*fb1b10abSAndroid Build Coastguard Worker   qcoeff = vec_add(qcoeff, rounded);
45*fb1b10abSAndroid Build Coastguard Worker   qcoeff = vec_mulhi(qcoeff, quant_shift);
46*fb1b10abSAndroid Build Coastguard Worker   qcoeff = vec_sign(qcoeff, coeff);
47*fb1b10abSAndroid Build Coastguard Worker   return vec_and(qcoeff, mask);
48*fb1b10abSAndroid Build Coastguard Worker }
49*fb1b10abSAndroid Build Coastguard Worker 
50*fb1b10abSAndroid Build Coastguard Worker // Quantization function used for 32x32 blocks.
quantize_coeff_32(int16x8_t coeff,int16x8_t coeff_abs,int16x8_t round,int16x8_t quant,int16x8_t quant_shift,bool16x8_t mask)51*fb1b10abSAndroid Build Coastguard Worker static INLINE int16x8_t quantize_coeff_32(int16x8_t coeff, int16x8_t coeff_abs,
52*fb1b10abSAndroid Build Coastguard Worker                                           int16x8_t round, int16x8_t quant,
53*fb1b10abSAndroid Build Coastguard Worker                                           int16x8_t quant_shift,
54*fb1b10abSAndroid Build Coastguard Worker                                           bool16x8_t mask) {
55*fb1b10abSAndroid Build Coastguard Worker   const int16x8_t rounded = vec_vaddshs(coeff_abs, round);
56*fb1b10abSAndroid Build Coastguard Worker   int16x8_t qcoeff = vec_mulhi(rounded, quant);
57*fb1b10abSAndroid Build Coastguard Worker   qcoeff = vec_add(qcoeff, rounded);
58*fb1b10abSAndroid Build Coastguard Worker   // 32x32 blocks require an extra multiplication by 2, this compensates for the
59*fb1b10abSAndroid Build Coastguard Worker   // extra right shift added in vec_mulhi, as such vec_madds can be used
60*fb1b10abSAndroid Build Coastguard Worker   // directly instead of vec_mulhi (((a * b) >> 15) >> 1) << 1 == (a * b >> 15)
61*fb1b10abSAndroid Build Coastguard Worker   qcoeff = vec_madds(qcoeff, quant_shift, vec_zeros_s16);
62*fb1b10abSAndroid Build Coastguard Worker   qcoeff = vec_sign(qcoeff, coeff);
63*fb1b10abSAndroid Build Coastguard Worker   return vec_and(qcoeff, mask);
64*fb1b10abSAndroid Build Coastguard Worker }
65*fb1b10abSAndroid Build Coastguard Worker 
66*fb1b10abSAndroid Build Coastguard Worker // DeQuantization function used for 32x32 blocks. Quantized coeff of 32x32
67*fb1b10abSAndroid Build Coastguard Worker // blocks are twice as big as for other block sizes. As such, using
68*fb1b10abSAndroid Build Coastguard Worker // vec_mladd results in overflow.
dequantize_coeff_32(int16x8_t qcoeff,int16x8_t dequant)69*fb1b10abSAndroid Build Coastguard Worker static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff,
70*fb1b10abSAndroid Build Coastguard Worker                                             int16x8_t dequant) {
71*fb1b10abSAndroid Build Coastguard Worker   int32x4_t dqcoeffe = vec_mule(qcoeff, dequant);
72*fb1b10abSAndroid Build Coastguard Worker   int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant);
73*fb1b10abSAndroid Build Coastguard Worker   // Add 1 if negative to round towards zero because the C uses division.
74*fb1b10abSAndroid Build Coastguard Worker   dqcoeffe = vec_add(dqcoeffe, vec_is_neg(dqcoeffe));
75*fb1b10abSAndroid Build Coastguard Worker   dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo));
76*fb1b10abSAndroid Build Coastguard Worker   dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32);
77*fb1b10abSAndroid Build Coastguard Worker   dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32);
78*fb1b10abSAndroid Build Coastguard Worker   return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack);
79*fb1b10abSAndroid Build Coastguard Worker }
80*fb1b10abSAndroid Build Coastguard Worker 
nonzero_scanindex(int16x8_t qcoeff,const int16_t * iscan_ptr,int index)81*fb1b10abSAndroid Build Coastguard Worker static INLINE int16x8_t nonzero_scanindex(int16x8_t qcoeff,
82*fb1b10abSAndroid Build Coastguard Worker                                           const int16_t *iscan_ptr, int index) {
83*fb1b10abSAndroid Build Coastguard Worker   int16x8_t scan = vec_vsx_ld(index, iscan_ptr);
84*fb1b10abSAndroid Build Coastguard Worker   bool16x8_t zero_coeff = vec_cmpeq(qcoeff, vec_zeros_s16);
85*fb1b10abSAndroid Build Coastguard Worker   return vec_andc(scan, zero_coeff);
86*fb1b10abSAndroid Build Coastguard Worker }
87*fb1b10abSAndroid Build Coastguard Worker 
88*fb1b10abSAndroid Build Coastguard Worker // Compare packed 16-bit integers across a, and return the maximum value in
89*fb1b10abSAndroid Build Coastguard Worker // every element. Returns a vector containing the biggest value across vector a.
vec_max_across(int16x8_t a)90*fb1b10abSAndroid Build Coastguard Worker static INLINE int16x8_t vec_max_across(int16x8_t a) {
91*fb1b10abSAndroid Build Coastguard Worker   a = vec_max(a, vec_perm(a, a, vec_perm64));
92*fb1b10abSAndroid Build Coastguard Worker   a = vec_max(a, vec_perm(a, a, vec_perm32));
93*fb1b10abSAndroid Build Coastguard Worker   return vec_max(a, vec_perm(a, a, vec_perm16));
94*fb1b10abSAndroid Build Coastguard Worker }
95*fb1b10abSAndroid Build Coastguard Worker 
vpx_quantize_b_vsx(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const int16_t * zbin_ptr,const int16_t * round_ptr,const int16_t * quant_ptr,const int16_t * quant_shift_ptr,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const int16_t * scan_ptr,const int16_t * iscan_ptr)96*fb1b10abSAndroid Build Coastguard Worker void vpx_quantize_b_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
97*fb1b10abSAndroid Build Coastguard Worker                         const int16_t *zbin_ptr, const int16_t *round_ptr,
98*fb1b10abSAndroid Build Coastguard Worker                         const int16_t *quant_ptr,
99*fb1b10abSAndroid Build Coastguard Worker                         const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
100*fb1b10abSAndroid Build Coastguard Worker                         tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
101*fb1b10abSAndroid Build Coastguard Worker                         uint16_t *eob_ptr, const int16_t *scan_ptr,
102*fb1b10abSAndroid Build Coastguard Worker                         const int16_t *iscan_ptr) {
103*fb1b10abSAndroid Build Coastguard Worker   int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob;
104*fb1b10abSAndroid Build Coastguard Worker   bool16x8_t zero_mask0, zero_mask1;
105*fb1b10abSAndroid Build Coastguard Worker 
106*fb1b10abSAndroid Build Coastguard Worker   // First set of 8 coeff starts with DC + 7 AC
107*fb1b10abSAndroid Build Coastguard Worker   int16x8_t zbin = vec_vsx_ld(0, zbin_ptr);
108*fb1b10abSAndroid Build Coastguard Worker   int16x8_t round = vec_vsx_ld(0, round_ptr);
109*fb1b10abSAndroid Build Coastguard Worker   int16x8_t quant = vec_vsx_ld(0, quant_ptr);
110*fb1b10abSAndroid Build Coastguard Worker   int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
111*fb1b10abSAndroid Build Coastguard Worker   int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr);
112*fb1b10abSAndroid Build Coastguard Worker 
113*fb1b10abSAndroid Build Coastguard Worker   int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
114*fb1b10abSAndroid Build Coastguard Worker   int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
115*fb1b10abSAndroid Build Coastguard Worker 
116*fb1b10abSAndroid Build Coastguard Worker   int16x8_t coeff0_abs = vec_abs(coeff0);
117*fb1b10abSAndroid Build Coastguard Worker   int16x8_t coeff1_abs = vec_abs(coeff1);
118*fb1b10abSAndroid Build Coastguard Worker 
119*fb1b10abSAndroid Build Coastguard Worker   zero_mask0 = vec_cmpge(coeff0_abs, zbin);
120*fb1b10abSAndroid Build Coastguard Worker   zbin = vec_splat(zbin, 1);
121*fb1b10abSAndroid Build Coastguard Worker   zero_mask1 = vec_cmpge(coeff1_abs, zbin);
122*fb1b10abSAndroid Build Coastguard Worker 
123*fb1b10abSAndroid Build Coastguard Worker   (void)scan_ptr;
124*fb1b10abSAndroid Build Coastguard Worker 
125*fb1b10abSAndroid Build Coastguard Worker   qcoeff0 =
126*fb1b10abSAndroid Build Coastguard Worker       quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift, zero_mask0);
127*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(qcoeff0, 0, qcoeff_ptr);
128*fb1b10abSAndroid Build Coastguard Worker   round = vec_splat(round, 1);
129*fb1b10abSAndroid Build Coastguard Worker   quant = vec_splat(quant, 1);
130*fb1b10abSAndroid Build Coastguard Worker   quant_shift = vec_splat(quant_shift, 1);
131*fb1b10abSAndroid Build Coastguard Worker   qcoeff1 =
132*fb1b10abSAndroid Build Coastguard Worker       quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift, zero_mask1);
133*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(qcoeff1, 16, qcoeff_ptr);
134*fb1b10abSAndroid Build Coastguard Worker 
135*fb1b10abSAndroid Build Coastguard Worker   dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16);
136*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr);
137*fb1b10abSAndroid Build Coastguard Worker   dequant = vec_splat(dequant, 1);
138*fb1b10abSAndroid Build Coastguard Worker   dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16);
139*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr);
140*fb1b10abSAndroid Build Coastguard Worker 
141*fb1b10abSAndroid Build Coastguard Worker   eob = vec_max(nonzero_scanindex(qcoeff0, iscan_ptr, 0),
142*fb1b10abSAndroid Build Coastguard Worker                 nonzero_scanindex(qcoeff1, iscan_ptr, 16));
143*fb1b10abSAndroid Build Coastguard Worker 
144*fb1b10abSAndroid Build Coastguard Worker   if (n_coeffs > 16) {
145*fb1b10abSAndroid Build Coastguard Worker     int index = 16;
146*fb1b10abSAndroid Build Coastguard Worker     int off0 = 32;
147*fb1b10abSAndroid Build Coastguard Worker     int off1 = 48;
148*fb1b10abSAndroid Build Coastguard Worker     int off2 = 64;
149*fb1b10abSAndroid Build Coastguard Worker     do {
150*fb1b10abSAndroid Build Coastguard Worker       int16x8_t coeff2, coeff2_abs, qcoeff2, dqcoeff2, eob2;
151*fb1b10abSAndroid Build Coastguard Worker       bool16x8_t zero_mask2;
152*fb1b10abSAndroid Build Coastguard Worker       coeff0 = vec_vsx_ld(off0, coeff_ptr);
153*fb1b10abSAndroid Build Coastguard Worker       coeff1 = vec_vsx_ld(off1, coeff_ptr);
154*fb1b10abSAndroid Build Coastguard Worker       coeff2 = vec_vsx_ld(off2, coeff_ptr);
155*fb1b10abSAndroid Build Coastguard Worker       coeff0_abs = vec_abs(coeff0);
156*fb1b10abSAndroid Build Coastguard Worker       coeff1_abs = vec_abs(coeff1);
157*fb1b10abSAndroid Build Coastguard Worker       coeff2_abs = vec_abs(coeff2);
158*fb1b10abSAndroid Build Coastguard Worker       zero_mask0 = vec_cmpge(coeff0_abs, zbin);
159*fb1b10abSAndroid Build Coastguard Worker       zero_mask1 = vec_cmpge(coeff1_abs, zbin);
160*fb1b10abSAndroid Build Coastguard Worker       zero_mask2 = vec_cmpge(coeff2_abs, zbin);
161*fb1b10abSAndroid Build Coastguard Worker       qcoeff0 = quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift,
162*fb1b10abSAndroid Build Coastguard Worker                                zero_mask0);
163*fb1b10abSAndroid Build Coastguard Worker       qcoeff1 = quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift,
164*fb1b10abSAndroid Build Coastguard Worker                                zero_mask1);
165*fb1b10abSAndroid Build Coastguard Worker       qcoeff2 = quantize_coeff(coeff2, coeff2_abs, round, quant, quant_shift,
166*fb1b10abSAndroid Build Coastguard Worker                                zero_mask2);
167*fb1b10abSAndroid Build Coastguard Worker       vec_vsx_st(qcoeff0, off0, qcoeff_ptr);
168*fb1b10abSAndroid Build Coastguard Worker       vec_vsx_st(qcoeff1, off1, qcoeff_ptr);
169*fb1b10abSAndroid Build Coastguard Worker       vec_vsx_st(qcoeff2, off2, qcoeff_ptr);
170*fb1b10abSAndroid Build Coastguard Worker 
171*fb1b10abSAndroid Build Coastguard Worker       dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16);
172*fb1b10abSAndroid Build Coastguard Worker       dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16);
173*fb1b10abSAndroid Build Coastguard Worker       dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16);
174*fb1b10abSAndroid Build Coastguard Worker 
175*fb1b10abSAndroid Build Coastguard Worker       vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr);
176*fb1b10abSAndroid Build Coastguard Worker       vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr);
177*fb1b10abSAndroid Build Coastguard Worker       vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr);
178*fb1b10abSAndroid Build Coastguard Worker 
179*fb1b10abSAndroid Build Coastguard Worker       eob = vec_max(eob, nonzero_scanindex(qcoeff0, iscan_ptr, off0));
180*fb1b10abSAndroid Build Coastguard Worker       eob2 = vec_max(nonzero_scanindex(qcoeff1, iscan_ptr, off1),
181*fb1b10abSAndroid Build Coastguard Worker                      nonzero_scanindex(qcoeff2, iscan_ptr, off2));
182*fb1b10abSAndroid Build Coastguard Worker       eob = vec_max(eob, eob2);
183*fb1b10abSAndroid Build Coastguard Worker 
184*fb1b10abSAndroid Build Coastguard Worker       index += 24;
185*fb1b10abSAndroid Build Coastguard Worker       off0 += 48;
186*fb1b10abSAndroid Build Coastguard Worker       off1 += 48;
187*fb1b10abSAndroid Build Coastguard Worker       off2 += 48;
188*fb1b10abSAndroid Build Coastguard Worker     } while (index < n_coeffs);
189*fb1b10abSAndroid Build Coastguard Worker   }
190*fb1b10abSAndroid Build Coastguard Worker 
191*fb1b10abSAndroid Build Coastguard Worker   eob = vec_max_across(eob);
192*fb1b10abSAndroid Build Coastguard Worker   *eob_ptr = eob[0];
193*fb1b10abSAndroid Build Coastguard Worker }
194*fb1b10abSAndroid Build Coastguard Worker 
vpx_quantize_b_32x32_vsx(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const int16_t * zbin_ptr,const int16_t * round_ptr,const int16_t * quant_ptr,const int16_t * quant_shift_ptr,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const int16_t * scan_ptr,const int16_t * iscan_ptr)195*fb1b10abSAndroid Build Coastguard Worker void vpx_quantize_b_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
196*fb1b10abSAndroid Build Coastguard Worker                               const int16_t *zbin_ptr, const int16_t *round_ptr,
197*fb1b10abSAndroid Build Coastguard Worker                               const int16_t *quant_ptr,
198*fb1b10abSAndroid Build Coastguard Worker                               const int16_t *quant_shift_ptr,
199*fb1b10abSAndroid Build Coastguard Worker                               tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
200*fb1b10abSAndroid Build Coastguard Worker                               const int16_t *dequant_ptr, uint16_t *eob_ptr,
201*fb1b10abSAndroid Build Coastguard Worker                               const int16_t *scan_ptr,
202*fb1b10abSAndroid Build Coastguard Worker                               const int16_t *iscan_ptr) {
203*fb1b10abSAndroid Build Coastguard Worker   // In stage 1, we quantize 16 coeffs (DC + 15 AC)
204*fb1b10abSAndroid Build Coastguard Worker   // In stage 2, we loop 42 times and quantize 24 coeffs per iteration
205*fb1b10abSAndroid Build Coastguard Worker   // (32 * 32 - 16) / 24 = 42
206*fb1b10abSAndroid Build Coastguard Worker   int num_itr = 42;
207*fb1b10abSAndroid Build Coastguard Worker   // Offsets are in bytes, 16 coeffs = 32 bytes
208*fb1b10abSAndroid Build Coastguard Worker   int off0 = 32;
209*fb1b10abSAndroid Build Coastguard Worker   int off1 = 48;
210*fb1b10abSAndroid Build Coastguard Worker   int off2 = 64;
211*fb1b10abSAndroid Build Coastguard Worker 
212*fb1b10abSAndroid Build Coastguard Worker   int16x8_t qcoeff0, qcoeff1, eob;
213*fb1b10abSAndroid Build Coastguard Worker   bool16x8_t zero_mask0, zero_mask1;
214*fb1b10abSAndroid Build Coastguard Worker 
215*fb1b10abSAndroid Build Coastguard Worker   int16x8_t zbin = vec_vsx_ld(0, zbin_ptr);
216*fb1b10abSAndroid Build Coastguard Worker   int16x8_t round = vec_vsx_ld(0, round_ptr);
217*fb1b10abSAndroid Build Coastguard Worker   int16x8_t quant = vec_vsx_ld(0, quant_ptr);
218*fb1b10abSAndroid Build Coastguard Worker   int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
219*fb1b10abSAndroid Build Coastguard Worker   int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr);
220*fb1b10abSAndroid Build Coastguard Worker 
221*fb1b10abSAndroid Build Coastguard Worker   int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
222*fb1b10abSAndroid Build Coastguard Worker   int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
223*fb1b10abSAndroid Build Coastguard Worker 
224*fb1b10abSAndroid Build Coastguard Worker   int16x8_t coeff0_abs = vec_abs(coeff0);
225*fb1b10abSAndroid Build Coastguard Worker   int16x8_t coeff1_abs = vec_abs(coeff1);
226*fb1b10abSAndroid Build Coastguard Worker 
227*fb1b10abSAndroid Build Coastguard Worker   (void)scan_ptr;
228*fb1b10abSAndroid Build Coastguard Worker   (void)n_coeffs;
229*fb1b10abSAndroid Build Coastguard Worker 
230*fb1b10abSAndroid Build Coastguard Worker   // 32x32 quantization requires that zbin and round be divided by 2
231*fb1b10abSAndroid Build Coastguard Worker   zbin = vec_sra(vec_add(zbin, vec_ones_s16), vec_ones_u16);
232*fb1b10abSAndroid Build Coastguard Worker   round = vec_sra(vec_add(round, vec_ones_s16), vec_ones_u16);
233*fb1b10abSAndroid Build Coastguard Worker 
234*fb1b10abSAndroid Build Coastguard Worker   zero_mask0 = vec_cmpge(coeff0_abs, zbin);
235*fb1b10abSAndroid Build Coastguard Worker   zbin = vec_splat(zbin, 1);  // remove DC from zbin
236*fb1b10abSAndroid Build Coastguard Worker   zero_mask1 = vec_cmpge(coeff1_abs, zbin);
237*fb1b10abSAndroid Build Coastguard Worker 
238*fb1b10abSAndroid Build Coastguard Worker   qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift,
239*fb1b10abSAndroid Build Coastguard Worker                               zero_mask0);
240*fb1b10abSAndroid Build Coastguard Worker   round = vec_splat(round, 1);              // remove DC from round
241*fb1b10abSAndroid Build Coastguard Worker   quant = vec_splat(quant, 1);              // remove DC from quant
242*fb1b10abSAndroid Build Coastguard Worker   quant_shift = vec_splat(quant_shift, 1);  // remove DC from quant_shift
243*fb1b10abSAndroid Build Coastguard Worker   qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift,
244*fb1b10abSAndroid Build Coastguard Worker                               zero_mask1);
245*fb1b10abSAndroid Build Coastguard Worker 
246*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(qcoeff0, 0, qcoeff_ptr);
247*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(qcoeff1, 16, qcoeff_ptr);
248*fb1b10abSAndroid Build Coastguard Worker 
249*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), 0, dqcoeff_ptr);
250*fb1b10abSAndroid Build Coastguard Worker   dequant = vec_splat(dequant, 1);  // remove DC from dequant
251*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), 16, dqcoeff_ptr);
252*fb1b10abSAndroid Build Coastguard Worker 
253*fb1b10abSAndroid Build Coastguard Worker   eob = vec_max(nonzero_scanindex(qcoeff0, iscan_ptr, 0),
254*fb1b10abSAndroid Build Coastguard Worker                 nonzero_scanindex(qcoeff1, iscan_ptr, 16));
255*fb1b10abSAndroid Build Coastguard Worker 
256*fb1b10abSAndroid Build Coastguard Worker   do {
257*fb1b10abSAndroid Build Coastguard Worker     int16x8_t coeff2, coeff2_abs, qcoeff2, eob2;
258*fb1b10abSAndroid Build Coastguard Worker     bool16x8_t zero_mask2;
259*fb1b10abSAndroid Build Coastguard Worker 
260*fb1b10abSAndroid Build Coastguard Worker     coeff0 = vec_vsx_ld(off0, coeff_ptr);
261*fb1b10abSAndroid Build Coastguard Worker     coeff1 = vec_vsx_ld(off1, coeff_ptr);
262*fb1b10abSAndroid Build Coastguard Worker     coeff2 = vec_vsx_ld(off2, coeff_ptr);
263*fb1b10abSAndroid Build Coastguard Worker 
264*fb1b10abSAndroid Build Coastguard Worker     coeff0_abs = vec_abs(coeff0);
265*fb1b10abSAndroid Build Coastguard Worker     coeff1_abs = vec_abs(coeff1);
266*fb1b10abSAndroid Build Coastguard Worker     coeff2_abs = vec_abs(coeff2);
267*fb1b10abSAndroid Build Coastguard Worker 
268*fb1b10abSAndroid Build Coastguard Worker     zero_mask0 = vec_cmpge(coeff0_abs, zbin);
269*fb1b10abSAndroid Build Coastguard Worker     zero_mask1 = vec_cmpge(coeff1_abs, zbin);
270*fb1b10abSAndroid Build Coastguard Worker     zero_mask2 = vec_cmpge(coeff2_abs, zbin);
271*fb1b10abSAndroid Build Coastguard Worker 
272*fb1b10abSAndroid Build Coastguard Worker     qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift,
273*fb1b10abSAndroid Build Coastguard Worker                                 zero_mask0);
274*fb1b10abSAndroid Build Coastguard Worker     qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift,
275*fb1b10abSAndroid Build Coastguard Worker                                 zero_mask1);
276*fb1b10abSAndroid Build Coastguard Worker     qcoeff2 = quantize_coeff_32(coeff2, coeff2_abs, round, quant, quant_shift,
277*fb1b10abSAndroid Build Coastguard Worker                                 zero_mask2);
278*fb1b10abSAndroid Build Coastguard Worker 
279*fb1b10abSAndroid Build Coastguard Worker     vec_vsx_st(qcoeff0, off0, qcoeff_ptr);
280*fb1b10abSAndroid Build Coastguard Worker     vec_vsx_st(qcoeff1, off1, qcoeff_ptr);
281*fb1b10abSAndroid Build Coastguard Worker     vec_vsx_st(qcoeff2, off2, qcoeff_ptr);
282*fb1b10abSAndroid Build Coastguard Worker 
283*fb1b10abSAndroid Build Coastguard Worker     vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), off0, dqcoeff_ptr);
284*fb1b10abSAndroid Build Coastguard Worker     vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), off1, dqcoeff_ptr);
285*fb1b10abSAndroid Build Coastguard Worker     vec_vsx_st(dequantize_coeff_32(qcoeff2, dequant), off2, dqcoeff_ptr);
286*fb1b10abSAndroid Build Coastguard Worker 
287*fb1b10abSAndroid Build Coastguard Worker     eob = vec_max(eob, nonzero_scanindex(qcoeff0, iscan_ptr, off0));
288*fb1b10abSAndroid Build Coastguard Worker     eob2 = vec_max(nonzero_scanindex(qcoeff1, iscan_ptr, off1),
289*fb1b10abSAndroid Build Coastguard Worker                    nonzero_scanindex(qcoeff2, iscan_ptr, off2));
290*fb1b10abSAndroid Build Coastguard Worker     eob = vec_max(eob, eob2);
291*fb1b10abSAndroid Build Coastguard Worker 
292*fb1b10abSAndroid Build Coastguard Worker     // 24 int16_t is 48 bytes
293*fb1b10abSAndroid Build Coastguard Worker     off0 += 48;
294*fb1b10abSAndroid Build Coastguard Worker     off1 += 48;
295*fb1b10abSAndroid Build Coastguard Worker     off2 += 48;
296*fb1b10abSAndroid Build Coastguard Worker     num_itr--;
297*fb1b10abSAndroid Build Coastguard Worker   } while (num_itr != 0);
298*fb1b10abSAndroid Build Coastguard Worker 
299*fb1b10abSAndroid Build Coastguard Worker   eob = vec_max_across(eob);
300*fb1b10abSAndroid Build Coastguard Worker   *eob_ptr = eob[0];
301*fb1b10abSAndroid Build Coastguard Worker }
302