xref: /aosp_15_r20/external/pdfium/third_party/libopenjpeg/dwt.c (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1*3ac0a46fSAndroid Build Coastguard Worker /*
2*3ac0a46fSAndroid Build Coastguard Worker  * The copyright in this software is being made available under the 2-clauses
3*3ac0a46fSAndroid Build Coastguard Worker  * BSD License, included below. This software may be subject to other third
4*3ac0a46fSAndroid Build Coastguard Worker  * party and contributor rights, including patent rights, and no such rights
5*3ac0a46fSAndroid Build Coastguard Worker  * are granted under this license.
6*3ac0a46fSAndroid Build Coastguard Worker  *
7*3ac0a46fSAndroid Build Coastguard Worker  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8*3ac0a46fSAndroid Build Coastguard Worker  * Copyright (c) 2002-2014, Professor Benoit Macq
9*3ac0a46fSAndroid Build Coastguard Worker  * Copyright (c) 2001-2003, David Janssens
10*3ac0a46fSAndroid Build Coastguard Worker  * Copyright (c) 2002-2003, Yannick Verschueren
11*3ac0a46fSAndroid Build Coastguard Worker  * Copyright (c) 2003-2007, Francois-Olivier Devaux
12*3ac0a46fSAndroid Build Coastguard Worker  * Copyright (c) 2003-2014, Antonin Descampe
13*3ac0a46fSAndroid Build Coastguard Worker  * Copyright (c) 2005, Herve Drolon, FreeImage Team
14*3ac0a46fSAndroid Build Coastguard Worker  * Copyright (c) 2007, Jonathan Ballard <[email protected]>
15*3ac0a46fSAndroid Build Coastguard Worker  * Copyright (c) 2007, Callum Lerwick <[email protected]>
16*3ac0a46fSAndroid Build Coastguard Worker  * Copyright (c) 2017, IntoPIX SA <[email protected]>
17*3ac0a46fSAndroid Build Coastguard Worker  * All rights reserved.
18*3ac0a46fSAndroid Build Coastguard Worker  *
19*3ac0a46fSAndroid Build Coastguard Worker  * Redistribution and use in source and binary forms, with or without
20*3ac0a46fSAndroid Build Coastguard Worker  * modification, are permitted provided that the following conditions
21*3ac0a46fSAndroid Build Coastguard Worker  * are met:
22*3ac0a46fSAndroid Build Coastguard Worker  * 1. Redistributions of source code must retain the above copyright
23*3ac0a46fSAndroid Build Coastguard Worker  *    notice, this list of conditions and the following disclaimer.
24*3ac0a46fSAndroid Build Coastguard Worker  * 2. Redistributions in binary form must reproduce the above copyright
25*3ac0a46fSAndroid Build Coastguard Worker  *    notice, this list of conditions and the following disclaimer in the
26*3ac0a46fSAndroid Build Coastguard Worker  *    documentation and/or other materials provided with the distribution.
27*3ac0a46fSAndroid Build Coastguard Worker  *
28*3ac0a46fSAndroid Build Coastguard Worker  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29*3ac0a46fSAndroid Build Coastguard Worker  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30*3ac0a46fSAndroid Build Coastguard Worker  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31*3ac0a46fSAndroid Build Coastguard Worker  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32*3ac0a46fSAndroid Build Coastguard Worker  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33*3ac0a46fSAndroid Build Coastguard Worker  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34*3ac0a46fSAndroid Build Coastguard Worker  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35*3ac0a46fSAndroid Build Coastguard Worker  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36*3ac0a46fSAndroid Build Coastguard Worker  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37*3ac0a46fSAndroid Build Coastguard Worker  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38*3ac0a46fSAndroid Build Coastguard Worker  * POSSIBILITY OF SUCH DAMAGE.
39*3ac0a46fSAndroid Build Coastguard Worker  */
40*3ac0a46fSAndroid Build Coastguard Worker 
41*3ac0a46fSAndroid Build Coastguard Worker #include <assert.h>
42*3ac0a46fSAndroid Build Coastguard Worker 
43*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_SKIP_POISON
44*3ac0a46fSAndroid Build Coastguard Worker #include "opj_includes.h"
45*3ac0a46fSAndroid Build Coastguard Worker 
46*3ac0a46fSAndroid Build Coastguard Worker #ifdef __SSE__
47*3ac0a46fSAndroid Build Coastguard Worker #include <xmmintrin.h>
48*3ac0a46fSAndroid Build Coastguard Worker #endif
49*3ac0a46fSAndroid Build Coastguard Worker #ifdef __SSE2__
50*3ac0a46fSAndroid Build Coastguard Worker #include <emmintrin.h>
51*3ac0a46fSAndroid Build Coastguard Worker #endif
52*3ac0a46fSAndroid Build Coastguard Worker #ifdef __SSSE3__
53*3ac0a46fSAndroid Build Coastguard Worker #include <tmmintrin.h>
54*3ac0a46fSAndroid Build Coastguard Worker #endif
55*3ac0a46fSAndroid Build Coastguard Worker #ifdef __AVX2__
56*3ac0a46fSAndroid Build Coastguard Worker #include <immintrin.h>
57*3ac0a46fSAndroid Build Coastguard Worker #endif
58*3ac0a46fSAndroid Build Coastguard Worker 
59*3ac0a46fSAndroid Build Coastguard Worker #if defined(__GNUC__)
60*3ac0a46fSAndroid Build Coastguard Worker #pragma GCC poison malloc calloc realloc free
61*3ac0a46fSAndroid Build Coastguard Worker #endif
62*3ac0a46fSAndroid Build Coastguard Worker 
63*3ac0a46fSAndroid Build Coastguard Worker /** @defgroup DWT DWT - Implementation of a discrete wavelet transform */
64*3ac0a46fSAndroid Build Coastguard Worker /*@{*/
65*3ac0a46fSAndroid Build Coastguard Worker 
66*3ac0a46fSAndroid Build Coastguard Worker #ifdef __AVX2__
67*3ac0a46fSAndroid Build Coastguard Worker /** Number of int32 values in a AVX2 register */
68*3ac0a46fSAndroid Build Coastguard Worker #define VREG_INT_COUNT       8
69*3ac0a46fSAndroid Build Coastguard Worker #else
70*3ac0a46fSAndroid Build Coastguard Worker /** Number of int32 values in a SSE2 register */
71*3ac0a46fSAndroid Build Coastguard Worker #define VREG_INT_COUNT       4
72*3ac0a46fSAndroid Build Coastguard Worker #endif
73*3ac0a46fSAndroid Build Coastguard Worker 
74*3ac0a46fSAndroid Build Coastguard Worker /** Number of columns that we can process in parallel in the vertical pass */
75*3ac0a46fSAndroid Build Coastguard Worker #define PARALLEL_COLS_53     (2*VREG_INT_COUNT)
76*3ac0a46fSAndroid Build Coastguard Worker 
77*3ac0a46fSAndroid Build Coastguard Worker /** @name Local data structures */
78*3ac0a46fSAndroid Build Coastguard Worker /*@{*/
79*3ac0a46fSAndroid Build Coastguard Worker 
80*3ac0a46fSAndroid Build Coastguard Worker typedef struct dwt_local {
81*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32* mem;
82*3ac0a46fSAndroid Build Coastguard Worker     OPJ_SIZE_T mem_count;
83*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 dn;   /* number of elements in high pass band */
84*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 sn;   /* number of elements in low pass band */
85*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 cas;  /* 0 = start on even coord, 1 = start on odd coord */
86*3ac0a46fSAndroid Build Coastguard Worker } opj_dwt_t;
87*3ac0a46fSAndroid Build Coastguard Worker 
88*3ac0a46fSAndroid Build Coastguard Worker #define NB_ELTS_V8  8
89*3ac0a46fSAndroid Build Coastguard Worker 
90*3ac0a46fSAndroid Build Coastguard Worker typedef union {
91*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32 f[NB_ELTS_V8];
92*3ac0a46fSAndroid Build Coastguard Worker } opj_v8_t;
93*3ac0a46fSAndroid Build Coastguard Worker 
94*3ac0a46fSAndroid Build Coastguard Worker typedef struct v8dwt_local {
95*3ac0a46fSAndroid Build Coastguard Worker     opj_v8_t*   wavelet ;
96*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32       dn ;  /* number of elements in high pass band */
97*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32       sn ;  /* number of elements in low pass band */
98*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32       cas ; /* 0 = start on even coord, 1 = start on odd coord */
99*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32      win_l_x0; /* start coord in low pass band */
100*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32      win_l_x1; /* end coord in low pass band */
101*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32      win_h_x0; /* start coord in high pass band */
102*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32      win_h_x1; /* end coord in high pass band */
103*3ac0a46fSAndroid Build Coastguard Worker } opj_v8dwt_t ;
104*3ac0a46fSAndroid Build Coastguard Worker 
105*3ac0a46fSAndroid Build Coastguard Worker /* From table F.4 from the standard */
106*3ac0a46fSAndroid Build Coastguard Worker static const OPJ_FLOAT32 opj_dwt_alpha =  -1.586134342f;
107*3ac0a46fSAndroid Build Coastguard Worker static const OPJ_FLOAT32 opj_dwt_beta  =  -0.052980118f;
108*3ac0a46fSAndroid Build Coastguard Worker static const OPJ_FLOAT32 opj_dwt_gamma = 0.882911075f;
109*3ac0a46fSAndroid Build Coastguard Worker static const OPJ_FLOAT32 opj_dwt_delta = 0.443506852f;
110*3ac0a46fSAndroid Build Coastguard Worker 
111*3ac0a46fSAndroid Build Coastguard Worker static const OPJ_FLOAT32 opj_K      = 1.230174105f;
112*3ac0a46fSAndroid Build Coastguard Worker static const OPJ_FLOAT32 opj_invK   = (OPJ_FLOAT32)(1.0 / 1.230174105);
113*3ac0a46fSAndroid Build Coastguard Worker 
114*3ac0a46fSAndroid Build Coastguard Worker /*@}*/
115*3ac0a46fSAndroid Build Coastguard Worker 
116*3ac0a46fSAndroid Build Coastguard Worker /** @name Local static functions */
117*3ac0a46fSAndroid Build Coastguard Worker /*@{*/
118*3ac0a46fSAndroid Build Coastguard Worker 
119*3ac0a46fSAndroid Build Coastguard Worker /**
120*3ac0a46fSAndroid Build Coastguard Worker Forward lazy transform (horizontal)
121*3ac0a46fSAndroid Build Coastguard Worker */
122*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a,
123*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_INT32 * OPJ_RESTRICT b,
124*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_INT32 dn,
125*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_INT32 sn, OPJ_INT32 cas);
126*3ac0a46fSAndroid Build Coastguard Worker 
127*3ac0a46fSAndroid Build Coastguard Worker /**
128*3ac0a46fSAndroid Build Coastguard Worker Forward 9-7 wavelet transform in 1-D
129*3ac0a46fSAndroid Build Coastguard Worker */
130*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_encode_1_real(void *a, OPJ_INT32 dn, OPJ_INT32 sn,
131*3ac0a46fSAndroid Build Coastguard Worker                                   OPJ_INT32 cas);
132*3ac0a46fSAndroid Build Coastguard Worker /**
133*3ac0a46fSAndroid Build Coastguard Worker Explicit calculation of the Quantization Stepsizes
134*3ac0a46fSAndroid Build Coastguard Worker */
135*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,
136*3ac0a46fSAndroid Build Coastguard Worker                                     opj_stepsize_t *bandno_stepsize);
137*3ac0a46fSAndroid Build Coastguard Worker /**
138*3ac0a46fSAndroid Build Coastguard Worker Inverse wavelet transform in 2-D.
139*3ac0a46fSAndroid Build Coastguard Worker */
140*3ac0a46fSAndroid Build Coastguard Worker static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
141*3ac0a46fSAndroid Build Coastguard Worker                                     const opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i);
142*3ac0a46fSAndroid Build Coastguard Worker 
143*3ac0a46fSAndroid Build Coastguard Worker static OPJ_BOOL opj_dwt_decode_partial_tile(
144*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_tilecomp_t* tilec,
145*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 numres);
146*3ac0a46fSAndroid Build Coastguard Worker 
147*3ac0a46fSAndroid Build Coastguard Worker /* Forward transform, for the vertical pass, processing cols columns */
148*3ac0a46fSAndroid Build Coastguard Worker /* where cols <= NB_ELTS_V8 */
149*3ac0a46fSAndroid Build Coastguard Worker /* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */
150*3ac0a46fSAndroid Build Coastguard Worker typedef void (*opj_encode_and_deinterleave_v_fnptr_type)(
151*3ac0a46fSAndroid Build Coastguard Worker     void *array,
152*3ac0a46fSAndroid Build Coastguard Worker     void *tmp,
153*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 height,
154*3ac0a46fSAndroid Build Coastguard Worker     OPJ_BOOL even,
155*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 stride_width,
156*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 cols);
157*3ac0a46fSAndroid Build Coastguard Worker 
158*3ac0a46fSAndroid Build Coastguard Worker /* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */
159*3ac0a46fSAndroid Build Coastguard Worker typedef void (*opj_encode_and_deinterleave_h_one_row_fnptr_type)(
160*3ac0a46fSAndroid Build Coastguard Worker     void *row,
161*3ac0a46fSAndroid Build Coastguard Worker     void *tmp,
162*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 width,
163*3ac0a46fSAndroid Build Coastguard Worker     OPJ_BOOL even);
164*3ac0a46fSAndroid Build Coastguard Worker 
165*3ac0a46fSAndroid Build Coastguard Worker static OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
166*3ac0a46fSAndroid Build Coastguard Worker         opj_tcd_tilecomp_t * tilec,
167*3ac0a46fSAndroid Build Coastguard Worker         opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v,
168*3ac0a46fSAndroid Build Coastguard Worker         opj_encode_and_deinterleave_h_one_row_fnptr_type
169*3ac0a46fSAndroid Build Coastguard Worker         p_encode_and_deinterleave_h_one_row);
170*3ac0a46fSAndroid Build Coastguard Worker 
171*3ac0a46fSAndroid Build Coastguard Worker static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r,
172*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 i);
173*3ac0a46fSAndroid Build Coastguard Worker 
174*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                             */
175*3ac0a46fSAndroid Build Coastguard Worker /* Inverse 9-7 wavelet transform in 1-D. */
176*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                            */
177*3ac0a46fSAndroid Build Coastguard Worker 
178*3ac0a46fSAndroid Build Coastguard Worker /*@}*/
179*3ac0a46fSAndroid Build Coastguard Worker 
180*3ac0a46fSAndroid Build Coastguard Worker /*@}*/
181*3ac0a46fSAndroid Build Coastguard Worker 
182*3ac0a46fSAndroid Build Coastguard Worker #define IDX_S(i) (i)*2
183*3ac0a46fSAndroid Build Coastguard Worker #define IDX_D(i) 1 + (i)* 2
184*3ac0a46fSAndroid Build Coastguard Worker #define UNDERFLOW_SN(i) ((i) >= sn&&sn>0)
185*3ac0a46fSAndroid Build Coastguard Worker #define UNDERFLOW_DN(i) ((i) >= dn&&dn>0)
186*3ac0a46fSAndroid Build Coastguard Worker #define OVERFLOW_S(i) (IDX_S(i) >= a_count)
187*3ac0a46fSAndroid Build Coastguard Worker #define OVERFLOW_D(i) (IDX_D(i) >= a_count)
188*3ac0a46fSAndroid Build Coastguard Worker 
189*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_S(i) a[IDX_S(i)]
190*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_D(i) a[IDX_D(i)]
191*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_S_(i) ((i)<0 ? OPJ_S(0) : (UNDERFLOW_SN(i) ? OPJ_S(sn - 1) : OVERFLOW_S(i) ? OPJ_S(i - 1) : OPJ_S(i)))
192*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_D_(i) ((i)<0 ? OPJ_D(0) : (UNDERFLOW_DN(i) ? OPJ_D(dn - 1) : OVERFLOW_D(i) ? OPJ_D(i - 1) : OPJ_D(i)))
193*3ac0a46fSAndroid Build Coastguard Worker /* new */
194*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_SS_(i) ((i)<0 ? OPJ_S(0) : (UNDERFLOW_DN(i) ? OPJ_S(dn - 1) : OVERFLOW_S(i) ? OPJ_S(i - 1) : OPJ_S(i)))
195*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_DD_(i) ((i)<0 ? OPJ_D(0) : (UNDERFLOW_SN(i) ? OPJ_D(sn - 1) : OVERFLOW_D(i) ? OPJ_D(i - 1) : OPJ_D(i)))
196*3ac0a46fSAndroid Build Coastguard Worker 
197*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                                                              */
198*3ac0a46fSAndroid Build Coastguard Worker /* This table contains the norms of the 5-3 wavelets for different bands. */
199*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                                                             */
200*3ac0a46fSAndroid Build Coastguard Worker /* FIXME! the array should really be extended up to 33 resolution levels */
201*3ac0a46fSAndroid Build Coastguard Worker /* See https://github.com/uclouvain/openjpeg/issues/493 */
202*3ac0a46fSAndroid Build Coastguard Worker static const OPJ_FLOAT64 opj_dwt_norms[4][10] = {
203*3ac0a46fSAndroid Build Coastguard Worker     {1.000, 1.500, 2.750, 5.375, 10.68, 21.34, 42.67, 85.33, 170.7, 341.3},
204*3ac0a46fSAndroid Build Coastguard Worker     {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9},
205*3ac0a46fSAndroid Build Coastguard Worker     {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9},
206*3ac0a46fSAndroid Build Coastguard Worker     {.7186, .9218, 1.586, 3.043, 6.019, 12.01, 24.00, 47.97, 95.93}
207*3ac0a46fSAndroid Build Coastguard Worker };
208*3ac0a46fSAndroid Build Coastguard Worker 
209*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                                                              */
210*3ac0a46fSAndroid Build Coastguard Worker /* This table contains the norms of the 9-7 wavelets for different bands. */
211*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                                                             */
212*3ac0a46fSAndroid Build Coastguard Worker /* FIXME! the array should really be extended up to 33 resolution levels */
213*3ac0a46fSAndroid Build Coastguard Worker /* See https://github.com/uclouvain/openjpeg/issues/493 */
214*3ac0a46fSAndroid Build Coastguard Worker static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = {
215*3ac0a46fSAndroid Build Coastguard Worker     {1.000, 1.965, 4.177, 8.403, 16.90, 33.84, 67.69, 135.3, 270.6, 540.9},
216*3ac0a46fSAndroid Build Coastguard Worker     {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0},
217*3ac0a46fSAndroid Build Coastguard Worker     {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0},
218*3ac0a46fSAndroid Build Coastguard Worker     {2.080, 3.865, 8.307, 17.18, 34.71, 69.59, 139.3, 278.6, 557.2}
219*3ac0a46fSAndroid Build Coastguard Worker };
220*3ac0a46fSAndroid Build Coastguard Worker 
221*3ac0a46fSAndroid Build Coastguard Worker /*
222*3ac0a46fSAndroid Build Coastguard Worker ==========================================================
223*3ac0a46fSAndroid Build Coastguard Worker    local functions
224*3ac0a46fSAndroid Build Coastguard Worker ==========================================================
225*3ac0a46fSAndroid Build Coastguard Worker */
226*3ac0a46fSAndroid Build Coastguard Worker 
227*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                             */
228*3ac0a46fSAndroid Build Coastguard Worker /* Forward lazy transform (horizontal).  */
229*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                            */
opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a,OPJ_INT32 * OPJ_RESTRICT b,OPJ_INT32 dn,OPJ_INT32 sn,OPJ_INT32 cas)230*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a,
231*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_INT32 * OPJ_RESTRICT b,
232*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_INT32 dn,
233*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_INT32 sn, OPJ_INT32 cas)
234*3ac0a46fSAndroid Build Coastguard Worker {
235*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i;
236*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 * OPJ_RESTRICT l_dest = b;
237*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 * OPJ_RESTRICT l_src = a + cas;
238*3ac0a46fSAndroid Build Coastguard Worker 
239*3ac0a46fSAndroid Build Coastguard Worker     for (i = 0; i < sn; ++i) {
240*3ac0a46fSAndroid Build Coastguard Worker         *l_dest++ = *l_src;
241*3ac0a46fSAndroid Build Coastguard Worker         l_src += 2;
242*3ac0a46fSAndroid Build Coastguard Worker     }
243*3ac0a46fSAndroid Build Coastguard Worker 
244*3ac0a46fSAndroid Build Coastguard Worker     l_dest = b + sn;
245*3ac0a46fSAndroid Build Coastguard Worker     l_src = a + 1 - cas;
246*3ac0a46fSAndroid Build Coastguard Worker 
247*3ac0a46fSAndroid Build Coastguard Worker     for (i = 0; i < dn; ++i)  {
248*3ac0a46fSAndroid Build Coastguard Worker         *l_dest++ = *l_src;
249*3ac0a46fSAndroid Build Coastguard Worker         l_src += 2;
250*3ac0a46fSAndroid Build Coastguard Worker     }
251*3ac0a46fSAndroid Build Coastguard Worker }
252*3ac0a46fSAndroid Build Coastguard Worker 
253*3ac0a46fSAndroid Build Coastguard Worker #ifdef STANDARD_SLOW_VERSION
254*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                             */
255*3ac0a46fSAndroid Build Coastguard Worker /* Inverse lazy transform (horizontal).  */
256*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                            */
opj_dwt_interleave_h(const opj_dwt_t * h,OPJ_INT32 * a)257*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_interleave_h(const opj_dwt_t* h, OPJ_INT32 *a)
258*3ac0a46fSAndroid Build Coastguard Worker {
259*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 *ai = a;
260*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 *bi = h->mem + h->cas;
261*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32  i    = h->sn;
262*3ac0a46fSAndroid Build Coastguard Worker     while (i--) {
263*3ac0a46fSAndroid Build Coastguard Worker         *bi = *(ai++);
264*3ac0a46fSAndroid Build Coastguard Worker         bi += 2;
265*3ac0a46fSAndroid Build Coastguard Worker     }
266*3ac0a46fSAndroid Build Coastguard Worker     ai  = a + h->sn;
267*3ac0a46fSAndroid Build Coastguard Worker     bi  = h->mem + 1 - h->cas;
268*3ac0a46fSAndroid Build Coastguard Worker     i   = h->dn ;
269*3ac0a46fSAndroid Build Coastguard Worker     while (i--) {
270*3ac0a46fSAndroid Build Coastguard Worker         *bi = *(ai++);
271*3ac0a46fSAndroid Build Coastguard Worker         bi += 2;
272*3ac0a46fSAndroid Build Coastguard Worker     }
273*3ac0a46fSAndroid Build Coastguard Worker }
274*3ac0a46fSAndroid Build Coastguard Worker 
275*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                             */
276*3ac0a46fSAndroid Build Coastguard Worker /* Inverse lazy transform (vertical).    */
277*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                            */
opj_dwt_interleave_v(const opj_dwt_t * v,OPJ_INT32 * a,OPJ_INT32 x)278*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
279*3ac0a46fSAndroid Build Coastguard Worker {
280*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 *ai = a;
281*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 *bi = v->mem + v->cas;
282*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32  i = v->sn;
283*3ac0a46fSAndroid Build Coastguard Worker     while (i--) {
284*3ac0a46fSAndroid Build Coastguard Worker         *bi = *ai;
285*3ac0a46fSAndroid Build Coastguard Worker         bi += 2;
286*3ac0a46fSAndroid Build Coastguard Worker         ai += x;
287*3ac0a46fSAndroid Build Coastguard Worker     }
288*3ac0a46fSAndroid Build Coastguard Worker     ai = a + (v->sn * (OPJ_SIZE_T)x);
289*3ac0a46fSAndroid Build Coastguard Worker     bi = v->mem + 1 - v->cas;
290*3ac0a46fSAndroid Build Coastguard Worker     i = v->dn ;
291*3ac0a46fSAndroid Build Coastguard Worker     while (i--) {
292*3ac0a46fSAndroid Build Coastguard Worker         *bi = *ai;
293*3ac0a46fSAndroid Build Coastguard Worker         bi += 2;
294*3ac0a46fSAndroid Build Coastguard Worker         ai += x;
295*3ac0a46fSAndroid Build Coastguard Worker     }
296*3ac0a46fSAndroid Build Coastguard Worker }
297*3ac0a46fSAndroid Build Coastguard Worker 
298*3ac0a46fSAndroid Build Coastguard Worker #endif /* STANDARD_SLOW_VERSION */
299*3ac0a46fSAndroid Build Coastguard Worker 
300*3ac0a46fSAndroid Build Coastguard Worker #ifdef STANDARD_SLOW_VERSION
301*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                            */
302*3ac0a46fSAndroid Build Coastguard Worker /* Inverse 5-3 wavelet transform in 1-D. */
303*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                           */
opj_dwt_decode_1_(OPJ_INT32 * a,OPJ_SIZE_T a_count,OPJ_INT32 dn,OPJ_INT32 sn,OPJ_INT32 cas)304*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn,
305*3ac0a46fSAndroid Build Coastguard Worker                               OPJ_INT32 sn, OPJ_INT32 cas)
306*3ac0a46fSAndroid Build Coastguard Worker {
307*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i;
308*3ac0a46fSAndroid Build Coastguard Worker 
309*3ac0a46fSAndroid Build Coastguard Worker     if (!cas) {
310*3ac0a46fSAndroid Build Coastguard Worker         if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
311*3ac0a46fSAndroid Build Coastguard Worker             for (i = 0; i < sn; i++) {
312*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
313*3ac0a46fSAndroid Build Coastguard Worker             }
314*3ac0a46fSAndroid Build Coastguard Worker             for (i = 0; i < dn; i++) {
315*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
316*3ac0a46fSAndroid Build Coastguard Worker             }
317*3ac0a46fSAndroid Build Coastguard Worker         }
318*3ac0a46fSAndroid Build Coastguard Worker     } else {
319*3ac0a46fSAndroid Build Coastguard Worker         if (!sn  && dn == 1) {        /* NEW :  CASE ONE ELEMENT */
320*3ac0a46fSAndroid Build Coastguard Worker             OPJ_S(0) /= 2;
321*3ac0a46fSAndroid Build Coastguard Worker         } else {
322*3ac0a46fSAndroid Build Coastguard Worker             for (i = 0; i < sn; i++) {
323*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_D(i) -= (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2;
324*3ac0a46fSAndroid Build Coastguard Worker             }
325*3ac0a46fSAndroid Build Coastguard Worker             for (i = 0; i < dn; i++) {
326*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_S(i) += (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1;
327*3ac0a46fSAndroid Build Coastguard Worker             }
328*3ac0a46fSAndroid Build Coastguard Worker         }
329*3ac0a46fSAndroid Build Coastguard Worker     }
330*3ac0a46fSAndroid Build Coastguard Worker }
331*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_decode_1(const opj_dwt_t * v)332*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_decode_1(const opj_dwt_t *v)
333*3ac0a46fSAndroid Build Coastguard Worker {
334*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_decode_1_(v->mem, v->mem_count, v->dn, v->sn, v->cas);
335*3ac0a46fSAndroid Build Coastguard Worker }
336*3ac0a46fSAndroid Build Coastguard Worker 
337*3ac0a46fSAndroid Build Coastguard Worker #endif /* STANDARD_SLOW_VERSION */
338*3ac0a46fSAndroid Build Coastguard Worker 
339*3ac0a46fSAndroid Build Coastguard Worker #if !defined(STANDARD_SLOW_VERSION)
opj_idwt53_h_cas0(OPJ_INT32 * tmp,const OPJ_INT32 sn,const OPJ_INT32 len,OPJ_INT32 * tiledp)340*3ac0a46fSAndroid Build Coastguard Worker static void  opj_idwt53_h_cas0(OPJ_INT32* tmp,
341*3ac0a46fSAndroid Build Coastguard Worker                                const OPJ_INT32 sn,
342*3ac0a46fSAndroid Build Coastguard Worker                                const OPJ_INT32 len,
343*3ac0a46fSAndroid Build Coastguard Worker                                OPJ_INT32* tiledp)
344*3ac0a46fSAndroid Build Coastguard Worker {
345*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i, j;
346*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32* in_even = &tiledp[0];
347*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32* in_odd = &tiledp[sn];
348*3ac0a46fSAndroid Build Coastguard Worker 
349*3ac0a46fSAndroid Build Coastguard Worker #ifdef TWO_PASS_VERSION
350*3ac0a46fSAndroid Build Coastguard Worker     /* For documentation purpose: performs lifting in two iterations, */
351*3ac0a46fSAndroid Build Coastguard Worker     /* but without explicit interleaving */
352*3ac0a46fSAndroid Build Coastguard Worker 
353*3ac0a46fSAndroid Build Coastguard Worker     assert(len > 1);
354*3ac0a46fSAndroid Build Coastguard Worker 
355*3ac0a46fSAndroid Build Coastguard Worker     /* Even */
356*3ac0a46fSAndroid Build Coastguard Worker     tmp[0] = in_even[0] - ((in_odd[0] + 1) >> 1);
357*3ac0a46fSAndroid Build Coastguard Worker     for (i = 2, j = 0; i <= len - 2; i += 2, j++) {
358*3ac0a46fSAndroid Build Coastguard Worker         tmp[i] = in_even[j + 1] - ((in_odd[j] + in_odd[j + 1] + 2) >> 2);
359*3ac0a46fSAndroid Build Coastguard Worker     }
360*3ac0a46fSAndroid Build Coastguard Worker     if (len & 1) { /* if len is odd */
361*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] = in_even[(len - 1) / 2] - ((in_odd[(len - 2) / 2] + 1) >> 1);
362*3ac0a46fSAndroid Build Coastguard Worker     }
363*3ac0a46fSAndroid Build Coastguard Worker 
364*3ac0a46fSAndroid Build Coastguard Worker     /* Odd */
365*3ac0a46fSAndroid Build Coastguard Worker     for (i = 1, j = 0; i < len - 1; i += 2, j++) {
366*3ac0a46fSAndroid Build Coastguard Worker         tmp[i] = in_odd[j] + ((tmp[i - 1] + tmp[i + 1]) >> 1);
367*3ac0a46fSAndroid Build Coastguard Worker     }
368*3ac0a46fSAndroid Build Coastguard Worker     if (!(len & 1)) { /* if len is even */
369*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] = in_odd[(len - 1) / 2] + tmp[len - 2];
370*3ac0a46fSAndroid Build Coastguard Worker     }
371*3ac0a46fSAndroid Build Coastguard Worker #else
372*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 d1c, d1n, s1n, s0c, s0n;
373*3ac0a46fSAndroid Build Coastguard Worker 
374*3ac0a46fSAndroid Build Coastguard Worker     assert(len > 1);
375*3ac0a46fSAndroid Build Coastguard Worker 
376*3ac0a46fSAndroid Build Coastguard Worker     /* Improved version of the TWO_PASS_VERSION: */
377*3ac0a46fSAndroid Build Coastguard Worker     /* Performs lifting in one single iteration. Saves memory */
378*3ac0a46fSAndroid Build Coastguard Worker     /* accesses and explicit interleaving. */
379*3ac0a46fSAndroid Build Coastguard Worker     s1n = in_even[0];
380*3ac0a46fSAndroid Build Coastguard Worker     d1n = in_odd[0];
381*3ac0a46fSAndroid Build Coastguard Worker     s0n = s1n - ((d1n + 1) >> 1);
382*3ac0a46fSAndroid Build Coastguard Worker 
383*3ac0a46fSAndroid Build Coastguard Worker     for (i = 0, j = 1; i < (len - 3); i += 2, j++) {
384*3ac0a46fSAndroid Build Coastguard Worker         d1c = d1n;
385*3ac0a46fSAndroid Build Coastguard Worker         s0c = s0n;
386*3ac0a46fSAndroid Build Coastguard Worker 
387*3ac0a46fSAndroid Build Coastguard Worker         s1n = in_even[j];
388*3ac0a46fSAndroid Build Coastguard Worker         d1n = in_odd[j];
389*3ac0a46fSAndroid Build Coastguard Worker 
390*3ac0a46fSAndroid Build Coastguard Worker         s0n = s1n - ((d1c + d1n + 2) >> 2);
391*3ac0a46fSAndroid Build Coastguard Worker 
392*3ac0a46fSAndroid Build Coastguard Worker         tmp[i  ] = s0c;
393*3ac0a46fSAndroid Build Coastguard Worker         tmp[i + 1] = opj_int_add_no_overflow(d1c, opj_int_add_no_overflow(s0c,
394*3ac0a46fSAndroid Build Coastguard Worker                                              s0n) >> 1);
395*3ac0a46fSAndroid Build Coastguard Worker     }
396*3ac0a46fSAndroid Build Coastguard Worker 
397*3ac0a46fSAndroid Build Coastguard Worker     tmp[i] = s0n;
398*3ac0a46fSAndroid Build Coastguard Worker 
399*3ac0a46fSAndroid Build Coastguard Worker     if (len & 1) {
400*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] = in_even[(len - 1) / 2] - ((d1n + 1) >> 1);
401*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 2] = d1n + ((s0n + tmp[len - 1]) >> 1);
402*3ac0a46fSAndroid Build Coastguard Worker     } else {
403*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] = d1n + s0n;
404*3ac0a46fSAndroid Build Coastguard Worker     }
405*3ac0a46fSAndroid Build Coastguard Worker #endif
406*3ac0a46fSAndroid Build Coastguard Worker     memcpy(tiledp, tmp, (OPJ_UINT32)len * sizeof(OPJ_INT32));
407*3ac0a46fSAndroid Build Coastguard Worker }
408*3ac0a46fSAndroid Build Coastguard Worker 
opj_idwt53_h_cas1(OPJ_INT32 * tmp,const OPJ_INT32 sn,const OPJ_INT32 len,OPJ_INT32 * tiledp)409*3ac0a46fSAndroid Build Coastguard Worker static void  opj_idwt53_h_cas1(OPJ_INT32* tmp,
410*3ac0a46fSAndroid Build Coastguard Worker                                const OPJ_INT32 sn,
411*3ac0a46fSAndroid Build Coastguard Worker                                const OPJ_INT32 len,
412*3ac0a46fSAndroid Build Coastguard Worker                                OPJ_INT32* tiledp)
413*3ac0a46fSAndroid Build Coastguard Worker {
414*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i, j;
415*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32* in_even = &tiledp[sn];
416*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32* in_odd = &tiledp[0];
417*3ac0a46fSAndroid Build Coastguard Worker 
418*3ac0a46fSAndroid Build Coastguard Worker #ifdef TWO_PASS_VERSION
419*3ac0a46fSAndroid Build Coastguard Worker     /* For documentation purpose: performs lifting in two iterations, */
420*3ac0a46fSAndroid Build Coastguard Worker     /* but without explicit interleaving */
421*3ac0a46fSAndroid Build Coastguard Worker 
422*3ac0a46fSAndroid Build Coastguard Worker     assert(len > 2);
423*3ac0a46fSAndroid Build Coastguard Worker 
424*3ac0a46fSAndroid Build Coastguard Worker     /* Odd */
425*3ac0a46fSAndroid Build Coastguard Worker     for (i = 1, j = 0; i < len - 1; i += 2, j++) {
426*3ac0a46fSAndroid Build Coastguard Worker         tmp[i] = in_odd[j] - ((in_even[j] + in_even[j + 1] + 2) >> 2);
427*3ac0a46fSAndroid Build Coastguard Worker     }
428*3ac0a46fSAndroid Build Coastguard Worker     if (!(len & 1)) {
429*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] = in_odd[len / 2 - 1] - ((in_even[len / 2 - 1] + 1) >> 1);
430*3ac0a46fSAndroid Build Coastguard Worker     }
431*3ac0a46fSAndroid Build Coastguard Worker 
432*3ac0a46fSAndroid Build Coastguard Worker     /* Even */
433*3ac0a46fSAndroid Build Coastguard Worker     tmp[0] = in_even[0] + tmp[1];
434*3ac0a46fSAndroid Build Coastguard Worker     for (i = 2, j = 1; i < len - 1; i += 2, j++) {
435*3ac0a46fSAndroid Build Coastguard Worker         tmp[i] = in_even[j] + ((tmp[i + 1] + tmp[i - 1]) >> 1);
436*3ac0a46fSAndroid Build Coastguard Worker     }
437*3ac0a46fSAndroid Build Coastguard Worker     if (len & 1) {
438*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] = in_even[len / 2] + tmp[len - 2];
439*3ac0a46fSAndroid Build Coastguard Worker     }
440*3ac0a46fSAndroid Build Coastguard Worker #else
441*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 s1, s2, dc, dn;
442*3ac0a46fSAndroid Build Coastguard Worker 
443*3ac0a46fSAndroid Build Coastguard Worker     assert(len > 2);
444*3ac0a46fSAndroid Build Coastguard Worker 
445*3ac0a46fSAndroid Build Coastguard Worker     /* Improved version of the TWO_PASS_VERSION: */
446*3ac0a46fSAndroid Build Coastguard Worker     /* Performs lifting in one single iteration. Saves memory */
447*3ac0a46fSAndroid Build Coastguard Worker     /* accesses and explicit interleaving. */
448*3ac0a46fSAndroid Build Coastguard Worker 
449*3ac0a46fSAndroid Build Coastguard Worker     s1 = in_even[1];
450*3ac0a46fSAndroid Build Coastguard Worker     dc = in_odd[0] - ((in_even[0] + s1 + 2) >> 2);
451*3ac0a46fSAndroid Build Coastguard Worker     tmp[0] = in_even[0] + dc;
452*3ac0a46fSAndroid Build Coastguard Worker 
453*3ac0a46fSAndroid Build Coastguard Worker     for (i = 1, j = 1; i < (len - 2 - !(len & 1)); i += 2, j++) {
454*3ac0a46fSAndroid Build Coastguard Worker 
455*3ac0a46fSAndroid Build Coastguard Worker         s2 = in_even[j + 1];
456*3ac0a46fSAndroid Build Coastguard Worker 
457*3ac0a46fSAndroid Build Coastguard Worker         dn = in_odd[j] - ((s1 + s2 + 2) >> 2);
458*3ac0a46fSAndroid Build Coastguard Worker         tmp[i  ] = dc;
459*3ac0a46fSAndroid Build Coastguard Worker         tmp[i + 1] = opj_int_add_no_overflow(s1, opj_int_add_no_overflow(dn, dc) >> 1);
460*3ac0a46fSAndroid Build Coastguard Worker 
461*3ac0a46fSAndroid Build Coastguard Worker         dc = dn;
462*3ac0a46fSAndroid Build Coastguard Worker         s1 = s2;
463*3ac0a46fSAndroid Build Coastguard Worker     }
464*3ac0a46fSAndroid Build Coastguard Worker 
465*3ac0a46fSAndroid Build Coastguard Worker     tmp[i] = dc;
466*3ac0a46fSAndroid Build Coastguard Worker 
467*3ac0a46fSAndroid Build Coastguard Worker     if (!(len & 1)) {
468*3ac0a46fSAndroid Build Coastguard Worker         dn = in_odd[len / 2 - 1] - ((s1 + 1) >> 1);
469*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 2] = s1 + ((dn + dc) >> 1);
470*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] = dn;
471*3ac0a46fSAndroid Build Coastguard Worker     } else {
472*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] = s1 + dc;
473*3ac0a46fSAndroid Build Coastguard Worker     }
474*3ac0a46fSAndroid Build Coastguard Worker #endif
475*3ac0a46fSAndroid Build Coastguard Worker     memcpy(tiledp, tmp, (OPJ_UINT32)len * sizeof(OPJ_INT32));
476*3ac0a46fSAndroid Build Coastguard Worker }
477*3ac0a46fSAndroid Build Coastguard Worker 
478*3ac0a46fSAndroid Build Coastguard Worker 
479*3ac0a46fSAndroid Build Coastguard Worker #endif /* !defined(STANDARD_SLOW_VERSION) */
480*3ac0a46fSAndroid Build Coastguard Worker 
481*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                            */
482*3ac0a46fSAndroid Build Coastguard Worker /* Inverse 5-3 wavelet transform in 1-D for one row. */
483*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                           */
484*3ac0a46fSAndroid Build Coastguard Worker /* Performs interleave, inverse wavelet transform and copy back to buffer */
opj_idwt53_h(const opj_dwt_t * dwt,OPJ_INT32 * tiledp)485*3ac0a46fSAndroid Build Coastguard Worker static void opj_idwt53_h(const opj_dwt_t *dwt,
486*3ac0a46fSAndroid Build Coastguard Worker                          OPJ_INT32* tiledp)
487*3ac0a46fSAndroid Build Coastguard Worker {
488*3ac0a46fSAndroid Build Coastguard Worker #ifdef STANDARD_SLOW_VERSION
489*3ac0a46fSAndroid Build Coastguard Worker     /* For documentation purpose */
490*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_interleave_h(dwt, tiledp);
491*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_decode_1(dwt);
492*3ac0a46fSAndroid Build Coastguard Worker     memcpy(tiledp, dwt->mem, (OPJ_UINT32)(dwt->sn + dwt->dn) * sizeof(OPJ_INT32));
493*3ac0a46fSAndroid Build Coastguard Worker #else
494*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 sn = dwt->sn;
495*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 len = sn + dwt->dn;
496*3ac0a46fSAndroid Build Coastguard Worker     if (dwt->cas == 0) { /* Left-most sample is on even coordinate */
497*3ac0a46fSAndroid Build Coastguard Worker         if (len > 1) {
498*3ac0a46fSAndroid Build Coastguard Worker             opj_idwt53_h_cas0(dwt->mem, sn, len, tiledp);
499*3ac0a46fSAndroid Build Coastguard Worker         } else {
500*3ac0a46fSAndroid Build Coastguard Worker             /* Unmodified value */
501*3ac0a46fSAndroid Build Coastguard Worker         }
502*3ac0a46fSAndroid Build Coastguard Worker     } else { /* Left-most sample is on odd coordinate */
503*3ac0a46fSAndroid Build Coastguard Worker         if (len == 1) {
504*3ac0a46fSAndroid Build Coastguard Worker             tiledp[0] /= 2;
505*3ac0a46fSAndroid Build Coastguard Worker         } else if (len == 2) {
506*3ac0a46fSAndroid Build Coastguard Worker             OPJ_INT32* out = dwt->mem;
507*3ac0a46fSAndroid Build Coastguard Worker             const OPJ_INT32* in_even = &tiledp[sn];
508*3ac0a46fSAndroid Build Coastguard Worker             const OPJ_INT32* in_odd = &tiledp[0];
509*3ac0a46fSAndroid Build Coastguard Worker             out[1] = in_odd[0] - ((in_even[0] + 1) >> 1);
510*3ac0a46fSAndroid Build Coastguard Worker             out[0] = in_even[0] + out[1];
511*3ac0a46fSAndroid Build Coastguard Worker             memcpy(tiledp, dwt->mem, (OPJ_UINT32)len * sizeof(OPJ_INT32));
512*3ac0a46fSAndroid Build Coastguard Worker         } else if (len > 2) {
513*3ac0a46fSAndroid Build Coastguard Worker             opj_idwt53_h_cas1(dwt->mem, sn, len, tiledp);
514*3ac0a46fSAndroid Build Coastguard Worker         }
515*3ac0a46fSAndroid Build Coastguard Worker     }
516*3ac0a46fSAndroid Build Coastguard Worker #endif
517*3ac0a46fSAndroid Build Coastguard Worker }
518*3ac0a46fSAndroid Build Coastguard Worker 
519*3ac0a46fSAndroid Build Coastguard Worker #if (defined(__SSE2__) || defined(__AVX2__)) && !defined(STANDARD_SLOW_VERSION)
520*3ac0a46fSAndroid Build Coastguard Worker 
521*3ac0a46fSAndroid Build Coastguard Worker /* Conveniency macros to improve the readability of the formulas */
522*3ac0a46fSAndroid Build Coastguard Worker #if __AVX2__
523*3ac0a46fSAndroid Build Coastguard Worker #define VREG        __m256i
524*3ac0a46fSAndroid Build Coastguard Worker #define LOAD_CST(x) _mm256_set1_epi32(x)
525*3ac0a46fSAndroid Build Coastguard Worker #define LOAD(x)     _mm256_load_si256((const VREG*)(x))
526*3ac0a46fSAndroid Build Coastguard Worker #define LOADU(x)    _mm256_loadu_si256((const VREG*)(x))
527*3ac0a46fSAndroid Build Coastguard Worker #define STORE(x,y)  _mm256_store_si256((VREG*)(x),(y))
528*3ac0a46fSAndroid Build Coastguard Worker #define STOREU(x,y) _mm256_storeu_si256((VREG*)(x),(y))
529*3ac0a46fSAndroid Build Coastguard Worker #define ADD(x,y)    _mm256_add_epi32((x),(y))
530*3ac0a46fSAndroid Build Coastguard Worker #define SUB(x,y)    _mm256_sub_epi32((x),(y))
531*3ac0a46fSAndroid Build Coastguard Worker #define SAR(x,y)    _mm256_srai_epi32((x),(y))
532*3ac0a46fSAndroid Build Coastguard Worker #else
533*3ac0a46fSAndroid Build Coastguard Worker #define VREG        __m128i
534*3ac0a46fSAndroid Build Coastguard Worker #define LOAD_CST(x) _mm_set1_epi32(x)
535*3ac0a46fSAndroid Build Coastguard Worker #define LOAD(x)     _mm_load_si128((const VREG*)(x))
536*3ac0a46fSAndroid Build Coastguard Worker #define LOADU(x)    _mm_loadu_si128((const VREG*)(x))
537*3ac0a46fSAndroid Build Coastguard Worker #define STORE(x,y)  _mm_store_si128((VREG*)(x),(y))
538*3ac0a46fSAndroid Build Coastguard Worker #define STOREU(x,y) _mm_storeu_si128((VREG*)(x),(y))
539*3ac0a46fSAndroid Build Coastguard Worker #define ADD(x,y)    _mm_add_epi32((x),(y))
540*3ac0a46fSAndroid Build Coastguard Worker #define SUB(x,y)    _mm_sub_epi32((x),(y))
541*3ac0a46fSAndroid Build Coastguard Worker #define SAR(x,y)    _mm_srai_epi32((x),(y))
542*3ac0a46fSAndroid Build Coastguard Worker #endif
543*3ac0a46fSAndroid Build Coastguard Worker #define ADD3(x,y,z) ADD(ADD(x,y),z)
544*3ac0a46fSAndroid Build Coastguard Worker 
545*3ac0a46fSAndroid Build Coastguard Worker static
opj_idwt53_v_final_memcpy(OPJ_INT32 * tiledp_col,const OPJ_INT32 * tmp,OPJ_INT32 len,OPJ_SIZE_T stride)546*3ac0a46fSAndroid Build Coastguard Worker void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col,
547*3ac0a46fSAndroid Build Coastguard Worker                                const OPJ_INT32* tmp,
548*3ac0a46fSAndroid Build Coastguard Worker                                OPJ_INT32 len,
549*3ac0a46fSAndroid Build Coastguard Worker                                OPJ_SIZE_T stride)
550*3ac0a46fSAndroid Build Coastguard Worker {
551*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i;
552*3ac0a46fSAndroid Build Coastguard Worker     for (i = 0; i < len; ++i) {
553*3ac0a46fSAndroid Build Coastguard Worker         /* A memcpy(&tiledp_col[i * stride + 0],
554*3ac0a46fSAndroid Build Coastguard Worker                     &tmp[PARALLEL_COLS_53 * i + 0],
555*3ac0a46fSAndroid Build Coastguard Worker                     PARALLEL_COLS_53 * sizeof(OPJ_INT32))
556*3ac0a46fSAndroid Build Coastguard Worker            would do but would be a tiny bit slower.
557*3ac0a46fSAndroid Build Coastguard Worker            We can take here advantage of our knowledge of alignment */
558*3ac0a46fSAndroid Build Coastguard Worker         STOREU(&tiledp_col[(OPJ_SIZE_T)i * stride + 0],
559*3ac0a46fSAndroid Build Coastguard Worker                LOAD(&tmp[PARALLEL_COLS_53 * i + 0]));
560*3ac0a46fSAndroid Build Coastguard Worker         STOREU(&tiledp_col[(OPJ_SIZE_T)i * stride + VREG_INT_COUNT],
561*3ac0a46fSAndroid Build Coastguard Worker                LOAD(&tmp[PARALLEL_COLS_53 * i + VREG_INT_COUNT]));
562*3ac0a46fSAndroid Build Coastguard Worker     }
563*3ac0a46fSAndroid Build Coastguard Worker }
564*3ac0a46fSAndroid Build Coastguard Worker 
565*3ac0a46fSAndroid Build Coastguard Worker /** Vertical inverse 5x3 wavelet transform for 8 columns in SSE2, or
566*3ac0a46fSAndroid Build Coastguard Worker  * 16 in AVX2, when top-most pixel is on even coordinate */
opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(OPJ_INT32 * tmp,const OPJ_INT32 sn,const OPJ_INT32 len,OPJ_INT32 * tiledp_col,const OPJ_SIZE_T stride)567*3ac0a46fSAndroid Build Coastguard Worker static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
568*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32* tmp,
569*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 sn,
570*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 len,
571*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32* tiledp_col,
572*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_SIZE_T stride)
573*3ac0a46fSAndroid Build Coastguard Worker {
574*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32* in_even = &tiledp_col[0];
575*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32* in_odd = &tiledp_col[(OPJ_SIZE_T)sn * stride];
576*3ac0a46fSAndroid Build Coastguard Worker 
577*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i;
578*3ac0a46fSAndroid Build Coastguard Worker     OPJ_SIZE_T j;
579*3ac0a46fSAndroid Build Coastguard Worker     VREG d1c_0, d1n_0, s1n_0, s0c_0, s0n_0;
580*3ac0a46fSAndroid Build Coastguard Worker     VREG d1c_1, d1n_1, s1n_1, s0c_1, s0n_1;
581*3ac0a46fSAndroid Build Coastguard Worker     const VREG two = LOAD_CST(2);
582*3ac0a46fSAndroid Build Coastguard Worker 
583*3ac0a46fSAndroid Build Coastguard Worker     assert(len > 1);
584*3ac0a46fSAndroid Build Coastguard Worker #if __AVX2__
585*3ac0a46fSAndroid Build Coastguard Worker     assert(PARALLEL_COLS_53 == 16);
586*3ac0a46fSAndroid Build Coastguard Worker     assert(VREG_INT_COUNT == 8);
587*3ac0a46fSAndroid Build Coastguard Worker #else
588*3ac0a46fSAndroid Build Coastguard Worker     assert(PARALLEL_COLS_53 == 8);
589*3ac0a46fSAndroid Build Coastguard Worker     assert(VREG_INT_COUNT == 4);
590*3ac0a46fSAndroid Build Coastguard Worker #endif
591*3ac0a46fSAndroid Build Coastguard Worker 
592*3ac0a46fSAndroid Build Coastguard Worker     /* Note: loads of input even/odd values must be done in a unaligned */
593*3ac0a46fSAndroid Build Coastguard Worker     /* fashion. But stores in tmp can be done with aligned store, since */
594*3ac0a46fSAndroid Build Coastguard Worker     /* the temporary buffer is properly aligned */
595*3ac0a46fSAndroid Build Coastguard Worker     assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
596*3ac0a46fSAndroid Build Coastguard Worker 
597*3ac0a46fSAndroid Build Coastguard Worker     s1n_0 = LOADU(in_even + 0);
598*3ac0a46fSAndroid Build Coastguard Worker     s1n_1 = LOADU(in_even + VREG_INT_COUNT);
599*3ac0a46fSAndroid Build Coastguard Worker     d1n_0 = LOADU(in_odd);
600*3ac0a46fSAndroid Build Coastguard Worker     d1n_1 = LOADU(in_odd + VREG_INT_COUNT);
601*3ac0a46fSAndroid Build Coastguard Worker 
602*3ac0a46fSAndroid Build Coastguard Worker     /* s0n = s1n - ((d1n + 1) >> 1); <==> */
603*3ac0a46fSAndroid Build Coastguard Worker     /* s0n = s1n - ((d1n + d1n + 2) >> 2); */
604*3ac0a46fSAndroid Build Coastguard Worker     s0n_0 = SUB(s1n_0, SAR(ADD3(d1n_0, d1n_0, two), 2));
605*3ac0a46fSAndroid Build Coastguard Worker     s0n_1 = SUB(s1n_1, SAR(ADD3(d1n_1, d1n_1, two), 2));
606*3ac0a46fSAndroid Build Coastguard Worker 
607*3ac0a46fSAndroid Build Coastguard Worker     for (i = 0, j = 1; i < (len - 3); i += 2, j++) {
608*3ac0a46fSAndroid Build Coastguard Worker         d1c_0 = d1n_0;
609*3ac0a46fSAndroid Build Coastguard Worker         s0c_0 = s0n_0;
610*3ac0a46fSAndroid Build Coastguard Worker         d1c_1 = d1n_1;
611*3ac0a46fSAndroid Build Coastguard Worker         s0c_1 = s0n_1;
612*3ac0a46fSAndroid Build Coastguard Worker 
613*3ac0a46fSAndroid Build Coastguard Worker         s1n_0 = LOADU(in_even + j * stride);
614*3ac0a46fSAndroid Build Coastguard Worker         s1n_1 = LOADU(in_even + j * stride + VREG_INT_COUNT);
615*3ac0a46fSAndroid Build Coastguard Worker         d1n_0 = LOADU(in_odd + j * stride);
616*3ac0a46fSAndroid Build Coastguard Worker         d1n_1 = LOADU(in_odd + j * stride + VREG_INT_COUNT);
617*3ac0a46fSAndroid Build Coastguard Worker 
618*3ac0a46fSAndroid Build Coastguard Worker         /*s0n = s1n - ((d1c + d1n + 2) >> 2);*/
619*3ac0a46fSAndroid Build Coastguard Worker         s0n_0 = SUB(s1n_0, SAR(ADD3(d1c_0, d1n_0, two), 2));
620*3ac0a46fSAndroid Build Coastguard Worker         s0n_1 = SUB(s1n_1, SAR(ADD3(d1c_1, d1n_1, two), 2));
621*3ac0a46fSAndroid Build Coastguard Worker 
622*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (i + 0), s0c_0);
623*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (i + 0) + VREG_INT_COUNT, s0c_1);
624*3ac0a46fSAndroid Build Coastguard Worker 
625*3ac0a46fSAndroid Build Coastguard Worker         /* d1c + ((s0c + s0n) >> 1) */
626*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (i + 1) + 0,
627*3ac0a46fSAndroid Build Coastguard Worker               ADD(d1c_0, SAR(ADD(s0c_0, s0n_0), 1)));
628*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (i + 1) + VREG_INT_COUNT,
629*3ac0a46fSAndroid Build Coastguard Worker               ADD(d1c_1, SAR(ADD(s0c_1, s0n_1), 1)));
630*3ac0a46fSAndroid Build Coastguard Worker     }
631*3ac0a46fSAndroid Build Coastguard Worker 
632*3ac0a46fSAndroid Build Coastguard Worker     STORE(tmp + PARALLEL_COLS_53 * (i + 0) + 0, s0n_0);
633*3ac0a46fSAndroid Build Coastguard Worker     STORE(tmp + PARALLEL_COLS_53 * (i + 0) + VREG_INT_COUNT, s0n_1);
634*3ac0a46fSAndroid Build Coastguard Worker 
635*3ac0a46fSAndroid Build Coastguard Worker     if (len & 1) {
636*3ac0a46fSAndroid Build Coastguard Worker         VREG tmp_len_minus_1;
637*3ac0a46fSAndroid Build Coastguard Worker         s1n_0 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride);
638*3ac0a46fSAndroid Build Coastguard Worker         /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */
639*3ac0a46fSAndroid Build Coastguard Worker         tmp_len_minus_1 = SUB(s1n_0, SAR(ADD3(d1n_0, d1n_0, two), 2));
640*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 1), tmp_len_minus_1);
641*3ac0a46fSAndroid Build Coastguard Worker         /* d1n + ((s0n + tmp_len_minus_1) >> 1) */
642*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 2),
643*3ac0a46fSAndroid Build Coastguard Worker               ADD(d1n_0, SAR(ADD(s0n_0, tmp_len_minus_1), 1)));
644*3ac0a46fSAndroid Build Coastguard Worker 
645*3ac0a46fSAndroid Build Coastguard Worker         s1n_1 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride + VREG_INT_COUNT);
646*3ac0a46fSAndroid Build Coastguard Worker         /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */
647*3ac0a46fSAndroid Build Coastguard Worker         tmp_len_minus_1 = SUB(s1n_1, SAR(ADD3(d1n_1, d1n_1, two), 2));
648*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 1) + VREG_INT_COUNT,
649*3ac0a46fSAndroid Build Coastguard Worker               tmp_len_minus_1);
650*3ac0a46fSAndroid Build Coastguard Worker         /* d1n + ((s0n + tmp_len_minus_1) >> 1) */
651*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 2) + VREG_INT_COUNT,
652*3ac0a46fSAndroid Build Coastguard Worker               ADD(d1n_1, SAR(ADD(s0n_1, tmp_len_minus_1), 1)));
653*3ac0a46fSAndroid Build Coastguard Worker 
654*3ac0a46fSAndroid Build Coastguard Worker 
655*3ac0a46fSAndroid Build Coastguard Worker     } else {
656*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 1) + 0,
657*3ac0a46fSAndroid Build Coastguard Worker               ADD(d1n_0, s0n_0));
658*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 1) + VREG_INT_COUNT,
659*3ac0a46fSAndroid Build Coastguard Worker               ADD(d1n_1, s0n_1));
660*3ac0a46fSAndroid Build Coastguard Worker     }
661*3ac0a46fSAndroid Build Coastguard Worker 
662*3ac0a46fSAndroid Build Coastguard Worker     opj_idwt53_v_final_memcpy(tiledp_col, tmp, len, stride);
663*3ac0a46fSAndroid Build Coastguard Worker }
664*3ac0a46fSAndroid Build Coastguard Worker 
665*3ac0a46fSAndroid Build Coastguard Worker 
666*3ac0a46fSAndroid Build Coastguard Worker /** Vertical inverse 5x3 wavelet transform for 8 columns in SSE2, or
667*3ac0a46fSAndroid Build Coastguard Worker  * 16 in AVX2, when top-most pixel is on odd coordinate */
opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2(OPJ_INT32 * tmp,const OPJ_INT32 sn,const OPJ_INT32 len,OPJ_INT32 * tiledp_col,const OPJ_SIZE_T stride)668*3ac0a46fSAndroid Build Coastguard Worker static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2(
669*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32* tmp,
670*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 sn,
671*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 len,
672*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32* tiledp_col,
673*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_SIZE_T stride)
674*3ac0a46fSAndroid Build Coastguard Worker {
675*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i;
676*3ac0a46fSAndroid Build Coastguard Worker     OPJ_SIZE_T j;
677*3ac0a46fSAndroid Build Coastguard Worker 
678*3ac0a46fSAndroid Build Coastguard Worker     VREG s1_0, s2_0, dc_0, dn_0;
679*3ac0a46fSAndroid Build Coastguard Worker     VREG s1_1, s2_1, dc_1, dn_1;
680*3ac0a46fSAndroid Build Coastguard Worker     const VREG two = LOAD_CST(2);
681*3ac0a46fSAndroid Build Coastguard Worker 
682*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride];
683*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32* in_odd = &tiledp_col[0];
684*3ac0a46fSAndroid Build Coastguard Worker 
685*3ac0a46fSAndroid Build Coastguard Worker     assert(len > 2);
686*3ac0a46fSAndroid Build Coastguard Worker #if __AVX2__
687*3ac0a46fSAndroid Build Coastguard Worker     assert(PARALLEL_COLS_53 == 16);
688*3ac0a46fSAndroid Build Coastguard Worker     assert(VREG_INT_COUNT == 8);
689*3ac0a46fSAndroid Build Coastguard Worker #else
690*3ac0a46fSAndroid Build Coastguard Worker     assert(PARALLEL_COLS_53 == 8);
691*3ac0a46fSAndroid Build Coastguard Worker     assert(VREG_INT_COUNT == 4);
692*3ac0a46fSAndroid Build Coastguard Worker #endif
693*3ac0a46fSAndroid Build Coastguard Worker 
694*3ac0a46fSAndroid Build Coastguard Worker     /* Note: loads of input even/odd values must be done in a unaligned */
695*3ac0a46fSAndroid Build Coastguard Worker     /* fashion. But stores in tmp can be done with aligned store, since */
696*3ac0a46fSAndroid Build Coastguard Worker     /* the temporary buffer is properly aligned */
697*3ac0a46fSAndroid Build Coastguard Worker     assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
698*3ac0a46fSAndroid Build Coastguard Worker 
699*3ac0a46fSAndroid Build Coastguard Worker     s1_0 = LOADU(in_even + stride);
700*3ac0a46fSAndroid Build Coastguard Worker     /* in_odd[0] - ((in_even[0] + s1 + 2) >> 2); */
701*3ac0a46fSAndroid Build Coastguard Worker     dc_0 = SUB(LOADU(in_odd + 0),
702*3ac0a46fSAndroid Build Coastguard Worker                SAR(ADD3(LOADU(in_even + 0), s1_0, two), 2));
703*3ac0a46fSAndroid Build Coastguard Worker     STORE(tmp + PARALLEL_COLS_53 * 0, ADD(LOADU(in_even + 0), dc_0));
704*3ac0a46fSAndroid Build Coastguard Worker 
705*3ac0a46fSAndroid Build Coastguard Worker     s1_1 = LOADU(in_even + stride + VREG_INT_COUNT);
706*3ac0a46fSAndroid Build Coastguard Worker     /* in_odd[0] - ((in_even[0] + s1 + 2) >> 2); */
707*3ac0a46fSAndroid Build Coastguard Worker     dc_1 = SUB(LOADU(in_odd + VREG_INT_COUNT),
708*3ac0a46fSAndroid Build Coastguard Worker                SAR(ADD3(LOADU(in_even + VREG_INT_COUNT), s1_1, two), 2));
709*3ac0a46fSAndroid Build Coastguard Worker     STORE(tmp + PARALLEL_COLS_53 * 0 + VREG_INT_COUNT,
710*3ac0a46fSAndroid Build Coastguard Worker           ADD(LOADU(in_even + VREG_INT_COUNT), dc_1));
711*3ac0a46fSAndroid Build Coastguard Worker 
712*3ac0a46fSAndroid Build Coastguard Worker     for (i = 1, j = 1; i < (len - 2 - !(len & 1)); i += 2, j++) {
713*3ac0a46fSAndroid Build Coastguard Worker 
714*3ac0a46fSAndroid Build Coastguard Worker         s2_0 = LOADU(in_even + (j + 1) * stride);
715*3ac0a46fSAndroid Build Coastguard Worker         s2_1 = LOADU(in_even + (j + 1) * stride + VREG_INT_COUNT);
716*3ac0a46fSAndroid Build Coastguard Worker 
717*3ac0a46fSAndroid Build Coastguard Worker         /* dn = in_odd[j * stride] - ((s1 + s2 + 2) >> 2); */
718*3ac0a46fSAndroid Build Coastguard Worker         dn_0 = SUB(LOADU(in_odd + j * stride),
719*3ac0a46fSAndroid Build Coastguard Worker                    SAR(ADD3(s1_0, s2_0, two), 2));
720*3ac0a46fSAndroid Build Coastguard Worker         dn_1 = SUB(LOADU(in_odd + j * stride + VREG_INT_COUNT),
721*3ac0a46fSAndroid Build Coastguard Worker                    SAR(ADD3(s1_1, s2_1, two), 2));
722*3ac0a46fSAndroid Build Coastguard Worker 
723*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * i, dc_0);
724*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * i + VREG_INT_COUNT, dc_1);
725*3ac0a46fSAndroid Build Coastguard Worker 
726*3ac0a46fSAndroid Build Coastguard Worker         /* tmp[i + 1] = s1 + ((dn + dc) >> 1); */
727*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (i + 1) + 0,
728*3ac0a46fSAndroid Build Coastguard Worker               ADD(s1_0, SAR(ADD(dn_0, dc_0), 1)));
729*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (i + 1) + VREG_INT_COUNT,
730*3ac0a46fSAndroid Build Coastguard Worker               ADD(s1_1, SAR(ADD(dn_1, dc_1), 1)));
731*3ac0a46fSAndroid Build Coastguard Worker 
732*3ac0a46fSAndroid Build Coastguard Worker         dc_0 = dn_0;
733*3ac0a46fSAndroid Build Coastguard Worker         s1_0 = s2_0;
734*3ac0a46fSAndroid Build Coastguard Worker         dc_1 = dn_1;
735*3ac0a46fSAndroid Build Coastguard Worker         s1_1 = s2_1;
736*3ac0a46fSAndroid Build Coastguard Worker     }
737*3ac0a46fSAndroid Build Coastguard Worker     STORE(tmp + PARALLEL_COLS_53 * i, dc_0);
738*3ac0a46fSAndroid Build Coastguard Worker     STORE(tmp + PARALLEL_COLS_53 * i + VREG_INT_COUNT, dc_1);
739*3ac0a46fSAndroid Build Coastguard Worker 
740*3ac0a46fSAndroid Build Coastguard Worker     if (!(len & 1)) {
741*3ac0a46fSAndroid Build Coastguard Worker         /*dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1); */
742*3ac0a46fSAndroid Build Coastguard Worker         dn_0 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride),
743*3ac0a46fSAndroid Build Coastguard Worker                    SAR(ADD3(s1_0, s1_0, two), 2));
744*3ac0a46fSAndroid Build Coastguard Worker         dn_1 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride + VREG_INT_COUNT),
745*3ac0a46fSAndroid Build Coastguard Worker                    SAR(ADD3(s1_1, s1_1, two), 2));
746*3ac0a46fSAndroid Build Coastguard Worker 
747*3ac0a46fSAndroid Build Coastguard Worker         /* tmp[len - 2] = s1 + ((dn + dc) >> 1); */
748*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 2) + 0,
749*3ac0a46fSAndroid Build Coastguard Worker               ADD(s1_0, SAR(ADD(dn_0, dc_0), 1)));
750*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 2) + VREG_INT_COUNT,
751*3ac0a46fSAndroid Build Coastguard Worker               ADD(s1_1, SAR(ADD(dn_1, dc_1), 1)));
752*3ac0a46fSAndroid Build Coastguard Worker 
753*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 1) + 0, dn_0);
754*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 1) + VREG_INT_COUNT, dn_1);
755*3ac0a46fSAndroid Build Coastguard Worker     } else {
756*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 1) + 0, ADD(s1_0, dc_0));
757*3ac0a46fSAndroid Build Coastguard Worker         STORE(tmp + PARALLEL_COLS_53 * (len - 1) + VREG_INT_COUNT,
758*3ac0a46fSAndroid Build Coastguard Worker               ADD(s1_1, dc_1));
759*3ac0a46fSAndroid Build Coastguard Worker     }
760*3ac0a46fSAndroid Build Coastguard Worker 
761*3ac0a46fSAndroid Build Coastguard Worker     opj_idwt53_v_final_memcpy(tiledp_col, tmp, len, stride);
762*3ac0a46fSAndroid Build Coastguard Worker }
763*3ac0a46fSAndroid Build Coastguard Worker 
764*3ac0a46fSAndroid Build Coastguard Worker #undef VREG
765*3ac0a46fSAndroid Build Coastguard Worker #undef LOAD_CST
766*3ac0a46fSAndroid Build Coastguard Worker #undef LOADU
767*3ac0a46fSAndroid Build Coastguard Worker #undef LOAD
768*3ac0a46fSAndroid Build Coastguard Worker #undef STORE
769*3ac0a46fSAndroid Build Coastguard Worker #undef STOREU
770*3ac0a46fSAndroid Build Coastguard Worker #undef ADD
771*3ac0a46fSAndroid Build Coastguard Worker #undef ADD3
772*3ac0a46fSAndroid Build Coastguard Worker #undef SUB
773*3ac0a46fSAndroid Build Coastguard Worker #undef SAR
774*3ac0a46fSAndroid Build Coastguard Worker 
775*3ac0a46fSAndroid Build Coastguard Worker #endif /* (defined(__SSE2__) || defined(__AVX2__)) && !defined(STANDARD_SLOW_VERSION) */
776*3ac0a46fSAndroid Build Coastguard Worker 
777*3ac0a46fSAndroid Build Coastguard Worker #if !defined(STANDARD_SLOW_VERSION)
778*3ac0a46fSAndroid Build Coastguard Worker /** Vertical inverse 5x3 wavelet transform for one column, when top-most
779*3ac0a46fSAndroid Build Coastguard Worker  * pixel is on even coordinate */
opj_idwt3_v_cas0(OPJ_INT32 * tmp,const OPJ_INT32 sn,const OPJ_INT32 len,OPJ_INT32 * tiledp_col,const OPJ_SIZE_T stride)780*3ac0a46fSAndroid Build Coastguard Worker static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
781*3ac0a46fSAndroid Build Coastguard Worker                              const OPJ_INT32 sn,
782*3ac0a46fSAndroid Build Coastguard Worker                              const OPJ_INT32 len,
783*3ac0a46fSAndroid Build Coastguard Worker                              OPJ_INT32* tiledp_col,
784*3ac0a46fSAndroid Build Coastguard Worker                              const OPJ_SIZE_T stride)
785*3ac0a46fSAndroid Build Coastguard Worker {
786*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i, j;
787*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 d1c, d1n, s1n, s0c, s0n;
788*3ac0a46fSAndroid Build Coastguard Worker 
789*3ac0a46fSAndroid Build Coastguard Worker     assert(len > 1);
790*3ac0a46fSAndroid Build Coastguard Worker 
791*3ac0a46fSAndroid Build Coastguard Worker     /* Performs lifting in one single iteration. Saves memory */
792*3ac0a46fSAndroid Build Coastguard Worker     /* accesses and explicit interleaving. */
793*3ac0a46fSAndroid Build Coastguard Worker 
794*3ac0a46fSAndroid Build Coastguard Worker     s1n = tiledp_col[0];
795*3ac0a46fSAndroid Build Coastguard Worker     d1n = tiledp_col[(OPJ_SIZE_T)sn * stride];
796*3ac0a46fSAndroid Build Coastguard Worker     s0n = s1n - ((d1n + 1) >> 1);
797*3ac0a46fSAndroid Build Coastguard Worker 
798*3ac0a46fSAndroid Build Coastguard Worker     for (i = 0, j = 0; i < (len - 3); i += 2, j++) {
799*3ac0a46fSAndroid Build Coastguard Worker         d1c = d1n;
800*3ac0a46fSAndroid Build Coastguard Worker         s0c = s0n;
801*3ac0a46fSAndroid Build Coastguard Worker 
802*3ac0a46fSAndroid Build Coastguard Worker         s1n = tiledp_col[(OPJ_SIZE_T)(j + 1) * stride];
803*3ac0a46fSAndroid Build Coastguard Worker         d1n = tiledp_col[(OPJ_SIZE_T)(sn + j + 1) * stride];
804*3ac0a46fSAndroid Build Coastguard Worker 
805*3ac0a46fSAndroid Build Coastguard Worker         s0n = opj_int_sub_no_overflow(s1n,
806*3ac0a46fSAndroid Build Coastguard Worker                                       opj_int_add_no_overflow(opj_int_add_no_overflow(d1c, d1n), 2) >> 2);
807*3ac0a46fSAndroid Build Coastguard Worker 
808*3ac0a46fSAndroid Build Coastguard Worker         tmp[i  ] = s0c;
809*3ac0a46fSAndroid Build Coastguard Worker         tmp[i + 1] = opj_int_add_no_overflow(d1c, opj_int_add_no_overflow(s0c,
810*3ac0a46fSAndroid Build Coastguard Worker                                              s0n) >> 1);
811*3ac0a46fSAndroid Build Coastguard Worker     }
812*3ac0a46fSAndroid Build Coastguard Worker 
813*3ac0a46fSAndroid Build Coastguard Worker     tmp[i] = s0n;
814*3ac0a46fSAndroid Build Coastguard Worker 
815*3ac0a46fSAndroid Build Coastguard Worker     if (len & 1) {
816*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] =
817*3ac0a46fSAndroid Build Coastguard Worker             tiledp_col[(OPJ_SIZE_T)((len - 1) / 2) * stride] -
818*3ac0a46fSAndroid Build Coastguard Worker             ((d1n + 1) >> 1);
819*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 2] = d1n + ((s0n + tmp[len - 1]) >> 1);
820*3ac0a46fSAndroid Build Coastguard Worker     } else {
821*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] = d1n + s0n;
822*3ac0a46fSAndroid Build Coastguard Worker     }
823*3ac0a46fSAndroid Build Coastguard Worker 
824*3ac0a46fSAndroid Build Coastguard Worker     for (i = 0; i < len; ++i) {
825*3ac0a46fSAndroid Build Coastguard Worker         tiledp_col[(OPJ_SIZE_T)i * stride] = tmp[i];
826*3ac0a46fSAndroid Build Coastguard Worker     }
827*3ac0a46fSAndroid Build Coastguard Worker }
828*3ac0a46fSAndroid Build Coastguard Worker 
829*3ac0a46fSAndroid Build Coastguard Worker 
830*3ac0a46fSAndroid Build Coastguard Worker /** Vertical inverse 5x3 wavelet transform for one column, when top-most
831*3ac0a46fSAndroid Build Coastguard Worker  * pixel is on odd coordinate */
opj_idwt3_v_cas1(OPJ_INT32 * tmp,const OPJ_INT32 sn,const OPJ_INT32 len,OPJ_INT32 * tiledp_col,const OPJ_SIZE_T stride)832*3ac0a46fSAndroid Build Coastguard Worker static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
833*3ac0a46fSAndroid Build Coastguard Worker                              const OPJ_INT32 sn,
834*3ac0a46fSAndroid Build Coastguard Worker                              const OPJ_INT32 len,
835*3ac0a46fSAndroid Build Coastguard Worker                              OPJ_INT32* tiledp_col,
836*3ac0a46fSAndroid Build Coastguard Worker                              const OPJ_SIZE_T stride)
837*3ac0a46fSAndroid Build Coastguard Worker {
838*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i, j;
839*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 s1, s2, dc, dn;
840*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride];
841*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32* in_odd = &tiledp_col[0];
842*3ac0a46fSAndroid Build Coastguard Worker 
843*3ac0a46fSAndroid Build Coastguard Worker     assert(len > 2);
844*3ac0a46fSAndroid Build Coastguard Worker 
845*3ac0a46fSAndroid Build Coastguard Worker     /* Performs lifting in one single iteration. Saves memory */
846*3ac0a46fSAndroid Build Coastguard Worker     /* accesses and explicit interleaving. */
847*3ac0a46fSAndroid Build Coastguard Worker 
848*3ac0a46fSAndroid Build Coastguard Worker     s1 = in_even[stride];
849*3ac0a46fSAndroid Build Coastguard Worker     dc = in_odd[0] - ((in_even[0] + s1 + 2) >> 2);
850*3ac0a46fSAndroid Build Coastguard Worker     tmp[0] = in_even[0] + dc;
851*3ac0a46fSAndroid Build Coastguard Worker     for (i = 1, j = 1; i < (len - 2 - !(len & 1)); i += 2, j++) {
852*3ac0a46fSAndroid Build Coastguard Worker 
853*3ac0a46fSAndroid Build Coastguard Worker         s2 = in_even[(OPJ_SIZE_T)(j + 1) * stride];
854*3ac0a46fSAndroid Build Coastguard Worker 
855*3ac0a46fSAndroid Build Coastguard Worker         dn = in_odd[(OPJ_SIZE_T)j * stride] - ((s1 + s2 + 2) >> 2);
856*3ac0a46fSAndroid Build Coastguard Worker         tmp[i  ] = dc;
857*3ac0a46fSAndroid Build Coastguard Worker         tmp[i + 1] = s1 + ((dn + dc) >> 1);
858*3ac0a46fSAndroid Build Coastguard Worker 
859*3ac0a46fSAndroid Build Coastguard Worker         dc = dn;
860*3ac0a46fSAndroid Build Coastguard Worker         s1 = s2;
861*3ac0a46fSAndroid Build Coastguard Worker     }
862*3ac0a46fSAndroid Build Coastguard Worker     tmp[i] = dc;
863*3ac0a46fSAndroid Build Coastguard Worker     if (!(len & 1)) {
864*3ac0a46fSAndroid Build Coastguard Worker         dn = in_odd[(OPJ_SIZE_T)(len / 2 - 1) * stride] - ((s1 + 1) >> 1);
865*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 2] = s1 + ((dn + dc) >> 1);
866*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] = dn;
867*3ac0a46fSAndroid Build Coastguard Worker     } else {
868*3ac0a46fSAndroid Build Coastguard Worker         tmp[len - 1] = s1 + dc;
869*3ac0a46fSAndroid Build Coastguard Worker     }
870*3ac0a46fSAndroid Build Coastguard Worker 
871*3ac0a46fSAndroid Build Coastguard Worker     for (i = 0; i < len; ++i) {
872*3ac0a46fSAndroid Build Coastguard Worker         tiledp_col[(OPJ_SIZE_T)i * stride] = tmp[i];
873*3ac0a46fSAndroid Build Coastguard Worker     }
874*3ac0a46fSAndroid Build Coastguard Worker }
875*3ac0a46fSAndroid Build Coastguard Worker #endif /* !defined(STANDARD_SLOW_VERSION) */
876*3ac0a46fSAndroid Build Coastguard Worker 
877*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                            */
878*3ac0a46fSAndroid Build Coastguard Worker /* Inverse vertical 5-3 wavelet transform in 1-D for several columns. */
879*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                           */
880*3ac0a46fSAndroid Build Coastguard Worker /* Performs interleave, inverse wavelet transform and copy back to buffer */
opj_idwt53_v(const opj_dwt_t * dwt,OPJ_INT32 * tiledp_col,OPJ_SIZE_T stride,OPJ_INT32 nb_cols)881*3ac0a46fSAndroid Build Coastguard Worker static void opj_idwt53_v(const opj_dwt_t *dwt,
882*3ac0a46fSAndroid Build Coastguard Worker                          OPJ_INT32* tiledp_col,
883*3ac0a46fSAndroid Build Coastguard Worker                          OPJ_SIZE_T stride,
884*3ac0a46fSAndroid Build Coastguard Worker                          OPJ_INT32 nb_cols)
885*3ac0a46fSAndroid Build Coastguard Worker {
886*3ac0a46fSAndroid Build Coastguard Worker #ifdef STANDARD_SLOW_VERSION
887*3ac0a46fSAndroid Build Coastguard Worker     /* For documentation purpose */
888*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 k, c;
889*3ac0a46fSAndroid Build Coastguard Worker     for (c = 0; c < nb_cols; c ++) {
890*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_interleave_v(dwt, tiledp_col + c, stride);
891*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_decode_1(dwt);
892*3ac0a46fSAndroid Build Coastguard Worker         for (k = 0; k < dwt->sn + dwt->dn; ++k) {
893*3ac0a46fSAndroid Build Coastguard Worker             tiledp_col[c + k * stride] = dwt->mem[k];
894*3ac0a46fSAndroid Build Coastguard Worker         }
895*3ac0a46fSAndroid Build Coastguard Worker     }
896*3ac0a46fSAndroid Build Coastguard Worker #else
897*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 sn = dwt->sn;
898*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 len = sn + dwt->dn;
899*3ac0a46fSAndroid Build Coastguard Worker     if (dwt->cas == 0) {
900*3ac0a46fSAndroid Build Coastguard Worker         /* If len == 1, unmodified value */
901*3ac0a46fSAndroid Build Coastguard Worker 
902*3ac0a46fSAndroid Build Coastguard Worker #if (defined(__SSE2__) || defined(__AVX2__))
903*3ac0a46fSAndroid Build Coastguard Worker         if (len > 1 && nb_cols == PARALLEL_COLS_53) {
904*3ac0a46fSAndroid Build Coastguard Worker             /* Same as below general case, except that thanks to SSE2/AVX2 */
905*3ac0a46fSAndroid Build Coastguard Worker             /* we can efficiently process 8/16 columns in parallel */
906*3ac0a46fSAndroid Build Coastguard Worker             opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(dwt->mem, sn, len, tiledp_col, stride);
907*3ac0a46fSAndroid Build Coastguard Worker             return;
908*3ac0a46fSAndroid Build Coastguard Worker         }
909*3ac0a46fSAndroid Build Coastguard Worker #endif
910*3ac0a46fSAndroid Build Coastguard Worker         if (len > 1) {
911*3ac0a46fSAndroid Build Coastguard Worker             OPJ_INT32 c;
912*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < nb_cols; c++, tiledp_col++) {
913*3ac0a46fSAndroid Build Coastguard Worker                 opj_idwt3_v_cas0(dwt->mem, sn, len, tiledp_col, stride);
914*3ac0a46fSAndroid Build Coastguard Worker             }
915*3ac0a46fSAndroid Build Coastguard Worker             return;
916*3ac0a46fSAndroid Build Coastguard Worker         }
917*3ac0a46fSAndroid Build Coastguard Worker     } else {
918*3ac0a46fSAndroid Build Coastguard Worker         if (len == 1) {
919*3ac0a46fSAndroid Build Coastguard Worker             OPJ_INT32 c;
920*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < nb_cols; c++, tiledp_col++) {
921*3ac0a46fSAndroid Build Coastguard Worker                 tiledp_col[0] /= 2;
922*3ac0a46fSAndroid Build Coastguard Worker             }
923*3ac0a46fSAndroid Build Coastguard Worker             return;
924*3ac0a46fSAndroid Build Coastguard Worker         }
925*3ac0a46fSAndroid Build Coastguard Worker 
926*3ac0a46fSAndroid Build Coastguard Worker         if (len == 2) {
927*3ac0a46fSAndroid Build Coastguard Worker             OPJ_INT32 c;
928*3ac0a46fSAndroid Build Coastguard Worker             OPJ_INT32* out = dwt->mem;
929*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < nb_cols; c++, tiledp_col++) {
930*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_INT32 i;
931*3ac0a46fSAndroid Build Coastguard Worker                 const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride];
932*3ac0a46fSAndroid Build Coastguard Worker                 const OPJ_INT32* in_odd = &tiledp_col[0];
933*3ac0a46fSAndroid Build Coastguard Worker 
934*3ac0a46fSAndroid Build Coastguard Worker                 out[1] = in_odd[0] - ((in_even[0] + 1) >> 1);
935*3ac0a46fSAndroid Build Coastguard Worker                 out[0] = in_even[0] + out[1];
936*3ac0a46fSAndroid Build Coastguard Worker 
937*3ac0a46fSAndroid Build Coastguard Worker                 for (i = 0; i < len; ++i) {
938*3ac0a46fSAndroid Build Coastguard Worker                     tiledp_col[(OPJ_SIZE_T)i * stride] = out[i];
939*3ac0a46fSAndroid Build Coastguard Worker                 }
940*3ac0a46fSAndroid Build Coastguard Worker             }
941*3ac0a46fSAndroid Build Coastguard Worker 
942*3ac0a46fSAndroid Build Coastguard Worker             return;
943*3ac0a46fSAndroid Build Coastguard Worker         }
944*3ac0a46fSAndroid Build Coastguard Worker 
945*3ac0a46fSAndroid Build Coastguard Worker #if (defined(__SSE2__) || defined(__AVX2__))
946*3ac0a46fSAndroid Build Coastguard Worker         if (len > 2 && nb_cols == PARALLEL_COLS_53) {
947*3ac0a46fSAndroid Build Coastguard Worker             /* Same as below general case, except that thanks to SSE2/AVX2 */
948*3ac0a46fSAndroid Build Coastguard Worker             /* we can efficiently process 8/16 columns in parallel */
949*3ac0a46fSAndroid Build Coastguard Worker             opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2(dwt->mem, sn, len, tiledp_col, stride);
950*3ac0a46fSAndroid Build Coastguard Worker             return;
951*3ac0a46fSAndroid Build Coastguard Worker         }
952*3ac0a46fSAndroid Build Coastguard Worker #endif
953*3ac0a46fSAndroid Build Coastguard Worker         if (len > 2) {
954*3ac0a46fSAndroid Build Coastguard Worker             OPJ_INT32 c;
955*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < nb_cols; c++, tiledp_col++) {
956*3ac0a46fSAndroid Build Coastguard Worker                 opj_idwt3_v_cas1(dwt->mem, sn, len, tiledp_col, stride);
957*3ac0a46fSAndroid Build Coastguard Worker             }
958*3ac0a46fSAndroid Build Coastguard Worker             return;
959*3ac0a46fSAndroid Build Coastguard Worker         }
960*3ac0a46fSAndroid Build Coastguard Worker     }
961*3ac0a46fSAndroid Build Coastguard Worker #endif
962*3ac0a46fSAndroid Build Coastguard Worker }
963*3ac0a46fSAndroid Build Coastguard Worker 
964*3ac0a46fSAndroid Build Coastguard Worker #if 0
965*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_encode_step1(OPJ_FLOAT32* fw,
966*3ac0a46fSAndroid Build Coastguard Worker                                  OPJ_UINT32 end,
967*3ac0a46fSAndroid Build Coastguard Worker                                  const OPJ_FLOAT32 c)
968*3ac0a46fSAndroid Build Coastguard Worker {
969*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i = 0;
970*3ac0a46fSAndroid Build Coastguard Worker     for (; i < end; ++i) {
971*3ac0a46fSAndroid Build Coastguard Worker         fw[0] *= c;
972*3ac0a46fSAndroid Build Coastguard Worker         fw += 2;
973*3ac0a46fSAndroid Build Coastguard Worker     }
974*3ac0a46fSAndroid Build Coastguard Worker }
975*3ac0a46fSAndroid Build Coastguard Worker #else
opj_dwt_encode_step1_combined(OPJ_FLOAT32 * fw,OPJ_UINT32 iters_c1,OPJ_UINT32 iters_c2,const OPJ_FLOAT32 c1,const OPJ_FLOAT32 c2)976*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_encode_step1_combined(OPJ_FLOAT32* fw,
977*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 iters_c1,
978*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 iters_c2,
979*3ac0a46fSAndroid Build Coastguard Worker         const OPJ_FLOAT32 c1,
980*3ac0a46fSAndroid Build Coastguard Worker         const OPJ_FLOAT32 c2)
981*3ac0a46fSAndroid Build Coastguard Worker {
982*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i = 0;
983*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_UINT32 iters_common =  opj_uint_min(iters_c1, iters_c2);
984*3ac0a46fSAndroid Build Coastguard Worker     assert((((OPJ_SIZE_T)fw) & 0xf) == 0);
985*3ac0a46fSAndroid Build Coastguard Worker     assert(opj_int_abs((OPJ_INT32)iters_c1 - (OPJ_INT32)iters_c2) <= 1);
986*3ac0a46fSAndroid Build Coastguard Worker     for (; i + 3 < iters_common; i += 4) {
987*3ac0a46fSAndroid Build Coastguard Worker #ifdef __SSE__
988*3ac0a46fSAndroid Build Coastguard Worker         const __m128 vcst = _mm_set_ps(c2, c1, c2, c1);
989*3ac0a46fSAndroid Build Coastguard Worker         *(__m128*)fw = _mm_mul_ps(*(__m128*)fw, vcst);
990*3ac0a46fSAndroid Build Coastguard Worker         *(__m128*)(fw + 4) = _mm_mul_ps(*(__m128*)(fw + 4), vcst);
991*3ac0a46fSAndroid Build Coastguard Worker #else
992*3ac0a46fSAndroid Build Coastguard Worker         fw[0] *= c1;
993*3ac0a46fSAndroid Build Coastguard Worker         fw[1] *= c2;
994*3ac0a46fSAndroid Build Coastguard Worker         fw[2] *= c1;
995*3ac0a46fSAndroid Build Coastguard Worker         fw[3] *= c2;
996*3ac0a46fSAndroid Build Coastguard Worker         fw[4] *= c1;
997*3ac0a46fSAndroid Build Coastguard Worker         fw[5] *= c2;
998*3ac0a46fSAndroid Build Coastguard Worker         fw[6] *= c1;
999*3ac0a46fSAndroid Build Coastguard Worker         fw[7] *= c2;
1000*3ac0a46fSAndroid Build Coastguard Worker #endif
1001*3ac0a46fSAndroid Build Coastguard Worker         fw += 8;
1002*3ac0a46fSAndroid Build Coastguard Worker     }
1003*3ac0a46fSAndroid Build Coastguard Worker     for (; i < iters_common; i++) {
1004*3ac0a46fSAndroid Build Coastguard Worker         fw[0] *= c1;
1005*3ac0a46fSAndroid Build Coastguard Worker         fw[1] *= c2;
1006*3ac0a46fSAndroid Build Coastguard Worker         fw += 2;
1007*3ac0a46fSAndroid Build Coastguard Worker     }
1008*3ac0a46fSAndroid Build Coastguard Worker     if (i < iters_c1) {
1009*3ac0a46fSAndroid Build Coastguard Worker         fw[0] *= c1;
1010*3ac0a46fSAndroid Build Coastguard Worker     } else if (i < iters_c2) {
1011*3ac0a46fSAndroid Build Coastguard Worker         fw[1] *= c2;
1012*3ac0a46fSAndroid Build Coastguard Worker     }
1013*3ac0a46fSAndroid Build Coastguard Worker }
1014*3ac0a46fSAndroid Build Coastguard Worker 
1015*3ac0a46fSAndroid Build Coastguard Worker #endif
1016*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_encode_step2(OPJ_FLOAT32 * fl,OPJ_FLOAT32 * fw,OPJ_UINT32 end,OPJ_UINT32 m,OPJ_FLOAT32 c)1017*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_encode_step2(OPJ_FLOAT32* fl, OPJ_FLOAT32* fw,
1018*3ac0a46fSAndroid Build Coastguard Worker                                  OPJ_UINT32 end,
1019*3ac0a46fSAndroid Build Coastguard Worker                                  OPJ_UINT32 m,
1020*3ac0a46fSAndroid Build Coastguard Worker                                  OPJ_FLOAT32 c)
1021*3ac0a46fSAndroid Build Coastguard Worker {
1022*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i;
1023*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 imax = opj_uint_min(end, m);
1024*3ac0a46fSAndroid Build Coastguard Worker     if (imax > 0) {
1025*3ac0a46fSAndroid Build Coastguard Worker         fw[-1] += (fl[0] + fw[0]) * c;
1026*3ac0a46fSAndroid Build Coastguard Worker         fw += 2;
1027*3ac0a46fSAndroid Build Coastguard Worker         i = 1;
1028*3ac0a46fSAndroid Build Coastguard Worker         for (; i + 3 < imax; i += 4) {
1029*3ac0a46fSAndroid Build Coastguard Worker             fw[-1] += (fw[-2] + fw[0]) * c;
1030*3ac0a46fSAndroid Build Coastguard Worker             fw[1] += (fw[0] + fw[2]) * c;
1031*3ac0a46fSAndroid Build Coastguard Worker             fw[3] += (fw[2] + fw[4]) * c;
1032*3ac0a46fSAndroid Build Coastguard Worker             fw[5] += (fw[4] + fw[6]) * c;
1033*3ac0a46fSAndroid Build Coastguard Worker             fw += 8;
1034*3ac0a46fSAndroid Build Coastguard Worker         }
1035*3ac0a46fSAndroid Build Coastguard Worker         for (; i < imax; ++i) {
1036*3ac0a46fSAndroid Build Coastguard Worker             fw[-1] += (fw[-2] + fw[0]) * c;
1037*3ac0a46fSAndroid Build Coastguard Worker             fw += 2;
1038*3ac0a46fSAndroid Build Coastguard Worker         }
1039*3ac0a46fSAndroid Build Coastguard Worker     }
1040*3ac0a46fSAndroid Build Coastguard Worker     if (m < end) {
1041*3ac0a46fSAndroid Build Coastguard Worker         assert(m + 1 == end);
1042*3ac0a46fSAndroid Build Coastguard Worker         fw[-1] += (2 * fw[-2]) * c;
1043*3ac0a46fSAndroid Build Coastguard Worker     }
1044*3ac0a46fSAndroid Build Coastguard Worker }
1045*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_encode_1_real(void * aIn,OPJ_INT32 dn,OPJ_INT32 sn,OPJ_INT32 cas)1046*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_encode_1_real(void *aIn, OPJ_INT32 dn, OPJ_INT32 sn,
1047*3ac0a46fSAndroid Build Coastguard Worker                                   OPJ_INT32 cas)
1048*3ac0a46fSAndroid Build Coastguard Worker {
1049*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32* w = (OPJ_FLOAT32*)aIn;
1050*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 a, b;
1051*3ac0a46fSAndroid Build Coastguard Worker     assert(dn + sn > 1);
1052*3ac0a46fSAndroid Build Coastguard Worker     if (cas == 0) {
1053*3ac0a46fSAndroid Build Coastguard Worker         a = 0;
1054*3ac0a46fSAndroid Build Coastguard Worker         b = 1;
1055*3ac0a46fSAndroid Build Coastguard Worker     } else {
1056*3ac0a46fSAndroid Build Coastguard Worker         a = 1;
1057*3ac0a46fSAndroid Build Coastguard Worker         b = 0;
1058*3ac0a46fSAndroid Build Coastguard Worker     }
1059*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_encode_step2(w + a, w + b + 1,
1060*3ac0a46fSAndroid Build Coastguard Worker                          (OPJ_UINT32)dn,
1061*3ac0a46fSAndroid Build Coastguard Worker                          (OPJ_UINT32)opj_int_min(dn, sn - b),
1062*3ac0a46fSAndroid Build Coastguard Worker                          opj_dwt_alpha);
1063*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_encode_step2(w + b, w + a + 1,
1064*3ac0a46fSAndroid Build Coastguard Worker                          (OPJ_UINT32)sn,
1065*3ac0a46fSAndroid Build Coastguard Worker                          (OPJ_UINT32)opj_int_min(sn, dn - a),
1066*3ac0a46fSAndroid Build Coastguard Worker                          opj_dwt_beta);
1067*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_encode_step2(w + a, w + b + 1,
1068*3ac0a46fSAndroid Build Coastguard Worker                          (OPJ_UINT32)dn,
1069*3ac0a46fSAndroid Build Coastguard Worker                          (OPJ_UINT32)opj_int_min(dn, sn - b),
1070*3ac0a46fSAndroid Build Coastguard Worker                          opj_dwt_gamma);
1071*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_encode_step2(w + b, w + a + 1,
1072*3ac0a46fSAndroid Build Coastguard Worker                          (OPJ_UINT32)sn,
1073*3ac0a46fSAndroid Build Coastguard Worker                          (OPJ_UINT32)opj_int_min(sn, dn - a),
1074*3ac0a46fSAndroid Build Coastguard Worker                          opj_dwt_delta);
1075*3ac0a46fSAndroid Build Coastguard Worker #if 0
1076*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_encode_step1(w + b, (OPJ_UINT32)dn,
1077*3ac0a46fSAndroid Build Coastguard Worker                          opj_K);
1078*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_encode_step1(w + a, (OPJ_UINT32)sn,
1079*3ac0a46fSAndroid Build Coastguard Worker                          opj_invK);
1080*3ac0a46fSAndroid Build Coastguard Worker #else
1081*3ac0a46fSAndroid Build Coastguard Worker     if (a == 0) {
1082*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_encode_step1_combined(w,
1083*3ac0a46fSAndroid Build Coastguard Worker                                       (OPJ_UINT32)sn,
1084*3ac0a46fSAndroid Build Coastguard Worker                                       (OPJ_UINT32)dn,
1085*3ac0a46fSAndroid Build Coastguard Worker                                       opj_invK,
1086*3ac0a46fSAndroid Build Coastguard Worker                                       opj_K);
1087*3ac0a46fSAndroid Build Coastguard Worker     } else {
1088*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_encode_step1_combined(w,
1089*3ac0a46fSAndroid Build Coastguard Worker                                       (OPJ_UINT32)dn,
1090*3ac0a46fSAndroid Build Coastguard Worker                                       (OPJ_UINT32)sn,
1091*3ac0a46fSAndroid Build Coastguard Worker                                       opj_K,
1092*3ac0a46fSAndroid Build Coastguard Worker                                       opj_invK);
1093*3ac0a46fSAndroid Build Coastguard Worker     }
1094*3ac0a46fSAndroid Build Coastguard Worker #endif
1095*3ac0a46fSAndroid Build Coastguard Worker }
1096*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_encode_stepsize(OPJ_INT32 stepsize,OPJ_INT32 numbps,opj_stepsize_t * bandno_stepsize)1097*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,
1098*3ac0a46fSAndroid Build Coastguard Worker                                     opj_stepsize_t *bandno_stepsize)
1099*3ac0a46fSAndroid Build Coastguard Worker {
1100*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 p, n;
1101*3ac0a46fSAndroid Build Coastguard Worker     p = opj_int_floorlog2(stepsize) - 13;
1102*3ac0a46fSAndroid Build Coastguard Worker     n = 11 - opj_int_floorlog2(stepsize);
1103*3ac0a46fSAndroid Build Coastguard Worker     bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff;
1104*3ac0a46fSAndroid Build Coastguard Worker     bandno_stepsize->expn = numbps - p;
1105*3ac0a46fSAndroid Build Coastguard Worker }
1106*3ac0a46fSAndroid Build Coastguard Worker 
1107*3ac0a46fSAndroid Build Coastguard Worker /*
1108*3ac0a46fSAndroid Build Coastguard Worker ==========================================================
1109*3ac0a46fSAndroid Build Coastguard Worker    DWT interface
1110*3ac0a46fSAndroid Build Coastguard Worker ==========================================================
1111*3ac0a46fSAndroid Build Coastguard Worker */
1112*3ac0a46fSAndroid Build Coastguard Worker 
1113*3ac0a46fSAndroid Build Coastguard Worker /** Process one line for the horizontal pass of the 5x3 forward transform */
1114*3ac0a46fSAndroid Build Coastguard Worker static
opj_dwt_encode_and_deinterleave_h_one_row(void * rowIn,void * tmpIn,OPJ_UINT32 width,OPJ_BOOL even)1115*3ac0a46fSAndroid Build Coastguard Worker void opj_dwt_encode_and_deinterleave_h_one_row(void* rowIn,
1116*3ac0a46fSAndroid Build Coastguard Worker         void* tmpIn,
1117*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 width,
1118*3ac0a46fSAndroid Build Coastguard Worker         OPJ_BOOL even)
1119*3ac0a46fSAndroid Build Coastguard Worker {
1120*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32* OPJ_RESTRICT row = (OPJ_INT32*)rowIn;
1121*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32*)tmpIn;
1122*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1);
1123*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn);
1124*3ac0a46fSAndroid Build Coastguard Worker 
1125*3ac0a46fSAndroid Build Coastguard Worker     if (even) {
1126*3ac0a46fSAndroid Build Coastguard Worker         if (width > 1) {
1127*3ac0a46fSAndroid Build Coastguard Worker             OPJ_INT32 i;
1128*3ac0a46fSAndroid Build Coastguard Worker             for (i = 0; i < sn - 1; i++) {
1129*3ac0a46fSAndroid Build Coastguard Worker                 tmp[sn + i] = row[2 * i + 1] - ((row[(i) * 2] + row[(i + 1) * 2]) >> 1);
1130*3ac0a46fSAndroid Build Coastguard Worker             }
1131*3ac0a46fSAndroid Build Coastguard Worker             if ((width % 2) == 0) {
1132*3ac0a46fSAndroid Build Coastguard Worker                 tmp[sn + i] = row[2 * i + 1] - row[(i) * 2];
1133*3ac0a46fSAndroid Build Coastguard Worker             }
1134*3ac0a46fSAndroid Build Coastguard Worker             row[0] += (tmp[sn] + tmp[sn] + 2) >> 2;
1135*3ac0a46fSAndroid Build Coastguard Worker             for (i = 1; i < dn; i++) {
1136*3ac0a46fSAndroid Build Coastguard Worker                 row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + i] + 2) >> 2);
1137*3ac0a46fSAndroid Build Coastguard Worker             }
1138*3ac0a46fSAndroid Build Coastguard Worker             if ((width % 2) == 1) {
1139*3ac0a46fSAndroid Build Coastguard Worker                 row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + (i - 1)] + 2) >> 2);
1140*3ac0a46fSAndroid Build Coastguard Worker             }
1141*3ac0a46fSAndroid Build Coastguard Worker             memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32));
1142*3ac0a46fSAndroid Build Coastguard Worker         }
1143*3ac0a46fSAndroid Build Coastguard Worker     } else {
1144*3ac0a46fSAndroid Build Coastguard Worker         if (width == 1) {
1145*3ac0a46fSAndroid Build Coastguard Worker             row[0] *= 2;
1146*3ac0a46fSAndroid Build Coastguard Worker         } else {
1147*3ac0a46fSAndroid Build Coastguard Worker             OPJ_INT32 i;
1148*3ac0a46fSAndroid Build Coastguard Worker             tmp[sn + 0] = row[0] - row[1];
1149*3ac0a46fSAndroid Build Coastguard Worker             for (i = 1; i < sn; i++) {
1150*3ac0a46fSAndroid Build Coastguard Worker                 tmp[sn + i] = row[2 * i] - ((row[2 * i + 1] + row[2 * (i - 1) + 1]) >> 1);
1151*3ac0a46fSAndroid Build Coastguard Worker             }
1152*3ac0a46fSAndroid Build Coastguard Worker             if ((width % 2) == 1) {
1153*3ac0a46fSAndroid Build Coastguard Worker                 tmp[sn + i] = row[2 * i] - row[2 * (i - 1) + 1];
1154*3ac0a46fSAndroid Build Coastguard Worker             }
1155*3ac0a46fSAndroid Build Coastguard Worker 
1156*3ac0a46fSAndroid Build Coastguard Worker             for (i = 0; i < dn - 1; i++) {
1157*3ac0a46fSAndroid Build Coastguard Worker                 row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i + 1] + 2) >> 2);
1158*3ac0a46fSAndroid Build Coastguard Worker             }
1159*3ac0a46fSAndroid Build Coastguard Worker             if ((width % 2) == 0) {
1160*3ac0a46fSAndroid Build Coastguard Worker                 row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i] + 2) >> 2);
1161*3ac0a46fSAndroid Build Coastguard Worker             }
1162*3ac0a46fSAndroid Build Coastguard Worker             memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32));
1163*3ac0a46fSAndroid Build Coastguard Worker         }
1164*3ac0a46fSAndroid Build Coastguard Worker     }
1165*3ac0a46fSAndroid Build Coastguard Worker }
1166*3ac0a46fSAndroid Build Coastguard Worker 
1167*3ac0a46fSAndroid Build Coastguard Worker /** Process one line for the horizontal pass of the 9x7 forward transform */
1168*3ac0a46fSAndroid Build Coastguard Worker static
opj_dwt_encode_and_deinterleave_h_one_row_real(void * rowIn,void * tmpIn,OPJ_UINT32 width,OPJ_BOOL even)1169*3ac0a46fSAndroid Build Coastguard Worker void opj_dwt_encode_and_deinterleave_h_one_row_real(void* rowIn,
1170*3ac0a46fSAndroid Build Coastguard Worker         void* tmpIn,
1171*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 width,
1172*3ac0a46fSAndroid Build Coastguard Worker         OPJ_BOOL even)
1173*3ac0a46fSAndroid Build Coastguard Worker {
1174*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32* OPJ_RESTRICT row = (OPJ_FLOAT32*)rowIn;
1175*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32*)tmpIn;
1176*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1);
1177*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn);
1178*3ac0a46fSAndroid Build Coastguard Worker     if (width == 1) {
1179*3ac0a46fSAndroid Build Coastguard Worker         return;
1180*3ac0a46fSAndroid Build Coastguard Worker     }
1181*3ac0a46fSAndroid Build Coastguard Worker     memcpy(tmp, row, width * sizeof(OPJ_FLOAT32));
1182*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_encode_1_real(tmp, dn, sn, even ? 0 : 1);
1183*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_deinterleave_h((OPJ_INT32 * OPJ_RESTRICT)tmp,
1184*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_INT32 * OPJ_RESTRICT)row,
1185*3ac0a46fSAndroid Build Coastguard Worker                            dn, sn, even ? 0 : 1);
1186*3ac0a46fSAndroid Build Coastguard Worker }
1187*3ac0a46fSAndroid Build Coastguard Worker 
1188*3ac0a46fSAndroid Build Coastguard Worker typedef struct {
1189*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_t h;
1190*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rw; /* Width of the resolution to process */
1191*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w; /* Width of tiledp */
1192*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 * OPJ_RESTRICT tiledp;
1193*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 min_j;
1194*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 max_j;
1195*3ac0a46fSAndroid Build Coastguard Worker     opj_encode_and_deinterleave_h_one_row_fnptr_type p_function;
1196*3ac0a46fSAndroid Build Coastguard Worker } opj_dwt_encode_h_job_t;
1197*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_encode_h_func(void * user_data,opj_tls_t * tls)1198*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_encode_h_func(void* user_data, opj_tls_t* tls)
1199*3ac0a46fSAndroid Build Coastguard Worker {
1200*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 j;
1201*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_encode_h_job_t* job;
1202*3ac0a46fSAndroid Build Coastguard Worker     (void)tls;
1203*3ac0a46fSAndroid Build Coastguard Worker 
1204*3ac0a46fSAndroid Build Coastguard Worker     job = (opj_dwt_encode_h_job_t*)user_data;
1205*3ac0a46fSAndroid Build Coastguard Worker     for (j = job->min_j; j < job->max_j; j++) {
1206*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32* OPJ_RESTRICT aj = job->tiledp + j * job->w;
1207*3ac0a46fSAndroid Build Coastguard Worker         (*job->p_function)(aj, job->h.mem, job->rw,
1208*3ac0a46fSAndroid Build Coastguard Worker                            job->h.cas == 0 ? OPJ_TRUE : OPJ_FALSE);
1209*3ac0a46fSAndroid Build Coastguard Worker     }
1210*3ac0a46fSAndroid Build Coastguard Worker 
1211*3ac0a46fSAndroid Build Coastguard Worker     opj_aligned_free(job->h.mem);
1212*3ac0a46fSAndroid Build Coastguard Worker     opj_free(job);
1213*3ac0a46fSAndroid Build Coastguard Worker }
1214*3ac0a46fSAndroid Build Coastguard Worker 
1215*3ac0a46fSAndroid Build Coastguard Worker typedef struct {
1216*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_t v;
1217*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rh;
1218*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w;
1219*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 * OPJ_RESTRICT tiledp;
1220*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 min_j;
1221*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 max_j;
1222*3ac0a46fSAndroid Build Coastguard Worker     opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v;
1223*3ac0a46fSAndroid Build Coastguard Worker } opj_dwt_encode_v_job_t;
1224*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_encode_v_func(void * user_data,opj_tls_t * tls)1225*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_encode_v_func(void* user_data, opj_tls_t* tls)
1226*3ac0a46fSAndroid Build Coastguard Worker {
1227*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 j;
1228*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_encode_v_job_t* job;
1229*3ac0a46fSAndroid Build Coastguard Worker     (void)tls;
1230*3ac0a46fSAndroid Build Coastguard Worker 
1231*3ac0a46fSAndroid Build Coastguard Worker     job = (opj_dwt_encode_v_job_t*)user_data;
1232*3ac0a46fSAndroid Build Coastguard Worker     for (j = job->min_j; j + NB_ELTS_V8 - 1 < job->max_j; j += NB_ELTS_V8) {
1233*3ac0a46fSAndroid Build Coastguard Worker         (*job->p_encode_and_deinterleave_v)(job->tiledp + j,
1234*3ac0a46fSAndroid Build Coastguard Worker                                             job->v.mem,
1235*3ac0a46fSAndroid Build Coastguard Worker                                             job->rh,
1236*3ac0a46fSAndroid Build Coastguard Worker                                             job->v.cas == 0,
1237*3ac0a46fSAndroid Build Coastguard Worker                                             job->w,
1238*3ac0a46fSAndroid Build Coastguard Worker                                             NB_ELTS_V8);
1239*3ac0a46fSAndroid Build Coastguard Worker     }
1240*3ac0a46fSAndroid Build Coastguard Worker     if (j < job->max_j) {
1241*3ac0a46fSAndroid Build Coastguard Worker         (*job->p_encode_and_deinterleave_v)(job->tiledp + j,
1242*3ac0a46fSAndroid Build Coastguard Worker                                             job->v.mem,
1243*3ac0a46fSAndroid Build Coastguard Worker                                             job->rh,
1244*3ac0a46fSAndroid Build Coastguard Worker                                             job->v.cas == 0,
1245*3ac0a46fSAndroid Build Coastguard Worker                                             job->w,
1246*3ac0a46fSAndroid Build Coastguard Worker                                             job->max_j - j);
1247*3ac0a46fSAndroid Build Coastguard Worker     }
1248*3ac0a46fSAndroid Build Coastguard Worker 
1249*3ac0a46fSAndroid Build Coastguard Worker     opj_aligned_free(job->v.mem);
1250*3ac0a46fSAndroid Build Coastguard Worker     opj_free(job);
1251*3ac0a46fSAndroid Build Coastguard Worker }
1252*3ac0a46fSAndroid Build Coastguard Worker 
1253*3ac0a46fSAndroid Build Coastguard Worker /** Fetch up to cols <= NB_ELTS_V8 for each line, and put them in tmpOut */
1254*3ac0a46fSAndroid Build Coastguard Worker /* that has a NB_ELTS_V8 interleave factor. */
opj_dwt_fetch_cols_vertical_pass(const void * arrayIn,void * tmpOut,OPJ_UINT32 height,OPJ_UINT32 stride_width,OPJ_UINT32 cols)1255*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_fetch_cols_vertical_pass(const void *arrayIn,
1256*3ac0a46fSAndroid Build Coastguard Worker         void *tmpOut,
1257*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 height,
1258*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 stride_width,
1259*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 cols)
1260*3ac0a46fSAndroid Build Coastguard Worker {
1261*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32* OPJ_RESTRICT array = (const OPJ_INT32 * OPJ_RESTRICT)arrayIn;
1262*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpOut;
1263*3ac0a46fSAndroid Build Coastguard Worker     if (cols == NB_ELTS_V8) {
1264*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 k;
1265*3ac0a46fSAndroid Build Coastguard Worker         for (k = 0; k < height; ++k) {
1266*3ac0a46fSAndroid Build Coastguard Worker             memcpy(tmp + NB_ELTS_V8 * k,
1267*3ac0a46fSAndroid Build Coastguard Worker                    array + k * stride_width,
1268*3ac0a46fSAndroid Build Coastguard Worker                    NB_ELTS_V8 * sizeof(OPJ_INT32));
1269*3ac0a46fSAndroid Build Coastguard Worker         }
1270*3ac0a46fSAndroid Build Coastguard Worker     } else {
1271*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 k;
1272*3ac0a46fSAndroid Build Coastguard Worker         for (k = 0; k < height; ++k) {
1273*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 c;
1274*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < cols; c++) {
1275*3ac0a46fSAndroid Build Coastguard Worker                 tmp[NB_ELTS_V8 * k + c] = array[c + k * stride_width];
1276*3ac0a46fSAndroid Build Coastguard Worker             }
1277*3ac0a46fSAndroid Build Coastguard Worker             for (; c < NB_ELTS_V8; c++) {
1278*3ac0a46fSAndroid Build Coastguard Worker                 tmp[NB_ELTS_V8 * k + c] = 0;
1279*3ac0a46fSAndroid Build Coastguard Worker             }
1280*3ac0a46fSAndroid Build Coastguard Worker         }
1281*3ac0a46fSAndroid Build Coastguard Worker     }
1282*3ac0a46fSAndroid Build Coastguard Worker }
1283*3ac0a46fSAndroid Build Coastguard Worker 
1284*3ac0a46fSAndroid Build Coastguard Worker /* Deinterleave result of forward transform, where cols <= NB_ELTS_V8 */
1285*3ac0a46fSAndroid Build Coastguard Worker /* and src contains NB_ELTS_V8 consecutive values for up to NB_ELTS_V8 */
1286*3ac0a46fSAndroid Build Coastguard Worker /* columns. */
opj_dwt_deinterleave_v_cols(const OPJ_INT32 * OPJ_RESTRICT src,OPJ_INT32 * OPJ_RESTRICT dst,OPJ_INT32 dn,OPJ_INT32 sn,OPJ_UINT32 stride_width,OPJ_INT32 cas,OPJ_UINT32 cols)1287*3ac0a46fSAndroid Build Coastguard Worker static INLINE void opj_dwt_deinterleave_v_cols(
1288*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 * OPJ_RESTRICT src,
1289*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 * OPJ_RESTRICT dst,
1290*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 dn,
1291*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 sn,
1292*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 stride_width,
1293*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 cas,
1294*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 cols)
1295*3ac0a46fSAndroid Build Coastguard Worker {
1296*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 k;
1297*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i = sn;
1298*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 * OPJ_RESTRICT l_dest = dst;
1299*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 * OPJ_RESTRICT l_src = src + cas * NB_ELTS_V8;
1300*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 c;
1301*3ac0a46fSAndroid Build Coastguard Worker 
1302*3ac0a46fSAndroid Build Coastguard Worker     for (k = 0; k < 2; k++) {
1303*3ac0a46fSAndroid Build Coastguard Worker         while (i--) {
1304*3ac0a46fSAndroid Build Coastguard Worker             if (cols == NB_ELTS_V8) {
1305*3ac0a46fSAndroid Build Coastguard Worker                 memcpy(l_dest, l_src, NB_ELTS_V8 * sizeof(OPJ_INT32));
1306*3ac0a46fSAndroid Build Coastguard Worker             } else {
1307*3ac0a46fSAndroid Build Coastguard Worker                 c = 0;
1308*3ac0a46fSAndroid Build Coastguard Worker                 switch (cols) {
1309*3ac0a46fSAndroid Build Coastguard Worker                 case 7:
1310*3ac0a46fSAndroid Build Coastguard Worker                     l_dest[c] = l_src[c];
1311*3ac0a46fSAndroid Build Coastguard Worker                     c++; /* fallthru */
1312*3ac0a46fSAndroid Build Coastguard Worker                 case 6:
1313*3ac0a46fSAndroid Build Coastguard Worker                     l_dest[c] = l_src[c];
1314*3ac0a46fSAndroid Build Coastguard Worker                     c++; /* fallthru */
1315*3ac0a46fSAndroid Build Coastguard Worker                 case 5:
1316*3ac0a46fSAndroid Build Coastguard Worker                     l_dest[c] = l_src[c];
1317*3ac0a46fSAndroid Build Coastguard Worker                     c++; /* fallthru */
1318*3ac0a46fSAndroid Build Coastguard Worker                 case 4:
1319*3ac0a46fSAndroid Build Coastguard Worker                     l_dest[c] = l_src[c];
1320*3ac0a46fSAndroid Build Coastguard Worker                     c++; /* fallthru */
1321*3ac0a46fSAndroid Build Coastguard Worker                 case 3:
1322*3ac0a46fSAndroid Build Coastguard Worker                     l_dest[c] = l_src[c];
1323*3ac0a46fSAndroid Build Coastguard Worker                     c++; /* fallthru */
1324*3ac0a46fSAndroid Build Coastguard Worker                 case 2:
1325*3ac0a46fSAndroid Build Coastguard Worker                     l_dest[c] = l_src[c];
1326*3ac0a46fSAndroid Build Coastguard Worker                     c++; /* fallthru */
1327*3ac0a46fSAndroid Build Coastguard Worker                 default:
1328*3ac0a46fSAndroid Build Coastguard Worker                     l_dest[c] = l_src[c];
1329*3ac0a46fSAndroid Build Coastguard Worker                     break;
1330*3ac0a46fSAndroid Build Coastguard Worker                 }
1331*3ac0a46fSAndroid Build Coastguard Worker             }
1332*3ac0a46fSAndroid Build Coastguard Worker             l_dest += stride_width;
1333*3ac0a46fSAndroid Build Coastguard Worker             l_src += 2 * NB_ELTS_V8;
1334*3ac0a46fSAndroid Build Coastguard Worker         }
1335*3ac0a46fSAndroid Build Coastguard Worker 
1336*3ac0a46fSAndroid Build Coastguard Worker         l_dest = dst + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)stride_width;
1337*3ac0a46fSAndroid Build Coastguard Worker         l_src = src + (1 - cas) * NB_ELTS_V8;
1338*3ac0a46fSAndroid Build Coastguard Worker         i = dn;
1339*3ac0a46fSAndroid Build Coastguard Worker     }
1340*3ac0a46fSAndroid Build Coastguard Worker }
1341*3ac0a46fSAndroid Build Coastguard Worker 
1342*3ac0a46fSAndroid Build Coastguard Worker 
1343*3ac0a46fSAndroid Build Coastguard Worker /* Forward 5-3 transform, for the vertical pass, processing cols columns */
1344*3ac0a46fSAndroid Build Coastguard Worker /* where cols <= NB_ELTS_V8 */
opj_dwt_encode_and_deinterleave_v(void * arrayIn,void * tmpIn,OPJ_UINT32 height,OPJ_BOOL even,OPJ_UINT32 stride_width,OPJ_UINT32 cols)1345*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_encode_and_deinterleave_v(
1346*3ac0a46fSAndroid Build Coastguard Worker     void *arrayIn,
1347*3ac0a46fSAndroid Build Coastguard Worker     void *tmpIn,
1348*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 height,
1349*3ac0a46fSAndroid Build Coastguard Worker     OPJ_BOOL even,
1350*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 stride_width,
1351*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 cols)
1352*3ac0a46fSAndroid Build Coastguard Worker {
1353*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32* OPJ_RESTRICT array = (OPJ_INT32 * OPJ_RESTRICT)arrayIn;
1354*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpIn;
1355*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_UINT32 sn = (height + (even ? 1 : 0)) >> 1;
1356*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_UINT32 dn = height - sn;
1357*3ac0a46fSAndroid Build Coastguard Worker 
1358*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_fetch_cols_vertical_pass(arrayIn, tmpIn, height, stride_width, cols);
1359*3ac0a46fSAndroid Build Coastguard Worker 
1360*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_Sc(i) tmp[(i)*2* NB_ELTS_V8 + c]
1361*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_Dc(i) tmp[((1+(i)*2))* NB_ELTS_V8 + c]
1362*3ac0a46fSAndroid Build Coastguard Worker 
1363*3ac0a46fSAndroid Build Coastguard Worker #ifdef __SSE2__
1364*3ac0a46fSAndroid Build Coastguard Worker     if (height == 1) {
1365*3ac0a46fSAndroid Build Coastguard Worker         if (!even) {
1366*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 c;
1367*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < NB_ELTS_V8; c++) {
1368*3ac0a46fSAndroid Build Coastguard Worker                 tmp[c] *= 2;
1369*3ac0a46fSAndroid Build Coastguard Worker             }
1370*3ac0a46fSAndroid Build Coastguard Worker         }
1371*3ac0a46fSAndroid Build Coastguard Worker     } else if (even) {
1372*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 c;
1373*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 i;
1374*3ac0a46fSAndroid Build Coastguard Worker         i = 0;
1375*3ac0a46fSAndroid Build Coastguard Worker         if (i + 1 < sn) {
1376*3ac0a46fSAndroid Build Coastguard Worker             __m128i xmm_Si_0 = *(const __m128i*)(tmp + 4 * 0);
1377*3ac0a46fSAndroid Build Coastguard Worker             __m128i xmm_Si_1 = *(const __m128i*)(tmp + 4 * 1);
1378*3ac0a46fSAndroid Build Coastguard Worker             for (; i + 1 < sn; i++) {
1379*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Sip1_0 = *(const __m128i*)(tmp +
1380*3ac0a46fSAndroid Build Coastguard Worker                                                        (i + 1) * 2 * NB_ELTS_V8 + 4 * 0);
1381*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Sip1_1 = *(const __m128i*)(tmp +
1382*3ac0a46fSAndroid Build Coastguard Worker                                                        (i + 1) * 2 * NB_ELTS_V8 + 4 * 1);
1383*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Di_0 = *(const __m128i*)(tmp +
1384*3ac0a46fSAndroid Build Coastguard Worker                                                      (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
1385*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Di_1 = *(const __m128i*)(tmp +
1386*3ac0a46fSAndroid Build Coastguard Worker                                                      (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
1387*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Di_0 = _mm_sub_epi32(xmm_Di_0,
1388*3ac0a46fSAndroid Build Coastguard Worker                                          _mm_srai_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), 1));
1389*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Di_1 = _mm_sub_epi32(xmm_Di_1,
1390*3ac0a46fSAndroid Build Coastguard Worker                                          _mm_srai_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), 1));
1391*3ac0a46fSAndroid Build Coastguard Worker                 *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) =  xmm_Di_0;
1392*3ac0a46fSAndroid Build Coastguard Worker                 *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) =  xmm_Di_1;
1393*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Si_0 = xmm_Sip1_0;
1394*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Si_1 = xmm_Sip1_1;
1395*3ac0a46fSAndroid Build Coastguard Worker             }
1396*3ac0a46fSAndroid Build Coastguard Worker         }
1397*3ac0a46fSAndroid Build Coastguard Worker         if (((height) % 2) == 0) {
1398*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < NB_ELTS_V8; c++) {
1399*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_Dc(i) -= OPJ_Sc(i);
1400*3ac0a46fSAndroid Build Coastguard Worker             }
1401*3ac0a46fSAndroid Build Coastguard Worker         }
1402*3ac0a46fSAndroid Build Coastguard Worker         for (c = 0; c < NB_ELTS_V8; c++) {
1403*3ac0a46fSAndroid Build Coastguard Worker             OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2;
1404*3ac0a46fSAndroid Build Coastguard Worker         }
1405*3ac0a46fSAndroid Build Coastguard Worker         i = 1;
1406*3ac0a46fSAndroid Build Coastguard Worker         if (i < dn) {
1407*3ac0a46fSAndroid Build Coastguard Worker             __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 +
1408*3ac0a46fSAndroid Build Coastguard Worker                                                    (i - 1) * 2) * NB_ELTS_V8 + 4 * 0);
1409*3ac0a46fSAndroid Build Coastguard Worker             __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 +
1410*3ac0a46fSAndroid Build Coastguard Worker                                                    (i - 1) * 2) * NB_ELTS_V8 + 4 * 1);
1411*3ac0a46fSAndroid Build Coastguard Worker             const __m128i xmm_two = _mm_set1_epi32(2);
1412*3ac0a46fSAndroid Build Coastguard Worker             for (; i < dn; i++) {
1413*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Di_0 = *(const __m128i*)(tmp +
1414*3ac0a46fSAndroid Build Coastguard Worker                                                      (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
1415*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Di_1 = *(const __m128i*)(tmp +
1416*3ac0a46fSAndroid Build Coastguard Worker                                                      (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
1417*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Si_0 = *(const __m128i*)(tmp +
1418*3ac0a46fSAndroid Build Coastguard Worker                                                      (i * 2) * NB_ELTS_V8 + 4 * 0);
1419*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Si_1 = *(const __m128i*)(tmp +
1420*3ac0a46fSAndroid Build Coastguard Worker                                                      (i * 2) * NB_ELTS_V8 + 4 * 1);
1421*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Si_0 = _mm_add_epi32(xmm_Si_0,
1422*3ac0a46fSAndroid Build Coastguard Worker                                          _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_0, xmm_Di_0), xmm_two), 2));
1423*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Si_1 = _mm_add_epi32(xmm_Si_1,
1424*3ac0a46fSAndroid Build Coastguard Worker                                          _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_1, xmm_Di_1), xmm_two), 2));
1425*3ac0a46fSAndroid Build Coastguard Worker                 *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0;
1426*3ac0a46fSAndroid Build Coastguard Worker                 *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1;
1427*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Dim1_0 = xmm_Di_0;
1428*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Dim1_1 = xmm_Di_1;
1429*3ac0a46fSAndroid Build Coastguard Worker             }
1430*3ac0a46fSAndroid Build Coastguard Worker         }
1431*3ac0a46fSAndroid Build Coastguard Worker         if (((height) % 2) == 1) {
1432*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < NB_ELTS_V8; c++) {
1433*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2;
1434*3ac0a46fSAndroid Build Coastguard Worker             }
1435*3ac0a46fSAndroid Build Coastguard Worker         }
1436*3ac0a46fSAndroid Build Coastguard Worker     } else {
1437*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 c;
1438*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 i;
1439*3ac0a46fSAndroid Build Coastguard Worker         for (c = 0; c < NB_ELTS_V8; c++) {
1440*3ac0a46fSAndroid Build Coastguard Worker             OPJ_Sc(0) -= OPJ_Dc(0);
1441*3ac0a46fSAndroid Build Coastguard Worker         }
1442*3ac0a46fSAndroid Build Coastguard Worker         i = 1;
1443*3ac0a46fSAndroid Build Coastguard Worker         if (i < sn) {
1444*3ac0a46fSAndroid Build Coastguard Worker             __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 +
1445*3ac0a46fSAndroid Build Coastguard Worker                                                    (i - 1) * 2) * NB_ELTS_V8 + 4 * 0);
1446*3ac0a46fSAndroid Build Coastguard Worker             __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 +
1447*3ac0a46fSAndroid Build Coastguard Worker                                                    (i - 1) * 2) * NB_ELTS_V8 + 4 * 1);
1448*3ac0a46fSAndroid Build Coastguard Worker             for (; i < sn; i++) {
1449*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Di_0 = *(const __m128i*)(tmp +
1450*3ac0a46fSAndroid Build Coastguard Worker                                                      (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
1451*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Di_1 = *(const __m128i*)(tmp +
1452*3ac0a46fSAndroid Build Coastguard Worker                                                      (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
1453*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Si_0 = *(const __m128i*)(tmp +
1454*3ac0a46fSAndroid Build Coastguard Worker                                                      (i * 2) * NB_ELTS_V8 + 4 * 0);
1455*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Si_1 = *(const __m128i*)(tmp +
1456*3ac0a46fSAndroid Build Coastguard Worker                                                      (i * 2) * NB_ELTS_V8 + 4 * 1);
1457*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Si_0 = _mm_sub_epi32(xmm_Si_0,
1458*3ac0a46fSAndroid Build Coastguard Worker                                          _mm_srai_epi32(_mm_add_epi32(xmm_Di_0, xmm_Dim1_0), 1));
1459*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Si_1 = _mm_sub_epi32(xmm_Si_1,
1460*3ac0a46fSAndroid Build Coastguard Worker                                          _mm_srai_epi32(_mm_add_epi32(xmm_Di_1, xmm_Dim1_1), 1));
1461*3ac0a46fSAndroid Build Coastguard Worker                 *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0;
1462*3ac0a46fSAndroid Build Coastguard Worker                 *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1;
1463*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Dim1_0 = xmm_Di_0;
1464*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Dim1_1 = xmm_Di_1;
1465*3ac0a46fSAndroid Build Coastguard Worker             }
1466*3ac0a46fSAndroid Build Coastguard Worker         }
1467*3ac0a46fSAndroid Build Coastguard Worker         if (((height) % 2) == 1) {
1468*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < NB_ELTS_V8; c++) {
1469*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_Sc(i) -= OPJ_Dc(i - 1);
1470*3ac0a46fSAndroid Build Coastguard Worker             }
1471*3ac0a46fSAndroid Build Coastguard Worker         }
1472*3ac0a46fSAndroid Build Coastguard Worker         i = 0;
1473*3ac0a46fSAndroid Build Coastguard Worker         if (i + 1 < dn) {
1474*3ac0a46fSAndroid Build Coastguard Worker             __m128i xmm_Si_0 = *((const __m128i*)(tmp + 4 * 0));
1475*3ac0a46fSAndroid Build Coastguard Worker             __m128i xmm_Si_1 = *((const __m128i*)(tmp + 4 * 1));
1476*3ac0a46fSAndroid Build Coastguard Worker             const __m128i xmm_two = _mm_set1_epi32(2);
1477*3ac0a46fSAndroid Build Coastguard Worker             for (; i + 1 < dn; i++) {
1478*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Sip1_0 = *(const __m128i*)(tmp +
1479*3ac0a46fSAndroid Build Coastguard Worker                                                        (i + 1) * 2 * NB_ELTS_V8 + 4 * 0);
1480*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Sip1_1 = *(const __m128i*)(tmp +
1481*3ac0a46fSAndroid Build Coastguard Worker                                                        (i + 1) * 2 * NB_ELTS_V8 + 4 * 1);
1482*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Di_0 = *(const __m128i*)(tmp +
1483*3ac0a46fSAndroid Build Coastguard Worker                                                      (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
1484*3ac0a46fSAndroid Build Coastguard Worker                 __m128i xmm_Di_1 = *(const __m128i*)(tmp +
1485*3ac0a46fSAndroid Build Coastguard Worker                                                      (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
1486*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Di_0 = _mm_add_epi32(xmm_Di_0,
1487*3ac0a46fSAndroid Build Coastguard Worker                                          _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), xmm_two), 2));
1488*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Di_1 = _mm_add_epi32(xmm_Di_1,
1489*3ac0a46fSAndroid Build Coastguard Worker                                          _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), xmm_two), 2));
1490*3ac0a46fSAndroid Build Coastguard Worker                 *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Di_0;
1491*3ac0a46fSAndroid Build Coastguard Worker                 *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Di_1;
1492*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Si_0 = xmm_Sip1_0;
1493*3ac0a46fSAndroid Build Coastguard Worker                 xmm_Si_1 = xmm_Sip1_1;
1494*3ac0a46fSAndroid Build Coastguard Worker             }
1495*3ac0a46fSAndroid Build Coastguard Worker         }
1496*3ac0a46fSAndroid Build Coastguard Worker         if (((height) % 2) == 0) {
1497*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < NB_ELTS_V8; c++) {
1498*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2;
1499*3ac0a46fSAndroid Build Coastguard Worker             }
1500*3ac0a46fSAndroid Build Coastguard Worker         }
1501*3ac0a46fSAndroid Build Coastguard Worker     }
1502*3ac0a46fSAndroid Build Coastguard Worker #else
1503*3ac0a46fSAndroid Build Coastguard Worker     if (even) {
1504*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 c;
1505*3ac0a46fSAndroid Build Coastguard Worker         if (height > 1) {
1506*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 i;
1507*3ac0a46fSAndroid Build Coastguard Worker             for (i = 0; i + 1 < sn; i++) {
1508*3ac0a46fSAndroid Build Coastguard Worker                 for (c = 0; c < NB_ELTS_V8; c++) {
1509*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_Dc(i) -= (OPJ_Sc(i) + OPJ_Sc(i + 1)) >> 1;
1510*3ac0a46fSAndroid Build Coastguard Worker                 }
1511*3ac0a46fSAndroid Build Coastguard Worker             }
1512*3ac0a46fSAndroid Build Coastguard Worker             if (((height) % 2) == 0) {
1513*3ac0a46fSAndroid Build Coastguard Worker                 for (c = 0; c < NB_ELTS_V8; c++) {
1514*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_Dc(i) -= OPJ_Sc(i);
1515*3ac0a46fSAndroid Build Coastguard Worker                 }
1516*3ac0a46fSAndroid Build Coastguard Worker             }
1517*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < NB_ELTS_V8; c++) {
1518*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2;
1519*3ac0a46fSAndroid Build Coastguard Worker             }
1520*3ac0a46fSAndroid Build Coastguard Worker             for (i = 1; i < dn; i++) {
1521*3ac0a46fSAndroid Build Coastguard Worker                 for (c = 0; c < NB_ELTS_V8; c++) {
1522*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i) + 2) >> 2;
1523*3ac0a46fSAndroid Build Coastguard Worker                 }
1524*3ac0a46fSAndroid Build Coastguard Worker             }
1525*3ac0a46fSAndroid Build Coastguard Worker             if (((height) % 2) == 1) {
1526*3ac0a46fSAndroid Build Coastguard Worker                 for (c = 0; c < NB_ELTS_V8; c++) {
1527*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2;
1528*3ac0a46fSAndroid Build Coastguard Worker                 }
1529*3ac0a46fSAndroid Build Coastguard Worker             }
1530*3ac0a46fSAndroid Build Coastguard Worker         }
1531*3ac0a46fSAndroid Build Coastguard Worker     } else {
1532*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 c;
1533*3ac0a46fSAndroid Build Coastguard Worker         if (height == 1) {
1534*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < NB_ELTS_V8; c++) {
1535*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_Sc(0) *= 2;
1536*3ac0a46fSAndroid Build Coastguard Worker             }
1537*3ac0a46fSAndroid Build Coastguard Worker         } else {
1538*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 i;
1539*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < NB_ELTS_V8; c++) {
1540*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_Sc(0) -= OPJ_Dc(0);
1541*3ac0a46fSAndroid Build Coastguard Worker             }
1542*3ac0a46fSAndroid Build Coastguard Worker             for (i = 1; i < sn; i++) {
1543*3ac0a46fSAndroid Build Coastguard Worker                 for (c = 0; c < NB_ELTS_V8; c++) {
1544*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_Sc(i) -= (OPJ_Dc(i) + OPJ_Dc(i - 1)) >> 1;
1545*3ac0a46fSAndroid Build Coastguard Worker                 }
1546*3ac0a46fSAndroid Build Coastguard Worker             }
1547*3ac0a46fSAndroid Build Coastguard Worker             if (((height) % 2) == 1) {
1548*3ac0a46fSAndroid Build Coastguard Worker                 for (c = 0; c < NB_ELTS_V8; c++) {
1549*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_Sc(i) -= OPJ_Dc(i - 1);
1550*3ac0a46fSAndroid Build Coastguard Worker                 }
1551*3ac0a46fSAndroid Build Coastguard Worker             }
1552*3ac0a46fSAndroid Build Coastguard Worker             for (i = 0; i + 1 < dn; i++) {
1553*3ac0a46fSAndroid Build Coastguard Worker                 for (c = 0; c < NB_ELTS_V8; c++) {
1554*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i + 1) + 2) >> 2;
1555*3ac0a46fSAndroid Build Coastguard Worker                 }
1556*3ac0a46fSAndroid Build Coastguard Worker             }
1557*3ac0a46fSAndroid Build Coastguard Worker             if (((height) % 2) == 0) {
1558*3ac0a46fSAndroid Build Coastguard Worker                 for (c = 0; c < NB_ELTS_V8; c++) {
1559*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2;
1560*3ac0a46fSAndroid Build Coastguard Worker                 }
1561*3ac0a46fSAndroid Build Coastguard Worker             }
1562*3ac0a46fSAndroid Build Coastguard Worker         }
1563*3ac0a46fSAndroid Build Coastguard Worker     }
1564*3ac0a46fSAndroid Build Coastguard Worker #endif
1565*3ac0a46fSAndroid Build Coastguard Worker 
1566*3ac0a46fSAndroid Build Coastguard Worker     if (cols == NB_ELTS_V8) {
1567*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn,
1568*3ac0a46fSAndroid Build Coastguard Worker                                     stride_width, even ? 0 : 1, NB_ELTS_V8);
1569*3ac0a46fSAndroid Build Coastguard Worker     } else {
1570*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn,
1571*3ac0a46fSAndroid Build Coastguard Worker                                     stride_width, even ? 0 : 1, cols);
1572*3ac0a46fSAndroid Build Coastguard Worker     }
1573*3ac0a46fSAndroid Build Coastguard Worker }
1574*3ac0a46fSAndroid Build Coastguard Worker 
opj_v8dwt_encode_step1(OPJ_FLOAT32 * fw,OPJ_UINT32 end,const OPJ_FLOAT32 cst)1575*3ac0a46fSAndroid Build Coastguard Worker static void opj_v8dwt_encode_step1(OPJ_FLOAT32* fw,
1576*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_UINT32 end,
1577*3ac0a46fSAndroid Build Coastguard Worker                                    const OPJ_FLOAT32 cst)
1578*3ac0a46fSAndroid Build Coastguard Worker {
1579*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i;
1580*3ac0a46fSAndroid Build Coastguard Worker #ifdef __SSE__
1581*3ac0a46fSAndroid Build Coastguard Worker     __m128* vw = (__m128*) fw;
1582*3ac0a46fSAndroid Build Coastguard Worker     const __m128 vcst = _mm_set1_ps(cst);
1583*3ac0a46fSAndroid Build Coastguard Worker     for (i = 0; i < end; ++i) {
1584*3ac0a46fSAndroid Build Coastguard Worker         vw[0] = _mm_mul_ps(vw[0], vcst);
1585*3ac0a46fSAndroid Build Coastguard Worker         vw[1] = _mm_mul_ps(vw[1], vcst);
1586*3ac0a46fSAndroid Build Coastguard Worker         vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128));
1587*3ac0a46fSAndroid Build Coastguard Worker     }
1588*3ac0a46fSAndroid Build Coastguard Worker #else
1589*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 c;
1590*3ac0a46fSAndroid Build Coastguard Worker     for (i = 0; i < end; ++i) {
1591*3ac0a46fSAndroid Build Coastguard Worker         for (c = 0; c < NB_ELTS_V8; c++) {
1592*3ac0a46fSAndroid Build Coastguard Worker             fw[i * 2 * NB_ELTS_V8 + c] *= cst;
1593*3ac0a46fSAndroid Build Coastguard Worker         }
1594*3ac0a46fSAndroid Build Coastguard Worker     }
1595*3ac0a46fSAndroid Build Coastguard Worker #endif
1596*3ac0a46fSAndroid Build Coastguard Worker }
1597*3ac0a46fSAndroid Build Coastguard Worker 
opj_v8dwt_encode_step2(OPJ_FLOAT32 * fl,OPJ_FLOAT32 * fw,OPJ_UINT32 end,OPJ_UINT32 m,OPJ_FLOAT32 cst)1598*3ac0a46fSAndroid Build Coastguard Worker static void opj_v8dwt_encode_step2(OPJ_FLOAT32* fl, OPJ_FLOAT32* fw,
1599*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_UINT32 end,
1600*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_UINT32 m,
1601*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_FLOAT32 cst)
1602*3ac0a46fSAndroid Build Coastguard Worker {
1603*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i;
1604*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 imax = opj_uint_min(end, m);
1605*3ac0a46fSAndroid Build Coastguard Worker #ifdef __SSE__
1606*3ac0a46fSAndroid Build Coastguard Worker     __m128* vw = (__m128*) fw;
1607*3ac0a46fSAndroid Build Coastguard Worker     __m128 vcst = _mm_set1_ps(cst);
1608*3ac0a46fSAndroid Build Coastguard Worker     if (imax > 0) {
1609*3ac0a46fSAndroid Build Coastguard Worker         __m128* vl = (__m128*) fl;
1610*3ac0a46fSAndroid Build Coastguard Worker         vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vl[0], vw[0]), vcst));
1611*3ac0a46fSAndroid Build Coastguard Worker         vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vl[1], vw[1]), vcst));
1612*3ac0a46fSAndroid Build Coastguard Worker         vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128));
1613*3ac0a46fSAndroid Build Coastguard Worker         i = 1;
1614*3ac0a46fSAndroid Build Coastguard Worker 
1615*3ac0a46fSAndroid Build Coastguard Worker         for (; i < imax; ++i) {
1616*3ac0a46fSAndroid Build Coastguard Worker             vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vw[-4], vw[0]), vcst));
1617*3ac0a46fSAndroid Build Coastguard Worker             vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vw[-3], vw[1]), vcst));
1618*3ac0a46fSAndroid Build Coastguard Worker             vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128));
1619*3ac0a46fSAndroid Build Coastguard Worker         }
1620*3ac0a46fSAndroid Build Coastguard Worker     }
1621*3ac0a46fSAndroid Build Coastguard Worker     if (m < end) {
1622*3ac0a46fSAndroid Build Coastguard Worker         assert(m + 1 == end);
1623*3ac0a46fSAndroid Build Coastguard Worker         vcst = _mm_add_ps(vcst, vcst);
1624*3ac0a46fSAndroid Build Coastguard Worker         vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(vw[-4], vcst));
1625*3ac0a46fSAndroid Build Coastguard Worker         vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(vw[-3], vcst));
1626*3ac0a46fSAndroid Build Coastguard Worker     }
1627*3ac0a46fSAndroid Build Coastguard Worker #else
1628*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 c;
1629*3ac0a46fSAndroid Build Coastguard Worker     if (imax > 0) {
1630*3ac0a46fSAndroid Build Coastguard Worker         for (c = 0; c < NB_ELTS_V8; c++) {
1631*3ac0a46fSAndroid Build Coastguard Worker             fw[-1 * NB_ELTS_V8 + c] += (fl[0 * NB_ELTS_V8 + c] + fw[0 * NB_ELTS_V8 + c]) *
1632*3ac0a46fSAndroid Build Coastguard Worker                                        cst;
1633*3ac0a46fSAndroid Build Coastguard Worker         }
1634*3ac0a46fSAndroid Build Coastguard Worker         fw += 2 * NB_ELTS_V8;
1635*3ac0a46fSAndroid Build Coastguard Worker         i = 1;
1636*3ac0a46fSAndroid Build Coastguard Worker         for (; i < imax; ++i) {
1637*3ac0a46fSAndroid Build Coastguard Worker             for (c = 0; c < NB_ELTS_V8; c++) {
1638*3ac0a46fSAndroid Build Coastguard Worker                 fw[-1 * NB_ELTS_V8 + c] += (fw[-2 * NB_ELTS_V8 + c] + fw[0 * NB_ELTS_V8 + c]) *
1639*3ac0a46fSAndroid Build Coastguard Worker                                            cst;
1640*3ac0a46fSAndroid Build Coastguard Worker             }
1641*3ac0a46fSAndroid Build Coastguard Worker             fw += 2 * NB_ELTS_V8;
1642*3ac0a46fSAndroid Build Coastguard Worker         }
1643*3ac0a46fSAndroid Build Coastguard Worker     }
1644*3ac0a46fSAndroid Build Coastguard Worker     if (m < end) {
1645*3ac0a46fSAndroid Build Coastguard Worker         assert(m + 1 == end);
1646*3ac0a46fSAndroid Build Coastguard Worker         for (c = 0; c < NB_ELTS_V8; c++) {
1647*3ac0a46fSAndroid Build Coastguard Worker             fw[-1 * NB_ELTS_V8 + c] += (2 * fw[-2 * NB_ELTS_V8 + c]) * cst;
1648*3ac0a46fSAndroid Build Coastguard Worker         }
1649*3ac0a46fSAndroid Build Coastguard Worker     }
1650*3ac0a46fSAndroid Build Coastguard Worker #endif
1651*3ac0a46fSAndroid Build Coastguard Worker }
1652*3ac0a46fSAndroid Build Coastguard Worker 
1653*3ac0a46fSAndroid Build Coastguard Worker /* Forward 9-7 transform, for the vertical pass, processing cols columns */
1654*3ac0a46fSAndroid Build Coastguard Worker /* where cols <= NB_ELTS_V8 */
opj_dwt_encode_and_deinterleave_v_real(void * arrayIn,void * tmpIn,OPJ_UINT32 height,OPJ_BOOL even,OPJ_UINT32 stride_width,OPJ_UINT32 cols)1655*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_encode_and_deinterleave_v_real(
1656*3ac0a46fSAndroid Build Coastguard Worker     void *arrayIn,
1657*3ac0a46fSAndroid Build Coastguard Worker     void *tmpIn,
1658*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 height,
1659*3ac0a46fSAndroid Build Coastguard Worker     OPJ_BOOL even,
1660*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 stride_width,
1661*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 cols)
1662*3ac0a46fSAndroid Build Coastguard Worker {
1663*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32* OPJ_RESTRICT array = (OPJ_FLOAT32 * OPJ_RESTRICT)arrayIn;
1664*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32 * OPJ_RESTRICT)tmpIn;
1665*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 sn = (OPJ_INT32)((height + (even ? 1 : 0)) >> 1);
1666*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_INT32 dn = (OPJ_INT32)(height - (OPJ_UINT32)sn);
1667*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 a, b;
1668*3ac0a46fSAndroid Build Coastguard Worker 
1669*3ac0a46fSAndroid Build Coastguard Worker     if (height == 1) {
1670*3ac0a46fSAndroid Build Coastguard Worker         return;
1671*3ac0a46fSAndroid Build Coastguard Worker     }
1672*3ac0a46fSAndroid Build Coastguard Worker 
1673*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_fetch_cols_vertical_pass(arrayIn, tmpIn, height, stride_width, cols);
1674*3ac0a46fSAndroid Build Coastguard Worker 
1675*3ac0a46fSAndroid Build Coastguard Worker     if (even) {
1676*3ac0a46fSAndroid Build Coastguard Worker         a = 0;
1677*3ac0a46fSAndroid Build Coastguard Worker         b = 1;
1678*3ac0a46fSAndroid Build Coastguard Worker     } else {
1679*3ac0a46fSAndroid Build Coastguard Worker         a = 1;
1680*3ac0a46fSAndroid Build Coastguard Worker         b = 0;
1681*3ac0a46fSAndroid Build Coastguard Worker     }
1682*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_encode_step2(tmp + a * NB_ELTS_V8,
1683*3ac0a46fSAndroid Build Coastguard Worker                            tmp + (b + 1) * NB_ELTS_V8,
1684*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)dn,
1685*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)opj_int_min(dn, sn - b),
1686*3ac0a46fSAndroid Build Coastguard Worker                            opj_dwt_alpha);
1687*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_encode_step2(tmp + b * NB_ELTS_V8,
1688*3ac0a46fSAndroid Build Coastguard Worker                            tmp + (a + 1) * NB_ELTS_V8,
1689*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)sn,
1690*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)opj_int_min(sn, dn - a),
1691*3ac0a46fSAndroid Build Coastguard Worker                            opj_dwt_beta);
1692*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_encode_step2(tmp + a * NB_ELTS_V8,
1693*3ac0a46fSAndroid Build Coastguard Worker                            tmp + (b + 1) * NB_ELTS_V8,
1694*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)dn,
1695*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)opj_int_min(dn, sn - b),
1696*3ac0a46fSAndroid Build Coastguard Worker                            opj_dwt_gamma);
1697*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_encode_step2(tmp + b * NB_ELTS_V8,
1698*3ac0a46fSAndroid Build Coastguard Worker                            tmp + (a + 1) * NB_ELTS_V8,
1699*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)sn,
1700*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)opj_int_min(sn, dn - a),
1701*3ac0a46fSAndroid Build Coastguard Worker                            opj_dwt_delta);
1702*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_encode_step1(tmp + b * NB_ELTS_V8, (OPJ_UINT32)dn,
1703*3ac0a46fSAndroid Build Coastguard Worker                            opj_K);
1704*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_encode_step1(tmp + a * NB_ELTS_V8, (OPJ_UINT32)sn,
1705*3ac0a46fSAndroid Build Coastguard Worker                            opj_invK);
1706*3ac0a46fSAndroid Build Coastguard Worker 
1707*3ac0a46fSAndroid Build Coastguard Worker 
1708*3ac0a46fSAndroid Build Coastguard Worker     if (cols == NB_ELTS_V8) {
1709*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_deinterleave_v_cols((OPJ_INT32*)tmp,
1710*3ac0a46fSAndroid Build Coastguard Worker                                     (OPJ_INT32*)array,
1711*3ac0a46fSAndroid Build Coastguard Worker                                     (OPJ_INT32)dn, (OPJ_INT32)sn,
1712*3ac0a46fSAndroid Build Coastguard Worker                                     stride_width, even ? 0 : 1, NB_ELTS_V8);
1713*3ac0a46fSAndroid Build Coastguard Worker     } else {
1714*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_deinterleave_v_cols((OPJ_INT32*)tmp,
1715*3ac0a46fSAndroid Build Coastguard Worker                                     (OPJ_INT32*)array,
1716*3ac0a46fSAndroid Build Coastguard Worker                                     (OPJ_INT32)dn, (OPJ_INT32)sn,
1717*3ac0a46fSAndroid Build Coastguard Worker                                     stride_width, even ? 0 : 1, cols);
1718*3ac0a46fSAndroid Build Coastguard Worker     }
1719*3ac0a46fSAndroid Build Coastguard Worker }
1720*3ac0a46fSAndroid Build Coastguard Worker 
1721*3ac0a46fSAndroid Build Coastguard Worker 
1722*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                            */
1723*3ac0a46fSAndroid Build Coastguard Worker /* Forward 5-3 wavelet transform in 2-D. */
1724*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                           */
opj_dwt_encode_procedure(opj_thread_pool_t * tp,opj_tcd_tilecomp_t * tilec,opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v,opj_encode_and_deinterleave_h_one_row_fnptr_type p_encode_and_deinterleave_h_one_row)1725*3ac0a46fSAndroid Build Coastguard Worker static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
1726*3ac0a46fSAndroid Build Coastguard Worker         opj_tcd_tilecomp_t * tilec,
1727*3ac0a46fSAndroid Build Coastguard Worker         opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v,
1728*3ac0a46fSAndroid Build Coastguard Worker         opj_encode_and_deinterleave_h_one_row_fnptr_type
1729*3ac0a46fSAndroid Build Coastguard Worker         p_encode_and_deinterleave_h_one_row)
1730*3ac0a46fSAndroid Build Coastguard Worker {
1731*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i;
1732*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 *bj = 00;
1733*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w;
1734*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 l;
1735*3ac0a46fSAndroid Build Coastguard Worker 
1736*3ac0a46fSAndroid Build Coastguard Worker     OPJ_SIZE_T l_data_size;
1737*3ac0a46fSAndroid Build Coastguard Worker 
1738*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_resolution_t * l_cur_res = 0;
1739*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_resolution_t * l_last_res = 0;
1740*3ac0a46fSAndroid Build Coastguard Worker     const int num_threads = opj_thread_pool_get_thread_count(tp);
1741*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 * OPJ_RESTRICT tiledp = tilec->data;
1742*3ac0a46fSAndroid Build Coastguard Worker 
1743*3ac0a46fSAndroid Build Coastguard Worker     w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
1744*3ac0a46fSAndroid Build Coastguard Worker     l = (OPJ_INT32)tilec->numresolutions - 1;
1745*3ac0a46fSAndroid Build Coastguard Worker 
1746*3ac0a46fSAndroid Build Coastguard Worker     l_cur_res = tilec->resolutions + l;
1747*3ac0a46fSAndroid Build Coastguard Worker     l_last_res = l_cur_res - 1;
1748*3ac0a46fSAndroid Build Coastguard Worker 
1749*3ac0a46fSAndroid Build Coastguard Worker     l_data_size = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions);
1750*3ac0a46fSAndroid Build Coastguard Worker     /* overflow check */
1751*3ac0a46fSAndroid Build Coastguard Worker     if (l_data_size > (SIZE_MAX / (NB_ELTS_V8 * sizeof(OPJ_INT32)))) {
1752*3ac0a46fSAndroid Build Coastguard Worker         /* FIXME event manager error callback */
1753*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
1754*3ac0a46fSAndroid Build Coastguard Worker     }
1755*3ac0a46fSAndroid Build Coastguard Worker     l_data_size *= NB_ELTS_V8 * sizeof(OPJ_INT32);
1756*3ac0a46fSAndroid Build Coastguard Worker     bj = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size);
1757*3ac0a46fSAndroid Build Coastguard Worker     /* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */
1758*3ac0a46fSAndroid Build Coastguard Worker     /* in that case, so do not error out */
1759*3ac0a46fSAndroid Build Coastguard Worker     if (l_data_size != 0 && ! bj) {
1760*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
1761*3ac0a46fSAndroid Build Coastguard Worker     }
1762*3ac0a46fSAndroid Build Coastguard Worker     i = l;
1763*3ac0a46fSAndroid Build Coastguard Worker 
1764*3ac0a46fSAndroid Build Coastguard Worker     while (i--) {
1765*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 j;
1766*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 rw;           /* width of the resolution level computed   */
1767*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 rh;           /* height of the resolution level computed  */
1768*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32
1769*3ac0a46fSAndroid Build Coastguard Worker         rw1;      /* width of the resolution level once lower than computed one                                       */
1770*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32
1771*3ac0a46fSAndroid Build Coastguard Worker         rh1;      /* height of the resolution level once lower than computed one                                      */
1772*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 cas_col;  /* 0 = non inversion on horizontal filtering 1 = inversion between low-pass and high-pass filtering */
1773*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 cas_row;  /* 0 = non inversion on vertical filtering 1 = inversion between low-pass and high-pass filtering   */
1774*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 dn, sn;
1775*3ac0a46fSAndroid Build Coastguard Worker 
1776*3ac0a46fSAndroid Build Coastguard Worker         rw  = (OPJ_UINT32)(l_cur_res->x1 - l_cur_res->x0);
1777*3ac0a46fSAndroid Build Coastguard Worker         rh  = (OPJ_UINT32)(l_cur_res->y1 - l_cur_res->y0);
1778*3ac0a46fSAndroid Build Coastguard Worker         rw1 = (OPJ_UINT32)(l_last_res->x1 - l_last_res->x0);
1779*3ac0a46fSAndroid Build Coastguard Worker         rh1 = (OPJ_UINT32)(l_last_res->y1 - l_last_res->y0);
1780*3ac0a46fSAndroid Build Coastguard Worker 
1781*3ac0a46fSAndroid Build Coastguard Worker         cas_row = l_cur_res->x0 & 1;
1782*3ac0a46fSAndroid Build Coastguard Worker         cas_col = l_cur_res->y0 & 1;
1783*3ac0a46fSAndroid Build Coastguard Worker 
1784*3ac0a46fSAndroid Build Coastguard Worker         sn = (OPJ_INT32)rh1;
1785*3ac0a46fSAndroid Build Coastguard Worker         dn = (OPJ_INT32)(rh - rh1);
1786*3ac0a46fSAndroid Build Coastguard Worker 
1787*3ac0a46fSAndroid Build Coastguard Worker         /* Perform vertical pass */
1788*3ac0a46fSAndroid Build Coastguard Worker         if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) {
1789*3ac0a46fSAndroid Build Coastguard Worker             for (j = 0; j + NB_ELTS_V8 - 1 < rw; j += NB_ELTS_V8) {
1790*3ac0a46fSAndroid Build Coastguard Worker                 p_encode_and_deinterleave_v(tiledp + j,
1791*3ac0a46fSAndroid Build Coastguard Worker                                             bj,
1792*3ac0a46fSAndroid Build Coastguard Worker                                             rh,
1793*3ac0a46fSAndroid Build Coastguard Worker                                             cas_col == 0,
1794*3ac0a46fSAndroid Build Coastguard Worker                                             w,
1795*3ac0a46fSAndroid Build Coastguard Worker                                             NB_ELTS_V8);
1796*3ac0a46fSAndroid Build Coastguard Worker             }
1797*3ac0a46fSAndroid Build Coastguard Worker             if (j < rw) {
1798*3ac0a46fSAndroid Build Coastguard Worker                 p_encode_and_deinterleave_v(tiledp + j,
1799*3ac0a46fSAndroid Build Coastguard Worker                                             bj,
1800*3ac0a46fSAndroid Build Coastguard Worker                                             rh,
1801*3ac0a46fSAndroid Build Coastguard Worker                                             cas_col == 0,
1802*3ac0a46fSAndroid Build Coastguard Worker                                             w,
1803*3ac0a46fSAndroid Build Coastguard Worker                                             rw - j);
1804*3ac0a46fSAndroid Build Coastguard Worker             }
1805*3ac0a46fSAndroid Build Coastguard Worker         }  else {
1806*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
1807*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 step_j;
1808*3ac0a46fSAndroid Build Coastguard Worker 
1809*3ac0a46fSAndroid Build Coastguard Worker             if (rw < num_jobs) {
1810*3ac0a46fSAndroid Build Coastguard Worker                 num_jobs = rw;
1811*3ac0a46fSAndroid Build Coastguard Worker             }
1812*3ac0a46fSAndroid Build Coastguard Worker             step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8;
1813*3ac0a46fSAndroid Build Coastguard Worker 
1814*3ac0a46fSAndroid Build Coastguard Worker             for (j = 0; j < num_jobs; j++) {
1815*3ac0a46fSAndroid Build Coastguard Worker                 opj_dwt_encode_v_job_t* job;
1816*3ac0a46fSAndroid Build Coastguard Worker 
1817*3ac0a46fSAndroid Build Coastguard Worker                 job = (opj_dwt_encode_v_job_t*) opj_malloc(sizeof(opj_dwt_encode_v_job_t));
1818*3ac0a46fSAndroid Build Coastguard Worker                 if (!job) {
1819*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
1820*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(bj);
1821*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
1822*3ac0a46fSAndroid Build Coastguard Worker                 }
1823*3ac0a46fSAndroid Build Coastguard Worker                 job->v.mem = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size);
1824*3ac0a46fSAndroid Build Coastguard Worker                 if (!job->v.mem) {
1825*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
1826*3ac0a46fSAndroid Build Coastguard Worker                     opj_free(job);
1827*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(bj);
1828*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
1829*3ac0a46fSAndroid Build Coastguard Worker                 }
1830*3ac0a46fSAndroid Build Coastguard Worker                 job->v.dn = dn;
1831*3ac0a46fSAndroid Build Coastguard Worker                 job->v.sn = sn;
1832*3ac0a46fSAndroid Build Coastguard Worker                 job->v.cas = cas_col;
1833*3ac0a46fSAndroid Build Coastguard Worker                 job->rh = rh;
1834*3ac0a46fSAndroid Build Coastguard Worker                 job->w = w;
1835*3ac0a46fSAndroid Build Coastguard Worker                 job->tiledp = tiledp;
1836*3ac0a46fSAndroid Build Coastguard Worker                 job->min_j = j * step_j;
1837*3ac0a46fSAndroid Build Coastguard Worker                 job->max_j = (j + 1 == num_jobs) ? rw : (j + 1) * step_j;
1838*3ac0a46fSAndroid Build Coastguard Worker                 job->p_encode_and_deinterleave_v = p_encode_and_deinterleave_v;
1839*3ac0a46fSAndroid Build Coastguard Worker                 opj_thread_pool_submit_job(tp, opj_dwt_encode_v_func, job);
1840*3ac0a46fSAndroid Build Coastguard Worker             }
1841*3ac0a46fSAndroid Build Coastguard Worker             opj_thread_pool_wait_completion(tp, 0);
1842*3ac0a46fSAndroid Build Coastguard Worker         }
1843*3ac0a46fSAndroid Build Coastguard Worker 
1844*3ac0a46fSAndroid Build Coastguard Worker         sn = (OPJ_INT32)rw1;
1845*3ac0a46fSAndroid Build Coastguard Worker         dn = (OPJ_INT32)(rw - rw1);
1846*3ac0a46fSAndroid Build Coastguard Worker 
1847*3ac0a46fSAndroid Build Coastguard Worker         /* Perform horizontal pass */
1848*3ac0a46fSAndroid Build Coastguard Worker         if (num_threads <= 1 || rh <= 1) {
1849*3ac0a46fSAndroid Build Coastguard Worker             for (j = 0; j < rh; j++) {
1850*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_INT32* OPJ_RESTRICT aj = tiledp + j * w;
1851*3ac0a46fSAndroid Build Coastguard Worker                 (*p_encode_and_deinterleave_h_one_row)(aj, bj, rw,
1852*3ac0a46fSAndroid Build Coastguard Worker                                                        cas_row == 0 ? OPJ_TRUE : OPJ_FALSE);
1853*3ac0a46fSAndroid Build Coastguard Worker             }
1854*3ac0a46fSAndroid Build Coastguard Worker         }  else {
1855*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
1856*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 step_j;
1857*3ac0a46fSAndroid Build Coastguard Worker 
1858*3ac0a46fSAndroid Build Coastguard Worker             if (rh < num_jobs) {
1859*3ac0a46fSAndroid Build Coastguard Worker                 num_jobs = rh;
1860*3ac0a46fSAndroid Build Coastguard Worker             }
1861*3ac0a46fSAndroid Build Coastguard Worker             step_j = (rh / num_jobs);
1862*3ac0a46fSAndroid Build Coastguard Worker 
1863*3ac0a46fSAndroid Build Coastguard Worker             for (j = 0; j < num_jobs; j++) {
1864*3ac0a46fSAndroid Build Coastguard Worker                 opj_dwt_encode_h_job_t* job;
1865*3ac0a46fSAndroid Build Coastguard Worker 
1866*3ac0a46fSAndroid Build Coastguard Worker                 job = (opj_dwt_encode_h_job_t*) opj_malloc(sizeof(opj_dwt_encode_h_job_t));
1867*3ac0a46fSAndroid Build Coastguard Worker                 if (!job) {
1868*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
1869*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(bj);
1870*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
1871*3ac0a46fSAndroid Build Coastguard Worker                 }
1872*3ac0a46fSAndroid Build Coastguard Worker                 job->h.mem = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size);
1873*3ac0a46fSAndroid Build Coastguard Worker                 if (!job->h.mem) {
1874*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
1875*3ac0a46fSAndroid Build Coastguard Worker                     opj_free(job);
1876*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(bj);
1877*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
1878*3ac0a46fSAndroid Build Coastguard Worker                 }
1879*3ac0a46fSAndroid Build Coastguard Worker                 job->h.dn = dn;
1880*3ac0a46fSAndroid Build Coastguard Worker                 job->h.sn = sn;
1881*3ac0a46fSAndroid Build Coastguard Worker                 job->h.cas = cas_row;
1882*3ac0a46fSAndroid Build Coastguard Worker                 job->rw = rw;
1883*3ac0a46fSAndroid Build Coastguard Worker                 job->w = w;
1884*3ac0a46fSAndroid Build Coastguard Worker                 job->tiledp = tiledp;
1885*3ac0a46fSAndroid Build Coastguard Worker                 job->min_j = j * step_j;
1886*3ac0a46fSAndroid Build Coastguard Worker                 job->max_j = (j + 1U) * step_j; /* this can overflow */
1887*3ac0a46fSAndroid Build Coastguard Worker                 if (j == (num_jobs - 1U)) {  /* this will take care of the overflow */
1888*3ac0a46fSAndroid Build Coastguard Worker                     job->max_j = rh;
1889*3ac0a46fSAndroid Build Coastguard Worker                 }
1890*3ac0a46fSAndroid Build Coastguard Worker                 job->p_function = p_encode_and_deinterleave_h_one_row;
1891*3ac0a46fSAndroid Build Coastguard Worker                 opj_thread_pool_submit_job(tp, opj_dwt_encode_h_func, job);
1892*3ac0a46fSAndroid Build Coastguard Worker             }
1893*3ac0a46fSAndroid Build Coastguard Worker             opj_thread_pool_wait_completion(tp, 0);
1894*3ac0a46fSAndroid Build Coastguard Worker         }
1895*3ac0a46fSAndroid Build Coastguard Worker 
1896*3ac0a46fSAndroid Build Coastguard Worker         l_cur_res = l_last_res;
1897*3ac0a46fSAndroid Build Coastguard Worker 
1898*3ac0a46fSAndroid Build Coastguard Worker         --l_last_res;
1899*3ac0a46fSAndroid Build Coastguard Worker     }
1900*3ac0a46fSAndroid Build Coastguard Worker 
1901*3ac0a46fSAndroid Build Coastguard Worker     opj_aligned_free(bj);
1902*3ac0a46fSAndroid Build Coastguard Worker     return OPJ_TRUE;
1903*3ac0a46fSAndroid Build Coastguard Worker }
1904*3ac0a46fSAndroid Build Coastguard Worker 
1905*3ac0a46fSAndroid Build Coastguard Worker /* Forward 5-3 wavelet transform in 2-D. */
1906*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                           */
opj_dwt_encode(opj_tcd_t * p_tcd,opj_tcd_tilecomp_t * tilec)1907*3ac0a46fSAndroid Build Coastguard Worker OPJ_BOOL opj_dwt_encode(opj_tcd_t *p_tcd,
1908*3ac0a46fSAndroid Build Coastguard Worker                         opj_tcd_tilecomp_t * tilec)
1909*3ac0a46fSAndroid Build Coastguard Worker {
1910*3ac0a46fSAndroid Build Coastguard Worker     return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec,
1911*3ac0a46fSAndroid Build Coastguard Worker                                     opj_dwt_encode_and_deinterleave_v,
1912*3ac0a46fSAndroid Build Coastguard Worker                                     opj_dwt_encode_and_deinterleave_h_one_row);
1913*3ac0a46fSAndroid Build Coastguard Worker }
1914*3ac0a46fSAndroid Build Coastguard Worker 
1915*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                            */
1916*3ac0a46fSAndroid Build Coastguard Worker /* Inverse 5-3 wavelet transform in 2-D. */
1917*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                           */
opj_dwt_decode(opj_tcd_t * p_tcd,opj_tcd_tilecomp_t * tilec,OPJ_UINT32 numres)1918*3ac0a46fSAndroid Build Coastguard Worker OPJ_BOOL opj_dwt_decode(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec,
1919*3ac0a46fSAndroid Build Coastguard Worker                         OPJ_UINT32 numres)
1920*3ac0a46fSAndroid Build Coastguard Worker {
1921*3ac0a46fSAndroid Build Coastguard Worker     if (p_tcd->whole_tile_decoding) {
1922*3ac0a46fSAndroid Build Coastguard Worker         return opj_dwt_decode_tile(p_tcd->thread_pool, tilec, numres);
1923*3ac0a46fSAndroid Build Coastguard Worker     } else {
1924*3ac0a46fSAndroid Build Coastguard Worker         return opj_dwt_decode_partial_tile(tilec, numres);
1925*3ac0a46fSAndroid Build Coastguard Worker     }
1926*3ac0a46fSAndroid Build Coastguard Worker }
1927*3ac0a46fSAndroid Build Coastguard Worker 
1928*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                */
1929*3ac0a46fSAndroid Build Coastguard Worker /* Get norm of 5-3 wavelet. */
1930*3ac0a46fSAndroid Build Coastguard Worker /* </summary>               */
opj_dwt_getnorm(OPJ_UINT32 level,OPJ_UINT32 orient)1931*3ac0a46fSAndroid Build Coastguard Worker OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient)
1932*3ac0a46fSAndroid Build Coastguard Worker {
1933*3ac0a46fSAndroid Build Coastguard Worker     /* FIXME ! This is just a band-aid to avoid a buffer overflow */
1934*3ac0a46fSAndroid Build Coastguard Worker     /* but the array should really be extended up to 33 resolution levels */
1935*3ac0a46fSAndroid Build Coastguard Worker     /* See https://github.com/uclouvain/openjpeg/issues/493 */
1936*3ac0a46fSAndroid Build Coastguard Worker     if (orient == 0 && level >= 10) {
1937*3ac0a46fSAndroid Build Coastguard Worker         level = 9;
1938*3ac0a46fSAndroid Build Coastguard Worker     } else if (orient > 0 && level >= 9) {
1939*3ac0a46fSAndroid Build Coastguard Worker         level = 8;
1940*3ac0a46fSAndroid Build Coastguard Worker     }
1941*3ac0a46fSAndroid Build Coastguard Worker     return opj_dwt_norms[orient][level];
1942*3ac0a46fSAndroid Build Coastguard Worker }
1943*3ac0a46fSAndroid Build Coastguard Worker 
1944*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                             */
1945*3ac0a46fSAndroid Build Coastguard Worker /* Forward 9-7 wavelet transform in 2-D. */
1946*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                            */
opj_dwt_encode_real(opj_tcd_t * p_tcd,opj_tcd_tilecomp_t * tilec)1947*3ac0a46fSAndroid Build Coastguard Worker OPJ_BOOL opj_dwt_encode_real(opj_tcd_t *p_tcd,
1948*3ac0a46fSAndroid Build Coastguard Worker                              opj_tcd_tilecomp_t * tilec)
1949*3ac0a46fSAndroid Build Coastguard Worker {
1950*3ac0a46fSAndroid Build Coastguard Worker     return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec,
1951*3ac0a46fSAndroid Build Coastguard Worker                                     opj_dwt_encode_and_deinterleave_v_real,
1952*3ac0a46fSAndroid Build Coastguard Worker                                     opj_dwt_encode_and_deinterleave_h_one_row_real);
1953*3ac0a46fSAndroid Build Coastguard Worker }
1954*3ac0a46fSAndroid Build Coastguard Worker 
1955*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                */
1956*3ac0a46fSAndroid Build Coastguard Worker /* Get norm of 9-7 wavelet. */
1957*3ac0a46fSAndroid Build Coastguard Worker /* </summary>               */
opj_dwt_getnorm_real(OPJ_UINT32 level,OPJ_UINT32 orient)1958*3ac0a46fSAndroid Build Coastguard Worker OPJ_FLOAT64 opj_dwt_getnorm_real(OPJ_UINT32 level, OPJ_UINT32 orient)
1959*3ac0a46fSAndroid Build Coastguard Worker {
1960*3ac0a46fSAndroid Build Coastguard Worker     /* FIXME ! This is just a band-aid to avoid a buffer overflow */
1961*3ac0a46fSAndroid Build Coastguard Worker     /* but the array should really be extended up to 33 resolution levels */
1962*3ac0a46fSAndroid Build Coastguard Worker     /* See https://github.com/uclouvain/openjpeg/issues/493 */
1963*3ac0a46fSAndroid Build Coastguard Worker     if (orient == 0 && level >= 10) {
1964*3ac0a46fSAndroid Build Coastguard Worker         level = 9;
1965*3ac0a46fSAndroid Build Coastguard Worker     } else if (orient > 0 && level >= 9) {
1966*3ac0a46fSAndroid Build Coastguard Worker         level = 8;
1967*3ac0a46fSAndroid Build Coastguard Worker     }
1968*3ac0a46fSAndroid Build Coastguard Worker     return opj_dwt_norms_real[orient][level];
1969*3ac0a46fSAndroid Build Coastguard Worker }
1970*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp,OPJ_UINT32 prec)1971*3ac0a46fSAndroid Build Coastguard Worker void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec)
1972*3ac0a46fSAndroid Build Coastguard Worker {
1973*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 numbands, bandno;
1974*3ac0a46fSAndroid Build Coastguard Worker     numbands = 3 * tccp->numresolutions - 2;
1975*3ac0a46fSAndroid Build Coastguard Worker     for (bandno = 0; bandno < numbands; bandno++) {
1976*3ac0a46fSAndroid Build Coastguard Worker         OPJ_FLOAT64 stepsize;
1977*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 resno, level, orient, gain;
1978*3ac0a46fSAndroid Build Coastguard Worker 
1979*3ac0a46fSAndroid Build Coastguard Worker         resno = (bandno == 0) ? 0 : ((bandno - 1) / 3 + 1);
1980*3ac0a46fSAndroid Build Coastguard Worker         orient = (bandno == 0) ? 0 : ((bandno - 1) % 3 + 1);
1981*3ac0a46fSAndroid Build Coastguard Worker         level = tccp->numresolutions - 1 - resno;
1982*3ac0a46fSAndroid Build Coastguard Worker         gain = (tccp->qmfbid == 0) ? 0 : ((orient == 0) ? 0 : (((orient == 1) ||
1983*3ac0a46fSAndroid Build Coastguard Worker                                           (orient == 2)) ? 1 : 2));
1984*3ac0a46fSAndroid Build Coastguard Worker         if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) {
1985*3ac0a46fSAndroid Build Coastguard Worker             stepsize = 1.0;
1986*3ac0a46fSAndroid Build Coastguard Worker         } else {
1987*3ac0a46fSAndroid Build Coastguard Worker             OPJ_FLOAT64 norm = opj_dwt_getnorm_real(level, orient);
1988*3ac0a46fSAndroid Build Coastguard Worker             stepsize = (1 << (gain)) / norm;
1989*3ac0a46fSAndroid Build Coastguard Worker         }
1990*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0),
1991*3ac0a46fSAndroid Build Coastguard Worker                                 (OPJ_INT32)(prec + gain), &tccp->stepsizes[bandno]);
1992*3ac0a46fSAndroid Build Coastguard Worker     }
1993*3ac0a46fSAndroid Build Coastguard Worker }
1994*3ac0a46fSAndroid Build Coastguard Worker 
1995*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                             */
1996*3ac0a46fSAndroid Build Coastguard Worker /* Determine maximum computed resolution level for inverse wavelet transform */
1997*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                            */
opj_dwt_max_resolution(opj_tcd_resolution_t * OPJ_RESTRICT r,OPJ_UINT32 i)1998*3ac0a46fSAndroid Build Coastguard Worker static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r,
1999*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 i)
2000*3ac0a46fSAndroid Build Coastguard Worker {
2001*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 mr   = 0;
2002*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w;
2003*3ac0a46fSAndroid Build Coastguard Worker     while (--i) {
2004*3ac0a46fSAndroid Build Coastguard Worker         ++r;
2005*3ac0a46fSAndroid Build Coastguard Worker         if (mr < (w = (OPJ_UINT32)(r->x1 - r->x0))) {
2006*3ac0a46fSAndroid Build Coastguard Worker             mr = w ;
2007*3ac0a46fSAndroid Build Coastguard Worker         }
2008*3ac0a46fSAndroid Build Coastguard Worker         if (mr < (w = (OPJ_UINT32)(r->y1 - r->y0))) {
2009*3ac0a46fSAndroid Build Coastguard Worker             mr = w ;
2010*3ac0a46fSAndroid Build Coastguard Worker         }
2011*3ac0a46fSAndroid Build Coastguard Worker     }
2012*3ac0a46fSAndroid Build Coastguard Worker     return mr ;
2013*3ac0a46fSAndroid Build Coastguard Worker }
2014*3ac0a46fSAndroid Build Coastguard Worker 
2015*3ac0a46fSAndroid Build Coastguard Worker typedef struct {
2016*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_t h;
2017*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rw;
2018*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w;
2019*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 * OPJ_RESTRICT tiledp;
2020*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 min_j;
2021*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 max_j;
2022*3ac0a46fSAndroid Build Coastguard Worker } opj_dwt_decode_h_job_t;
2023*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_decode_h_func(void * user_data,opj_tls_t * tls)2024*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_decode_h_func(void* user_data, opj_tls_t* tls)
2025*3ac0a46fSAndroid Build Coastguard Worker {
2026*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 j;
2027*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_decode_h_job_t* job;
2028*3ac0a46fSAndroid Build Coastguard Worker     (void)tls;
2029*3ac0a46fSAndroid Build Coastguard Worker 
2030*3ac0a46fSAndroid Build Coastguard Worker     job = (opj_dwt_decode_h_job_t*)user_data;
2031*3ac0a46fSAndroid Build Coastguard Worker     for (j = job->min_j; j < job->max_j; j++) {
2032*3ac0a46fSAndroid Build Coastguard Worker         opj_idwt53_h(&job->h, &job->tiledp[j * job->w]);
2033*3ac0a46fSAndroid Build Coastguard Worker     }
2034*3ac0a46fSAndroid Build Coastguard Worker 
2035*3ac0a46fSAndroid Build Coastguard Worker     opj_aligned_free(job->h.mem);
2036*3ac0a46fSAndroid Build Coastguard Worker     opj_free(job);
2037*3ac0a46fSAndroid Build Coastguard Worker }
2038*3ac0a46fSAndroid Build Coastguard Worker 
2039*3ac0a46fSAndroid Build Coastguard Worker typedef struct {
2040*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_t v;
2041*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rh;
2042*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w;
2043*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 * OPJ_RESTRICT tiledp;
2044*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 min_j;
2045*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 max_j;
2046*3ac0a46fSAndroid Build Coastguard Worker } opj_dwt_decode_v_job_t;
2047*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_decode_v_func(void * user_data,opj_tls_t * tls)2048*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
2049*3ac0a46fSAndroid Build Coastguard Worker {
2050*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 j;
2051*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_decode_v_job_t* job;
2052*3ac0a46fSAndroid Build Coastguard Worker     (void)tls;
2053*3ac0a46fSAndroid Build Coastguard Worker 
2054*3ac0a46fSAndroid Build Coastguard Worker     job = (opj_dwt_decode_v_job_t*)user_data;
2055*3ac0a46fSAndroid Build Coastguard Worker     for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j;
2056*3ac0a46fSAndroid Build Coastguard Worker             j += PARALLEL_COLS_53) {
2057*3ac0a46fSAndroid Build Coastguard Worker         opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w,
2058*3ac0a46fSAndroid Build Coastguard Worker                      PARALLEL_COLS_53);
2059*3ac0a46fSAndroid Build Coastguard Worker     }
2060*3ac0a46fSAndroid Build Coastguard Worker     if (j < job->max_j)
2061*3ac0a46fSAndroid Build Coastguard Worker         opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w,
2062*3ac0a46fSAndroid Build Coastguard Worker                      (OPJ_INT32)(job->max_j - j));
2063*3ac0a46fSAndroid Build Coastguard Worker 
2064*3ac0a46fSAndroid Build Coastguard Worker     opj_aligned_free(job->v.mem);
2065*3ac0a46fSAndroid Build Coastguard Worker     opj_free(job);
2066*3ac0a46fSAndroid Build Coastguard Worker }
2067*3ac0a46fSAndroid Build Coastguard Worker 
2068*3ac0a46fSAndroid Build Coastguard Worker 
2069*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                            */
2070*3ac0a46fSAndroid Build Coastguard Worker /* Inverse wavelet transform in 2-D.    */
2071*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                           */
opj_dwt_decode_tile(opj_thread_pool_t * tp,const opj_tcd_tilecomp_t * tilec,OPJ_UINT32 numres)2072*3ac0a46fSAndroid Build Coastguard Worker static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
2073*3ac0a46fSAndroid Build Coastguard Worker         const opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres)
2074*3ac0a46fSAndroid Build Coastguard Worker {
2075*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_t h;
2076*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_t v;
2077*3ac0a46fSAndroid Build Coastguard Worker 
2078*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_resolution_t* tr = tilec->resolutions;
2079*3ac0a46fSAndroid Build Coastguard Worker 
2080*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 -
2081*3ac0a46fSAndroid Build Coastguard Worker                                  tr->x0);  /* width of the resolution level computed */
2082*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 -
2083*3ac0a46fSAndroid Build Coastguard Worker                                  tr->y0);  /* height of the resolution level computed */
2084*3ac0a46fSAndroid Build Coastguard Worker 
2085*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions -
2086*3ac0a46fSAndroid Build Coastguard Worker                                                                1].x1 -
2087*3ac0a46fSAndroid Build Coastguard Worker                                 tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
2088*3ac0a46fSAndroid Build Coastguard Worker     OPJ_SIZE_T h_mem_size;
2089*3ac0a46fSAndroid Build Coastguard Worker     int num_threads;
2090*3ac0a46fSAndroid Build Coastguard Worker 
2091*3ac0a46fSAndroid Build Coastguard Worker     if (numres == 1U) {
2092*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_TRUE;
2093*3ac0a46fSAndroid Build Coastguard Worker     }
2094*3ac0a46fSAndroid Build Coastguard Worker     num_threads = opj_thread_pool_get_thread_count(tp);
2095*3ac0a46fSAndroid Build Coastguard Worker     h.mem_count = opj_dwt_max_resolution(tr, numres);
2096*3ac0a46fSAndroid Build Coastguard Worker     /* overflow check */
2097*3ac0a46fSAndroid Build Coastguard Worker     if (h.mem_count > (SIZE_MAX / PARALLEL_COLS_53 / sizeof(OPJ_INT32))) {
2098*3ac0a46fSAndroid Build Coastguard Worker         /* FIXME event manager error callback */
2099*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
2100*3ac0a46fSAndroid Build Coastguard Worker     }
2101*3ac0a46fSAndroid Build Coastguard Worker     /* We need PARALLEL_COLS_53 times the height of the array, */
2102*3ac0a46fSAndroid Build Coastguard Worker     /* since for the vertical pass */
2103*3ac0a46fSAndroid Build Coastguard Worker     /* we process PARALLEL_COLS_53 columns at a time */
2104*3ac0a46fSAndroid Build Coastguard Worker     h_mem_size = h.mem_count * PARALLEL_COLS_53 * sizeof(OPJ_INT32);
2105*3ac0a46fSAndroid Build Coastguard Worker     h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size);
2106*3ac0a46fSAndroid Build Coastguard Worker     if (! h.mem) {
2107*3ac0a46fSAndroid Build Coastguard Worker         /* FIXME event manager error callback */
2108*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
2109*3ac0a46fSAndroid Build Coastguard Worker     }
2110*3ac0a46fSAndroid Build Coastguard Worker 
2111*3ac0a46fSAndroid Build Coastguard Worker     v.mem_count = h.mem_count;
2112*3ac0a46fSAndroid Build Coastguard Worker     v.mem = h.mem;
2113*3ac0a46fSAndroid Build Coastguard Worker 
2114*3ac0a46fSAndroid Build Coastguard Worker     while (--numres) {
2115*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 * OPJ_RESTRICT tiledp = tilec->data;
2116*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 j;
2117*3ac0a46fSAndroid Build Coastguard Worker 
2118*3ac0a46fSAndroid Build Coastguard Worker         ++tr;
2119*3ac0a46fSAndroid Build Coastguard Worker         h.sn = (OPJ_INT32)rw;
2120*3ac0a46fSAndroid Build Coastguard Worker         v.sn = (OPJ_INT32)rh;
2121*3ac0a46fSAndroid Build Coastguard Worker 
2122*3ac0a46fSAndroid Build Coastguard Worker         rw = (OPJ_UINT32)(tr->x1 - tr->x0);
2123*3ac0a46fSAndroid Build Coastguard Worker         rh = (OPJ_UINT32)(tr->y1 - tr->y0);
2124*3ac0a46fSAndroid Build Coastguard Worker 
2125*3ac0a46fSAndroid Build Coastguard Worker         h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn);
2126*3ac0a46fSAndroid Build Coastguard Worker         h.cas = tr->x0 % 2;
2127*3ac0a46fSAndroid Build Coastguard Worker 
2128*3ac0a46fSAndroid Build Coastguard Worker         if (num_threads <= 1 || rh <= 1) {
2129*3ac0a46fSAndroid Build Coastguard Worker             for (j = 0; j < rh; ++j) {
2130*3ac0a46fSAndroid Build Coastguard Worker                 opj_idwt53_h(&h, &tiledp[(OPJ_SIZE_T)j * w]);
2131*3ac0a46fSAndroid Build Coastguard Worker             }
2132*3ac0a46fSAndroid Build Coastguard Worker         } else {
2133*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
2134*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 step_j;
2135*3ac0a46fSAndroid Build Coastguard Worker 
2136*3ac0a46fSAndroid Build Coastguard Worker             if (rh < num_jobs) {
2137*3ac0a46fSAndroid Build Coastguard Worker                 num_jobs = rh;
2138*3ac0a46fSAndroid Build Coastguard Worker             }
2139*3ac0a46fSAndroid Build Coastguard Worker             step_j = (rh / num_jobs);
2140*3ac0a46fSAndroid Build Coastguard Worker 
2141*3ac0a46fSAndroid Build Coastguard Worker             for (j = 0; j < num_jobs; j++) {
2142*3ac0a46fSAndroid Build Coastguard Worker                 opj_dwt_decode_h_job_t* job;
2143*3ac0a46fSAndroid Build Coastguard Worker 
2144*3ac0a46fSAndroid Build Coastguard Worker                 job = (opj_dwt_decode_h_job_t*) opj_malloc(sizeof(opj_dwt_decode_h_job_t));
2145*3ac0a46fSAndroid Build Coastguard Worker                 if (!job) {
2146*3ac0a46fSAndroid Build Coastguard Worker                     /* It would be nice to fallback to single thread case, but */
2147*3ac0a46fSAndroid Build Coastguard Worker                     /* unfortunately some jobs may be launched and have modified */
2148*3ac0a46fSAndroid Build Coastguard Worker                     /* tiledp, so it is not practical to recover from that error */
2149*3ac0a46fSAndroid Build Coastguard Worker                     /* FIXME event manager error callback */
2150*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
2151*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(h.mem);
2152*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
2153*3ac0a46fSAndroid Build Coastguard Worker                 }
2154*3ac0a46fSAndroid Build Coastguard Worker                 job->h = h;
2155*3ac0a46fSAndroid Build Coastguard Worker                 job->rw = rw;
2156*3ac0a46fSAndroid Build Coastguard Worker                 job->w = w;
2157*3ac0a46fSAndroid Build Coastguard Worker                 job->tiledp = tiledp;
2158*3ac0a46fSAndroid Build Coastguard Worker                 job->min_j = j * step_j;
2159*3ac0a46fSAndroid Build Coastguard Worker                 job->max_j = (j + 1U) * step_j; /* this can overflow */
2160*3ac0a46fSAndroid Build Coastguard Worker                 if (j == (num_jobs - 1U)) {  /* this will take care of the overflow */
2161*3ac0a46fSAndroid Build Coastguard Worker                     job->max_j = rh;
2162*3ac0a46fSAndroid Build Coastguard Worker                 }
2163*3ac0a46fSAndroid Build Coastguard Worker                 job->h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size);
2164*3ac0a46fSAndroid Build Coastguard Worker                 if (!job->h.mem) {
2165*3ac0a46fSAndroid Build Coastguard Worker                     /* FIXME event manager error callback */
2166*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
2167*3ac0a46fSAndroid Build Coastguard Worker                     opj_free(job);
2168*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(h.mem);
2169*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
2170*3ac0a46fSAndroid Build Coastguard Worker                 }
2171*3ac0a46fSAndroid Build Coastguard Worker                 opj_thread_pool_submit_job(tp, opj_dwt_decode_h_func, job);
2172*3ac0a46fSAndroid Build Coastguard Worker             }
2173*3ac0a46fSAndroid Build Coastguard Worker             opj_thread_pool_wait_completion(tp, 0);
2174*3ac0a46fSAndroid Build Coastguard Worker         }
2175*3ac0a46fSAndroid Build Coastguard Worker 
2176*3ac0a46fSAndroid Build Coastguard Worker         v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn);
2177*3ac0a46fSAndroid Build Coastguard Worker         v.cas = tr->y0 % 2;
2178*3ac0a46fSAndroid Build Coastguard Worker 
2179*3ac0a46fSAndroid Build Coastguard Worker         if (num_threads <= 1 || rw <= 1) {
2180*3ac0a46fSAndroid Build Coastguard Worker             for (j = 0; j + PARALLEL_COLS_53 <= rw;
2181*3ac0a46fSAndroid Build Coastguard Worker                     j += PARALLEL_COLS_53) {
2182*3ac0a46fSAndroid Build Coastguard Worker                 opj_idwt53_v(&v, &tiledp[j], (OPJ_SIZE_T)w, PARALLEL_COLS_53);
2183*3ac0a46fSAndroid Build Coastguard Worker             }
2184*3ac0a46fSAndroid Build Coastguard Worker             if (j < rw) {
2185*3ac0a46fSAndroid Build Coastguard Worker                 opj_idwt53_v(&v, &tiledp[j], (OPJ_SIZE_T)w, (OPJ_INT32)(rw - j));
2186*3ac0a46fSAndroid Build Coastguard Worker             }
2187*3ac0a46fSAndroid Build Coastguard Worker         } else {
2188*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
2189*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 step_j;
2190*3ac0a46fSAndroid Build Coastguard Worker 
2191*3ac0a46fSAndroid Build Coastguard Worker             if (rw < num_jobs) {
2192*3ac0a46fSAndroid Build Coastguard Worker                 num_jobs = rw;
2193*3ac0a46fSAndroid Build Coastguard Worker             }
2194*3ac0a46fSAndroid Build Coastguard Worker             step_j = (rw / num_jobs);
2195*3ac0a46fSAndroid Build Coastguard Worker 
2196*3ac0a46fSAndroid Build Coastguard Worker             for (j = 0; j < num_jobs; j++) {
2197*3ac0a46fSAndroid Build Coastguard Worker                 opj_dwt_decode_v_job_t* job;
2198*3ac0a46fSAndroid Build Coastguard Worker 
2199*3ac0a46fSAndroid Build Coastguard Worker                 job = (opj_dwt_decode_v_job_t*) opj_malloc(sizeof(opj_dwt_decode_v_job_t));
2200*3ac0a46fSAndroid Build Coastguard Worker                 if (!job) {
2201*3ac0a46fSAndroid Build Coastguard Worker                     /* It would be nice to fallback to single thread case, but */
2202*3ac0a46fSAndroid Build Coastguard Worker                     /* unfortunately some jobs may be launched and have modified */
2203*3ac0a46fSAndroid Build Coastguard Worker                     /* tiledp, so it is not practical to recover from that error */
2204*3ac0a46fSAndroid Build Coastguard Worker                     /* FIXME event manager error callback */
2205*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
2206*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(v.mem);
2207*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
2208*3ac0a46fSAndroid Build Coastguard Worker                 }
2209*3ac0a46fSAndroid Build Coastguard Worker                 job->v = v;
2210*3ac0a46fSAndroid Build Coastguard Worker                 job->rh = rh;
2211*3ac0a46fSAndroid Build Coastguard Worker                 job->w = w;
2212*3ac0a46fSAndroid Build Coastguard Worker                 job->tiledp = tiledp;
2213*3ac0a46fSAndroid Build Coastguard Worker                 job->min_j = j * step_j;
2214*3ac0a46fSAndroid Build Coastguard Worker                 job->max_j = (j + 1U) * step_j; /* this can overflow */
2215*3ac0a46fSAndroid Build Coastguard Worker                 if (j == (num_jobs - 1U)) {  /* this will take care of the overflow */
2216*3ac0a46fSAndroid Build Coastguard Worker                     job->max_j = rw;
2217*3ac0a46fSAndroid Build Coastguard Worker                 }
2218*3ac0a46fSAndroid Build Coastguard Worker                 job->v.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size);
2219*3ac0a46fSAndroid Build Coastguard Worker                 if (!job->v.mem) {
2220*3ac0a46fSAndroid Build Coastguard Worker                     /* FIXME event manager error callback */
2221*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
2222*3ac0a46fSAndroid Build Coastguard Worker                     opj_free(job);
2223*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(v.mem);
2224*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
2225*3ac0a46fSAndroid Build Coastguard Worker                 }
2226*3ac0a46fSAndroid Build Coastguard Worker                 opj_thread_pool_submit_job(tp, opj_dwt_decode_v_func, job);
2227*3ac0a46fSAndroid Build Coastguard Worker             }
2228*3ac0a46fSAndroid Build Coastguard Worker             opj_thread_pool_wait_completion(tp, 0);
2229*3ac0a46fSAndroid Build Coastguard Worker         }
2230*3ac0a46fSAndroid Build Coastguard Worker     }
2231*3ac0a46fSAndroid Build Coastguard Worker     opj_aligned_free(h.mem);
2232*3ac0a46fSAndroid Build Coastguard Worker     return OPJ_TRUE;
2233*3ac0a46fSAndroid Build Coastguard Worker }
2234*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_interleave_partial_h(OPJ_INT32 * dest,OPJ_INT32 cas,opj_sparse_array_int32_t * sa,OPJ_UINT32 sa_line,OPJ_UINT32 sn,OPJ_UINT32 win_l_x0,OPJ_UINT32 win_l_x1,OPJ_UINT32 win_h_x0,OPJ_UINT32 win_h_x1)2235*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_interleave_partial_h(OPJ_INT32 *dest,
2236*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 cas,
2237*3ac0a46fSAndroid Build Coastguard Worker         opj_sparse_array_int32_t* sa,
2238*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 sa_line,
2239*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 sn,
2240*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_l_x0,
2241*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_l_x1,
2242*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_h_x0,
2243*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_h_x1)
2244*3ac0a46fSAndroid Build Coastguard Worker {
2245*3ac0a46fSAndroid Build Coastguard Worker     OPJ_BOOL ret;
2246*3ac0a46fSAndroid Build Coastguard Worker     ret = opj_sparse_array_int32_read(sa,
2247*3ac0a46fSAndroid Build Coastguard Worker                                       win_l_x0, sa_line,
2248*3ac0a46fSAndroid Build Coastguard Worker                                       win_l_x1, sa_line + 1,
2249*3ac0a46fSAndroid Build Coastguard Worker                                       dest + cas + 2 * win_l_x0,
2250*3ac0a46fSAndroid Build Coastguard Worker                                       2, 0, OPJ_TRUE);
2251*3ac0a46fSAndroid Build Coastguard Worker     assert(ret);
2252*3ac0a46fSAndroid Build Coastguard Worker     ret = opj_sparse_array_int32_read(sa,
2253*3ac0a46fSAndroid Build Coastguard Worker                                       sn + win_h_x0, sa_line,
2254*3ac0a46fSAndroid Build Coastguard Worker                                       sn + win_h_x1, sa_line + 1,
2255*3ac0a46fSAndroid Build Coastguard Worker                                       dest + 1 - cas + 2 * win_h_x0,
2256*3ac0a46fSAndroid Build Coastguard Worker                                       2, 0, OPJ_TRUE);
2257*3ac0a46fSAndroid Build Coastguard Worker     assert(ret);
2258*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UNUSED(ret);
2259*3ac0a46fSAndroid Build Coastguard Worker }
2260*3ac0a46fSAndroid Build Coastguard Worker 
2261*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_interleave_partial_v(OPJ_INT32 * dest,OPJ_INT32 cas,opj_sparse_array_int32_t * sa,OPJ_UINT32 sa_col,OPJ_UINT32 nb_cols,OPJ_UINT32 sn,OPJ_UINT32 win_l_y0,OPJ_UINT32 win_l_y1,OPJ_UINT32 win_h_y0,OPJ_UINT32 win_h_y1)2262*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest,
2263*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 cas,
2264*3ac0a46fSAndroid Build Coastguard Worker         opj_sparse_array_int32_t* sa,
2265*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 sa_col,
2266*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 nb_cols,
2267*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 sn,
2268*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_l_y0,
2269*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_l_y1,
2270*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_h_y0,
2271*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_h_y1)
2272*3ac0a46fSAndroid Build Coastguard Worker {
2273*3ac0a46fSAndroid Build Coastguard Worker     OPJ_BOOL ret;
2274*3ac0a46fSAndroid Build Coastguard Worker     ret  = opj_sparse_array_int32_read(sa,
2275*3ac0a46fSAndroid Build Coastguard Worker                                        sa_col, win_l_y0,
2276*3ac0a46fSAndroid Build Coastguard Worker                                        sa_col + nb_cols, win_l_y1,
2277*3ac0a46fSAndroid Build Coastguard Worker                                        dest + cas * 4 + 2 * 4 * win_l_y0,
2278*3ac0a46fSAndroid Build Coastguard Worker                                        1, 2 * 4, OPJ_TRUE);
2279*3ac0a46fSAndroid Build Coastguard Worker     assert(ret);
2280*3ac0a46fSAndroid Build Coastguard Worker     ret = opj_sparse_array_int32_read(sa,
2281*3ac0a46fSAndroid Build Coastguard Worker                                       sa_col, sn + win_h_y0,
2282*3ac0a46fSAndroid Build Coastguard Worker                                       sa_col + nb_cols, sn + win_h_y1,
2283*3ac0a46fSAndroid Build Coastguard Worker                                       dest + (1 - cas) * 4 + 2 * 4 * win_h_y0,
2284*3ac0a46fSAndroid Build Coastguard Worker                                       1, 2 * 4, OPJ_TRUE);
2285*3ac0a46fSAndroid Build Coastguard Worker     assert(ret);
2286*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UNUSED(ret);
2287*3ac0a46fSAndroid Build Coastguard Worker }
2288*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_decode_partial_1(OPJ_INT32 * a,OPJ_SIZE_T a_count,OPJ_INT32 dn,OPJ_INT32 sn,OPJ_INT32 cas,OPJ_INT32 win_l_x0,OPJ_INT32 win_l_x1,OPJ_INT32 win_h_x0,OPJ_INT32 win_h_x1)2289*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_SIZE_T a_count,
2290*3ac0a46fSAndroid Build Coastguard Worker                                      OPJ_INT32 dn, OPJ_INT32 sn,
2291*3ac0a46fSAndroid Build Coastguard Worker                                      OPJ_INT32 cas,
2292*3ac0a46fSAndroid Build Coastguard Worker                                      OPJ_INT32 win_l_x0,
2293*3ac0a46fSAndroid Build Coastguard Worker                                      OPJ_INT32 win_l_x1,
2294*3ac0a46fSAndroid Build Coastguard Worker                                      OPJ_INT32 win_h_x0,
2295*3ac0a46fSAndroid Build Coastguard Worker                                      OPJ_INT32 win_h_x1)
2296*3ac0a46fSAndroid Build Coastguard Worker {
2297*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i;
2298*3ac0a46fSAndroid Build Coastguard Worker 
2299*3ac0a46fSAndroid Build Coastguard Worker     if (!cas) {
2300*3ac0a46fSAndroid Build Coastguard Worker         if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
2301*3ac0a46fSAndroid Build Coastguard Worker 
2302*3ac0a46fSAndroid Build Coastguard Worker             /* Naive version is :
2303*3ac0a46fSAndroid Build Coastguard Worker             for (i = win_l_x0; i < i_max; i++) {
2304*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
2305*3ac0a46fSAndroid Build Coastguard Worker             }
2306*3ac0a46fSAndroid Build Coastguard Worker             for (i = win_h_x0; i < win_h_x1; i++) {
2307*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
2308*3ac0a46fSAndroid Build Coastguard Worker             }
2309*3ac0a46fSAndroid Build Coastguard Worker             but the compiler doesn't manage to unroll it to avoid bound
2310*3ac0a46fSAndroid Build Coastguard Worker             checking in OPJ_S_ and OPJ_D_ macros
2311*3ac0a46fSAndroid Build Coastguard Worker             */
2312*3ac0a46fSAndroid Build Coastguard Worker 
2313*3ac0a46fSAndroid Build Coastguard Worker             i = win_l_x0;
2314*3ac0a46fSAndroid Build Coastguard Worker             if (i < win_l_x1) {
2315*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_INT32 i_max;
2316*3ac0a46fSAndroid Build Coastguard Worker 
2317*3ac0a46fSAndroid Build Coastguard Worker                 /* Left-most case */
2318*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
2319*3ac0a46fSAndroid Build Coastguard Worker                 i ++;
2320*3ac0a46fSAndroid Build Coastguard Worker 
2321*3ac0a46fSAndroid Build Coastguard Worker                 i_max = win_l_x1;
2322*3ac0a46fSAndroid Build Coastguard Worker                 if (i_max > dn) {
2323*3ac0a46fSAndroid Build Coastguard Worker                     i_max = dn;
2324*3ac0a46fSAndroid Build Coastguard Worker                 }
2325*3ac0a46fSAndroid Build Coastguard Worker                 for (; i < i_max; i++) {
2326*3ac0a46fSAndroid Build Coastguard Worker                     /* No bound checking */
2327*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_S(i) -= (OPJ_D(i - 1) + OPJ_D(i) + 2) >> 2;
2328*3ac0a46fSAndroid Build Coastguard Worker                 }
2329*3ac0a46fSAndroid Build Coastguard Worker                 for (; i < win_l_x1; i++) {
2330*3ac0a46fSAndroid Build Coastguard Worker                     /* Right-most case */
2331*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
2332*3ac0a46fSAndroid Build Coastguard Worker                 }
2333*3ac0a46fSAndroid Build Coastguard Worker             }
2334*3ac0a46fSAndroid Build Coastguard Worker 
2335*3ac0a46fSAndroid Build Coastguard Worker             i = win_h_x0;
2336*3ac0a46fSAndroid Build Coastguard Worker             if (i < win_h_x1) {
2337*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_INT32 i_max = win_h_x1;
2338*3ac0a46fSAndroid Build Coastguard Worker                 if (i_max >= sn) {
2339*3ac0a46fSAndroid Build Coastguard Worker                     i_max = sn - 1;
2340*3ac0a46fSAndroid Build Coastguard Worker                 }
2341*3ac0a46fSAndroid Build Coastguard Worker                 for (; i < i_max; i++) {
2342*3ac0a46fSAndroid Build Coastguard Worker                     /* No bound checking */
2343*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_D(i) += (OPJ_S(i) + OPJ_S(i + 1)) >> 1;
2344*3ac0a46fSAndroid Build Coastguard Worker                 }
2345*3ac0a46fSAndroid Build Coastguard Worker                 for (; i < win_h_x1; i++) {
2346*3ac0a46fSAndroid Build Coastguard Worker                     /* Right-most case */
2347*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
2348*3ac0a46fSAndroid Build Coastguard Worker                 }
2349*3ac0a46fSAndroid Build Coastguard Worker             }
2350*3ac0a46fSAndroid Build Coastguard Worker         }
2351*3ac0a46fSAndroid Build Coastguard Worker     } else {
2352*3ac0a46fSAndroid Build Coastguard Worker         if (!sn  && dn == 1) {        /* NEW :  CASE ONE ELEMENT */
2353*3ac0a46fSAndroid Build Coastguard Worker             OPJ_S(0) /= 2;
2354*3ac0a46fSAndroid Build Coastguard Worker         } else {
2355*3ac0a46fSAndroid Build Coastguard Worker             for (i = win_l_x0; i < win_l_x1; i++) {
2356*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_D(i) = opj_int_sub_no_overflow(OPJ_D(i),
2357*3ac0a46fSAndroid Build Coastguard Worker                                                    opj_int_add_no_overflow(opj_int_add_no_overflow(OPJ_SS_(i), OPJ_SS_(i + 1)),
2358*3ac0a46fSAndroid Build Coastguard Worker                                                            2) >> 2);
2359*3ac0a46fSAndroid Build Coastguard Worker             }
2360*3ac0a46fSAndroid Build Coastguard Worker             for (i = win_h_x0; i < win_h_x1; i++) {
2361*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_S(i) = opj_int_add_no_overflow(OPJ_S(i),
2362*3ac0a46fSAndroid Build Coastguard Worker                                                    opj_int_add_no_overflow(OPJ_DD_(i), OPJ_DD_(i - 1)) >> 1);
2363*3ac0a46fSAndroid Build Coastguard Worker             }
2364*3ac0a46fSAndroid Build Coastguard Worker         }
2365*3ac0a46fSAndroid Build Coastguard Worker     }
2366*3ac0a46fSAndroid Build Coastguard Worker }
2367*3ac0a46fSAndroid Build Coastguard Worker 
2368*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_S_off(i,off) a[(OPJ_UINT32)(i)*2*4+off]
2369*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_D_off(i,off) a[(1+(OPJ_UINT32)(i)*2)*4+off]
2370*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_S__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=sn?OPJ_S_off(sn-1,off):OPJ_S_off(i,off)))
2371*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_D__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=dn?OPJ_D_off(dn-1,off):OPJ_D_off(i,off)))
2372*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_SS__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=dn?OPJ_S_off(dn-1,off):OPJ_S_off(i,off)))
2373*3ac0a46fSAndroid Build Coastguard Worker #define OPJ_DD__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=sn?OPJ_D_off(sn-1,off):OPJ_D_off(i,off)))
2374*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_decode_partial_1_parallel(OPJ_INT32 * a,OPJ_UINT32 nb_cols,OPJ_INT32 dn,OPJ_INT32 sn,OPJ_INT32 cas,OPJ_INT32 win_l_x0,OPJ_INT32 win_l_x1,OPJ_INT32 win_h_x0,OPJ_INT32 win_h_x1)2375*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_decode_partial_1_parallel(OPJ_INT32 *a,
2376*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 nb_cols,
2377*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 dn, OPJ_INT32 sn,
2378*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 cas,
2379*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 win_l_x0,
2380*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 win_l_x1,
2381*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 win_h_x0,
2382*3ac0a46fSAndroid Build Coastguard Worker         OPJ_INT32 win_h_x1)
2383*3ac0a46fSAndroid Build Coastguard Worker {
2384*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 i;
2385*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 off;
2386*3ac0a46fSAndroid Build Coastguard Worker 
2387*3ac0a46fSAndroid Build Coastguard Worker     (void)nb_cols;
2388*3ac0a46fSAndroid Build Coastguard Worker 
2389*3ac0a46fSAndroid Build Coastguard Worker     if (!cas) {
2390*3ac0a46fSAndroid Build Coastguard Worker         if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
2391*3ac0a46fSAndroid Build Coastguard Worker 
2392*3ac0a46fSAndroid Build Coastguard Worker             /* Naive version is :
2393*3ac0a46fSAndroid Build Coastguard Worker             for (i = win_l_x0; i < i_max; i++) {
2394*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
2395*3ac0a46fSAndroid Build Coastguard Worker             }
2396*3ac0a46fSAndroid Build Coastguard Worker             for (i = win_h_x0; i < win_h_x1; i++) {
2397*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
2398*3ac0a46fSAndroid Build Coastguard Worker             }
2399*3ac0a46fSAndroid Build Coastguard Worker             but the compiler doesn't manage to unroll it to avoid bound
2400*3ac0a46fSAndroid Build Coastguard Worker             checking in OPJ_S_ and OPJ_D_ macros
2401*3ac0a46fSAndroid Build Coastguard Worker             */
2402*3ac0a46fSAndroid Build Coastguard Worker 
2403*3ac0a46fSAndroid Build Coastguard Worker             i = win_l_x0;
2404*3ac0a46fSAndroid Build Coastguard Worker             if (i < win_l_x1) {
2405*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_INT32 i_max;
2406*3ac0a46fSAndroid Build Coastguard Worker 
2407*3ac0a46fSAndroid Build Coastguard Worker                 /* Left-most case */
2408*3ac0a46fSAndroid Build Coastguard Worker                 for (off = 0; off < 4; off++) {
2409*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2;
2410*3ac0a46fSAndroid Build Coastguard Worker                 }
2411*3ac0a46fSAndroid Build Coastguard Worker                 i ++;
2412*3ac0a46fSAndroid Build Coastguard Worker 
2413*3ac0a46fSAndroid Build Coastguard Worker                 i_max = win_l_x1;
2414*3ac0a46fSAndroid Build Coastguard Worker                 if (i_max > dn) {
2415*3ac0a46fSAndroid Build Coastguard Worker                     i_max = dn;
2416*3ac0a46fSAndroid Build Coastguard Worker                 }
2417*3ac0a46fSAndroid Build Coastguard Worker 
2418*3ac0a46fSAndroid Build Coastguard Worker #ifdef __SSE2__
2419*3ac0a46fSAndroid Build Coastguard Worker                 if (i + 1 < i_max) {
2420*3ac0a46fSAndroid Build Coastguard Worker                     const __m128i two = _mm_set1_epi32(2);
2421*3ac0a46fSAndroid Build Coastguard Worker                     __m128i Dm1 = _mm_load_si128((__m128i * const)(a + 4 + (i - 1) * 8));
2422*3ac0a46fSAndroid Build Coastguard Worker                     for (; i + 1 < i_max; i += 2) {
2423*3ac0a46fSAndroid Build Coastguard Worker                         /* No bound checking */
2424*3ac0a46fSAndroid Build Coastguard Worker                         __m128i S = _mm_load_si128((__m128i * const)(a + i * 8));
2425*3ac0a46fSAndroid Build Coastguard Worker                         __m128i D = _mm_load_si128((__m128i * const)(a + 4 + i * 8));
2426*3ac0a46fSAndroid Build Coastguard Worker                         __m128i S1 = _mm_load_si128((__m128i * const)(a + (i + 1) * 8));
2427*3ac0a46fSAndroid Build Coastguard Worker                         __m128i D1 = _mm_load_si128((__m128i * const)(a + 4 + (i + 1) * 8));
2428*3ac0a46fSAndroid Build Coastguard Worker                         S = _mm_sub_epi32(S,
2429*3ac0a46fSAndroid Build Coastguard Worker                                           _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(Dm1, D), two), 2));
2430*3ac0a46fSAndroid Build Coastguard Worker                         S1 = _mm_sub_epi32(S1,
2431*3ac0a46fSAndroid Build Coastguard Worker                                            _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(D, D1), two), 2));
2432*3ac0a46fSAndroid Build Coastguard Worker                         _mm_store_si128((__m128i*)(a + i * 8), S);
2433*3ac0a46fSAndroid Build Coastguard Worker                         _mm_store_si128((__m128i*)(a + (i + 1) * 8), S1);
2434*3ac0a46fSAndroid Build Coastguard Worker                         Dm1 = D1;
2435*3ac0a46fSAndroid Build Coastguard Worker                     }
2436*3ac0a46fSAndroid Build Coastguard Worker                 }
2437*3ac0a46fSAndroid Build Coastguard Worker #endif
2438*3ac0a46fSAndroid Build Coastguard Worker 
2439*3ac0a46fSAndroid Build Coastguard Worker                 for (; i < i_max; i++) {
2440*3ac0a46fSAndroid Build Coastguard Worker                     /* No bound checking */
2441*3ac0a46fSAndroid Build Coastguard Worker                     for (off = 0; off < 4; off++) {
2442*3ac0a46fSAndroid Build Coastguard Worker                         OPJ_S_off(i, off) -= (OPJ_D_off(i - 1, off) + OPJ_D_off(i, off) + 2) >> 2;
2443*3ac0a46fSAndroid Build Coastguard Worker                     }
2444*3ac0a46fSAndroid Build Coastguard Worker                 }
2445*3ac0a46fSAndroid Build Coastguard Worker                 for (; i < win_l_x1; i++) {
2446*3ac0a46fSAndroid Build Coastguard Worker                     /* Right-most case */
2447*3ac0a46fSAndroid Build Coastguard Worker                     for (off = 0; off < 4; off++) {
2448*3ac0a46fSAndroid Build Coastguard Worker                         OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2;
2449*3ac0a46fSAndroid Build Coastguard Worker                     }
2450*3ac0a46fSAndroid Build Coastguard Worker                 }
2451*3ac0a46fSAndroid Build Coastguard Worker             }
2452*3ac0a46fSAndroid Build Coastguard Worker 
2453*3ac0a46fSAndroid Build Coastguard Worker             i = win_h_x0;
2454*3ac0a46fSAndroid Build Coastguard Worker             if (i < win_h_x1) {
2455*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_INT32 i_max = win_h_x1;
2456*3ac0a46fSAndroid Build Coastguard Worker                 if (i_max >= sn) {
2457*3ac0a46fSAndroid Build Coastguard Worker                     i_max = sn - 1;
2458*3ac0a46fSAndroid Build Coastguard Worker                 }
2459*3ac0a46fSAndroid Build Coastguard Worker 
2460*3ac0a46fSAndroid Build Coastguard Worker #ifdef __SSE2__
2461*3ac0a46fSAndroid Build Coastguard Worker                 if (i + 1 < i_max) {
2462*3ac0a46fSAndroid Build Coastguard Worker                     __m128i S =  _mm_load_si128((__m128i * const)(a + i * 8));
2463*3ac0a46fSAndroid Build Coastguard Worker                     for (; i + 1 < i_max; i += 2) {
2464*3ac0a46fSAndroid Build Coastguard Worker                         /* No bound checking */
2465*3ac0a46fSAndroid Build Coastguard Worker                         __m128i D = _mm_load_si128((__m128i * const)(a + 4 + i * 8));
2466*3ac0a46fSAndroid Build Coastguard Worker                         __m128i S1 = _mm_load_si128((__m128i * const)(a + (i + 1) * 8));
2467*3ac0a46fSAndroid Build Coastguard Worker                         __m128i D1 = _mm_load_si128((__m128i * const)(a + 4 + (i + 1) * 8));
2468*3ac0a46fSAndroid Build Coastguard Worker                         __m128i S2 = _mm_load_si128((__m128i * const)(a + (i + 2) * 8));
2469*3ac0a46fSAndroid Build Coastguard Worker                         D = _mm_add_epi32(D, _mm_srai_epi32(_mm_add_epi32(S, S1), 1));
2470*3ac0a46fSAndroid Build Coastguard Worker                         D1 = _mm_add_epi32(D1, _mm_srai_epi32(_mm_add_epi32(S1, S2), 1));
2471*3ac0a46fSAndroid Build Coastguard Worker                         _mm_store_si128((__m128i*)(a + 4 + i * 8), D);
2472*3ac0a46fSAndroid Build Coastguard Worker                         _mm_store_si128((__m128i*)(a + 4 + (i + 1) * 8), D1);
2473*3ac0a46fSAndroid Build Coastguard Worker                         S = S2;
2474*3ac0a46fSAndroid Build Coastguard Worker                     }
2475*3ac0a46fSAndroid Build Coastguard Worker                 }
2476*3ac0a46fSAndroid Build Coastguard Worker #endif
2477*3ac0a46fSAndroid Build Coastguard Worker 
2478*3ac0a46fSAndroid Build Coastguard Worker                 for (; i < i_max; i++) {
2479*3ac0a46fSAndroid Build Coastguard Worker                     /* No bound checking */
2480*3ac0a46fSAndroid Build Coastguard Worker                     for (off = 0; off < 4; off++) {
2481*3ac0a46fSAndroid Build Coastguard Worker                         OPJ_D_off(i, off) += (OPJ_S_off(i, off) + OPJ_S_off(i + 1, off)) >> 1;
2482*3ac0a46fSAndroid Build Coastguard Worker                     }
2483*3ac0a46fSAndroid Build Coastguard Worker                 }
2484*3ac0a46fSAndroid Build Coastguard Worker                 for (; i < win_h_x1; i++) {
2485*3ac0a46fSAndroid Build Coastguard Worker                     /* Right-most case */
2486*3ac0a46fSAndroid Build Coastguard Worker                     for (off = 0; off < 4; off++) {
2487*3ac0a46fSAndroid Build Coastguard Worker                         OPJ_D_off(i, off) += (OPJ_S__off(i, off) + OPJ_S__off(i + 1, off)) >> 1;
2488*3ac0a46fSAndroid Build Coastguard Worker                     }
2489*3ac0a46fSAndroid Build Coastguard Worker                 }
2490*3ac0a46fSAndroid Build Coastguard Worker             }
2491*3ac0a46fSAndroid Build Coastguard Worker         }
2492*3ac0a46fSAndroid Build Coastguard Worker     } else {
2493*3ac0a46fSAndroid Build Coastguard Worker         if (!sn  && dn == 1) {        /* NEW :  CASE ONE ELEMENT */
2494*3ac0a46fSAndroid Build Coastguard Worker             for (off = 0; off < 4; off++) {
2495*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_S_off(0, off) /= 2;
2496*3ac0a46fSAndroid Build Coastguard Worker             }
2497*3ac0a46fSAndroid Build Coastguard Worker         } else {
2498*3ac0a46fSAndroid Build Coastguard Worker             for (i = win_l_x0; i < win_l_x1; i++) {
2499*3ac0a46fSAndroid Build Coastguard Worker                 for (off = 0; off < 4; off++) {
2500*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_D_off(i, off) = opj_int_sub_no_overflow(
2501*3ac0a46fSAndroid Build Coastguard Worker                                             OPJ_D_off(i, off),
2502*3ac0a46fSAndroid Build Coastguard Worker                                             opj_int_add_no_overflow(
2503*3ac0a46fSAndroid Build Coastguard Worker                                                 opj_int_add_no_overflow(OPJ_SS__off(i, off), OPJ_SS__off(i + 1, off)), 2) >> 2);
2504*3ac0a46fSAndroid Build Coastguard Worker                 }
2505*3ac0a46fSAndroid Build Coastguard Worker             }
2506*3ac0a46fSAndroid Build Coastguard Worker             for (i = win_h_x0; i < win_h_x1; i++) {
2507*3ac0a46fSAndroid Build Coastguard Worker                 for (off = 0; off < 4; off++) {
2508*3ac0a46fSAndroid Build Coastguard Worker                     OPJ_S_off(i, off) = opj_int_add_no_overflow(
2509*3ac0a46fSAndroid Build Coastguard Worker                                             OPJ_S_off(i, off),
2510*3ac0a46fSAndroid Build Coastguard Worker                                             opj_int_add_no_overflow(OPJ_DD__off(i, off), OPJ_DD__off(i - 1, off)) >> 1);
2511*3ac0a46fSAndroid Build Coastguard Worker                 }
2512*3ac0a46fSAndroid Build Coastguard Worker             }
2513*3ac0a46fSAndroid Build Coastguard Worker         }
2514*3ac0a46fSAndroid Build Coastguard Worker     }
2515*3ac0a46fSAndroid Build Coastguard Worker }
2516*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_get_band_coordinates(opj_tcd_tilecomp_t * tilec,OPJ_UINT32 resno,OPJ_UINT32 bandno,OPJ_UINT32 tcx0,OPJ_UINT32 tcy0,OPJ_UINT32 tcx1,OPJ_UINT32 tcy1,OPJ_UINT32 * tbx0,OPJ_UINT32 * tby0,OPJ_UINT32 * tbx1,OPJ_UINT32 * tby1)2517*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_get_band_coordinates(opj_tcd_tilecomp_t* tilec,
2518*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 resno,
2519*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 bandno,
2520*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 tcx0,
2521*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 tcy0,
2522*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 tcx1,
2523*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 tcy1,
2524*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32* tbx0,
2525*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32* tby0,
2526*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32* tbx1,
2527*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32* tby1)
2528*3ac0a46fSAndroid Build Coastguard Worker {
2529*3ac0a46fSAndroid Build Coastguard Worker     /* Compute number of decomposition for this band. See table F-1 */
2530*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 nb = (resno == 0) ?
2531*3ac0a46fSAndroid Build Coastguard Worker                     tilec->numresolutions - 1 :
2532*3ac0a46fSAndroid Build Coastguard Worker                     tilec->numresolutions - resno;
2533*3ac0a46fSAndroid Build Coastguard Worker     /* Map above tile-based coordinates to sub-band-based coordinates per */
2534*3ac0a46fSAndroid Build Coastguard Worker     /* equation B-15 of the standard */
2535*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 x0b = bandno & 1;
2536*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 y0b = bandno >> 1;
2537*3ac0a46fSAndroid Build Coastguard Worker     if (tbx0) {
2538*3ac0a46fSAndroid Build Coastguard Worker         *tbx0 = (nb == 0) ? tcx0 :
2539*3ac0a46fSAndroid Build Coastguard Worker                 (tcx0 <= (1U << (nb - 1)) * x0b) ? 0 :
2540*3ac0a46fSAndroid Build Coastguard Worker                 opj_uint_ceildivpow2(tcx0 - (1U << (nb - 1)) * x0b, nb);
2541*3ac0a46fSAndroid Build Coastguard Worker     }
2542*3ac0a46fSAndroid Build Coastguard Worker     if (tby0) {
2543*3ac0a46fSAndroid Build Coastguard Worker         *tby0 = (nb == 0) ? tcy0 :
2544*3ac0a46fSAndroid Build Coastguard Worker                 (tcy0 <= (1U << (nb - 1)) * y0b) ? 0 :
2545*3ac0a46fSAndroid Build Coastguard Worker                 opj_uint_ceildivpow2(tcy0 - (1U << (nb - 1)) * y0b, nb);
2546*3ac0a46fSAndroid Build Coastguard Worker     }
2547*3ac0a46fSAndroid Build Coastguard Worker     if (tbx1) {
2548*3ac0a46fSAndroid Build Coastguard Worker         *tbx1 = (nb == 0) ? tcx1 :
2549*3ac0a46fSAndroid Build Coastguard Worker                 (tcx1 <= (1U << (nb - 1)) * x0b) ? 0 :
2550*3ac0a46fSAndroid Build Coastguard Worker                 opj_uint_ceildivpow2(tcx1 - (1U << (nb - 1)) * x0b, nb);
2551*3ac0a46fSAndroid Build Coastguard Worker     }
2552*3ac0a46fSAndroid Build Coastguard Worker     if (tby1) {
2553*3ac0a46fSAndroid Build Coastguard Worker         *tby1 = (nb == 0) ? tcy1 :
2554*3ac0a46fSAndroid Build Coastguard Worker                 (tcy1 <= (1U << (nb - 1)) * y0b) ? 0 :
2555*3ac0a46fSAndroid Build Coastguard Worker                 opj_uint_ceildivpow2(tcy1 - (1U << (nb - 1)) * y0b, nb);
2556*3ac0a46fSAndroid Build Coastguard Worker     }
2557*3ac0a46fSAndroid Build Coastguard Worker }
2558*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_segment_grow(OPJ_UINT32 filter_width,OPJ_UINT32 max_size,OPJ_UINT32 * start,OPJ_UINT32 * end)2559*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt_segment_grow(OPJ_UINT32 filter_width,
2560*3ac0a46fSAndroid Build Coastguard Worker                                  OPJ_UINT32 max_size,
2561*3ac0a46fSAndroid Build Coastguard Worker                                  OPJ_UINT32* start,
2562*3ac0a46fSAndroid Build Coastguard Worker                                  OPJ_UINT32* end)
2563*3ac0a46fSAndroid Build Coastguard Worker {
2564*3ac0a46fSAndroid Build Coastguard Worker     *start = opj_uint_subs(*start, filter_width);
2565*3ac0a46fSAndroid Build Coastguard Worker     *end = opj_uint_adds(*end, filter_width);
2566*3ac0a46fSAndroid Build Coastguard Worker     *end = opj_uint_min(*end, max_size);
2567*3ac0a46fSAndroid Build Coastguard Worker }
2568*3ac0a46fSAndroid Build Coastguard Worker 
2569*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_init_sparse_array(opj_tcd_tilecomp_t * tilec,OPJ_UINT32 numres)2570*3ac0a46fSAndroid Build Coastguard Worker static opj_sparse_array_int32_t* opj_dwt_init_sparse_array(
2571*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_tilecomp_t* tilec,
2572*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 numres)
2573*3ac0a46fSAndroid Build Coastguard Worker {
2574*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_resolution_t* tr_max = &(tilec->resolutions[numres - 1]);
2575*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w = (OPJ_UINT32)(tr_max->x1 - tr_max->x0);
2576*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 h = (OPJ_UINT32)(tr_max->y1 - tr_max->y0);
2577*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 resno, bandno, precno, cblkno;
2578*3ac0a46fSAndroid Build Coastguard Worker     opj_sparse_array_int32_t* sa = opj_sparse_array_int32_create(
2579*3ac0a46fSAndroid Build Coastguard Worker                                        w, h, opj_uint_min(w, 64), opj_uint_min(h, 64));
2580*3ac0a46fSAndroid Build Coastguard Worker     if (sa == NULL) {
2581*3ac0a46fSAndroid Build Coastguard Worker         return NULL;
2582*3ac0a46fSAndroid Build Coastguard Worker     }
2583*3ac0a46fSAndroid Build Coastguard Worker 
2584*3ac0a46fSAndroid Build Coastguard Worker     for (resno = 0; resno < numres; ++resno) {
2585*3ac0a46fSAndroid Build Coastguard Worker         opj_tcd_resolution_t* res = &tilec->resolutions[resno];
2586*3ac0a46fSAndroid Build Coastguard Worker 
2587*3ac0a46fSAndroid Build Coastguard Worker         for (bandno = 0; bandno < res->numbands; ++bandno) {
2588*3ac0a46fSAndroid Build Coastguard Worker             opj_tcd_band_t* band = &res->bands[bandno];
2589*3ac0a46fSAndroid Build Coastguard Worker 
2590*3ac0a46fSAndroid Build Coastguard Worker             for (precno = 0; precno < res->pw * res->ph; ++precno) {
2591*3ac0a46fSAndroid Build Coastguard Worker                 opj_tcd_precinct_t* precinct = &band->precincts[precno];
2592*3ac0a46fSAndroid Build Coastguard Worker                 for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
2593*3ac0a46fSAndroid Build Coastguard Worker                     opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
2594*3ac0a46fSAndroid Build Coastguard Worker                     if (cblk->decoded_data != NULL) {
2595*3ac0a46fSAndroid Build Coastguard Worker                         OPJ_UINT32 x = (OPJ_UINT32)(cblk->x0 - band->x0);
2596*3ac0a46fSAndroid Build Coastguard Worker                         OPJ_UINT32 y = (OPJ_UINT32)(cblk->y0 - band->y0);
2597*3ac0a46fSAndroid Build Coastguard Worker                         OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
2598*3ac0a46fSAndroid Build Coastguard Worker                         OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
2599*3ac0a46fSAndroid Build Coastguard Worker 
2600*3ac0a46fSAndroid Build Coastguard Worker                         if (band->bandno & 1) {
2601*3ac0a46fSAndroid Build Coastguard Worker                             opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
2602*3ac0a46fSAndroid Build Coastguard Worker                             x += (OPJ_UINT32)(pres->x1 - pres->x0);
2603*3ac0a46fSAndroid Build Coastguard Worker                         }
2604*3ac0a46fSAndroid Build Coastguard Worker                         if (band->bandno & 2) {
2605*3ac0a46fSAndroid Build Coastguard Worker                             opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
2606*3ac0a46fSAndroid Build Coastguard Worker                             y += (OPJ_UINT32)(pres->y1 - pres->y0);
2607*3ac0a46fSAndroid Build Coastguard Worker                         }
2608*3ac0a46fSAndroid Build Coastguard Worker 
2609*3ac0a46fSAndroid Build Coastguard Worker                         if (!opj_sparse_array_int32_write(sa, x, y,
2610*3ac0a46fSAndroid Build Coastguard Worker                                                           x + cblk_w, y + cblk_h,
2611*3ac0a46fSAndroid Build Coastguard Worker                                                           cblk->decoded_data,
2612*3ac0a46fSAndroid Build Coastguard Worker                                                           1, cblk_w, OPJ_TRUE)) {
2613*3ac0a46fSAndroid Build Coastguard Worker                             opj_sparse_array_int32_free(sa);
2614*3ac0a46fSAndroid Build Coastguard Worker                             return NULL;
2615*3ac0a46fSAndroid Build Coastguard Worker                         }
2616*3ac0a46fSAndroid Build Coastguard Worker                     }
2617*3ac0a46fSAndroid Build Coastguard Worker                 }
2618*3ac0a46fSAndroid Build Coastguard Worker             }
2619*3ac0a46fSAndroid Build Coastguard Worker         }
2620*3ac0a46fSAndroid Build Coastguard Worker     }
2621*3ac0a46fSAndroid Build Coastguard Worker 
2622*3ac0a46fSAndroid Build Coastguard Worker     return sa;
2623*3ac0a46fSAndroid Build Coastguard Worker }
2624*3ac0a46fSAndroid Build Coastguard Worker 
2625*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_decode_partial_tile(opj_tcd_tilecomp_t * tilec,OPJ_UINT32 numres)2626*3ac0a46fSAndroid Build Coastguard Worker static OPJ_BOOL opj_dwt_decode_partial_tile(
2627*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_tilecomp_t* tilec,
2628*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 numres)
2629*3ac0a46fSAndroid Build Coastguard Worker {
2630*3ac0a46fSAndroid Build Coastguard Worker     opj_sparse_array_int32_t* sa;
2631*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_t h;
2632*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt_t v;
2633*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 resno;
2634*3ac0a46fSAndroid Build Coastguard Worker     /* This value matches the maximum left/right extension given in tables */
2635*3ac0a46fSAndroid Build Coastguard Worker     /* F.2 and F.3 of the standard. */
2636*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_UINT32 filter_width = 2U;
2637*3ac0a46fSAndroid Build Coastguard Worker 
2638*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_resolution_t* tr = tilec->resolutions;
2639*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_resolution_t* tr_max = &(tilec->resolutions[numres - 1]);
2640*3ac0a46fSAndroid Build Coastguard Worker 
2641*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 -
2642*3ac0a46fSAndroid Build Coastguard Worker                                  tr->x0);  /* width of the resolution level computed */
2643*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 -
2644*3ac0a46fSAndroid Build Coastguard Worker                                  tr->y0);  /* height of the resolution level computed */
2645*3ac0a46fSAndroid Build Coastguard Worker 
2646*3ac0a46fSAndroid Build Coastguard Worker     OPJ_SIZE_T h_mem_size;
2647*3ac0a46fSAndroid Build Coastguard Worker 
2648*3ac0a46fSAndroid Build Coastguard Worker     /* Compute the intersection of the area of interest, expressed in tile coordinates */
2649*3ac0a46fSAndroid Build Coastguard Worker     /* with the tile coordinates */
2650*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 win_tcx0 = tilec->win_x0;
2651*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 win_tcy0 = tilec->win_y0;
2652*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 win_tcx1 = tilec->win_x1;
2653*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 win_tcy1 = tilec->win_y1;
2654*3ac0a46fSAndroid Build Coastguard Worker 
2655*3ac0a46fSAndroid Build Coastguard Worker     if (tr_max->x0 == tr_max->x1 || tr_max->y0 == tr_max->y1) {
2656*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_TRUE;
2657*3ac0a46fSAndroid Build Coastguard Worker     }
2658*3ac0a46fSAndroid Build Coastguard Worker 
2659*3ac0a46fSAndroid Build Coastguard Worker     sa = opj_dwt_init_sparse_array(tilec, numres);
2660*3ac0a46fSAndroid Build Coastguard Worker     if (sa == NULL) {
2661*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
2662*3ac0a46fSAndroid Build Coastguard Worker     }
2663*3ac0a46fSAndroid Build Coastguard Worker 
2664*3ac0a46fSAndroid Build Coastguard Worker     if (numres == 1U) {
2665*3ac0a46fSAndroid Build Coastguard Worker         OPJ_BOOL ret = opj_sparse_array_int32_read(sa,
2666*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_x0 - (OPJ_UINT32)tr_max->x0,
2667*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_y0 - (OPJ_UINT32)tr_max->y0,
2668*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_x1 - (OPJ_UINT32)tr_max->x0,
2669*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_y1 - (OPJ_UINT32)tr_max->y0,
2670*3ac0a46fSAndroid Build Coastguard Worker                        tilec->data_win,
2671*3ac0a46fSAndroid Build Coastguard Worker                        1, tr_max->win_x1 - tr_max->win_x0,
2672*3ac0a46fSAndroid Build Coastguard Worker                        OPJ_TRUE);
2673*3ac0a46fSAndroid Build Coastguard Worker         assert(ret);
2674*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UNUSED(ret);
2675*3ac0a46fSAndroid Build Coastguard Worker         opj_sparse_array_int32_free(sa);
2676*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_TRUE;
2677*3ac0a46fSAndroid Build Coastguard Worker     }
2678*3ac0a46fSAndroid Build Coastguard Worker     h.mem_count = opj_dwt_max_resolution(tr, numres);
2679*3ac0a46fSAndroid Build Coastguard Worker     /* overflow check */
2680*3ac0a46fSAndroid Build Coastguard Worker     /* in vertical pass, we process 4 columns at a time */
2681*3ac0a46fSAndroid Build Coastguard Worker     if (h.mem_count > (SIZE_MAX / (4 * sizeof(OPJ_INT32)))) {
2682*3ac0a46fSAndroid Build Coastguard Worker         /* FIXME event manager error callback */
2683*3ac0a46fSAndroid Build Coastguard Worker         opj_sparse_array_int32_free(sa);
2684*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
2685*3ac0a46fSAndroid Build Coastguard Worker     }
2686*3ac0a46fSAndroid Build Coastguard Worker 
2687*3ac0a46fSAndroid Build Coastguard Worker     h_mem_size = h.mem_count * 4 * sizeof(OPJ_INT32);
2688*3ac0a46fSAndroid Build Coastguard Worker     h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size);
2689*3ac0a46fSAndroid Build Coastguard Worker     if (! h.mem) {
2690*3ac0a46fSAndroid Build Coastguard Worker         /* FIXME event manager error callback */
2691*3ac0a46fSAndroid Build Coastguard Worker         opj_sparse_array_int32_free(sa);
2692*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
2693*3ac0a46fSAndroid Build Coastguard Worker     }
2694*3ac0a46fSAndroid Build Coastguard Worker 
2695*3ac0a46fSAndroid Build Coastguard Worker     v.mem_count = h.mem_count;
2696*3ac0a46fSAndroid Build Coastguard Worker     v.mem = h.mem;
2697*3ac0a46fSAndroid Build Coastguard Worker 
2698*3ac0a46fSAndroid Build Coastguard Worker     for (resno = 1; resno < numres; resno ++) {
2699*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 i, j;
2700*3ac0a46fSAndroid Build Coastguard Worker         /* Window of interest subband-based coordinates */
2701*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_ll_x0, win_ll_y0, win_ll_x1, win_ll_y1;
2702*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_hl_x0, win_hl_x1;
2703*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_lh_y0, win_lh_y1;
2704*3ac0a46fSAndroid Build Coastguard Worker         /* Window of interest tile-resolution-based coordinates */
2705*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_tr_x0, win_tr_x1, win_tr_y0, win_tr_y1;
2706*3ac0a46fSAndroid Build Coastguard Worker         /* Tile-resolution subband-based coordinates */
2707*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 tr_ll_x0, tr_ll_y0, tr_hl_x0, tr_lh_y0;
2708*3ac0a46fSAndroid Build Coastguard Worker 
2709*3ac0a46fSAndroid Build Coastguard Worker         ++tr;
2710*3ac0a46fSAndroid Build Coastguard Worker 
2711*3ac0a46fSAndroid Build Coastguard Worker         h.sn = (OPJ_INT32)rw;
2712*3ac0a46fSAndroid Build Coastguard Worker         v.sn = (OPJ_INT32)rh;
2713*3ac0a46fSAndroid Build Coastguard Worker 
2714*3ac0a46fSAndroid Build Coastguard Worker         rw = (OPJ_UINT32)(tr->x1 - tr->x0);
2715*3ac0a46fSAndroid Build Coastguard Worker         rh = (OPJ_UINT32)(tr->y1 - tr->y0);
2716*3ac0a46fSAndroid Build Coastguard Worker 
2717*3ac0a46fSAndroid Build Coastguard Worker         h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn);
2718*3ac0a46fSAndroid Build Coastguard Worker         h.cas = tr->x0 % 2;
2719*3ac0a46fSAndroid Build Coastguard Worker 
2720*3ac0a46fSAndroid Build Coastguard Worker         v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn);
2721*3ac0a46fSAndroid Build Coastguard Worker         v.cas = tr->y0 % 2;
2722*3ac0a46fSAndroid Build Coastguard Worker 
2723*3ac0a46fSAndroid Build Coastguard Worker         /* Get the subband coordinates for the window of interest */
2724*3ac0a46fSAndroid Build Coastguard Worker         /* LL band */
2725*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_get_band_coordinates(tilec, resno, 0,
2726*3ac0a46fSAndroid Build Coastguard Worker                                      win_tcx0, win_tcy0, win_tcx1, win_tcy1,
2727*3ac0a46fSAndroid Build Coastguard Worker                                      &win_ll_x0, &win_ll_y0,
2728*3ac0a46fSAndroid Build Coastguard Worker                                      &win_ll_x1, &win_ll_y1);
2729*3ac0a46fSAndroid Build Coastguard Worker 
2730*3ac0a46fSAndroid Build Coastguard Worker         /* HL band */
2731*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_get_band_coordinates(tilec, resno, 1,
2732*3ac0a46fSAndroid Build Coastguard Worker                                      win_tcx0, win_tcy0, win_tcx1, win_tcy1,
2733*3ac0a46fSAndroid Build Coastguard Worker                                      &win_hl_x0, NULL, &win_hl_x1, NULL);
2734*3ac0a46fSAndroid Build Coastguard Worker 
2735*3ac0a46fSAndroid Build Coastguard Worker         /* LH band */
2736*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_get_band_coordinates(tilec, resno, 2,
2737*3ac0a46fSAndroid Build Coastguard Worker                                      win_tcx0, win_tcy0, win_tcx1, win_tcy1,
2738*3ac0a46fSAndroid Build Coastguard Worker                                      NULL, &win_lh_y0, NULL, &win_lh_y1);
2739*3ac0a46fSAndroid Build Coastguard Worker 
2740*3ac0a46fSAndroid Build Coastguard Worker         /* Beware: band index for non-LL0 resolution are 0=HL, 1=LH and 2=HH */
2741*3ac0a46fSAndroid Build Coastguard Worker         tr_ll_x0 = (OPJ_UINT32)tr->bands[1].x0;
2742*3ac0a46fSAndroid Build Coastguard Worker         tr_ll_y0 = (OPJ_UINT32)tr->bands[0].y0;
2743*3ac0a46fSAndroid Build Coastguard Worker         tr_hl_x0 = (OPJ_UINT32)tr->bands[0].x0;
2744*3ac0a46fSAndroid Build Coastguard Worker         tr_lh_y0 = (OPJ_UINT32)tr->bands[1].y0;
2745*3ac0a46fSAndroid Build Coastguard Worker 
2746*3ac0a46fSAndroid Build Coastguard Worker         /* Subtract the origin of the bands for this tile, to the subwindow */
2747*3ac0a46fSAndroid Build Coastguard Worker         /* of interest band coordinates, so as to get them relative to the */
2748*3ac0a46fSAndroid Build Coastguard Worker         /* tile */
2749*3ac0a46fSAndroid Build Coastguard Worker         win_ll_x0 = opj_uint_subs(win_ll_x0, tr_ll_x0);
2750*3ac0a46fSAndroid Build Coastguard Worker         win_ll_y0 = opj_uint_subs(win_ll_y0, tr_ll_y0);
2751*3ac0a46fSAndroid Build Coastguard Worker         win_ll_x1 = opj_uint_subs(win_ll_x1, tr_ll_x0);
2752*3ac0a46fSAndroid Build Coastguard Worker         win_ll_y1 = opj_uint_subs(win_ll_y1, tr_ll_y0);
2753*3ac0a46fSAndroid Build Coastguard Worker         win_hl_x0 = opj_uint_subs(win_hl_x0, tr_hl_x0);
2754*3ac0a46fSAndroid Build Coastguard Worker         win_hl_x1 = opj_uint_subs(win_hl_x1, tr_hl_x0);
2755*3ac0a46fSAndroid Build Coastguard Worker         win_lh_y0 = opj_uint_subs(win_lh_y0, tr_lh_y0);
2756*3ac0a46fSAndroid Build Coastguard Worker         win_lh_y1 = opj_uint_subs(win_lh_y1, tr_lh_y0);
2757*3ac0a46fSAndroid Build Coastguard Worker 
2758*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_segment_grow(filter_width, (OPJ_UINT32)h.sn, &win_ll_x0, &win_ll_x1);
2759*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_segment_grow(filter_width, (OPJ_UINT32)h.dn, &win_hl_x0, &win_hl_x1);
2760*3ac0a46fSAndroid Build Coastguard Worker 
2761*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_segment_grow(filter_width, (OPJ_UINT32)v.sn, &win_ll_y0, &win_ll_y1);
2762*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_segment_grow(filter_width, (OPJ_UINT32)v.dn, &win_lh_y0, &win_lh_y1);
2763*3ac0a46fSAndroid Build Coastguard Worker 
2764*3ac0a46fSAndroid Build Coastguard Worker         /* Compute the tile-resolution-based coordinates for the window of interest */
2765*3ac0a46fSAndroid Build Coastguard Worker         if (h.cas == 0) {
2766*3ac0a46fSAndroid Build Coastguard Worker             win_tr_x0 = opj_uint_min(2 * win_ll_x0, 2 * win_hl_x0 + 1);
2767*3ac0a46fSAndroid Build Coastguard Worker             win_tr_x1 = opj_uint_min(opj_uint_max(2 * win_ll_x1, 2 * win_hl_x1 + 1), rw);
2768*3ac0a46fSAndroid Build Coastguard Worker         } else {
2769*3ac0a46fSAndroid Build Coastguard Worker             win_tr_x0 = opj_uint_min(2 * win_hl_x0, 2 * win_ll_x0 + 1);
2770*3ac0a46fSAndroid Build Coastguard Worker             win_tr_x1 = opj_uint_min(opj_uint_max(2 * win_hl_x1, 2 * win_ll_x1 + 1), rw);
2771*3ac0a46fSAndroid Build Coastguard Worker         }
2772*3ac0a46fSAndroid Build Coastguard Worker 
2773*3ac0a46fSAndroid Build Coastguard Worker         if (v.cas == 0) {
2774*3ac0a46fSAndroid Build Coastguard Worker             win_tr_y0 = opj_uint_min(2 * win_ll_y0, 2 * win_lh_y0 + 1);
2775*3ac0a46fSAndroid Build Coastguard Worker             win_tr_y1 = opj_uint_min(opj_uint_max(2 * win_ll_y1, 2 * win_lh_y1 + 1), rh);
2776*3ac0a46fSAndroid Build Coastguard Worker         } else {
2777*3ac0a46fSAndroid Build Coastguard Worker             win_tr_y0 = opj_uint_min(2 * win_lh_y0, 2 * win_ll_y0 + 1);
2778*3ac0a46fSAndroid Build Coastguard Worker             win_tr_y1 = opj_uint_min(opj_uint_max(2 * win_lh_y1, 2 * win_ll_y1 + 1), rh);
2779*3ac0a46fSAndroid Build Coastguard Worker         }
2780*3ac0a46fSAndroid Build Coastguard Worker 
2781*3ac0a46fSAndroid Build Coastguard Worker         for (j = 0; j < rh; ++j) {
2782*3ac0a46fSAndroid Build Coastguard Worker             if ((j >= win_ll_y0 && j < win_ll_y1) ||
2783*3ac0a46fSAndroid Build Coastguard Worker                     (j >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn)) {
2784*3ac0a46fSAndroid Build Coastguard Worker 
2785*3ac0a46fSAndroid Build Coastguard Worker                 /* Avoids dwt.c:1584:44 (in opj_dwt_decode_partial_1): runtime error: */
2786*3ac0a46fSAndroid Build Coastguard Worker                 /* signed integer overflow: -1094795586 + -1094795586 cannot be represented in type 'int' */
2787*3ac0a46fSAndroid Build Coastguard Worker                 /* on opj_decompress -i  ../../openjpeg/MAPA.jp2 -o out.tif -d 0,0,256,256 */
2788*3ac0a46fSAndroid Build Coastguard Worker                 /* This is less extreme than memsetting the whole buffer to 0 */
2789*3ac0a46fSAndroid Build Coastguard Worker                 /* although we could potentially do better with better handling of edge conditions */
2790*3ac0a46fSAndroid Build Coastguard Worker                 if (win_tr_x1 >= 1 && win_tr_x1 < rw) {
2791*3ac0a46fSAndroid Build Coastguard Worker                     h.mem[win_tr_x1 - 1] = 0;
2792*3ac0a46fSAndroid Build Coastguard Worker                 }
2793*3ac0a46fSAndroid Build Coastguard Worker                 if (win_tr_x1 < rw) {
2794*3ac0a46fSAndroid Build Coastguard Worker                     h.mem[win_tr_x1] = 0;
2795*3ac0a46fSAndroid Build Coastguard Worker                 }
2796*3ac0a46fSAndroid Build Coastguard Worker 
2797*3ac0a46fSAndroid Build Coastguard Worker                 opj_dwt_interleave_partial_h(h.mem,
2798*3ac0a46fSAndroid Build Coastguard Worker                                              h.cas,
2799*3ac0a46fSAndroid Build Coastguard Worker                                              sa,
2800*3ac0a46fSAndroid Build Coastguard Worker                                              j,
2801*3ac0a46fSAndroid Build Coastguard Worker                                              (OPJ_UINT32)h.sn,
2802*3ac0a46fSAndroid Build Coastguard Worker                                              win_ll_x0,
2803*3ac0a46fSAndroid Build Coastguard Worker                                              win_ll_x1,
2804*3ac0a46fSAndroid Build Coastguard Worker                                              win_hl_x0,
2805*3ac0a46fSAndroid Build Coastguard Worker                                              win_hl_x1);
2806*3ac0a46fSAndroid Build Coastguard Worker                 opj_dwt_decode_partial_1(h.mem, h.mem_count, h.dn, h.sn, h.cas,
2807*3ac0a46fSAndroid Build Coastguard Worker                                          (OPJ_INT32)win_ll_x0,
2808*3ac0a46fSAndroid Build Coastguard Worker                                          (OPJ_INT32)win_ll_x1,
2809*3ac0a46fSAndroid Build Coastguard Worker                                          (OPJ_INT32)win_hl_x0,
2810*3ac0a46fSAndroid Build Coastguard Worker                                          (OPJ_INT32)win_hl_x1);
2811*3ac0a46fSAndroid Build Coastguard Worker                 if (!opj_sparse_array_int32_write(sa,
2812*3ac0a46fSAndroid Build Coastguard Worker                                                   win_tr_x0, j,
2813*3ac0a46fSAndroid Build Coastguard Worker                                                   win_tr_x1, j + 1,
2814*3ac0a46fSAndroid Build Coastguard Worker                                                   h.mem + win_tr_x0,
2815*3ac0a46fSAndroid Build Coastguard Worker                                                   1, 0, OPJ_TRUE)) {
2816*3ac0a46fSAndroid Build Coastguard Worker                     /* FIXME event manager error callback */
2817*3ac0a46fSAndroid Build Coastguard Worker                     opj_sparse_array_int32_free(sa);
2818*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(h.mem);
2819*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
2820*3ac0a46fSAndroid Build Coastguard Worker                 }
2821*3ac0a46fSAndroid Build Coastguard Worker             }
2822*3ac0a46fSAndroid Build Coastguard Worker         }
2823*3ac0a46fSAndroid Build Coastguard Worker 
2824*3ac0a46fSAndroid Build Coastguard Worker         for (i = win_tr_x0; i < win_tr_x1;) {
2825*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 nb_cols = opj_uint_min(4U, win_tr_x1 - i);
2826*3ac0a46fSAndroid Build Coastguard Worker             opj_dwt_interleave_partial_v(v.mem,
2827*3ac0a46fSAndroid Build Coastguard Worker                                          v.cas,
2828*3ac0a46fSAndroid Build Coastguard Worker                                          sa,
2829*3ac0a46fSAndroid Build Coastguard Worker                                          i,
2830*3ac0a46fSAndroid Build Coastguard Worker                                          nb_cols,
2831*3ac0a46fSAndroid Build Coastguard Worker                                          (OPJ_UINT32)v.sn,
2832*3ac0a46fSAndroid Build Coastguard Worker                                          win_ll_y0,
2833*3ac0a46fSAndroid Build Coastguard Worker                                          win_ll_y1,
2834*3ac0a46fSAndroid Build Coastguard Worker                                          win_lh_y0,
2835*3ac0a46fSAndroid Build Coastguard Worker                                          win_lh_y1);
2836*3ac0a46fSAndroid Build Coastguard Worker             opj_dwt_decode_partial_1_parallel(v.mem, nb_cols, v.dn, v.sn, v.cas,
2837*3ac0a46fSAndroid Build Coastguard Worker                                               (OPJ_INT32)win_ll_y0,
2838*3ac0a46fSAndroid Build Coastguard Worker                                               (OPJ_INT32)win_ll_y1,
2839*3ac0a46fSAndroid Build Coastguard Worker                                               (OPJ_INT32)win_lh_y0,
2840*3ac0a46fSAndroid Build Coastguard Worker                                               (OPJ_INT32)win_lh_y1);
2841*3ac0a46fSAndroid Build Coastguard Worker             if (!opj_sparse_array_int32_write(sa,
2842*3ac0a46fSAndroid Build Coastguard Worker                                               i, win_tr_y0,
2843*3ac0a46fSAndroid Build Coastguard Worker                                               i + nb_cols, win_tr_y1,
2844*3ac0a46fSAndroid Build Coastguard Worker                                               v.mem + 4 * win_tr_y0,
2845*3ac0a46fSAndroid Build Coastguard Worker                                               1, 4, OPJ_TRUE)) {
2846*3ac0a46fSAndroid Build Coastguard Worker                 /* FIXME event manager error callback */
2847*3ac0a46fSAndroid Build Coastguard Worker                 opj_sparse_array_int32_free(sa);
2848*3ac0a46fSAndroid Build Coastguard Worker                 opj_aligned_free(h.mem);
2849*3ac0a46fSAndroid Build Coastguard Worker                 return OPJ_FALSE;
2850*3ac0a46fSAndroid Build Coastguard Worker             }
2851*3ac0a46fSAndroid Build Coastguard Worker 
2852*3ac0a46fSAndroid Build Coastguard Worker             i += nb_cols;
2853*3ac0a46fSAndroid Build Coastguard Worker         }
2854*3ac0a46fSAndroid Build Coastguard Worker     }
2855*3ac0a46fSAndroid Build Coastguard Worker     opj_aligned_free(h.mem);
2856*3ac0a46fSAndroid Build Coastguard Worker 
2857*3ac0a46fSAndroid Build Coastguard Worker     {
2858*3ac0a46fSAndroid Build Coastguard Worker         OPJ_BOOL ret = opj_sparse_array_int32_read(sa,
2859*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_x0 - (OPJ_UINT32)tr_max->x0,
2860*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_y0 - (OPJ_UINT32)tr_max->y0,
2861*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_x1 - (OPJ_UINT32)tr_max->x0,
2862*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_y1 - (OPJ_UINT32)tr_max->y0,
2863*3ac0a46fSAndroid Build Coastguard Worker                        tilec->data_win,
2864*3ac0a46fSAndroid Build Coastguard Worker                        1, tr_max->win_x1 - tr_max->win_x0,
2865*3ac0a46fSAndroid Build Coastguard Worker                        OPJ_TRUE);
2866*3ac0a46fSAndroid Build Coastguard Worker         assert(ret);
2867*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UNUSED(ret);
2868*3ac0a46fSAndroid Build Coastguard Worker     }
2869*3ac0a46fSAndroid Build Coastguard Worker     opj_sparse_array_int32_free(sa);
2870*3ac0a46fSAndroid Build Coastguard Worker     return OPJ_TRUE;
2871*3ac0a46fSAndroid Build Coastguard Worker }
2872*3ac0a46fSAndroid Build Coastguard Worker 
opj_v8dwt_interleave_h(opj_v8dwt_t * OPJ_RESTRICT dwt,OPJ_FLOAT32 * OPJ_RESTRICT a,OPJ_UINT32 width,OPJ_UINT32 remaining_height)2873*3ac0a46fSAndroid Build Coastguard Worker static void opj_v8dwt_interleave_h(opj_v8dwt_t* OPJ_RESTRICT dwt,
2874*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_FLOAT32* OPJ_RESTRICT a,
2875*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_UINT32 width,
2876*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_UINT32 remaining_height)
2877*3ac0a46fSAndroid Build Coastguard Worker {
2878*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32* OPJ_RESTRICT bi = (OPJ_FLOAT32*)(dwt->wavelet + dwt->cas);
2879*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i, k;
2880*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 x0 = dwt->win_l_x0;
2881*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 x1 = dwt->win_l_x1;
2882*3ac0a46fSAndroid Build Coastguard Worker 
2883*3ac0a46fSAndroid Build Coastguard Worker     for (k = 0; k < 2; ++k) {
2884*3ac0a46fSAndroid Build Coastguard Worker         if (remaining_height >= NB_ELTS_V8 && ((OPJ_SIZE_T) a & 0x0f) == 0 &&
2885*3ac0a46fSAndroid Build Coastguard Worker                 ((OPJ_SIZE_T) bi & 0x0f) == 0) {
2886*3ac0a46fSAndroid Build Coastguard Worker             /* Fast code path */
2887*3ac0a46fSAndroid Build Coastguard Worker             for (i = x0; i < x1; ++i) {
2888*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_UINT32 j = i;
2889*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_FLOAT32* OPJ_RESTRICT dst = bi + i * 2 * NB_ELTS_V8;
2890*3ac0a46fSAndroid Build Coastguard Worker                 dst[0] = a[j];
2891*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2892*3ac0a46fSAndroid Build Coastguard Worker                 dst[1] = a[j];
2893*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2894*3ac0a46fSAndroid Build Coastguard Worker                 dst[2] = a[j];
2895*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2896*3ac0a46fSAndroid Build Coastguard Worker                 dst[3] = a[j];
2897*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2898*3ac0a46fSAndroid Build Coastguard Worker                 dst[4] = a[j];
2899*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2900*3ac0a46fSAndroid Build Coastguard Worker                 dst[5] = a[j];
2901*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2902*3ac0a46fSAndroid Build Coastguard Worker                 dst[6] = a[j];
2903*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2904*3ac0a46fSAndroid Build Coastguard Worker                 dst[7] = a[j];
2905*3ac0a46fSAndroid Build Coastguard Worker             }
2906*3ac0a46fSAndroid Build Coastguard Worker         } else {
2907*3ac0a46fSAndroid Build Coastguard Worker             /* Slow code path */
2908*3ac0a46fSAndroid Build Coastguard Worker             for (i = x0; i < x1; ++i) {
2909*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_UINT32 j = i;
2910*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_FLOAT32* OPJ_RESTRICT dst = bi + i * 2 * NB_ELTS_V8;
2911*3ac0a46fSAndroid Build Coastguard Worker                 dst[0] = a[j];
2912*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2913*3ac0a46fSAndroid Build Coastguard Worker                 if (remaining_height == 1) {
2914*3ac0a46fSAndroid Build Coastguard Worker                     continue;
2915*3ac0a46fSAndroid Build Coastguard Worker                 }
2916*3ac0a46fSAndroid Build Coastguard Worker                 dst[1] = a[j];
2917*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2918*3ac0a46fSAndroid Build Coastguard Worker                 if (remaining_height == 2) {
2919*3ac0a46fSAndroid Build Coastguard Worker                     continue;
2920*3ac0a46fSAndroid Build Coastguard Worker                 }
2921*3ac0a46fSAndroid Build Coastguard Worker                 dst[2] = a[j];
2922*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2923*3ac0a46fSAndroid Build Coastguard Worker                 if (remaining_height == 3) {
2924*3ac0a46fSAndroid Build Coastguard Worker                     continue;
2925*3ac0a46fSAndroid Build Coastguard Worker                 }
2926*3ac0a46fSAndroid Build Coastguard Worker                 dst[3] = a[j];
2927*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2928*3ac0a46fSAndroid Build Coastguard Worker                 if (remaining_height == 4) {
2929*3ac0a46fSAndroid Build Coastguard Worker                     continue;
2930*3ac0a46fSAndroid Build Coastguard Worker                 }
2931*3ac0a46fSAndroid Build Coastguard Worker                 dst[4] = a[j];
2932*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2933*3ac0a46fSAndroid Build Coastguard Worker                 if (remaining_height == 5) {
2934*3ac0a46fSAndroid Build Coastguard Worker                     continue;
2935*3ac0a46fSAndroid Build Coastguard Worker                 }
2936*3ac0a46fSAndroid Build Coastguard Worker                 dst[5] = a[j];
2937*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2938*3ac0a46fSAndroid Build Coastguard Worker                 if (remaining_height == 6) {
2939*3ac0a46fSAndroid Build Coastguard Worker                     continue;
2940*3ac0a46fSAndroid Build Coastguard Worker                 }
2941*3ac0a46fSAndroid Build Coastguard Worker                 dst[6] = a[j];
2942*3ac0a46fSAndroid Build Coastguard Worker                 j += width;
2943*3ac0a46fSAndroid Build Coastguard Worker                 if (remaining_height == 7) {
2944*3ac0a46fSAndroid Build Coastguard Worker                     continue;
2945*3ac0a46fSAndroid Build Coastguard Worker                 }
2946*3ac0a46fSAndroid Build Coastguard Worker                 dst[7] = a[j];
2947*3ac0a46fSAndroid Build Coastguard Worker             }
2948*3ac0a46fSAndroid Build Coastguard Worker         }
2949*3ac0a46fSAndroid Build Coastguard Worker 
2950*3ac0a46fSAndroid Build Coastguard Worker         bi = (OPJ_FLOAT32*)(dwt->wavelet + 1 - dwt->cas);
2951*3ac0a46fSAndroid Build Coastguard Worker         a += dwt->sn;
2952*3ac0a46fSAndroid Build Coastguard Worker         x0 = dwt->win_h_x0;
2953*3ac0a46fSAndroid Build Coastguard Worker         x1 = dwt->win_h_x1;
2954*3ac0a46fSAndroid Build Coastguard Worker     }
2955*3ac0a46fSAndroid Build Coastguard Worker }
2956*3ac0a46fSAndroid Build Coastguard Worker 
opj_v8dwt_interleave_partial_h(opj_v8dwt_t * dwt,opj_sparse_array_int32_t * sa,OPJ_UINT32 sa_line,OPJ_UINT32 remaining_height)2957*3ac0a46fSAndroid Build Coastguard Worker static void opj_v8dwt_interleave_partial_h(opj_v8dwt_t* dwt,
2958*3ac0a46fSAndroid Build Coastguard Worker         opj_sparse_array_int32_t* sa,
2959*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 sa_line,
2960*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 remaining_height)
2961*3ac0a46fSAndroid Build Coastguard Worker {
2962*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i;
2963*3ac0a46fSAndroid Build Coastguard Worker     for (i = 0; i < remaining_height; i++) {
2964*3ac0a46fSAndroid Build Coastguard Worker         OPJ_BOOL ret;
2965*3ac0a46fSAndroid Build Coastguard Worker         ret = opj_sparse_array_int32_read(sa,
2966*3ac0a46fSAndroid Build Coastguard Worker                                           dwt->win_l_x0, sa_line + i,
2967*3ac0a46fSAndroid Build Coastguard Worker                                           dwt->win_l_x1, sa_line + i + 1,
2968*3ac0a46fSAndroid Build Coastguard Worker                                           /* Nasty cast from float* to int32* */
2969*3ac0a46fSAndroid Build Coastguard Worker                                           (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0) + i,
2970*3ac0a46fSAndroid Build Coastguard Worker                                           2 * NB_ELTS_V8, 0, OPJ_TRUE);
2971*3ac0a46fSAndroid Build Coastguard Worker         assert(ret);
2972*3ac0a46fSAndroid Build Coastguard Worker         ret = opj_sparse_array_int32_read(sa,
2973*3ac0a46fSAndroid Build Coastguard Worker                                           (OPJ_UINT32)dwt->sn + dwt->win_h_x0, sa_line + i,
2974*3ac0a46fSAndroid Build Coastguard Worker                                           (OPJ_UINT32)dwt->sn + dwt->win_h_x1, sa_line + i + 1,
2975*3ac0a46fSAndroid Build Coastguard Worker                                           /* Nasty cast from float* to int32* */
2976*3ac0a46fSAndroid Build Coastguard Worker                                           (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i,
2977*3ac0a46fSAndroid Build Coastguard Worker                                           2 * NB_ELTS_V8, 0, OPJ_TRUE);
2978*3ac0a46fSAndroid Build Coastguard Worker         assert(ret);
2979*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UNUSED(ret);
2980*3ac0a46fSAndroid Build Coastguard Worker     }
2981*3ac0a46fSAndroid Build Coastguard Worker }
2982*3ac0a46fSAndroid Build Coastguard Worker 
opj_v8dwt_interleave_v(opj_v8dwt_t * OPJ_RESTRICT dwt,OPJ_FLOAT32 * OPJ_RESTRICT a,OPJ_UINT32 width,OPJ_UINT32 nb_elts_read)2983*3ac0a46fSAndroid Build Coastguard Worker static INLINE void opj_v8dwt_interleave_v(opj_v8dwt_t* OPJ_RESTRICT dwt,
2984*3ac0a46fSAndroid Build Coastguard Worker         OPJ_FLOAT32* OPJ_RESTRICT a,
2985*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 width,
2986*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 nb_elts_read)
2987*3ac0a46fSAndroid Build Coastguard Worker {
2988*3ac0a46fSAndroid Build Coastguard Worker     opj_v8_t* OPJ_RESTRICT bi = dwt->wavelet + dwt->cas;
2989*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i;
2990*3ac0a46fSAndroid Build Coastguard Worker 
2991*3ac0a46fSAndroid Build Coastguard Worker     for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) {
2992*3ac0a46fSAndroid Build Coastguard Worker         memcpy(&bi[i * 2], &a[i * (OPJ_SIZE_T)width],
2993*3ac0a46fSAndroid Build Coastguard Worker                (OPJ_SIZE_T)nb_elts_read * sizeof(OPJ_FLOAT32));
2994*3ac0a46fSAndroid Build Coastguard Worker     }
2995*3ac0a46fSAndroid Build Coastguard Worker 
2996*3ac0a46fSAndroid Build Coastguard Worker     a += (OPJ_UINT32)dwt->sn * (OPJ_SIZE_T)width;
2997*3ac0a46fSAndroid Build Coastguard Worker     bi = dwt->wavelet + 1 - dwt->cas;
2998*3ac0a46fSAndroid Build Coastguard Worker 
2999*3ac0a46fSAndroid Build Coastguard Worker     for (i = dwt->win_h_x0; i < dwt->win_h_x1; ++i) {
3000*3ac0a46fSAndroid Build Coastguard Worker         memcpy(&bi[i * 2], &a[i * (OPJ_SIZE_T)width],
3001*3ac0a46fSAndroid Build Coastguard Worker                (OPJ_SIZE_T)nb_elts_read * sizeof(OPJ_FLOAT32));
3002*3ac0a46fSAndroid Build Coastguard Worker     }
3003*3ac0a46fSAndroid Build Coastguard Worker }
3004*3ac0a46fSAndroid Build Coastguard Worker 
opj_v8dwt_interleave_partial_v(opj_v8dwt_t * OPJ_RESTRICT dwt,opj_sparse_array_int32_t * sa,OPJ_UINT32 sa_col,OPJ_UINT32 nb_elts_read)3005*3ac0a46fSAndroid Build Coastguard Worker static void opj_v8dwt_interleave_partial_v(opj_v8dwt_t* OPJ_RESTRICT dwt,
3006*3ac0a46fSAndroid Build Coastguard Worker         opj_sparse_array_int32_t* sa,
3007*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 sa_col,
3008*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 nb_elts_read)
3009*3ac0a46fSAndroid Build Coastguard Worker {
3010*3ac0a46fSAndroid Build Coastguard Worker     OPJ_BOOL ret;
3011*3ac0a46fSAndroid Build Coastguard Worker     ret = opj_sparse_array_int32_read(sa,
3012*3ac0a46fSAndroid Build Coastguard Worker                                       sa_col, dwt->win_l_x0,
3013*3ac0a46fSAndroid Build Coastguard Worker                                       sa_col + nb_elts_read, dwt->win_l_x1,
3014*3ac0a46fSAndroid Build Coastguard Worker                                       (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0),
3015*3ac0a46fSAndroid Build Coastguard Worker                                       1, 2 * NB_ELTS_V8, OPJ_TRUE);
3016*3ac0a46fSAndroid Build Coastguard Worker     assert(ret);
3017*3ac0a46fSAndroid Build Coastguard Worker     ret = opj_sparse_array_int32_read(sa,
3018*3ac0a46fSAndroid Build Coastguard Worker                                       sa_col, (OPJ_UINT32)dwt->sn + dwt->win_h_x0,
3019*3ac0a46fSAndroid Build Coastguard Worker                                       sa_col + nb_elts_read, (OPJ_UINT32)dwt->sn + dwt->win_h_x1,
3020*3ac0a46fSAndroid Build Coastguard Worker                                       (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0),
3021*3ac0a46fSAndroid Build Coastguard Worker                                       1, 2 * NB_ELTS_V8, OPJ_TRUE);
3022*3ac0a46fSAndroid Build Coastguard Worker     assert(ret);
3023*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UNUSED(ret);
3024*3ac0a46fSAndroid Build Coastguard Worker }
3025*3ac0a46fSAndroid Build Coastguard Worker 
3026*3ac0a46fSAndroid Build Coastguard Worker #ifdef __SSE__
3027*3ac0a46fSAndroid Build Coastguard Worker 
opj_v8dwt_decode_step1_sse(opj_v8_t * w,OPJ_UINT32 start,OPJ_UINT32 end,const __m128 c)3028*3ac0a46fSAndroid Build Coastguard Worker static void opj_v8dwt_decode_step1_sse(opj_v8_t* w,
3029*3ac0a46fSAndroid Build Coastguard Worker                                        OPJ_UINT32 start,
3030*3ac0a46fSAndroid Build Coastguard Worker                                        OPJ_UINT32 end,
3031*3ac0a46fSAndroid Build Coastguard Worker                                        const __m128 c)
3032*3ac0a46fSAndroid Build Coastguard Worker {
3033*3ac0a46fSAndroid Build Coastguard Worker     __m128* OPJ_RESTRICT vw = (__m128*) w;
3034*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i = start;
3035*3ac0a46fSAndroid Build Coastguard Worker     /* To be adapted if NB_ELTS_V8 changes */
3036*3ac0a46fSAndroid Build Coastguard Worker     vw += 4 * start;
3037*3ac0a46fSAndroid Build Coastguard Worker     /* Note: attempt at loop unrolling x2 doesn't help */
3038*3ac0a46fSAndroid Build Coastguard Worker     for (; i < end; ++i, vw += 4) {
3039*3ac0a46fSAndroid Build Coastguard Worker         vw[0] = _mm_mul_ps(vw[0], c);
3040*3ac0a46fSAndroid Build Coastguard Worker         vw[1] = _mm_mul_ps(vw[1], c);
3041*3ac0a46fSAndroid Build Coastguard Worker     }
3042*3ac0a46fSAndroid Build Coastguard Worker }
3043*3ac0a46fSAndroid Build Coastguard Worker 
opj_v8dwt_decode_step2_sse(opj_v8_t * l,opj_v8_t * w,OPJ_UINT32 start,OPJ_UINT32 end,OPJ_UINT32 m,__m128 c)3044*3ac0a46fSAndroid Build Coastguard Worker static void opj_v8dwt_decode_step2_sse(opj_v8_t* l, opj_v8_t* w,
3045*3ac0a46fSAndroid Build Coastguard Worker                                        OPJ_UINT32 start,
3046*3ac0a46fSAndroid Build Coastguard Worker                                        OPJ_UINT32 end,
3047*3ac0a46fSAndroid Build Coastguard Worker                                        OPJ_UINT32 m,
3048*3ac0a46fSAndroid Build Coastguard Worker                                        __m128 c)
3049*3ac0a46fSAndroid Build Coastguard Worker {
3050*3ac0a46fSAndroid Build Coastguard Worker     __m128* OPJ_RESTRICT vl = (__m128*) l;
3051*3ac0a46fSAndroid Build Coastguard Worker     __m128* OPJ_RESTRICT vw = (__m128*) w;
3052*3ac0a46fSAndroid Build Coastguard Worker     /* To be adapted if NB_ELTS_V8 changes */
3053*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i;
3054*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 imax = opj_uint_min(end, m);
3055*3ac0a46fSAndroid Build Coastguard Worker     if (start == 0) {
3056*3ac0a46fSAndroid Build Coastguard Worker         if (imax >= 1) {
3057*3ac0a46fSAndroid Build Coastguard Worker             vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vl[0], vw[0]), c));
3058*3ac0a46fSAndroid Build Coastguard Worker             vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vl[1], vw[1]), c));
3059*3ac0a46fSAndroid Build Coastguard Worker             vw += 4;
3060*3ac0a46fSAndroid Build Coastguard Worker             start = 1;
3061*3ac0a46fSAndroid Build Coastguard Worker         }
3062*3ac0a46fSAndroid Build Coastguard Worker     } else {
3063*3ac0a46fSAndroid Build Coastguard Worker         vw += start * 4;
3064*3ac0a46fSAndroid Build Coastguard Worker     }
3065*3ac0a46fSAndroid Build Coastguard Worker 
3066*3ac0a46fSAndroid Build Coastguard Worker     i = start;
3067*3ac0a46fSAndroid Build Coastguard Worker     /* Note: attempt at loop unrolling x2 doesn't help */
3068*3ac0a46fSAndroid Build Coastguard Worker     for (; i < imax; ++i) {
3069*3ac0a46fSAndroid Build Coastguard Worker         vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vw[-4], vw[0]), c));
3070*3ac0a46fSAndroid Build Coastguard Worker         vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vw[-3], vw[1]), c));
3071*3ac0a46fSAndroid Build Coastguard Worker         vw += 4;
3072*3ac0a46fSAndroid Build Coastguard Worker     }
3073*3ac0a46fSAndroid Build Coastguard Worker     if (m < end) {
3074*3ac0a46fSAndroid Build Coastguard Worker         assert(m + 1 == end);
3075*3ac0a46fSAndroid Build Coastguard Worker         c = _mm_add_ps(c, c);
3076*3ac0a46fSAndroid Build Coastguard Worker         vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(c, vw[-4]));
3077*3ac0a46fSAndroid Build Coastguard Worker         vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(c, vw[-3]));
3078*3ac0a46fSAndroid Build Coastguard Worker     }
3079*3ac0a46fSAndroid Build Coastguard Worker }
3080*3ac0a46fSAndroid Build Coastguard Worker 
3081*3ac0a46fSAndroid Build Coastguard Worker #else
3082*3ac0a46fSAndroid Build Coastguard Worker 
opj_v8dwt_decode_step1(opj_v8_t * w,OPJ_UINT32 start,OPJ_UINT32 end,const OPJ_FLOAT32 c)3083*3ac0a46fSAndroid Build Coastguard Worker static void opj_v8dwt_decode_step1(opj_v8_t* w,
3084*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_UINT32 start,
3085*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_UINT32 end,
3086*3ac0a46fSAndroid Build Coastguard Worker                                    const OPJ_FLOAT32 c)
3087*3ac0a46fSAndroid Build Coastguard Worker {
3088*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32* OPJ_RESTRICT fw = (OPJ_FLOAT32*) w;
3089*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i;
3090*3ac0a46fSAndroid Build Coastguard Worker     /* To be adapted if NB_ELTS_V8 changes */
3091*3ac0a46fSAndroid Build Coastguard Worker     for (i = start; i < end; ++i) {
3092*3ac0a46fSAndroid Build Coastguard Worker         fw[i * 2 * 8    ] = fw[i * 2 * 8    ] * c;
3093*3ac0a46fSAndroid Build Coastguard Worker         fw[i * 2 * 8 + 1] = fw[i * 2 * 8 + 1] * c;
3094*3ac0a46fSAndroid Build Coastguard Worker         fw[i * 2 * 8 + 2] = fw[i * 2 * 8 + 2] * c;
3095*3ac0a46fSAndroid Build Coastguard Worker         fw[i * 2 * 8 + 3] = fw[i * 2 * 8 + 3] * c;
3096*3ac0a46fSAndroid Build Coastguard Worker         fw[i * 2 * 8 + 4] = fw[i * 2 * 8 + 4] * c;
3097*3ac0a46fSAndroid Build Coastguard Worker         fw[i * 2 * 8 + 5] = fw[i * 2 * 8 + 5] * c;
3098*3ac0a46fSAndroid Build Coastguard Worker         fw[i * 2 * 8 + 6] = fw[i * 2 * 8 + 6] * c;
3099*3ac0a46fSAndroid Build Coastguard Worker         fw[i * 2 * 8 + 7] = fw[i * 2 * 8 + 7] * c;
3100*3ac0a46fSAndroid Build Coastguard Worker     }
3101*3ac0a46fSAndroid Build Coastguard Worker }
3102*3ac0a46fSAndroid Build Coastguard Worker 
opj_v8dwt_decode_step2(opj_v8_t * l,opj_v8_t * w,OPJ_UINT32 start,OPJ_UINT32 end,OPJ_UINT32 m,OPJ_FLOAT32 c)3103*3ac0a46fSAndroid Build Coastguard Worker static void opj_v8dwt_decode_step2(opj_v8_t* l, opj_v8_t* w,
3104*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_UINT32 start,
3105*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_UINT32 end,
3106*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_UINT32 m,
3107*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_FLOAT32 c)
3108*3ac0a46fSAndroid Build Coastguard Worker {
3109*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32* fl = (OPJ_FLOAT32*) l;
3110*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32* fw = (OPJ_FLOAT32*) w;
3111*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 i;
3112*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 imax = opj_uint_min(end, m);
3113*3ac0a46fSAndroid Build Coastguard Worker     if (start > 0) {
3114*3ac0a46fSAndroid Build Coastguard Worker         fw += 2 * NB_ELTS_V8 * start;
3115*3ac0a46fSAndroid Build Coastguard Worker         fl = fw - 2 * NB_ELTS_V8;
3116*3ac0a46fSAndroid Build Coastguard Worker     }
3117*3ac0a46fSAndroid Build Coastguard Worker     /* To be adapted if NB_ELTS_V8 changes */
3118*3ac0a46fSAndroid Build Coastguard Worker     for (i = start; i < imax; ++i) {
3119*3ac0a46fSAndroid Build Coastguard Worker         fw[-8] = fw[-8] + ((fl[0] + fw[0]) * c);
3120*3ac0a46fSAndroid Build Coastguard Worker         fw[-7] = fw[-7] + ((fl[1] + fw[1]) * c);
3121*3ac0a46fSAndroid Build Coastguard Worker         fw[-6] = fw[-6] + ((fl[2] + fw[2]) * c);
3122*3ac0a46fSAndroid Build Coastguard Worker         fw[-5] = fw[-5] + ((fl[3] + fw[3]) * c);
3123*3ac0a46fSAndroid Build Coastguard Worker         fw[-4] = fw[-4] + ((fl[4] + fw[4]) * c);
3124*3ac0a46fSAndroid Build Coastguard Worker         fw[-3] = fw[-3] + ((fl[5] + fw[5]) * c);
3125*3ac0a46fSAndroid Build Coastguard Worker         fw[-2] = fw[-2] + ((fl[6] + fw[6]) * c);
3126*3ac0a46fSAndroid Build Coastguard Worker         fw[-1] = fw[-1] + ((fl[7] + fw[7]) * c);
3127*3ac0a46fSAndroid Build Coastguard Worker         fl = fw;
3128*3ac0a46fSAndroid Build Coastguard Worker         fw += 2 * NB_ELTS_V8;
3129*3ac0a46fSAndroid Build Coastguard Worker     }
3130*3ac0a46fSAndroid Build Coastguard Worker     if (m < end) {
3131*3ac0a46fSAndroid Build Coastguard Worker         assert(m + 1 == end);
3132*3ac0a46fSAndroid Build Coastguard Worker         c += c;
3133*3ac0a46fSAndroid Build Coastguard Worker         fw[-8] = fw[-8] + fl[0] * c;
3134*3ac0a46fSAndroid Build Coastguard Worker         fw[-7] = fw[-7] + fl[1] * c;
3135*3ac0a46fSAndroid Build Coastguard Worker         fw[-6] = fw[-6] + fl[2] * c;
3136*3ac0a46fSAndroid Build Coastguard Worker         fw[-5] = fw[-5] + fl[3] * c;
3137*3ac0a46fSAndroid Build Coastguard Worker         fw[-4] = fw[-4] + fl[4] * c;
3138*3ac0a46fSAndroid Build Coastguard Worker         fw[-3] = fw[-3] + fl[5] * c;
3139*3ac0a46fSAndroid Build Coastguard Worker         fw[-2] = fw[-2] + fl[6] * c;
3140*3ac0a46fSAndroid Build Coastguard Worker         fw[-1] = fw[-1] + fl[7] * c;
3141*3ac0a46fSAndroid Build Coastguard Worker     }
3142*3ac0a46fSAndroid Build Coastguard Worker }
3143*3ac0a46fSAndroid Build Coastguard Worker 
3144*3ac0a46fSAndroid Build Coastguard Worker #endif
3145*3ac0a46fSAndroid Build Coastguard Worker 
3146*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                             */
3147*3ac0a46fSAndroid Build Coastguard Worker /* Inverse 9-7 wavelet transform in 1-D. */
3148*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                            */
opj_v8dwt_decode(opj_v8dwt_t * OPJ_RESTRICT dwt)3149*3ac0a46fSAndroid Build Coastguard Worker static void opj_v8dwt_decode(opj_v8dwt_t* OPJ_RESTRICT dwt)
3150*3ac0a46fSAndroid Build Coastguard Worker {
3151*3ac0a46fSAndroid Build Coastguard Worker     OPJ_INT32 a, b;
3152*3ac0a46fSAndroid Build Coastguard Worker     /* BUG_WEIRD_TWO_INVK (look for this identifier in tcd.c) */
3153*3ac0a46fSAndroid Build Coastguard Worker     /* Historic value for 2 / opj_invK */
3154*3ac0a46fSAndroid Build Coastguard Worker     /* Normally, we should use invK, but if we do so, we have failures in the */
3155*3ac0a46fSAndroid Build Coastguard Worker     /* conformance test, due to MSE and peak errors significantly higher than */
3156*3ac0a46fSAndroid Build Coastguard Worker     /* accepted value */
3157*3ac0a46fSAndroid Build Coastguard Worker     /* Due to using two_invK instead of invK, we have to compensate in tcd.c */
3158*3ac0a46fSAndroid Build Coastguard Worker     /* the computation of the stepsize for the non LL subbands */
3159*3ac0a46fSAndroid Build Coastguard Worker     const float two_invK = 1.625732422f;
3160*3ac0a46fSAndroid Build Coastguard Worker     if (dwt->cas == 0) {
3161*3ac0a46fSAndroid Build Coastguard Worker         if (!((dwt->dn > 0) || (dwt->sn > 1))) {
3162*3ac0a46fSAndroid Build Coastguard Worker             return;
3163*3ac0a46fSAndroid Build Coastguard Worker         }
3164*3ac0a46fSAndroid Build Coastguard Worker         a = 0;
3165*3ac0a46fSAndroid Build Coastguard Worker         b = 1;
3166*3ac0a46fSAndroid Build Coastguard Worker     } else {
3167*3ac0a46fSAndroid Build Coastguard Worker         if (!((dwt->sn > 0) || (dwt->dn > 1))) {
3168*3ac0a46fSAndroid Build Coastguard Worker             return;
3169*3ac0a46fSAndroid Build Coastguard Worker         }
3170*3ac0a46fSAndroid Build Coastguard Worker         a = 1;
3171*3ac0a46fSAndroid Build Coastguard Worker         b = 0;
3172*3ac0a46fSAndroid Build Coastguard Worker     }
3173*3ac0a46fSAndroid Build Coastguard Worker #ifdef __SSE__
3174*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step1_sse(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1,
3175*3ac0a46fSAndroid Build Coastguard Worker                                _mm_set1_ps(opj_K));
3176*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step1_sse(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1,
3177*3ac0a46fSAndroid Build Coastguard Worker                                _mm_set1_ps(two_invK));
3178*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1,
3179*3ac0a46fSAndroid Build Coastguard Worker                                dwt->win_l_x0, dwt->win_l_x1,
3180*3ac0a46fSAndroid Build Coastguard Worker                                (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a),
3181*3ac0a46fSAndroid Build Coastguard Worker                                _mm_set1_ps(-opj_dwt_delta));
3182*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1,
3183*3ac0a46fSAndroid Build Coastguard Worker                                dwt->win_h_x0, dwt->win_h_x1,
3184*3ac0a46fSAndroid Build Coastguard Worker                                (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b),
3185*3ac0a46fSAndroid Build Coastguard Worker                                _mm_set1_ps(-opj_dwt_gamma));
3186*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1,
3187*3ac0a46fSAndroid Build Coastguard Worker                                dwt->win_l_x0, dwt->win_l_x1,
3188*3ac0a46fSAndroid Build Coastguard Worker                                (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a),
3189*3ac0a46fSAndroid Build Coastguard Worker                                _mm_set1_ps(-opj_dwt_beta));
3190*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1,
3191*3ac0a46fSAndroid Build Coastguard Worker                                dwt->win_h_x0, dwt->win_h_x1,
3192*3ac0a46fSAndroid Build Coastguard Worker                                (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b),
3193*3ac0a46fSAndroid Build Coastguard Worker                                _mm_set1_ps(-opj_dwt_alpha));
3194*3ac0a46fSAndroid Build Coastguard Worker #else
3195*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step1(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1,
3196*3ac0a46fSAndroid Build Coastguard Worker                            opj_K);
3197*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step1(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1,
3198*3ac0a46fSAndroid Build Coastguard Worker                            two_invK);
3199*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1,
3200*3ac0a46fSAndroid Build Coastguard Worker                            dwt->win_l_x0, dwt->win_l_x1,
3201*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a),
3202*3ac0a46fSAndroid Build Coastguard Worker                            -opj_dwt_delta);
3203*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1,
3204*3ac0a46fSAndroid Build Coastguard Worker                            dwt->win_h_x0, dwt->win_h_x1,
3205*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b),
3206*3ac0a46fSAndroid Build Coastguard Worker                            -opj_dwt_gamma);
3207*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1,
3208*3ac0a46fSAndroid Build Coastguard Worker                            dwt->win_l_x0, dwt->win_l_x1,
3209*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a),
3210*3ac0a46fSAndroid Build Coastguard Worker                            -opj_dwt_beta);
3211*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1,
3212*3ac0a46fSAndroid Build Coastguard Worker                            dwt->win_h_x0, dwt->win_h_x1,
3213*3ac0a46fSAndroid Build Coastguard Worker                            (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b),
3214*3ac0a46fSAndroid Build Coastguard Worker                            -opj_dwt_alpha);
3215*3ac0a46fSAndroid Build Coastguard Worker #endif
3216*3ac0a46fSAndroid Build Coastguard Worker }
3217*3ac0a46fSAndroid Build Coastguard Worker 
3218*3ac0a46fSAndroid Build Coastguard Worker typedef struct {
3219*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_t h;
3220*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rw;
3221*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w;
3222*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32 * OPJ_RESTRICT aj;
3223*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 nb_rows;
3224*3ac0a46fSAndroid Build Coastguard Worker } opj_dwt97_decode_h_job_t;
3225*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt97_decode_h_func(void * user_data,opj_tls_t * tls)3226*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt97_decode_h_func(void* user_data, opj_tls_t* tls)
3227*3ac0a46fSAndroid Build Coastguard Worker {
3228*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 j;
3229*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt97_decode_h_job_t* job;
3230*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32 * OPJ_RESTRICT aj;
3231*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w;
3232*3ac0a46fSAndroid Build Coastguard Worker     (void)tls;
3233*3ac0a46fSAndroid Build Coastguard Worker 
3234*3ac0a46fSAndroid Build Coastguard Worker     job = (opj_dwt97_decode_h_job_t*)user_data;
3235*3ac0a46fSAndroid Build Coastguard Worker     w = job->w;
3236*3ac0a46fSAndroid Build Coastguard Worker 
3237*3ac0a46fSAndroid Build Coastguard Worker     assert((job->nb_rows % NB_ELTS_V8) == 0);
3238*3ac0a46fSAndroid Build Coastguard Worker 
3239*3ac0a46fSAndroid Build Coastguard Worker     aj = job->aj;
3240*3ac0a46fSAndroid Build Coastguard Worker     for (j = 0; j + NB_ELTS_V8 <= job->nb_rows; j += NB_ELTS_V8) {
3241*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 k;
3242*3ac0a46fSAndroid Build Coastguard Worker         opj_v8dwt_interleave_h(&job->h, aj, job->w, NB_ELTS_V8);
3243*3ac0a46fSAndroid Build Coastguard Worker         opj_v8dwt_decode(&job->h);
3244*3ac0a46fSAndroid Build Coastguard Worker 
3245*3ac0a46fSAndroid Build Coastguard Worker         /* To be adapted if NB_ELTS_V8 changes */
3246*3ac0a46fSAndroid Build Coastguard Worker         for (k = 0; k < job->rw; k++) {
3247*3ac0a46fSAndroid Build Coastguard Worker             aj[k      ] = job->h.wavelet[k].f[0];
3248*3ac0a46fSAndroid Build Coastguard Worker             aj[k + (OPJ_SIZE_T)w  ] = job->h.wavelet[k].f[1];
3249*3ac0a46fSAndroid Build Coastguard Worker             aj[k + (OPJ_SIZE_T)w * 2] = job->h.wavelet[k].f[2];
3250*3ac0a46fSAndroid Build Coastguard Worker             aj[k + (OPJ_SIZE_T)w * 3] = job->h.wavelet[k].f[3];
3251*3ac0a46fSAndroid Build Coastguard Worker         }
3252*3ac0a46fSAndroid Build Coastguard Worker         for (k = 0; k < job->rw; k++) {
3253*3ac0a46fSAndroid Build Coastguard Worker             aj[k + (OPJ_SIZE_T)w * 4] = job->h.wavelet[k].f[4];
3254*3ac0a46fSAndroid Build Coastguard Worker             aj[k + (OPJ_SIZE_T)w * 5] = job->h.wavelet[k].f[5];
3255*3ac0a46fSAndroid Build Coastguard Worker             aj[k + (OPJ_SIZE_T)w * 6] = job->h.wavelet[k].f[6];
3256*3ac0a46fSAndroid Build Coastguard Worker             aj[k + (OPJ_SIZE_T)w * 7] = job->h.wavelet[k].f[7];
3257*3ac0a46fSAndroid Build Coastguard Worker         }
3258*3ac0a46fSAndroid Build Coastguard Worker 
3259*3ac0a46fSAndroid Build Coastguard Worker         aj += w * NB_ELTS_V8;
3260*3ac0a46fSAndroid Build Coastguard Worker     }
3261*3ac0a46fSAndroid Build Coastguard Worker 
3262*3ac0a46fSAndroid Build Coastguard Worker     opj_aligned_free(job->h.wavelet);
3263*3ac0a46fSAndroid Build Coastguard Worker     opj_free(job);
3264*3ac0a46fSAndroid Build Coastguard Worker }
3265*3ac0a46fSAndroid Build Coastguard Worker 
3266*3ac0a46fSAndroid Build Coastguard Worker 
3267*3ac0a46fSAndroid Build Coastguard Worker typedef struct {
3268*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_t v;
3269*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rh;
3270*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w;
3271*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32 * OPJ_RESTRICT aj;
3272*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 nb_columns;
3273*3ac0a46fSAndroid Build Coastguard Worker } opj_dwt97_decode_v_job_t;
3274*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt97_decode_v_func(void * user_data,opj_tls_t * tls)3275*3ac0a46fSAndroid Build Coastguard Worker static void opj_dwt97_decode_v_func(void* user_data, opj_tls_t* tls)
3276*3ac0a46fSAndroid Build Coastguard Worker {
3277*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 j;
3278*3ac0a46fSAndroid Build Coastguard Worker     opj_dwt97_decode_v_job_t* job;
3279*3ac0a46fSAndroid Build Coastguard Worker     OPJ_FLOAT32 * OPJ_RESTRICT aj;
3280*3ac0a46fSAndroid Build Coastguard Worker     (void)tls;
3281*3ac0a46fSAndroid Build Coastguard Worker 
3282*3ac0a46fSAndroid Build Coastguard Worker     job = (opj_dwt97_decode_v_job_t*)user_data;
3283*3ac0a46fSAndroid Build Coastguard Worker 
3284*3ac0a46fSAndroid Build Coastguard Worker     assert((job->nb_columns % NB_ELTS_V8) == 0);
3285*3ac0a46fSAndroid Build Coastguard Worker 
3286*3ac0a46fSAndroid Build Coastguard Worker     aj = job->aj;
3287*3ac0a46fSAndroid Build Coastguard Worker     for (j = 0; j + NB_ELTS_V8 <= job->nb_columns; j += NB_ELTS_V8) {
3288*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 k;
3289*3ac0a46fSAndroid Build Coastguard Worker 
3290*3ac0a46fSAndroid Build Coastguard Worker         opj_v8dwt_interleave_v(&job->v, aj, job->w, NB_ELTS_V8);
3291*3ac0a46fSAndroid Build Coastguard Worker         opj_v8dwt_decode(&job->v);
3292*3ac0a46fSAndroid Build Coastguard Worker 
3293*3ac0a46fSAndroid Build Coastguard Worker         for (k = 0; k < job->rh; ++k) {
3294*3ac0a46fSAndroid Build Coastguard Worker             memcpy(&aj[k * (OPJ_SIZE_T)job->w], &job->v.wavelet[k],
3295*3ac0a46fSAndroid Build Coastguard Worker                    NB_ELTS_V8 * sizeof(OPJ_FLOAT32));
3296*3ac0a46fSAndroid Build Coastguard Worker         }
3297*3ac0a46fSAndroid Build Coastguard Worker         aj += NB_ELTS_V8;
3298*3ac0a46fSAndroid Build Coastguard Worker     }
3299*3ac0a46fSAndroid Build Coastguard Worker 
3300*3ac0a46fSAndroid Build Coastguard Worker     opj_aligned_free(job->v.wavelet);
3301*3ac0a46fSAndroid Build Coastguard Worker     opj_free(job);
3302*3ac0a46fSAndroid Build Coastguard Worker }
3303*3ac0a46fSAndroid Build Coastguard Worker 
3304*3ac0a46fSAndroid Build Coastguard Worker 
3305*3ac0a46fSAndroid Build Coastguard Worker /* <summary>                             */
3306*3ac0a46fSAndroid Build Coastguard Worker /* Inverse 9-7 wavelet transform in 2-D. */
3307*3ac0a46fSAndroid Build Coastguard Worker /* </summary>                            */
3308*3ac0a46fSAndroid Build Coastguard Worker static
opj_dwt_decode_tile_97(opj_thread_pool_t * tp,opj_tcd_tilecomp_t * OPJ_RESTRICT tilec,OPJ_UINT32 numres)3309*3ac0a46fSAndroid Build Coastguard Worker OPJ_BOOL opj_dwt_decode_tile_97(opj_thread_pool_t* tp,
3310*3ac0a46fSAndroid Build Coastguard Worker                                 opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
3311*3ac0a46fSAndroid Build Coastguard Worker                                 OPJ_UINT32 numres)
3312*3ac0a46fSAndroid Build Coastguard Worker {
3313*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_t h;
3314*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_t v;
3315*3ac0a46fSAndroid Build Coastguard Worker 
3316*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_resolution_t* res = tilec->resolutions;
3317*3ac0a46fSAndroid Build Coastguard Worker 
3318*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rw = (OPJ_UINT32)(res->x1 -
3319*3ac0a46fSAndroid Build Coastguard Worker                                  res->x0);    /* width of the resolution level computed */
3320*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rh = (OPJ_UINT32)(res->y1 -
3321*3ac0a46fSAndroid Build Coastguard Worker                                  res->y0);    /* height of the resolution level computed */
3322*3ac0a46fSAndroid Build Coastguard Worker 
3323*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions -
3324*3ac0a46fSAndroid Build Coastguard Worker                                                                1].x1 -
3325*3ac0a46fSAndroid Build Coastguard Worker                                 tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
3326*3ac0a46fSAndroid Build Coastguard Worker 
3327*3ac0a46fSAndroid Build Coastguard Worker     OPJ_SIZE_T l_data_size;
3328*3ac0a46fSAndroid Build Coastguard Worker     const int num_threads = opj_thread_pool_get_thread_count(tp);
3329*3ac0a46fSAndroid Build Coastguard Worker 
3330*3ac0a46fSAndroid Build Coastguard Worker     if (numres == 1) {
3331*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_TRUE;
3332*3ac0a46fSAndroid Build Coastguard Worker     }
3333*3ac0a46fSAndroid Build Coastguard Worker 
3334*3ac0a46fSAndroid Build Coastguard Worker     l_data_size = opj_dwt_max_resolution(res, numres);
3335*3ac0a46fSAndroid Build Coastguard Worker     /* overflow check */
3336*3ac0a46fSAndroid Build Coastguard Worker     if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) {
3337*3ac0a46fSAndroid Build Coastguard Worker         /* FIXME event manager error callback */
3338*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
3339*3ac0a46fSAndroid Build Coastguard Worker     }
3340*3ac0a46fSAndroid Build Coastguard Worker     h.wavelet = (opj_v8_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v8_t));
3341*3ac0a46fSAndroid Build Coastguard Worker     if (!h.wavelet) {
3342*3ac0a46fSAndroid Build Coastguard Worker         /* FIXME event manager error callback */
3343*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
3344*3ac0a46fSAndroid Build Coastguard Worker     }
3345*3ac0a46fSAndroid Build Coastguard Worker     v.wavelet = h.wavelet;
3346*3ac0a46fSAndroid Build Coastguard Worker 
3347*3ac0a46fSAndroid Build Coastguard Worker     while (--numres) {
3348*3ac0a46fSAndroid Build Coastguard Worker         OPJ_FLOAT32 * OPJ_RESTRICT aj = (OPJ_FLOAT32*) tilec->data;
3349*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 j;
3350*3ac0a46fSAndroid Build Coastguard Worker 
3351*3ac0a46fSAndroid Build Coastguard Worker         h.sn = (OPJ_INT32)rw;
3352*3ac0a46fSAndroid Build Coastguard Worker         v.sn = (OPJ_INT32)rh;
3353*3ac0a46fSAndroid Build Coastguard Worker 
3354*3ac0a46fSAndroid Build Coastguard Worker         ++res;
3355*3ac0a46fSAndroid Build Coastguard Worker 
3356*3ac0a46fSAndroid Build Coastguard Worker         rw = (OPJ_UINT32)(res->x1 -
3357*3ac0a46fSAndroid Build Coastguard Worker                           res->x0);   /* width of the resolution level computed */
3358*3ac0a46fSAndroid Build Coastguard Worker         rh = (OPJ_UINT32)(res->y1 -
3359*3ac0a46fSAndroid Build Coastguard Worker                           res->y0);   /* height of the resolution level computed */
3360*3ac0a46fSAndroid Build Coastguard Worker 
3361*3ac0a46fSAndroid Build Coastguard Worker         h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn);
3362*3ac0a46fSAndroid Build Coastguard Worker         h.cas = res->x0 % 2;
3363*3ac0a46fSAndroid Build Coastguard Worker 
3364*3ac0a46fSAndroid Build Coastguard Worker         h.win_l_x0 = 0;
3365*3ac0a46fSAndroid Build Coastguard Worker         h.win_l_x1 = (OPJ_UINT32)h.sn;
3366*3ac0a46fSAndroid Build Coastguard Worker         h.win_h_x0 = 0;
3367*3ac0a46fSAndroid Build Coastguard Worker         h.win_h_x1 = (OPJ_UINT32)h.dn;
3368*3ac0a46fSAndroid Build Coastguard Worker 
3369*3ac0a46fSAndroid Build Coastguard Worker         if (num_threads <= 1 || rh < 2 * NB_ELTS_V8) {
3370*3ac0a46fSAndroid Build Coastguard Worker             for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) {
3371*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_UINT32 k;
3372*3ac0a46fSAndroid Build Coastguard Worker                 opj_v8dwt_interleave_h(&h, aj, w, NB_ELTS_V8);
3373*3ac0a46fSAndroid Build Coastguard Worker                 opj_v8dwt_decode(&h);
3374*3ac0a46fSAndroid Build Coastguard Worker 
3375*3ac0a46fSAndroid Build Coastguard Worker                 /* To be adapted if NB_ELTS_V8 changes */
3376*3ac0a46fSAndroid Build Coastguard Worker                 for (k = 0; k < rw; k++) {
3377*3ac0a46fSAndroid Build Coastguard Worker                     aj[k      ] = h.wavelet[k].f[0];
3378*3ac0a46fSAndroid Build Coastguard Worker                     aj[k + (OPJ_SIZE_T)w  ] = h.wavelet[k].f[1];
3379*3ac0a46fSAndroid Build Coastguard Worker                     aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2];
3380*3ac0a46fSAndroid Build Coastguard Worker                     aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3];
3381*3ac0a46fSAndroid Build Coastguard Worker                 }
3382*3ac0a46fSAndroid Build Coastguard Worker                 for (k = 0; k < rw; k++) {
3383*3ac0a46fSAndroid Build Coastguard Worker                     aj[k + (OPJ_SIZE_T)w * 4] = h.wavelet[k].f[4];
3384*3ac0a46fSAndroid Build Coastguard Worker                     aj[k + (OPJ_SIZE_T)w * 5] = h.wavelet[k].f[5];
3385*3ac0a46fSAndroid Build Coastguard Worker                     aj[k + (OPJ_SIZE_T)w * 6] = h.wavelet[k].f[6];
3386*3ac0a46fSAndroid Build Coastguard Worker                     aj[k + (OPJ_SIZE_T)w * 7] = h.wavelet[k].f[7];
3387*3ac0a46fSAndroid Build Coastguard Worker                 }
3388*3ac0a46fSAndroid Build Coastguard Worker 
3389*3ac0a46fSAndroid Build Coastguard Worker                 aj += w * NB_ELTS_V8;
3390*3ac0a46fSAndroid Build Coastguard Worker             }
3391*3ac0a46fSAndroid Build Coastguard Worker         } else {
3392*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
3393*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 step_j;
3394*3ac0a46fSAndroid Build Coastguard Worker 
3395*3ac0a46fSAndroid Build Coastguard Worker             if ((rh / NB_ELTS_V8) < num_jobs) {
3396*3ac0a46fSAndroid Build Coastguard Worker                 num_jobs = rh / NB_ELTS_V8;
3397*3ac0a46fSAndroid Build Coastguard Worker             }
3398*3ac0a46fSAndroid Build Coastguard Worker             step_j = ((rh / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8;
3399*3ac0a46fSAndroid Build Coastguard Worker             for (j = 0; j < num_jobs; j++) {
3400*3ac0a46fSAndroid Build Coastguard Worker                 opj_dwt97_decode_h_job_t* job;
3401*3ac0a46fSAndroid Build Coastguard Worker 
3402*3ac0a46fSAndroid Build Coastguard Worker                 job = (opj_dwt97_decode_h_job_t*) opj_malloc(sizeof(opj_dwt97_decode_h_job_t));
3403*3ac0a46fSAndroid Build Coastguard Worker                 if (!job) {
3404*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
3405*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(h.wavelet);
3406*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
3407*3ac0a46fSAndroid Build Coastguard Worker                 }
3408*3ac0a46fSAndroid Build Coastguard Worker                 job->h.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t));
3409*3ac0a46fSAndroid Build Coastguard Worker                 if (!job->h.wavelet) {
3410*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
3411*3ac0a46fSAndroid Build Coastguard Worker                     opj_free(job);
3412*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(h.wavelet);
3413*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
3414*3ac0a46fSAndroid Build Coastguard Worker                 }
3415*3ac0a46fSAndroid Build Coastguard Worker                 job->h.dn = h.dn;
3416*3ac0a46fSAndroid Build Coastguard Worker                 job->h.sn = h.sn;
3417*3ac0a46fSAndroid Build Coastguard Worker                 job->h.cas = h.cas;
3418*3ac0a46fSAndroid Build Coastguard Worker                 job->h.win_l_x0 = h.win_l_x0;
3419*3ac0a46fSAndroid Build Coastguard Worker                 job->h.win_l_x1 = h.win_l_x1;
3420*3ac0a46fSAndroid Build Coastguard Worker                 job->h.win_h_x0 = h.win_h_x0;
3421*3ac0a46fSAndroid Build Coastguard Worker                 job->h.win_h_x1 = h.win_h_x1;
3422*3ac0a46fSAndroid Build Coastguard Worker                 job->rw = rw;
3423*3ac0a46fSAndroid Build Coastguard Worker                 job->w = w;
3424*3ac0a46fSAndroid Build Coastguard Worker                 job->aj = aj;
3425*3ac0a46fSAndroid Build Coastguard Worker                 job->nb_rows = (j + 1 == num_jobs) ? (rh & (OPJ_UINT32)~
3426*3ac0a46fSAndroid Build Coastguard Worker                                                       (NB_ELTS_V8 - 1)) - j * step_j : step_j;
3427*3ac0a46fSAndroid Build Coastguard Worker                 aj += w * job->nb_rows;
3428*3ac0a46fSAndroid Build Coastguard Worker                 opj_thread_pool_submit_job(tp, opj_dwt97_decode_h_func, job);
3429*3ac0a46fSAndroid Build Coastguard Worker             }
3430*3ac0a46fSAndroid Build Coastguard Worker             opj_thread_pool_wait_completion(tp, 0);
3431*3ac0a46fSAndroid Build Coastguard Worker             j = rh & (OPJ_UINT32)~(NB_ELTS_V8 - 1);
3432*3ac0a46fSAndroid Build Coastguard Worker         }
3433*3ac0a46fSAndroid Build Coastguard Worker 
3434*3ac0a46fSAndroid Build Coastguard Worker         if (j < rh) {
3435*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 k;
3436*3ac0a46fSAndroid Build Coastguard Worker             opj_v8dwt_interleave_h(&h, aj, w, rh - j);
3437*3ac0a46fSAndroid Build Coastguard Worker             opj_v8dwt_decode(&h);
3438*3ac0a46fSAndroid Build Coastguard Worker             for (k = 0; k < rw; k++) {
3439*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_UINT32 l;
3440*3ac0a46fSAndroid Build Coastguard Worker                 for (l = 0; l < rh - j; l++) {
3441*3ac0a46fSAndroid Build Coastguard Worker                     aj[k + (OPJ_SIZE_T)w  * l ] = h.wavelet[k].f[l];
3442*3ac0a46fSAndroid Build Coastguard Worker                 }
3443*3ac0a46fSAndroid Build Coastguard Worker             }
3444*3ac0a46fSAndroid Build Coastguard Worker         }
3445*3ac0a46fSAndroid Build Coastguard Worker 
3446*3ac0a46fSAndroid Build Coastguard Worker         v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn);
3447*3ac0a46fSAndroid Build Coastguard Worker         v.cas = res->y0 % 2;
3448*3ac0a46fSAndroid Build Coastguard Worker         v.win_l_x0 = 0;
3449*3ac0a46fSAndroid Build Coastguard Worker         v.win_l_x1 = (OPJ_UINT32)v.sn;
3450*3ac0a46fSAndroid Build Coastguard Worker         v.win_h_x0 = 0;
3451*3ac0a46fSAndroid Build Coastguard Worker         v.win_h_x1 = (OPJ_UINT32)v.dn;
3452*3ac0a46fSAndroid Build Coastguard Worker 
3453*3ac0a46fSAndroid Build Coastguard Worker         aj = (OPJ_FLOAT32*) tilec->data;
3454*3ac0a46fSAndroid Build Coastguard Worker         if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) {
3455*3ac0a46fSAndroid Build Coastguard Worker             for (j = rw; j > (NB_ELTS_V8 - 1); j -= NB_ELTS_V8) {
3456*3ac0a46fSAndroid Build Coastguard Worker                 OPJ_UINT32 k;
3457*3ac0a46fSAndroid Build Coastguard Worker 
3458*3ac0a46fSAndroid Build Coastguard Worker                 opj_v8dwt_interleave_v(&v, aj, w, NB_ELTS_V8);
3459*3ac0a46fSAndroid Build Coastguard Worker                 opj_v8dwt_decode(&v);
3460*3ac0a46fSAndroid Build Coastguard Worker 
3461*3ac0a46fSAndroid Build Coastguard Worker                 for (k = 0; k < rh; ++k) {
3462*3ac0a46fSAndroid Build Coastguard Worker                     memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], NB_ELTS_V8 * sizeof(OPJ_FLOAT32));
3463*3ac0a46fSAndroid Build Coastguard Worker                 }
3464*3ac0a46fSAndroid Build Coastguard Worker                 aj += NB_ELTS_V8;
3465*3ac0a46fSAndroid Build Coastguard Worker             }
3466*3ac0a46fSAndroid Build Coastguard Worker         } else {
3467*3ac0a46fSAndroid Build Coastguard Worker             /* "bench_dwt -I" shows that scaling is poor, likely due to RAM
3468*3ac0a46fSAndroid Build Coastguard Worker                 transfer being the limiting factor. So limit the number of
3469*3ac0a46fSAndroid Build Coastguard Worker                 threads.
3470*3ac0a46fSAndroid Build Coastguard Worker              */
3471*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 num_jobs = opj_uint_max((OPJ_UINT32)num_threads / 2, 2U);
3472*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 step_j;
3473*3ac0a46fSAndroid Build Coastguard Worker 
3474*3ac0a46fSAndroid Build Coastguard Worker             if ((rw / NB_ELTS_V8) < num_jobs) {
3475*3ac0a46fSAndroid Build Coastguard Worker                 num_jobs = rw / NB_ELTS_V8;
3476*3ac0a46fSAndroid Build Coastguard Worker             }
3477*3ac0a46fSAndroid Build Coastguard Worker             step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8;
3478*3ac0a46fSAndroid Build Coastguard Worker             for (j = 0; j < num_jobs; j++) {
3479*3ac0a46fSAndroid Build Coastguard Worker                 opj_dwt97_decode_v_job_t* job;
3480*3ac0a46fSAndroid Build Coastguard Worker 
3481*3ac0a46fSAndroid Build Coastguard Worker                 job = (opj_dwt97_decode_v_job_t*) opj_malloc(sizeof(opj_dwt97_decode_v_job_t));
3482*3ac0a46fSAndroid Build Coastguard Worker                 if (!job) {
3483*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
3484*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(h.wavelet);
3485*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
3486*3ac0a46fSAndroid Build Coastguard Worker                 }
3487*3ac0a46fSAndroid Build Coastguard Worker                 job->v.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t));
3488*3ac0a46fSAndroid Build Coastguard Worker                 if (!job->v.wavelet) {
3489*3ac0a46fSAndroid Build Coastguard Worker                     opj_thread_pool_wait_completion(tp, 0);
3490*3ac0a46fSAndroid Build Coastguard Worker                     opj_free(job);
3491*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(h.wavelet);
3492*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
3493*3ac0a46fSAndroid Build Coastguard Worker                 }
3494*3ac0a46fSAndroid Build Coastguard Worker                 job->v.dn = v.dn;
3495*3ac0a46fSAndroid Build Coastguard Worker                 job->v.sn = v.sn;
3496*3ac0a46fSAndroid Build Coastguard Worker                 job->v.cas = v.cas;
3497*3ac0a46fSAndroid Build Coastguard Worker                 job->v.win_l_x0 = v.win_l_x0;
3498*3ac0a46fSAndroid Build Coastguard Worker                 job->v.win_l_x1 = v.win_l_x1;
3499*3ac0a46fSAndroid Build Coastguard Worker                 job->v.win_h_x0 = v.win_h_x0;
3500*3ac0a46fSAndroid Build Coastguard Worker                 job->v.win_h_x1 = v.win_h_x1;
3501*3ac0a46fSAndroid Build Coastguard Worker                 job->rh = rh;
3502*3ac0a46fSAndroid Build Coastguard Worker                 job->w = w;
3503*3ac0a46fSAndroid Build Coastguard Worker                 job->aj = aj;
3504*3ac0a46fSAndroid Build Coastguard Worker                 job->nb_columns = (j + 1 == num_jobs) ? (rw & (OPJ_UINT32)~
3505*3ac0a46fSAndroid Build Coastguard Worker                                   (NB_ELTS_V8 - 1)) - j * step_j : step_j;
3506*3ac0a46fSAndroid Build Coastguard Worker                 aj += job->nb_columns;
3507*3ac0a46fSAndroid Build Coastguard Worker                 opj_thread_pool_submit_job(tp, opj_dwt97_decode_v_func, job);
3508*3ac0a46fSAndroid Build Coastguard Worker             }
3509*3ac0a46fSAndroid Build Coastguard Worker             opj_thread_pool_wait_completion(tp, 0);
3510*3ac0a46fSAndroid Build Coastguard Worker         }
3511*3ac0a46fSAndroid Build Coastguard Worker 
3512*3ac0a46fSAndroid Build Coastguard Worker         if (rw & (NB_ELTS_V8 - 1)) {
3513*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 k;
3514*3ac0a46fSAndroid Build Coastguard Worker 
3515*3ac0a46fSAndroid Build Coastguard Worker             j = rw & (NB_ELTS_V8 - 1);
3516*3ac0a46fSAndroid Build Coastguard Worker 
3517*3ac0a46fSAndroid Build Coastguard Worker             opj_v8dwt_interleave_v(&v, aj, w, j);
3518*3ac0a46fSAndroid Build Coastguard Worker             opj_v8dwt_decode(&v);
3519*3ac0a46fSAndroid Build Coastguard Worker 
3520*3ac0a46fSAndroid Build Coastguard Worker             for (k = 0; k < rh; ++k) {
3521*3ac0a46fSAndroid Build Coastguard Worker                 memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k],
3522*3ac0a46fSAndroid Build Coastguard Worker                        (OPJ_SIZE_T)j * sizeof(OPJ_FLOAT32));
3523*3ac0a46fSAndroid Build Coastguard Worker             }
3524*3ac0a46fSAndroid Build Coastguard Worker         }
3525*3ac0a46fSAndroid Build Coastguard Worker     }
3526*3ac0a46fSAndroid Build Coastguard Worker 
3527*3ac0a46fSAndroid Build Coastguard Worker     opj_aligned_free(h.wavelet);
3528*3ac0a46fSAndroid Build Coastguard Worker     return OPJ_TRUE;
3529*3ac0a46fSAndroid Build Coastguard Worker }
3530*3ac0a46fSAndroid Build Coastguard Worker 
3531*3ac0a46fSAndroid Build Coastguard Worker static
opj_dwt_decode_partial_97(opj_tcd_tilecomp_t * OPJ_RESTRICT tilec,OPJ_UINT32 numres)3532*3ac0a46fSAndroid Build Coastguard Worker OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
3533*3ac0a46fSAndroid Build Coastguard Worker                                    OPJ_UINT32 numres)
3534*3ac0a46fSAndroid Build Coastguard Worker {
3535*3ac0a46fSAndroid Build Coastguard Worker     opj_sparse_array_int32_t* sa;
3536*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_t h;
3537*3ac0a46fSAndroid Build Coastguard Worker     opj_v8dwt_t v;
3538*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 resno;
3539*3ac0a46fSAndroid Build Coastguard Worker     /* This value matches the maximum left/right extension given in tables */
3540*3ac0a46fSAndroid Build Coastguard Worker     /* F.2 and F.3 of the standard. Note: in opj_tcd_is_subband_area_of_interest() */
3541*3ac0a46fSAndroid Build Coastguard Worker     /* we currently use 3. */
3542*3ac0a46fSAndroid Build Coastguard Worker     const OPJ_UINT32 filter_width = 4U;
3543*3ac0a46fSAndroid Build Coastguard Worker 
3544*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_resolution_t* tr = tilec->resolutions;
3545*3ac0a46fSAndroid Build Coastguard Worker     opj_tcd_resolution_t* tr_max = &(tilec->resolutions[numres - 1]);
3546*3ac0a46fSAndroid Build Coastguard Worker 
3547*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 -
3548*3ac0a46fSAndroid Build Coastguard Worker                                  tr->x0);    /* width of the resolution level computed */
3549*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 -
3550*3ac0a46fSAndroid Build Coastguard Worker                                  tr->y0);    /* height of the resolution level computed */
3551*3ac0a46fSAndroid Build Coastguard Worker 
3552*3ac0a46fSAndroid Build Coastguard Worker     OPJ_SIZE_T l_data_size;
3553*3ac0a46fSAndroid Build Coastguard Worker 
3554*3ac0a46fSAndroid Build Coastguard Worker     /* Compute the intersection of the area of interest, expressed in tile coordinates */
3555*3ac0a46fSAndroid Build Coastguard Worker     /* with the tile coordinates */
3556*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 win_tcx0 = tilec->win_x0;
3557*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 win_tcy0 = tilec->win_y0;
3558*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 win_tcx1 = tilec->win_x1;
3559*3ac0a46fSAndroid Build Coastguard Worker     OPJ_UINT32 win_tcy1 = tilec->win_y1;
3560*3ac0a46fSAndroid Build Coastguard Worker 
3561*3ac0a46fSAndroid Build Coastguard Worker     if (tr_max->x0 == tr_max->x1 || tr_max->y0 == tr_max->y1) {
3562*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_TRUE;
3563*3ac0a46fSAndroid Build Coastguard Worker     }
3564*3ac0a46fSAndroid Build Coastguard Worker 
3565*3ac0a46fSAndroid Build Coastguard Worker     sa = opj_dwt_init_sparse_array(tilec, numres);
3566*3ac0a46fSAndroid Build Coastguard Worker     if (sa == NULL) {
3567*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
3568*3ac0a46fSAndroid Build Coastguard Worker     }
3569*3ac0a46fSAndroid Build Coastguard Worker 
3570*3ac0a46fSAndroid Build Coastguard Worker     if (numres == 1U) {
3571*3ac0a46fSAndroid Build Coastguard Worker         OPJ_BOOL ret = opj_sparse_array_int32_read(sa,
3572*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_x0 - (OPJ_UINT32)tr_max->x0,
3573*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_y0 - (OPJ_UINT32)tr_max->y0,
3574*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_x1 - (OPJ_UINT32)tr_max->x0,
3575*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_y1 - (OPJ_UINT32)tr_max->y0,
3576*3ac0a46fSAndroid Build Coastguard Worker                        tilec->data_win,
3577*3ac0a46fSAndroid Build Coastguard Worker                        1, tr_max->win_x1 - tr_max->win_x0,
3578*3ac0a46fSAndroid Build Coastguard Worker                        OPJ_TRUE);
3579*3ac0a46fSAndroid Build Coastguard Worker         assert(ret);
3580*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UNUSED(ret);
3581*3ac0a46fSAndroid Build Coastguard Worker         opj_sparse_array_int32_free(sa);
3582*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_TRUE;
3583*3ac0a46fSAndroid Build Coastguard Worker     }
3584*3ac0a46fSAndroid Build Coastguard Worker 
3585*3ac0a46fSAndroid Build Coastguard Worker     l_data_size = opj_dwt_max_resolution(tr, numres);
3586*3ac0a46fSAndroid Build Coastguard Worker     /* overflow check */
3587*3ac0a46fSAndroid Build Coastguard Worker     if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) {
3588*3ac0a46fSAndroid Build Coastguard Worker         /* FIXME event manager error callback */
3589*3ac0a46fSAndroid Build Coastguard Worker         opj_sparse_array_int32_free(sa);
3590*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
3591*3ac0a46fSAndroid Build Coastguard Worker     }
3592*3ac0a46fSAndroid Build Coastguard Worker     h.wavelet = (opj_v8_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v8_t));
3593*3ac0a46fSAndroid Build Coastguard Worker     if (!h.wavelet) {
3594*3ac0a46fSAndroid Build Coastguard Worker         /* FIXME event manager error callback */
3595*3ac0a46fSAndroid Build Coastguard Worker         opj_sparse_array_int32_free(sa);
3596*3ac0a46fSAndroid Build Coastguard Worker         return OPJ_FALSE;
3597*3ac0a46fSAndroid Build Coastguard Worker     }
3598*3ac0a46fSAndroid Build Coastguard Worker     v.wavelet = h.wavelet;
3599*3ac0a46fSAndroid Build Coastguard Worker 
3600*3ac0a46fSAndroid Build Coastguard Worker     for (resno = 1; resno < numres; resno ++) {
3601*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 j;
3602*3ac0a46fSAndroid Build Coastguard Worker         /* Window of interest subband-based coordinates */
3603*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_ll_x0, win_ll_y0, win_ll_x1, win_ll_y1;
3604*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_hl_x0, win_hl_x1;
3605*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_lh_y0, win_lh_y1;
3606*3ac0a46fSAndroid Build Coastguard Worker         /* Window of interest tile-resolution-based coordinates */
3607*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 win_tr_x0, win_tr_x1, win_tr_y0, win_tr_y1;
3608*3ac0a46fSAndroid Build Coastguard Worker         /* Tile-resolution subband-based coordinates */
3609*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UINT32 tr_ll_x0, tr_ll_y0, tr_hl_x0, tr_lh_y0;
3610*3ac0a46fSAndroid Build Coastguard Worker 
3611*3ac0a46fSAndroid Build Coastguard Worker         ++tr;
3612*3ac0a46fSAndroid Build Coastguard Worker 
3613*3ac0a46fSAndroid Build Coastguard Worker         h.sn = (OPJ_INT32)rw;
3614*3ac0a46fSAndroid Build Coastguard Worker         v.sn = (OPJ_INT32)rh;
3615*3ac0a46fSAndroid Build Coastguard Worker 
3616*3ac0a46fSAndroid Build Coastguard Worker         rw = (OPJ_UINT32)(tr->x1 - tr->x0);
3617*3ac0a46fSAndroid Build Coastguard Worker         rh = (OPJ_UINT32)(tr->y1 - tr->y0);
3618*3ac0a46fSAndroid Build Coastguard Worker 
3619*3ac0a46fSAndroid Build Coastguard Worker         h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn);
3620*3ac0a46fSAndroid Build Coastguard Worker         h.cas = tr->x0 % 2;
3621*3ac0a46fSAndroid Build Coastguard Worker 
3622*3ac0a46fSAndroid Build Coastguard Worker         v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn);
3623*3ac0a46fSAndroid Build Coastguard Worker         v.cas = tr->y0 % 2;
3624*3ac0a46fSAndroid Build Coastguard Worker 
3625*3ac0a46fSAndroid Build Coastguard Worker         /* Get the subband coordinates for the window of interest */
3626*3ac0a46fSAndroid Build Coastguard Worker         /* LL band */
3627*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_get_band_coordinates(tilec, resno, 0,
3628*3ac0a46fSAndroid Build Coastguard Worker                                      win_tcx0, win_tcy0, win_tcx1, win_tcy1,
3629*3ac0a46fSAndroid Build Coastguard Worker                                      &win_ll_x0, &win_ll_y0,
3630*3ac0a46fSAndroid Build Coastguard Worker                                      &win_ll_x1, &win_ll_y1);
3631*3ac0a46fSAndroid Build Coastguard Worker 
3632*3ac0a46fSAndroid Build Coastguard Worker         /* HL band */
3633*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_get_band_coordinates(tilec, resno, 1,
3634*3ac0a46fSAndroid Build Coastguard Worker                                      win_tcx0, win_tcy0, win_tcx1, win_tcy1,
3635*3ac0a46fSAndroid Build Coastguard Worker                                      &win_hl_x0, NULL, &win_hl_x1, NULL);
3636*3ac0a46fSAndroid Build Coastguard Worker 
3637*3ac0a46fSAndroid Build Coastguard Worker         /* LH band */
3638*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_get_band_coordinates(tilec, resno, 2,
3639*3ac0a46fSAndroid Build Coastguard Worker                                      win_tcx0, win_tcy0, win_tcx1, win_tcy1,
3640*3ac0a46fSAndroid Build Coastguard Worker                                      NULL, &win_lh_y0, NULL, &win_lh_y1);
3641*3ac0a46fSAndroid Build Coastguard Worker 
3642*3ac0a46fSAndroid Build Coastguard Worker         /* Beware: band index for non-LL0 resolution are 0=HL, 1=LH and 2=HH */
3643*3ac0a46fSAndroid Build Coastguard Worker         tr_ll_x0 = (OPJ_UINT32)tr->bands[1].x0;
3644*3ac0a46fSAndroid Build Coastguard Worker         tr_ll_y0 = (OPJ_UINT32)tr->bands[0].y0;
3645*3ac0a46fSAndroid Build Coastguard Worker         tr_hl_x0 = (OPJ_UINT32)tr->bands[0].x0;
3646*3ac0a46fSAndroid Build Coastguard Worker         tr_lh_y0 = (OPJ_UINT32)tr->bands[1].y0;
3647*3ac0a46fSAndroid Build Coastguard Worker 
3648*3ac0a46fSAndroid Build Coastguard Worker         /* Subtract the origin of the bands for this tile, to the subwindow */
3649*3ac0a46fSAndroid Build Coastguard Worker         /* of interest band coordinates, so as to get them relative to the */
3650*3ac0a46fSAndroid Build Coastguard Worker         /* tile */
3651*3ac0a46fSAndroid Build Coastguard Worker         win_ll_x0 = opj_uint_subs(win_ll_x0, tr_ll_x0);
3652*3ac0a46fSAndroid Build Coastguard Worker         win_ll_y0 = opj_uint_subs(win_ll_y0, tr_ll_y0);
3653*3ac0a46fSAndroid Build Coastguard Worker         win_ll_x1 = opj_uint_subs(win_ll_x1, tr_ll_x0);
3654*3ac0a46fSAndroid Build Coastguard Worker         win_ll_y1 = opj_uint_subs(win_ll_y1, tr_ll_y0);
3655*3ac0a46fSAndroid Build Coastguard Worker         win_hl_x0 = opj_uint_subs(win_hl_x0, tr_hl_x0);
3656*3ac0a46fSAndroid Build Coastguard Worker         win_hl_x1 = opj_uint_subs(win_hl_x1, tr_hl_x0);
3657*3ac0a46fSAndroid Build Coastguard Worker         win_lh_y0 = opj_uint_subs(win_lh_y0, tr_lh_y0);
3658*3ac0a46fSAndroid Build Coastguard Worker         win_lh_y1 = opj_uint_subs(win_lh_y1, tr_lh_y0);
3659*3ac0a46fSAndroid Build Coastguard Worker 
3660*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_segment_grow(filter_width, (OPJ_UINT32)h.sn, &win_ll_x0, &win_ll_x1);
3661*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_segment_grow(filter_width, (OPJ_UINT32)h.dn, &win_hl_x0, &win_hl_x1);
3662*3ac0a46fSAndroid Build Coastguard Worker 
3663*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_segment_grow(filter_width, (OPJ_UINT32)v.sn, &win_ll_y0, &win_ll_y1);
3664*3ac0a46fSAndroid Build Coastguard Worker         opj_dwt_segment_grow(filter_width, (OPJ_UINT32)v.dn, &win_lh_y0, &win_lh_y1);
3665*3ac0a46fSAndroid Build Coastguard Worker 
3666*3ac0a46fSAndroid Build Coastguard Worker         /* Compute the tile-resolution-based coordinates for the window of interest */
3667*3ac0a46fSAndroid Build Coastguard Worker         if (h.cas == 0) {
3668*3ac0a46fSAndroid Build Coastguard Worker             win_tr_x0 = opj_uint_min(2 * win_ll_x0, 2 * win_hl_x0 + 1);
3669*3ac0a46fSAndroid Build Coastguard Worker             win_tr_x1 = opj_uint_min(opj_uint_max(2 * win_ll_x1, 2 * win_hl_x1 + 1), rw);
3670*3ac0a46fSAndroid Build Coastguard Worker         } else {
3671*3ac0a46fSAndroid Build Coastguard Worker             win_tr_x0 = opj_uint_min(2 * win_hl_x0, 2 * win_ll_x0 + 1);
3672*3ac0a46fSAndroid Build Coastguard Worker             win_tr_x1 = opj_uint_min(opj_uint_max(2 * win_hl_x1, 2 * win_ll_x1 + 1), rw);
3673*3ac0a46fSAndroid Build Coastguard Worker         }
3674*3ac0a46fSAndroid Build Coastguard Worker 
3675*3ac0a46fSAndroid Build Coastguard Worker         if (v.cas == 0) {
3676*3ac0a46fSAndroid Build Coastguard Worker             win_tr_y0 = opj_uint_min(2 * win_ll_y0, 2 * win_lh_y0 + 1);
3677*3ac0a46fSAndroid Build Coastguard Worker             win_tr_y1 = opj_uint_min(opj_uint_max(2 * win_ll_y1, 2 * win_lh_y1 + 1), rh);
3678*3ac0a46fSAndroid Build Coastguard Worker         } else {
3679*3ac0a46fSAndroid Build Coastguard Worker             win_tr_y0 = opj_uint_min(2 * win_lh_y0, 2 * win_ll_y0 + 1);
3680*3ac0a46fSAndroid Build Coastguard Worker             win_tr_y1 = opj_uint_min(opj_uint_max(2 * win_lh_y1, 2 * win_ll_y1 + 1), rh);
3681*3ac0a46fSAndroid Build Coastguard Worker         }
3682*3ac0a46fSAndroid Build Coastguard Worker 
3683*3ac0a46fSAndroid Build Coastguard Worker         h.win_l_x0 = win_ll_x0;
3684*3ac0a46fSAndroid Build Coastguard Worker         h.win_l_x1 = win_ll_x1;
3685*3ac0a46fSAndroid Build Coastguard Worker         h.win_h_x0 = win_hl_x0;
3686*3ac0a46fSAndroid Build Coastguard Worker         h.win_h_x1 = win_hl_x1;
3687*3ac0a46fSAndroid Build Coastguard Worker         for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) {
3688*3ac0a46fSAndroid Build Coastguard Worker             if ((j + (NB_ELTS_V8 - 1) >= win_ll_y0 && j < win_ll_y1) ||
3689*3ac0a46fSAndroid Build Coastguard Worker                     (j + (NB_ELTS_V8 - 1) >= win_lh_y0 + (OPJ_UINT32)v.sn &&
3690*3ac0a46fSAndroid Build Coastguard Worker                      j < win_lh_y1 + (OPJ_UINT32)v.sn)) {
3691*3ac0a46fSAndroid Build Coastguard Worker                 opj_v8dwt_interleave_partial_h(&h, sa, j, opj_uint_min(NB_ELTS_V8, rh - j));
3692*3ac0a46fSAndroid Build Coastguard Worker                 opj_v8dwt_decode(&h);
3693*3ac0a46fSAndroid Build Coastguard Worker                 if (!opj_sparse_array_int32_write(sa,
3694*3ac0a46fSAndroid Build Coastguard Worker                                                   win_tr_x0, j,
3695*3ac0a46fSAndroid Build Coastguard Worker                                                   win_tr_x1, j + NB_ELTS_V8,
3696*3ac0a46fSAndroid Build Coastguard Worker                                                   (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0],
3697*3ac0a46fSAndroid Build Coastguard Worker                                                   NB_ELTS_V8, 1, OPJ_TRUE)) {
3698*3ac0a46fSAndroid Build Coastguard Worker                     /* FIXME event manager error callback */
3699*3ac0a46fSAndroid Build Coastguard Worker                     opj_sparse_array_int32_free(sa);
3700*3ac0a46fSAndroid Build Coastguard Worker                     opj_aligned_free(h.wavelet);
3701*3ac0a46fSAndroid Build Coastguard Worker                     return OPJ_FALSE;
3702*3ac0a46fSAndroid Build Coastguard Worker                 }
3703*3ac0a46fSAndroid Build Coastguard Worker             }
3704*3ac0a46fSAndroid Build Coastguard Worker         }
3705*3ac0a46fSAndroid Build Coastguard Worker 
3706*3ac0a46fSAndroid Build Coastguard Worker         if (j < rh &&
3707*3ac0a46fSAndroid Build Coastguard Worker                 ((j + (NB_ELTS_V8 - 1) >= win_ll_y0 && j < win_ll_y1) ||
3708*3ac0a46fSAndroid Build Coastguard Worker                  (j + (NB_ELTS_V8 - 1) >= win_lh_y0 + (OPJ_UINT32)v.sn &&
3709*3ac0a46fSAndroid Build Coastguard Worker                   j < win_lh_y1 + (OPJ_UINT32)v.sn))) {
3710*3ac0a46fSAndroid Build Coastguard Worker             opj_v8dwt_interleave_partial_h(&h, sa, j, rh - j);
3711*3ac0a46fSAndroid Build Coastguard Worker             opj_v8dwt_decode(&h);
3712*3ac0a46fSAndroid Build Coastguard Worker             if (!opj_sparse_array_int32_write(sa,
3713*3ac0a46fSAndroid Build Coastguard Worker                                               win_tr_x0, j,
3714*3ac0a46fSAndroid Build Coastguard Worker                                               win_tr_x1, rh,
3715*3ac0a46fSAndroid Build Coastguard Worker                                               (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0],
3716*3ac0a46fSAndroid Build Coastguard Worker                                               NB_ELTS_V8, 1, OPJ_TRUE)) {
3717*3ac0a46fSAndroid Build Coastguard Worker                 /* FIXME event manager error callback */
3718*3ac0a46fSAndroid Build Coastguard Worker                 opj_sparse_array_int32_free(sa);
3719*3ac0a46fSAndroid Build Coastguard Worker                 opj_aligned_free(h.wavelet);
3720*3ac0a46fSAndroid Build Coastguard Worker                 return OPJ_FALSE;
3721*3ac0a46fSAndroid Build Coastguard Worker             }
3722*3ac0a46fSAndroid Build Coastguard Worker         }
3723*3ac0a46fSAndroid Build Coastguard Worker 
3724*3ac0a46fSAndroid Build Coastguard Worker         v.win_l_x0 = win_ll_y0;
3725*3ac0a46fSAndroid Build Coastguard Worker         v.win_l_x1 = win_ll_y1;
3726*3ac0a46fSAndroid Build Coastguard Worker         v.win_h_x0 = win_lh_y0;
3727*3ac0a46fSAndroid Build Coastguard Worker         v.win_h_x1 = win_lh_y1;
3728*3ac0a46fSAndroid Build Coastguard Worker         for (j = win_tr_x0; j < win_tr_x1; j += NB_ELTS_V8) {
3729*3ac0a46fSAndroid Build Coastguard Worker             OPJ_UINT32 nb_elts = opj_uint_min(NB_ELTS_V8, win_tr_x1 - j);
3730*3ac0a46fSAndroid Build Coastguard Worker 
3731*3ac0a46fSAndroid Build Coastguard Worker             opj_v8dwt_interleave_partial_v(&v, sa, j, nb_elts);
3732*3ac0a46fSAndroid Build Coastguard Worker             opj_v8dwt_decode(&v);
3733*3ac0a46fSAndroid Build Coastguard Worker 
3734*3ac0a46fSAndroid Build Coastguard Worker             if (!opj_sparse_array_int32_write(sa,
3735*3ac0a46fSAndroid Build Coastguard Worker                                               j, win_tr_y0,
3736*3ac0a46fSAndroid Build Coastguard Worker                                               j + nb_elts, win_tr_y1,
3737*3ac0a46fSAndroid Build Coastguard Worker                                               (OPJ_INT32*)&h.wavelet[win_tr_y0].f[0],
3738*3ac0a46fSAndroid Build Coastguard Worker                                               1, NB_ELTS_V8, OPJ_TRUE)) {
3739*3ac0a46fSAndroid Build Coastguard Worker                 /* FIXME event manager error callback */
3740*3ac0a46fSAndroid Build Coastguard Worker                 opj_sparse_array_int32_free(sa);
3741*3ac0a46fSAndroid Build Coastguard Worker                 opj_aligned_free(h.wavelet);
3742*3ac0a46fSAndroid Build Coastguard Worker                 return OPJ_FALSE;
3743*3ac0a46fSAndroid Build Coastguard Worker             }
3744*3ac0a46fSAndroid Build Coastguard Worker         }
3745*3ac0a46fSAndroid Build Coastguard Worker     }
3746*3ac0a46fSAndroid Build Coastguard Worker 
3747*3ac0a46fSAndroid Build Coastguard Worker     {
3748*3ac0a46fSAndroid Build Coastguard Worker         OPJ_BOOL ret = opj_sparse_array_int32_read(sa,
3749*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_x0 - (OPJ_UINT32)tr_max->x0,
3750*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_y0 - (OPJ_UINT32)tr_max->y0,
3751*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_x1 - (OPJ_UINT32)tr_max->x0,
3752*3ac0a46fSAndroid Build Coastguard Worker                        tr_max->win_y1 - (OPJ_UINT32)tr_max->y0,
3753*3ac0a46fSAndroid Build Coastguard Worker                        tilec->data_win,
3754*3ac0a46fSAndroid Build Coastguard Worker                        1, tr_max->win_x1 - tr_max->win_x0,
3755*3ac0a46fSAndroid Build Coastguard Worker                        OPJ_TRUE);
3756*3ac0a46fSAndroid Build Coastguard Worker         assert(ret);
3757*3ac0a46fSAndroid Build Coastguard Worker         OPJ_UNUSED(ret);
3758*3ac0a46fSAndroid Build Coastguard Worker     }
3759*3ac0a46fSAndroid Build Coastguard Worker     opj_sparse_array_int32_free(sa);
3760*3ac0a46fSAndroid Build Coastguard Worker 
3761*3ac0a46fSAndroid Build Coastguard Worker     opj_aligned_free(h.wavelet);
3762*3ac0a46fSAndroid Build Coastguard Worker     return OPJ_TRUE;
3763*3ac0a46fSAndroid Build Coastguard Worker }
3764*3ac0a46fSAndroid Build Coastguard Worker 
3765*3ac0a46fSAndroid Build Coastguard Worker 
opj_dwt_decode_real(opj_tcd_t * p_tcd,opj_tcd_tilecomp_t * OPJ_RESTRICT tilec,OPJ_UINT32 numres)3766*3ac0a46fSAndroid Build Coastguard Worker OPJ_BOOL opj_dwt_decode_real(opj_tcd_t *p_tcd,
3767*3ac0a46fSAndroid Build Coastguard Worker                              opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
3768*3ac0a46fSAndroid Build Coastguard Worker                              OPJ_UINT32 numres)
3769*3ac0a46fSAndroid Build Coastguard Worker {
3770*3ac0a46fSAndroid Build Coastguard Worker     if (p_tcd->whole_tile_decoding) {
3771*3ac0a46fSAndroid Build Coastguard Worker         return opj_dwt_decode_tile_97(p_tcd->thread_pool, tilec, numres);
3772*3ac0a46fSAndroid Build Coastguard Worker     } else {
3773*3ac0a46fSAndroid Build Coastguard Worker         return opj_dwt_decode_partial_97(tilec, numres);
3774*3ac0a46fSAndroid Build Coastguard Worker     }
3775*3ac0a46fSAndroid Build Coastguard Worker }
3776