xref: /aosp_15_r20/external/libopenapv/src/oapv.c (revision abb65b4b03b69e1d508d4d9a44dcf199df16e7c3)
1 /*
2  * Copyright (c) 2022 Samsung Electronics Co., Ltd.
3  * All Rights Reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * - Redistributions of source code must retain the above copyright notice,
9  *   this list of conditions and the following disclaimer.
10  *
11  * - Redistributions in binary form must reproduce the above copyright notice,
12  *   this list of conditions and the following disclaimer in the documentation
13  *   and/or other materials provided with the distribution.
14  *
15  * - Neither the name of the copyright owner, nor the names of its contributors
16  *   may be used to endorse or promote products derived from this software
17  *   without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "oapv_def.h"
33 
imgb_to_block(oapv_imgb_t * imgb,int c,int x_l,int y_l,int w_l,int h_l,s16 * block)34 static void imgb_to_block(oapv_imgb_t *imgb, int c, int x_l, int y_l, int w_l, int h_l, s16 *block)
35 {
36     u8 *src, *dst;
37     int i, sft_hor, sft_ver;
38     int bd = OAPV_CS_GET_BYTE_DEPTH(imgb->cs);
39 
40     if(c == 0) {
41         sft_hor = sft_ver = 0;
42     }
43     else {
44         u8 cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs));
45         sft_hor = get_chroma_sft_w(cfi);
46         sft_ver = get_chroma_sft_h(cfi);
47     }
48 
49     src = ((u8 *)imgb->a[c]) + ((y_l >> sft_ver) * imgb->s[c]) + ((x_l * bd) >> sft_hor);
50     dst = (u8 *)block;
51 
52     for(i = 0; i < (h_l); i++) {
53         oapv_mcpy(dst, src, (w_l)*bd);
54 
55         src += imgb->s[c];
56         dst += (w_l)*bd;
57     }
58 }
59 
imgb_to_block_10bit(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)60 static void imgb_to_block_10bit(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
61 {
62     const int mid_val = (1 << (10 - 1));
63     s16      *s = (s16 *)src;
64     s16      *d = (s16 *)dst;
65 
66     for(int h = 0; h < blk_h; h++) {
67         for(int w = 0; w < blk_w; w++) {
68             d[w] = s[w] - mid_val;
69         }
70         s = (s16 *)(((u8 *)s) + s_src);
71         d = (s16 *)(((u8 *)d) + s_dst);
72     }
73 }
74 
imgb_to_block_p210_y(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)75 static void imgb_to_block_p210_y(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
76 {
77     const int mid_val = (1 << (10 - 1));
78     u16      *s = (s16 *)src;
79     s16      *d = (s16 *)dst;
80 
81     for(int h = 0; h < blk_h; h++) {
82         for(int w = 0; w < blk_w; w++) {
83             d[w] = (s16)(s[w] >> 6) - mid_val;
84         }
85         s = (u16 *)(((u8 *)s) + s_src);
86         d = (s16 *)(((u8 *)d) + s_dst);
87     }
88 }
89 
imgb_to_block_p210_uv(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)90 static void imgb_to_block_p210_uv(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
91 {
92     const int mid_val = (1 << (10 - 1));
93     u16      *s = (u16 *)src + offset_src;
94     s16      *d = (s16 *)dst;
95 
96     for(int h = 0; h < blk_h; h++) {
97         for(int w = 0; w < blk_w; w++) {
98             d[w] = (s16)(s[w * 2] >> 6) - mid_val;
99         }
100         s = (u16 *)(((u8 *)s) + s_src);
101         d = (s16 *)(((u8 *)d) + s_dst);
102     }
103 }
104 
imgb_to_block_p210(oapv_imgb_t * imgb,int c,int x_l,int y_l,int w_l,int h_l,s16 * block)105 static void imgb_to_block_p210(oapv_imgb_t *imgb, int c, int x_l, int y_l, int w_l, int h_l, s16 *block)
106 {
107     u16 *src, *dst;
108     int  sft_hor, sft_ver, s_src;
109     int  bd = OAPV_CS_GET_BYTE_DEPTH(imgb->cs);
110     int  size_scale = 1;
111     int  tc = c;
112 
113     if(c == 0) {
114         sft_hor = sft_ver = 0;
115     }
116     else {
117         u8 cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs));
118         sft_hor = get_chroma_sft_w(cfi);
119         sft_ver = get_chroma_sft_h(cfi);
120         size_scale = 2;
121         tc = 1;
122     }
123 
124     s_src = imgb->s[tc] >> (bd > 1 ? 1 : 0);
125     src = ((u16 *)imgb->a[tc]) + ((y_l >> sft_ver) * s_src) + ((x_l * size_scale) >> sft_hor);
126     dst = (u16 *)block;
127 
128     for(int i = 0; i < (h_l); i++) {
129         for(int j = 0; j < (w_l); j++) {
130             dst[j] = (src[j * size_scale + (c >> 1)] >> 6);
131         }
132         src += s_src;
133         dst += w_l;
134     }
135 }
136 
block_to_imgb_10bit(void * src,int blk_w,int blk_h,int s_src,int offset_dst,int s_dst,void * dst)137 static void block_to_imgb_10bit(void *src, int blk_w, int blk_h, int s_src, int offset_dst, int s_dst, void *dst)
138 {
139     const int max_val = (1 << 10) - 1;
140     const int mid_val = (1 << (10 - 1));
141     s16      *s = (s16 *)src;
142     u16      *d = (u16 *)dst;
143 
144     for(int h = 0; h < blk_h; h++) {
145         for(int w = 0; w < blk_w; w++) {
146             d[w] = oapv_clip3(0, max_val, s[w] + mid_val);
147         }
148         s = (s16 *)(((u8 *)s) + s_src);
149         d = (u16 *)(((u8 *)d) + s_dst);
150     }
151 }
152 
block_to_imgb_p210_y(void * src,int blk_w,int blk_h,int s_src,int offset_dst,int s_dst,void * dst)153 static void block_to_imgb_p210_y(void *src, int blk_w, int blk_h, int s_src, int offset_dst, int s_dst, void *dst)
154 {
155     const int max_val = (1 << 10) - 1;
156     const int mid_val = (1 << (10 - 1));
157     s16      *s = (s16 *)src;
158     u16      *d = (u16 *)dst;
159 
160     for(int h = 0; h < blk_h; h++) {
161         for(int w = 0; w < blk_w; w++) {
162             d[w] = oapv_clip3(0, max_val, s[w] + mid_val) << 6;
163         }
164         s = (s16 *)(((u8 *)s) + s_src);
165         d = (u16 *)(((u8 *)d) + s_dst);
166     }
167 }
168 
block_to_imgb_p210_uv(void * src,int blk_w,int blk_h,int s_src,int x_pel,int s_dst,void * dst)169 static void block_to_imgb_p210_uv(void *src, int blk_w, int blk_h, int s_src, int x_pel, int s_dst, void *dst)
170 {
171     const int max_val = (1 << 10) - 1;
172     const int mid_val = (1 << (10 - 1));
173     s16      *s = (s16 *)src;
174 
175     // x_pel is x-offset value from left boundary of picture in unit of pixel.
176     // the 'dst' address has calculated by
177     // dst = (s16*)((u8*)origin + y_pel*s_dst) + x_pel;
178     // in case of P210 color format,
179     // since 's_dst' is byte size of stride including all U and V pixel values,
180     // y-offset calculation is correct.
181     // however, the adding only x_pel is not enough to address the correct pixel
182     // position of U or V because U & V use the same buffer plane
183     // in interleaved way,
184     // so, the 'dst' address should be increased by 'x_pel' to address pixel
185     // position correctly.
186     u16      *d = (u16 *)dst + x_pel; // p210 pixel value needs 0~65535 range
187 
188     for(int h = 0; h < blk_h; h++) {
189         for(int w = 0; w < blk_w; w++) {
190             d[w * 2] = ((u16)oapv_clip3(0, max_val, s[w] + mid_val)) << 6;
191         }
192         s = (s16 *)(((u8 *)s) + s_src);
193         d = (u16 *)(((u8 *)d) + s_dst);
194     }
195 }
196 
plus_mid_val(s16 * coef,int b_w,int b_h,int bit_depth)197 static void plus_mid_val(s16 *coef, int b_w, int b_h, int bit_depth)
198 {
199     int mid_val = 1 << (bit_depth - 1);
200     for(int i = 0; i < b_h * b_w; i++) {
201         coef[i] = oapv_clip3(0, (1 << bit_depth) - 1, coef[i] + mid_val);
202     }
203 }
204 
copy_fi_to_finfo(oapv_fi_t * fi,int pbu_type,int group_id,oapv_frm_info_t * finfo)205 static void copy_fi_to_finfo(oapv_fi_t *fi, int pbu_type, int group_id, oapv_frm_info_t *finfo)
206 {
207     finfo->w = (int)fi->frame_width; // casting to 'int' would be fine here
208     finfo->h = (int)fi->frame_height; // casting to 'int' would be fine here
209     finfo->cs = OAPV_CS_SET(chroma_format_idc_to_color_format(fi->chroma_format_idc), fi->bit_depth, 0);
210     finfo->pbu_type = pbu_type;
211     finfo->group_id = group_id;
212     finfo->profile_idc = fi->profile_idc;
213     finfo->level_idc = fi->level_idc;
214     finfo->band_idc = fi->band_idc;
215     finfo->chroma_format_idc = fi->chroma_format_idc;
216     finfo->bit_depth = fi->bit_depth;
217     finfo->capture_time_distance = fi->capture_time_distance;
218 }
219 
220 ///////////////////////////////////////////////////////////////////////////////
221 // start of encoder code
222 #if ENABLE_ENCODER
223 ///////////////////////////////////////////////////////////////////////////////
224 
enc_id_to_ctx(oapve_t id)225 static oapve_ctx_t *enc_id_to_ctx(oapve_t id)
226 {
227     oapve_ctx_t *ctx;
228     oapv_assert_rv(id, NULL);
229     ctx = (oapve_ctx_t *)id;
230     oapv_assert_rv((ctx)->magic == OAPVE_MAGIC_CODE, NULL);
231     return ctx;
232 }
233 
enc_ctx_alloc(void)234 static oapve_ctx_t *enc_ctx_alloc(void)
235 {
236     oapve_ctx_t *ctx;
237     ctx = (oapve_ctx_t *)oapv_malloc_fast(sizeof(oapve_ctx_t));
238     oapv_assert_rv(ctx, NULL);
239     oapv_mset_x64a(ctx, 0, sizeof(oapve_ctx_t));
240     return ctx;
241 }
242 
enc_ctx_free(oapve_ctx_t * ctx)243 static void enc_ctx_free(oapve_ctx_t *ctx)
244 {
245     oapv_mfree_fast(ctx);
246 }
247 
enc_core_alloc()248 static oapve_core_t *enc_core_alloc()
249 {
250     oapve_core_t *core;
251     core = (oapve_core_t *)oapv_malloc_fast(sizeof(oapve_core_t));
252 
253     oapv_assert_rv(core, NULL);
254     oapv_mset_x64a(core, 0, sizeof(oapve_core_t));
255 
256     return core;
257 }
258 
enc_core_free(oapve_core_t * core)259 static void enc_core_free(oapve_core_t *core)
260 {
261     oapv_mfree_fast(core);
262 }
263 
enc_core_init(oapve_core_t * core,oapve_ctx_t * ctx,int tile_idx,int thread_idx)264 static int enc_core_init(oapve_core_t *core, oapve_ctx_t *ctx, int tile_idx, int thread_idx)
265 {
266     core->tile_idx = tile_idx;
267     core->ctx = ctx;
268     return OAPV_OK;
269 }
270 
enc_minus_mid_val(s16 * coef,int w_blk,int h_blk,int bit_depth)271 static void enc_minus_mid_val(s16 *coef, int w_blk, int h_blk, int bit_depth)
272 {
273     int mid_val = 1 << (bit_depth - 1);
274     for(int i = 0; i < h_blk * w_blk; i++) {
275         coef[i] -= mid_val;
276     }
277 }
278 
enc_set_tile_info(oapve_tile_t * ti,int w_pel,int h_pel,int tile_w,int tile_h,int * num_tile_cols,int * num_tile_rows,int * num_tiles)279 static int enc_set_tile_info(oapve_tile_t *ti, int w_pel, int h_pel, int tile_w,
280                              int tile_h, int *num_tile_cols, int *num_tile_rows, int *num_tiles)
281 {
282     (*num_tile_cols) = (w_pel + (tile_w - 1)) / tile_w;
283     (*num_tile_rows) = (h_pel + (tile_h - 1)) / tile_h;
284     (*num_tiles) = (*num_tile_cols) * (*num_tile_rows);
285 
286     for(int i = 0; i < (*num_tiles); i++) {
287         int tx = (i % (*num_tile_cols)) * tile_w;
288         int ty = (i / (*num_tile_cols)) * tile_h;
289         ti[i].x = tx;
290         ti[i].y = ty;
291         ti[i].w = tx + tile_w > w_pel ? w_pel - tx : tile_w;
292         ti[i].h = ty + tile_h > h_pel ? h_pel - ty : tile_h;
293     }
294     return OAPV_OK;
295 }
296 
enc_block(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)297 static double enc_block(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
298 {
299     int bit_depth = ctx->bit_depth;
300 
301     oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
302     ctx->fn_quant[0](core->coef, core->qp[c], core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 128 : 212);
303 
304     int prev_dc = core->prev_dc[c];
305     core->prev_dc[c] = core->coef[0];
306     core->coef[0] = core->coef[0] - prev_dc;
307 
308     if(ctx->rec) {
309         oapv_mcpy(core->coef_rec, core->coef, sizeof(s16) * OAPV_BLK_D);
310         core->coef_rec[0] = core->coef_rec[0] + prev_dc;
311         ctx->fn_dquant[0](core->coef_rec, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
312         ctx->fn_itx[0](core->coef_rec, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
313     }
314 
315     return 0;
316 }
317 
enc_block_rdo_slow(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)318 static double enc_block_rdo_slow(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
319 {
320     ALIGNED_16(s16 recon[OAPV_BLK_D]) = { 0 };
321     ALIGNED_16(s16 coeff[OAPV_BLK_D]) = { 0 };
322     int        blk_w = 1 << log2_w;
323     int        blk_h = 1 << log2_h;
324     int        bit_depth = ctx->bit_depth;
325     int        qp = core->qp[c];
326     s16        org[OAPV_BLK_D] = { 0 };
327     s16       *best_coeff = core->coef;
328     s16       *best_recon = core->coef_rec;
329     int        best_cost = INT_MAX;
330     int        zero_dist = 0;
331     const u16 *scanp = oapv_tbl_scan;
332     const int  map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
333 
334     oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
335     oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
336     oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
337     ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
338 
339     {
340         oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
341         ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
342         ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
343         int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
344         oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
345         if(ctx->rec) {
346             oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
347         }
348         if(cost == 0) {
349             zero_dist = 1;
350         }
351         best_cost = cost;
352     }
353 
354     for(int itr = 0; itr < (c == 0 ? 2 : 1) && !zero_dist; itr++) {
355         for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
356             int best_idx = 0;
357             s16 org_coef = coeff[scanp[j]];
358             int adj_rng = c == 0 ? 13 : 5;
359             if(org_coef == 0) {
360                 if(c == 0 && scanp[j] < 3) {
361                     adj_rng = 3;
362                 }
363                 else {
364                     continue;
365                 }
366             }
367 
368             for(int i = 1; i < adj_rng && !zero_dist; i++) {
369                 if(i > 2) {
370                     if(best_idx == 0) {
371                         continue;
372                     }
373                     else if(best_idx % 2 == 1 && i % 2 == 0) {
374                         continue;
375                     }
376                     else if(best_idx % 2 == 0 && i % 2 == 1) {
377                         continue;
378                     }
379                 }
380 
381                 s16 test_coef = org_coef + map_idx_diff[i];
382                 coeff[scanp[j]] = test_coef;
383 
384                 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
385                 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
386                 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
387                 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
388 
389                 if(cost < best_cost) {
390                     best_cost = cost;
391                     best_coeff[scanp[j]] = test_coef;
392                     if(ctx->rec) {
393                         oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
394                     }
395                     best_idx = i;
396                     if(cost == 0) {
397                         zero_dist = 1;
398                     }
399                 }
400                 else {
401                     coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
402                 }
403             }
404         }
405     }
406 
407     int curr_dc = best_coeff[0];
408     best_coeff[0] -= core->prev_dc[c];
409     core->prev_dc[c] = curr_dc;
410 
411     return best_cost;
412 }
413 
enc_block_rdo_medium(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)414 static double enc_block_rdo_medium(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
415 {
416     ALIGNED_16(s16 org[OAPV_BLK_D]);
417     ALIGNED_16(s16 recon[OAPV_BLK_D]);
418     ALIGNED_16(s16 coeff[OAPV_BLK_D]);
419     ALIGNED_16(s16 tmp_buf[OAPV_BLK_D]);
420 
421     ALIGNED_32(int rec_ups[OAPV_BLK_D]);
422     ALIGNED_32(int rec_tmp[OAPV_BLK_D]);
423 
424     int        blk_w = 1 << log2_w;
425     int        blk_h = 1 << log2_h;
426     int        bit_depth = ctx->bit_depth;
427     int        qp = core->qp[c];
428 
429     s16       *best_coeff = core->coef;
430     s16       *best_recon = core->coef_rec;
431 
432     int        best_cost = INT_MAX;
433     int        zero_dist = 0;
434     const u16 *scanp = oapv_tbl_scan;
435     const int  map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
436 
437     oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
438     oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
439     oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
440 
441     ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
442 
443     {
444         oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
445         ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
446         ctx->fn_itx_part[0](recon, tmp_buf, ITX_SHIFT1, 1 << log2_w);
447         oapv_itx_get_wo_sft(tmp_buf, recon, rec_ups, ITX_SHIFT2(bit_depth), 1 << log2_h);
448 
449         int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
450         oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
451         if(ctx->rec) {
452             oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
453         }
454         if(cost == 0) {
455             zero_dist = 1;
456         }
457         best_cost = cost;
458     }
459 
460     for(int itr = 0; itr < (c == 0 ? 2 : 1) && !zero_dist; itr++) {
461         for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
462             int best_idx = 0;
463             s16 org_coef = coeff[scanp[j]];
464             int adj_rng = (c == 0 ? 13 : 5);
465             if(org_coef == 0) {
466                 if(c == 0 && scanp[j] < 3) {
467                     adj_rng = 3;
468                 }
469                 else {
470                     continue;
471                 }
472             }
473             int q_step = 0;
474             if(core->dq_shift[c] > 0) {
475                 q_step = (core->q_mat_dec[c][scanp[j]] + (1 << (core->dq_shift[c] - 1))) >> core->dq_shift[c];
476             }
477             else {
478                 q_step = (core->q_mat_dec[c][scanp[j]]) << (-core->dq_shift[c]);
479             }
480 
481             for(int i = 1; i < adj_rng && !zero_dist; i++) {
482                 if(i > 2) {
483                     if(best_idx == 0) {
484                         continue;
485                     }
486                     else if(best_idx % 2 == 1 && i % 2 == 0) {
487                         continue;
488                     }
489                     else if(best_idx % 2 == 0 && i % 2 == 1) {
490                         continue;
491                     }
492                 }
493 
494                 s16 test_coef = org_coef + map_idx_diff[i];
495                 coeff[scanp[j]] = test_coef;
496                 int step_diff = q_step * map_idx_diff[i];
497                 ctx->fn_itx_adj[0](rec_ups, rec_tmp, j, step_diff, 9);
498                 for(int k = 0; k < 64; k++) {
499                     recon[k] = (rec_tmp[k] + 512) >> 10;
500                 }
501 
502                 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
503                 if(cost < best_cost) {
504                     oapv_mcpy(rec_ups, rec_tmp, sizeof(int) * OAPV_BLK_D);
505                     best_cost = cost;
506                     best_coeff[scanp[j]] = test_coef;
507                     best_idx = i;
508                     if(cost == 0) {
509                         zero_dist = 1;
510                     }
511                 }
512                 else {
513                     coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
514                 }
515             }
516         }
517     }
518 
519     if(ctx->rec) {
520         oapv_mcpy(best_recon, best_coeff, sizeof(s16) * OAPV_BLK_D);
521         ctx->fn_dquant[0](best_recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
522         ctx->fn_itx[0](best_recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
523     }
524 
525     int curr_dc = best_coeff[0];
526     best_coeff[0] -= core->prev_dc[c];
527     core->prev_dc[c] = curr_dc;
528 
529     return best_cost;
530 }
531 
enc_block_rdo_placebo(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)532 static double enc_block_rdo_placebo(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
533 {
534     int  blk_w = 1 << log2_w;
535     int  blk_h = 1 << log2_h;
536     int  bit_depth = ctx->bit_depth;
537     int  qp = core->qp[c];
538     s16 *best_coeff = core->coef;
539     s16 *best_recon = core->coef_rec;
540     ALIGNED_16(s16 org[OAPV_BLK_D]);
541     ALIGNED_16(s16 recon[OAPV_BLK_D]);
542     ALIGNED_16(s16 coeff[OAPV_BLK_D]);
543     int        best_cost = INT_MAX;
544     int        zero_dist = 0;
545     const u16 *scanp = oapv_tbl_scan;
546     const int  map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
547 
548     oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
549     oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
550     oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
551 
552     ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
553 
554     {
555         oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
556         ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
557         ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
558         int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
559         oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
560         if(ctx->rec) {
561             oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
562         }
563         if(cost == 0) {
564             zero_dist = 1;
565         }
566         best_cost = cost;
567     }
568 
569     for(int itr = 0; itr < (c == 0 ? 7 : 3) && !zero_dist; itr++) {
570         for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
571             int best_idx = 0;
572             s16 org_coef = coeff[scanp[j]];
573             int adj_rng = (c == 0 ? 15 : 5);
574             if(org_coef == 0) {
575                 if(c == 0 && scanp[j] < 3) {
576                     adj_rng = 3;
577                 }
578                 else {
579                     continue;
580                 }
581             }
582 
583             for(int i = 1; i < adj_rng && !zero_dist; i++) {
584                 if(i > 2) {
585                     if(best_idx == 0) {
586                         continue;
587                     }
588                     else if(best_idx % 2 == 1 && i % 2 == 0) {
589                         continue;
590                     }
591                     else if(best_idx % 2 == 0 && i % 2 == 1) {
592                         continue;
593                     }
594                 }
595 
596                 s16 test_coef = org_coef + map_idx_diff[i];
597                 coeff[scanp[j]] = test_coef;
598 
599                 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
600                 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
601                 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
602                 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
603 
604                 if(cost < best_cost) {
605                     best_cost = cost;
606                     best_coeff[scanp[j]] = test_coef;
607                     if(ctx->rec) {
608                         oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
609                     }
610                     best_idx = i;
611                     if(cost == 0) {
612                         zero_dist = 1;
613                     }
614                 }
615                 else {
616                     coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
617                 }
618             }
619         }
620     }
621 
622     int curr_dc = best_coeff[0];
623     best_coeff[0] -= core->prev_dc[c];
624     core->prev_dc[c] = curr_dc;
625 
626     return best_cost;
627 }
628 
enc_read_param(oapve_ctx_t * ctx,oapve_param_t * param)629 static int enc_read_param(oapve_ctx_t *ctx, oapve_param_t *param)
630 {
631     /* check input parameters */
632     oapv_assert_rv(param->w > 0 && param->h > 0, OAPV_ERR_INVALID_ARGUMENT);
633     oapv_assert_rv(param->qp >= MIN_QUANT && param->qp <= MAX_QUANT, OAPV_ERR_INVALID_ARGUMENT);
634 
635     ctx->qp[Y_C] = param->qp;
636     ctx->qp[U_C] = param->qp + param->qp_cb_offset;
637     ctx->qp[V_C] = param->qp + param->qp_cr_offset;
638     ctx->qp[X_C] = param->qp;
639 
640     ctx->num_comp = get_num_comp(param->csp);
641 
642     if(param->preset == OAPV_PRESET_SLOW) {
643         ctx->fn_block = enc_block_rdo_slow;
644     }
645     else if(param->preset == OAPV_PRESET_PLACEBO) {
646         ctx->fn_block = enc_block_rdo_placebo;
647     }
648     else if(param->preset == OAPV_PRESET_MEDIUM) {
649         ctx->fn_block = enc_block_rdo_medium;
650     }
651     else {
652         ctx->fn_block = enc_block;
653     }
654 
655     ctx->log2_block = OAPV_LOG2_BLK;
656 
657     /* set various value */
658     ctx->w = ((ctx->param->w + (OAPV_MB_W - 1)) >> OAPV_LOG2_MB_W) << OAPV_LOG2_MB_W;
659     ctx->h = ((ctx->param->h + (OAPV_MB_H - 1)) >> OAPV_LOG2_MB_H) << OAPV_LOG2_MB_H;
660 
661     int tile_w = ctx->param->tile_w_mb * OAPV_MB_W;
662     int tile_h = ctx->param->tile_h_mb * OAPV_MB_H;
663     enc_set_tile_info(ctx->tile, ctx->w, ctx->h, tile_w, tile_h, &ctx->num_tile_cols, &ctx->num_tile_rows, &ctx->num_tiles);
664 
665     return OAPV_OK;
666 }
667 
enc_flush(oapve_ctx_t * ctx)668 static void enc_flush(oapve_ctx_t *ctx)
669 {
670     // Release thread pool controller and created threads
671     if(ctx->cdesc.threads >= 1) {
672         if(ctx->tpool) {
673             // thread controller instance is present
674             // terminate the created thread
675             for(int i = 0; i < ctx->cdesc.threads; i++) {
676                 if(ctx->thread_id[i]) {
677                     // valid thread instance
678                     ctx->tpool->release(&ctx->thread_id[i]);
679                 }
680             }
681             // dinitialize the tc
682             oapv_tpool_deinit(ctx->tpool);
683             oapv_mfree_fast(ctx->tpool);
684             ctx->tpool = NULL;
685         }
686     }
687 
688     oapv_tpool_sync_obj_delete(&ctx->sync_obj);
689     for(int i = 0; i < ctx->cdesc.threads; i++) {
690         enc_core_free(ctx->core[i]);
691         ctx->core[i] = NULL;
692     }
693 
694     oapv_mfree_fast(ctx->tile[0].bs_buf);
695 }
696 
enc_ready(oapve_ctx_t * ctx)697 static int enc_ready(oapve_ctx_t *ctx)
698 {
699     oapve_core_t *core = NULL;
700     int           ret = OAPV_OK;
701     oapv_assert(ctx->core[0] == NULL);
702 
703     for(int i = 0; i < ctx->cdesc.threads; i++) {
704         core = enc_core_alloc();
705         oapv_assert_gv(core != NULL, ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
706         ctx->core[i] = core;
707     }
708 
709     // initialize the threads to NULL
710     for(int i = 0; i < OAPV_MAX_THREADS; i++) {
711         ctx->thread_id[i] = 0;
712     }
713 
714     // get the context synchronization handle
715     ctx->sync_obj = oapv_tpool_sync_obj_create();
716     oapv_assert_gv(ctx->sync_obj != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
717 
718     if(ctx->cdesc.threads >= 1) {
719         ctx->tpool = oapv_malloc(sizeof(oapv_tpool_t));
720         oapv_tpool_init(ctx->tpool, ctx->cdesc.threads);
721         for(int i = 0; i < ctx->cdesc.threads; i++) {
722             ctx->thread_id[i] = ctx->tpool->create(ctx->tpool, i);
723             oapv_assert_gv(ctx->thread_id[i] != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
724         }
725     }
726 
727     for(int i = 0; i < OAPV_MAX_TILES; i++) {
728         ctx->tile[i].stat = ENC_TILE_STAT_NOT_ENCODED;
729     }
730     ctx->tile[0].bs_buf = (u8 *)oapv_malloc(ctx->cdesc.max_bs_buf_size);
731     oapv_assert_gv(ctx->tile[0].bs_buf, ret, OAPV_ERR_UNKNOWN, ERR);
732 
733     ctx->rc_param.alpha = OAPV_RC_ALPHA;
734     ctx->rc_param.beta = OAPV_RC_BETA;
735 
736     return OAPV_OK;
737 ERR:
738 
739     enc_flush(ctx);
740 
741     return ret;
742 }
743 
enc_tile_comp(oapv_bs_t * bs,oapve_tile_t * tile,oapve_ctx_t * ctx,oapve_core_t * core,int c,int s_org,void * org,int s_rec,void * rec)744 static int enc_tile_comp(oapv_bs_t *bs, oapve_tile_t *tile, oapve_ctx_t *ctx, oapve_core_t *core, int c, int s_org, void *org, int s_rec, void *rec)
745 {
746     int  mb_h, mb_w, mb_y, mb_x, blk_x, blk_y;
747     s16 *o16 = NULL, *r16 = NULL;
748 
749     u8  *bs_cur = oapv_bsw_sink(bs);
750     oapv_assert_rv(bsw_is_align8(bs), OAPV_ERR_MALFORMED_BITSTREAM);
751 
752     mb_w = OAPV_MB_W >> ctx->comp_sft[c][0];
753     mb_h = OAPV_MB_H >> ctx->comp_sft[c][1];
754 
755     int tile_le = tile->x >> ctx->comp_sft[c][0];
756     int tile_ri = (tile->w >> ctx->comp_sft[c][0]) + tile_le;
757     int tile_to = tile->y >> ctx->comp_sft[c][1];
758     int tile_bo = (tile->h >> ctx->comp_sft[c][1]) + tile_to;
759 
760     for(mb_y = tile_to; mb_y < tile_bo; mb_y += mb_h) {
761         for(mb_x = tile_le; mb_x < tile_ri; mb_x += mb_w) {
762             for(blk_y = mb_y; blk_y < (mb_y + mb_h); blk_y += OAPV_BLK_H) {
763                 for(blk_x = mb_x; blk_x < (mb_x + mb_w); blk_x += OAPV_BLK_W) {
764                     o16 = (s16 *)((u8 *)org + blk_y * s_org) + blk_x;
765                     ctx->fn_imgb_to_block[c](o16, OAPV_BLK_W, OAPV_BLK_H, s_org, blk_x, (OAPV_BLK_W << 1), core->coef);
766 
767                     ctx->fn_block(ctx, core, OAPV_LOG2_BLK_W, OAPV_LOG2_BLK_H, c);
768                     oapve_vlc_dc_coeff(ctx, core, bs, core->coef[0], c);
769                     oapve_vlc_ac_coeff(ctx, core, bs, core->coef, 0, c);
770                     DUMP_COEF(core->coef, OAPV_BLK_D, blk_x, blk_y, c);
771 
772                     if(rec != NULL) {
773                         r16 = (s16 *)((u8 *)rec + blk_y * s_rec) + blk_x;
774                         ctx->fn_block_to_imgb[c](core->coef_rec, OAPV_BLK_W, OAPV_BLK_H, (OAPV_BLK_W << 1), blk_x, s_rec, r16);
775                     }
776                 }
777             }
778         }
779     }
780 
781     /* byte align */
782     while(!bsw_is_align8(bs)) {
783         oapv_bsw_write1(bs, 0);
784     }
785 
786     /* de-init BSW */
787     oapv_bsw_deinit(bs);
788 
789     return (int)(bs->cur - bs_cur);
790 }
791 
enc_tile(oapve_ctx_t * ctx,oapve_core_t * core,oapve_tile_t * tile)792 static int enc_tile(oapve_ctx_t *ctx, oapve_core_t *core, oapve_tile_t *tile)
793 {
794     oapv_bs_t bs;
795     oapv_bsw_init(&bs, tile->bs_buf, tile->bs_buf_max, NULL);
796 
797     int qp = 0;
798     if(ctx->param->rc_type != 0) {
799         oapve_rc_get_qp(ctx, tile, ctx->qp[Y_C], &qp);
800         oapv_assert(qp != 0);
801     }
802     else {
803         qp = ctx->qp[Y_C];
804     }
805 
806     tile->tile_size = 0;
807     DUMP_SAVE(0);
808     oapve_vlc_tile_size(&bs, tile->tile_size);
809     oapve_set_tile_header(ctx, &tile->th, core->tile_idx, qp);
810     oapve_vlc_tile_header(ctx, &bs, &tile->th);
811 
812     for(int c = 0; c < ctx->num_comp; c++) {
813         int cnt = 0;
814         core->qp[c] = tile->th.tile_qp[c];
815         int qscale = oapv_quant_scale[core->qp[c] % 6];
816         s32 scale_multiply_16 = (s32)(qscale << 4); // 15bit + 4bit
817         for(int y = 0; y < OAPV_BLK_H; y++) {
818             for(int x = 0; x < OAPV_BLK_W; x++) {
819                 core->q_mat_enc[c][cnt++] = scale_multiply_16 / ctx->fh.q_matrix[c][y][x];
820             }
821         }
822 
823         if(ctx->rec || ctx->param->preset > OAPV_PRESET_MEDIUM) {
824             core->dq_shift[c] = ctx->bit_depth - 2 - (core->qp[c] / 6);
825 
826             int cnt = 0;
827             int dq_scale = oapv_tbl_dq_scale[core->qp[c] % 6];
828             for(int y = 0; y < OAPV_BLK_H; y++) {
829                 for(int x = 0; x < OAPV_BLK_W; x++) {
830                     core->q_mat_dec[c][cnt++] = dq_scale * ctx->fh.q_matrix[c][y][x];
831                 }
832             }
833         }
834     }
835 
836     for(int c = 0; c < ctx->num_comp; c++) {
837         core->prev_dc_ctx[c] = 20;
838         core->prev_1st_ac_ctx[c] = 0;
839         core->prev_dc[c] = 0;
840 
841         int  tc, s_org, s_rec;
842         s16 *org, *rec;
843 
844         if(OAPV_CS_GET_FORMAT(ctx->imgb->cs) == OAPV_CF_PLANAR2) {
845             tc = c > 0 ? 1 : 0;
846             org = ctx->imgb->a[tc];
847             org += (c > 1) ? 1 : 0;
848             s_org = ctx->imgb->s[tc];
849 
850             if(ctx->rec) {
851                 rec = ctx->rec->a[tc];
852                 rec += (c > 1) ? 1 : 0;
853                 s_rec = ctx->imgb->s[tc];
854             }
855             else {
856                 rec = NULL;
857                 s_rec = 0;
858             }
859         }
860         else {
861             org = ctx->imgb->a[c];
862             s_org = ctx->imgb->s[c];
863             if(ctx->rec) {
864                 rec = ctx->rec->a[c];
865                 s_rec = ctx->imgb->s[c];
866             }
867             else {
868                 rec = NULL;
869                 s_rec = 0;
870             }
871         }
872 
873         tile->th.tile_data_size[c] = enc_tile_comp(&bs, tile, ctx, core, c, s_org, org, s_rec, rec);
874     }
875 
876     u32 bs_size = (int)(bs.cur - bs.beg);
877     if(bs_size > tile->bs_buf_max) {
878         return OAPV_ERR_OUT_OF_BS_BUF;
879     }
880     tile->bs_size = bs_size;
881 
882     oapv_bs_t bs_th;
883     bs_th.is_bin_count = 0;
884     oapv_bsw_init(&bs_th, tile->bs_buf, tile->bs_size, NULL);
885     tile->tile_size = bs_size - OAPV_TILE_SIZE_LEN;
886 
887     DUMP_SAVE(1);
888     DUMP_LOAD(0);
889     oapve_vlc_tile_size(&bs_th, tile->tile_size);
890     oapve_vlc_tile_header(ctx, &bs_th, &tile->th);
891     DUMP_LOAD(1);
892     oapv_bsw_deinit(&bs_th);
893     return OAPV_OK;
894 }
895 
enc_thread_tile(void * arg)896 static int enc_thread_tile(void *arg)
897 {
898     oapve_core_t *core = (oapve_core_t *)arg;
899     oapve_ctx_t  *ctx = core->ctx;
900     oapve_tile_t *tile = ctx->tile;
901     int           ret = OAPV_OK, i;
902 
903     while(1) {
904         // find not encoded tile
905         oapv_tpool_enter_cs(ctx->sync_obj);
906         for(i = 0; i < ctx->num_tiles; i++) {
907             if(tile[i].stat == ENC_TILE_STAT_NOT_ENCODED) {
908                 tile[i].stat = ENC_TILE_STAT_ON_ENCODING;
909                 core->tile_idx = i;
910                 break;
911             }
912         }
913         oapv_tpool_leave_cs(ctx->sync_obj);
914         if(i == ctx->num_tiles) {
915             break;
916         }
917 
918         ret = enc_tile(ctx, core, &tile[core->tile_idx]);
919         oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
920 
921         oapv_tpool_enter_cs(ctx->sync_obj);
922         tile[core->tile_idx].stat = ENC_TILE_STAT_ENCODED;
923         oapv_tpool_leave_cs(ctx->sync_obj);
924     }
925 ERR:
926     return ret;
927 }
928 
enc_img_pad_p210(oapve_ctx_t * ctx,oapv_imgb_t * imgb)929 static void enc_img_pad_p210(oapve_ctx_t *ctx, oapv_imgb_t *imgb)
930 {
931     if(ctx->w == ctx->param->w && ctx->h == ctx->param->h) {
932         return;
933     }
934 
935     if(ctx->w != ctx->param->w) {
936         for(int c = 0; c < imgb->np; c++) {
937             int  shift_w = 0;
938             int  shift_h = 0;
939 
940             int  sw = ctx->param->w >> shift_w;
941             int  ew = ctx->w >> shift_w;
942             int  th = ctx->h >> shift_h;
943             pel *dst = (pel *)imgb->a[c];
944             pel  src;
945 
946             for(int h = 0; h < th; h++) {
947                 src = dst[sw - 1];
948                 for(int w = sw; w < ew; w++) {
949                     dst[w] = src;
950                 }
951                 dst += (imgb->s[c] >> 1);
952             }
953         }
954     }
955 
956     if(ctx->h != ctx->param->h) {
957         for(int c = 0; c < imgb->np; c++) {
958             int  shift_w = 0;
959             int  shift_h = 0;
960 
961             int  sh = ctx->param->h >> shift_h;
962             int  eh = ctx->h >> shift_h;
963             int  tw = ctx->w >> shift_w;
964             pel *dst = ((pel *)imgb->a[c]) + sh * (imgb->s[c] >> 1);
965             pel *src = dst - (imgb->s[c] >> 1);
966 
967             for(int h = sh; h < eh; h++) {
968                 oapv_mcpy(dst, src, sizeof(pel) * tw);
969                 dst += (imgb->s[c] >> 1);
970             }
971         }
972     }
973 }
enc_img_pad(oapve_ctx_t * ctx,oapv_imgb_t * imgb)974 static void enc_img_pad(oapve_ctx_t *ctx, oapv_imgb_t *imgb)
975 {
976     if(ctx->w == ctx->param->w && ctx->h == ctx->param->h) {
977         return;
978     }
979 
980     if(ctx->w != ctx->param->w) {
981         for(int c = 0; c < imgb->np; c++) {
982             int  sw = ctx->param->w >> ctx->comp_sft[c][0];
983             int  ew = ctx->w >> ctx->comp_sft[c][0];
984             int  th = ctx->h >> ctx->comp_sft[c][1];
985             pel *dst = (pel *)imgb->a[c];
986             pel  src;
987 
988             for(int h = 0; h < th; h++) {
989                 src = dst[sw - 1];
990                 for(int w = sw; w < ew; w++) {
991                     dst[w] = src;
992                 }
993                 dst += (imgb->s[c] >> 1);
994             }
995         }
996     }
997 
998     if(ctx->h != ctx->param->h) {
999         for(int c = 0; c < imgb->np; c++) {
1000             int  sh = ctx->param->h >> ctx->comp_sft[c][1];
1001             int  eh = ctx->h >> ctx->comp_sft[c][1];
1002             int  tw = ctx->w >> ctx->comp_sft[c][0];
1003             pel *dst = ((pel *)imgb->a[c]) + sh * (imgb->s[c] >> 1);
1004             pel *src = dst - (imgb->s[c] >> 1);
1005 
1006             for(int h = sh; h < eh; h++) {
1007                 oapv_mcpy(dst, src, sizeof(pel) * tw);
1008                 dst += (imgb->s[c] >> 1);
1009             }
1010         }
1011     }
1012 }
1013 
enc_frm_prepare(oapve_ctx_t * ctx,oapv_imgb_t * imgb_i,oapv_imgb_t * imgb_r)1014 static int enc_frm_prepare(oapve_ctx_t *ctx, oapv_imgb_t *imgb_i, oapv_imgb_t *imgb_r)
1015 {
1016     ctx->cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb_i->cs));
1017     ctx->num_comp = get_num_comp(ctx->cfi);
1018 
1019     ctx->comp_sft[Y_C][0] = 0;
1020     ctx->comp_sft[Y_C][1] = 0;
1021     for(int c = 1; c < ctx->num_comp; c++) {
1022         ctx->comp_sft[c][0] = get_chroma_sft_w(ctx->cfi);
1023         ctx->comp_sft[c][1] = get_chroma_sft_h(ctx->cfi);
1024     }
1025 
1026     ctx->bit_depth = OAPV_CS_GET_BIT_DEPTH(imgb_i->cs);
1027 
1028     if(OAPV_CS_GET_FORMAT(imgb_i->cs) == OAPV_CF_PLANAR2) {
1029         ctx->fn_imgb_to_block_rc = imgb_to_block_p210;
1030 
1031         ctx->fn_imgb_to_block[Y_C] = imgb_to_block_p210_y;
1032         ctx->fn_imgb_to_block[U_C] = imgb_to_block_p210_uv;
1033         ctx->fn_imgb_to_block[V_C] = imgb_to_block_p210_uv;
1034 
1035         ctx->fn_block_to_imgb[Y_C] = block_to_imgb_p210_y;
1036         ctx->fn_block_to_imgb[U_C] = block_to_imgb_p210_uv;
1037         ctx->fn_block_to_imgb[V_C] = block_to_imgb_p210_uv;
1038         ctx->fn_img_pad = enc_img_pad_p210;
1039     }
1040     else {
1041         ctx->fn_imgb_to_block_rc = imgb_to_block;
1042         for(int i = 0; i < ctx->num_comp; i++) {
1043             ctx->fn_imgb_to_block[i] = imgb_to_block_10bit;
1044             ctx->fn_block_to_imgb[i] = block_to_imgb_10bit;
1045         }
1046         ctx->fn_img_pad = enc_img_pad;
1047     }
1048 
1049     /* initialize bitstream container */
1050     // oapv_bsw_init(&ctx->bs, bitb->addr, bitb->bsize, NULL); // TODO : remove
1051     ctx->w = (imgb_i->aw[Y_C] > 0) ? imgb_i->aw[Y_C] : imgb_i->w[Y_C];
1052     ctx->h = (imgb_i->ah[Y_C] > 0) ? imgb_i->ah[Y_C] : imgb_i->h[Y_C];
1053 
1054     ctx->fn_img_pad(ctx, imgb_i);
1055 
1056     for(int i = 0; i < ctx->num_tiles; i++) {
1057         ctx->tile[i].stat = ENC_TILE_STAT_NOT_ENCODED;
1058     }
1059 
1060     ctx->imgb = imgb_i;
1061     imgb_addref(ctx->imgb);
1062     if(imgb_r != NULL) {
1063         for(int c = 0; c < ctx->num_comp; c++) {
1064             imgb_r->w[c] = imgb_i->w[c];
1065             imgb_r->h[c] = imgb_i->h[c];
1066             imgb_r->x[c] = imgb_i->x[c];
1067             imgb_r->y[c] = imgb_i->y[c];
1068         }
1069         ctx->rec = imgb_r;
1070         imgb_addref(ctx->rec);
1071     }
1072 
1073     int buf_size = ctx->cdesc.max_bs_buf_size / ctx->num_tiles;
1074     ctx->tile[0].bs_buf_max = buf_size;
1075     for(int i = 1; i < ctx->num_tiles; i++) {
1076         ctx->tile[i].bs_buf = ctx->tile[i - 1].bs_buf + buf_size;
1077         ctx->tile[i].bs_buf_max = buf_size;
1078     }
1079 
1080     for(int i = 0; i < ctx->cdesc.threads; i++) {
1081         ctx->core[i]->ctx = ctx;
1082         ctx->core[i]->thread_idx = i;
1083     }
1084 
1085     return OAPV_OK;
1086 }
1087 
enc_frm_finish(oapve_ctx_t * ctx,oapve_stat_t * stat)1088 static int enc_frm_finish(oapve_ctx_t *ctx, oapve_stat_t *stat)
1089 {
1090     imgb_release(ctx->imgb);
1091     if(ctx->rec) {
1092         imgb_release(ctx->rec);
1093         ctx->rec = NULL;
1094     }
1095     return OAPV_OK;
1096 }
1097 
enc_frame(oapve_ctx_t * ctx)1098 static int enc_frame(oapve_ctx_t *ctx)
1099 {
1100     oapv_bs_t *bs = &ctx->bs;
1101     int        ret = OAPV_OK;
1102 
1103     oapv_bs_t  bs_fh;
1104     oapv_mcpy(&bs_fh, bs, sizeof(oapv_bs_t));
1105 
1106     /* write frame header */
1107     oapve_set_frame_header(ctx, &ctx->fh);
1108     oapve_vlc_frame_header(bs, ctx, &ctx->fh);
1109 
1110     /* de-init BSW */
1111     oapv_bsw_deinit(bs);
1112 
1113     /* rc init */
1114     u64 cost_sum = 0;
1115     if(ctx->param->rc_type != 0) {
1116         oapve_rc_get_tile_cost_thread(ctx, &cost_sum);
1117 
1118         double bits_pic = ((double)ctx->param->bitrate * 1000) / ((double)ctx->param->fps_num / ctx->param->fps_den);
1119         for(int i = 0; i < ctx->num_tiles; i++) {
1120             ctx->tile[i].rc.target_bits_left = bits_pic * ctx->tile[i].rc.cost / cost_sum;
1121             ctx->tile[i].rc.target_bits = ctx->tile[i].rc.target_bits_left;
1122         }
1123 
1124         ctx->rc_param.lambda = oapve_rc_estimate_pic_lambda(ctx, cost_sum);
1125         ctx->rc_param.qp = oapve_rc_estimate_pic_qp(ctx->rc_param.lambda);
1126         for(int c = 0; c < ctx->num_comp; c++) {
1127             ctx->qp[c] = ctx->rc_param.qp;
1128             if(c == 1) {
1129                 ctx->qp[c] += ctx->param->qp_cb_offset;
1130             }
1131             else if(c == 2) {
1132                 ctx->qp[c] += ctx->param->qp_cr_offset;
1133             }
1134         }
1135     }
1136 
1137     oapv_tpool_t *tpool = ctx->tpool;
1138     int           res, tidx = 0, thread_num1 = 0;
1139     int           parallel_task = (ctx->cdesc.threads > ctx->num_tiles) ? ctx->num_tiles : ctx->cdesc.threads;
1140 
1141     /* encode tiles ************************************/
1142     for(tidx = 0; tidx < (parallel_task - 1); tidx++) {
1143         tpool->run(ctx->thread_id[tidx], enc_thread_tile,
1144                    (void *)ctx->core[tidx]);
1145     }
1146     ret = enc_thread_tile((void *)ctx->core[tidx]);
1147     oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1148 
1149     for(thread_num1 = 0; thread_num1 < parallel_task - 1; thread_num1++) {
1150         res = tpool->join(ctx->thread_id[thread_num1], &ret);
1151         oapv_assert_gv(res == TPOOL_SUCCESS, ret, OAPV_ERR_FAILED_SYSCALL, ERR);
1152         oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1153     }
1154     /****************************************************/
1155 
1156     for(int i = 0; i < ctx->num_tiles; i++) {
1157         oapv_mcpy(ctx->bs.cur, ctx->tile[i].bs_buf, ctx->tile[i].bs_size);
1158         ctx->bs.cur = ctx->bs.cur + ctx->tile[i].bs_size;
1159         ctx->fh.tile_size[i] = ctx->tile[i].bs_size - OAPV_TILE_SIZE_LEN;
1160     }
1161 
1162     /* rewrite frame header */
1163     if(ctx->fh.tile_size_present_in_fh_flag) {
1164         oapve_vlc_frame_header(&bs_fh, ctx, &ctx->fh);
1165     }
1166     if(ctx->param->rc_type != 0) {
1167         oapve_rc_update_after_pic(ctx, cost_sum);
1168     }
1169     return ret;
1170 
1171 ERR:
1172     return ret;
1173 }
1174 
enc_platform_init(oapve_ctx_t * ctx)1175 static int enc_platform_init(oapve_ctx_t *ctx)
1176 {
1177     // default settings
1178     ctx->fn_sad = oapv_tbl_fn_sad_16b;
1179     ctx->fn_ssd = oapv_tbl_fn_ssd_16b;
1180     ctx->fn_diff = oapv_tbl_fn_diff_16b;
1181     ctx->fn_itx_part = oapv_tbl_fn_itx_part;
1182     ctx->fn_itx = oapv_tbl_fn_itx;
1183     ctx->fn_itx_adj = oapv_tbl_fn_itx_adj;
1184     ctx->fn_txb = oapv_tbl_fn_tx;
1185     ctx->fn_quant = oapv_tbl_fn_quant;
1186     ctx->fn_dquant = oapv_tbl_fn_dquant;
1187     ctx->fn_had8x8 = oapv_dc_removed_had8x8;
1188 #if X86_SSE
1189     int check_cpu, support_sse, support_avx2;
1190 
1191     check_cpu = oapv_check_cpu_info_x86();
1192     support_sse = (check_cpu >> 0) & 1;
1193     support_avx2 = (check_cpu >> 2) & 1;
1194 
1195     if(support_avx2) {
1196         ctx->fn_ssd = oapv_tbl_fn_ssd_16b_avx;
1197         ctx->fn_itx_part = oapv_tbl_fn_itx_part_avx;
1198         ctx->fn_itx = oapv_tbl_fn_itx_avx;
1199         ctx->fn_itx_adj = oapv_tbl_fn_itx_adj_avx;
1200         ctx->fn_txb = oapv_tbl_fn_txb_avx;
1201         ctx->fn_quant = oapv_tbl_fn_quant_avx;
1202         ctx->fn_dquant = oapv_tbl_fn_dquant_avx;
1203         ctx->fn_had8x8 = oapv_dc_removed_had8x8_sse;
1204     }
1205     else if(support_sse) {
1206         ctx->fn_ssd = oapv_tbl_fn_ssd_16b_sse;
1207         ctx->fn_had8x8 = oapv_dc_removed_had8x8_sse;
1208     }
1209 #elif ARM_NEON
1210     ctx->fn_ssd = oapv_tbl_fn_ssd_16b_neon;
1211     ctx->fn_itx = oapv_tbl_fn_itx_neon;
1212     ctx->fn_txb = oapv_tbl_fn_txb_neon;
1213     ctx->fn_quant = oapv_tbl_fn_quant_neon;
1214     ctx->fn_had8x8 = oapv_dc_removed_had8x8;
1215 #endif
1216     return OAPV_OK;
1217 }
1218 
oapve_create(oapve_cdesc_t * cdesc,int * err)1219 oapve_t oapve_create(oapve_cdesc_t *cdesc, int *err)
1220 {
1221     oapve_ctx_t *ctx;
1222     int          ret;
1223 
1224     DUMP_CREATE(1);
1225     /* memory allocation for ctx and core structure */
1226     ctx = (oapve_ctx_t *)enc_ctx_alloc();
1227     if(ctx != NULL) {
1228         oapv_mcpy(&ctx->cdesc, cdesc, sizeof(oapve_cdesc_t));
1229         ret = enc_platform_init(ctx);
1230         oapv_assert_g(ret == OAPV_OK, ERR);
1231 
1232         ret = enc_ready(ctx);
1233         oapv_assert_g(ret == OAPV_OK, ERR);
1234 
1235         /* set default value for ctx */
1236         ctx->magic = OAPVE_MAGIC_CODE;
1237         ctx->id = (oapve_t)ctx;
1238         if(err) {
1239             *err = OAPV_OK;
1240         }
1241         return (ctx->id);
1242     }
1243     else {
1244         ret = OAPV_ERR;
1245     }
1246 ERR:
1247     if(ctx) {
1248         enc_ctx_free(ctx);
1249     }
1250     if(err) {
1251         *err = ret;
1252     }
1253     return NULL;
1254 }
1255 
oapve_delete(oapve_t eid)1256 void oapve_delete(oapve_t eid)
1257 {
1258     oapve_ctx_t *ctx;
1259 
1260     ctx = enc_id_to_ctx(eid);
1261     oapv_assert_r(ctx);
1262 
1263     DUMP_DELETE();
1264     enc_flush(ctx);
1265     enc_ctx_free(ctx);
1266 }
1267 
oapve_encode(oapve_t eid,oapv_frms_t * ifrms,oapvm_t mid,oapv_bitb_t * bitb,oapve_stat_t * stat,oapv_frms_t * rfrms)1268 int oapve_encode(oapve_t eid, oapv_frms_t *ifrms, oapvm_t mid, oapv_bitb_t *bitb, oapve_stat_t *stat, oapv_frms_t *rfrms)
1269 {
1270     oapve_ctx_t *ctx;
1271     oapv_frm_t  *frm;
1272     oapv_bs_t   *bs;
1273     int          i, ret;
1274 
1275     ctx = enc_id_to_ctx(eid);
1276     oapv_assert_rv(ctx != NULL && bitb->addr && bitb->bsize > 0, OAPV_ERR_INVALID_ARGUMENT);
1277 
1278     bs = &ctx->bs;
1279 
1280     oapv_bsw_init(bs, bitb->addr, bitb->bsize, NULL);
1281     oapv_mset(stat, 0, sizeof(oapve_stat_t));
1282 
1283     u8       *bs_pos_au_beg = oapv_bsw_sink(bs); // address syntax of au size
1284     u8       *bs_pos_pbu_beg;
1285     oapv_bs_t bs_pbu_beg;
1286     oapv_bsw_write(bs, 0, 32);
1287 
1288     for(i = 0; i < ifrms->num_frms; i++) {
1289         frm = &ifrms->frm[i];
1290 
1291         /* set default value for encoding parameter */
1292         ctx->param = &ctx->cdesc.param[i];
1293         ret = enc_read_param(ctx, ctx->param);
1294         oapv_assert_rv(ret == OAPV_OK, OAPV_ERR);
1295 
1296         oapv_assert_rv(ctx->param->profile_idc == OAPV_PROFILE_422_10, OAPV_ERR_UNSUPPORTED);
1297 
1298         // prepare for encoding a frame
1299         ret = enc_frm_prepare(ctx, frm->imgb, (rfrms != NULL) ? rfrms->frm[i].imgb : NULL);
1300         oapv_assert_rv(ret == OAPV_OK, ret);
1301 
1302         bs_pos_pbu_beg = oapv_bsw_sink(bs);            /* store pbu pos to calculate size */
1303         oapv_mcpy(&bs_pbu_beg, bs, sizeof(oapv_bs_t)); /* store pbu pos of ai to re-write */
1304 
1305         DUMP_SAVE(0);
1306         oapve_vlc_pbu_size(bs, 0);
1307         oapve_vlc_pbu_header(bs, frm->pbu_type, frm->group_id);
1308         // encode a frame
1309         ret = enc_frame(ctx);
1310         oapv_assert_rv(ret == OAPV_OK, ret);
1311 
1312         // rewrite pbu_size
1313         int pbu_size = ((u8 *)oapv_bsw_sink(bs)) - bs_pos_pbu_beg - 4;
1314         DUMP_SAVE(1);
1315         DUMP_LOAD(0);
1316         oapve_vlc_pbu_size(&bs_pbu_beg, pbu_size);
1317         DUMP_LOAD(1);
1318 
1319         stat->frm_size[i] = pbu_size + 4 /* PUB size length*/;
1320         copy_fi_to_finfo(&ctx->fh.fi, frm->pbu_type, frm->group_id, &stat->aui.frm_info[i]);
1321 
1322         // add frame hash value of reconstructed frame into metadata list
1323         if(ctx->use_frm_hash) {
1324             if(frm->pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
1325                frm->pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME) {
1326                 oapv_assert_rv(mid != NULL, OAPV_ERR_INVALID_ARGUMENT);
1327                 ret = oapv_set_md5_pld(mid, frm->group_id, ctx->rec);
1328                 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1329             }
1330         }
1331 
1332         // finishing of encoding a frame
1333         ret = enc_frm_finish(ctx, stat);
1334         oapv_assert_rv(ret == OAPV_OK, ret);
1335     }
1336     stat->aui.num_frms = ifrms->num_frms;
1337 
1338     oapvm_ctx_t *md_list = mid;
1339     if(md_list != NULL) {
1340         int num_md = md_list->num;
1341         for(i = 0; i < num_md; i++) {
1342             int group_id = md_list->group_ids[i];
1343             bs_pos_pbu_beg = oapv_bsw_sink(bs);            /* store pbu pos to calculate size */
1344             oapv_mcpy(&bs_pbu_beg, bs, sizeof(oapv_bs_t)); /* store pbu pos of ai to re-write */
1345             DUMP_SAVE(0);
1346 
1347             oapve_vlc_pbu_size(bs, 0);
1348             oapve_vlc_pbu_header(bs, OAPV_PBU_TYPE_METADATA, group_id);
1349             oapve_vlc_metadata(&md_list->md_arr[i], bs);
1350 
1351             // rewrite pbu_size
1352             int pbu_size = ((u8 *)oapv_bsw_sink(bs)) - bs_pos_pbu_beg - 4;
1353             DUMP_SAVE(1);
1354             DUMP_LOAD(0);
1355             oapve_vlc_pbu_size(&bs_pbu_beg, pbu_size);
1356             DUMP_LOAD(1);
1357         }
1358     }
1359 
1360     u32 au_size = (u32)((u8 *)oapv_bsw_sink(bs) - bs_pos_au_beg) - 4;
1361     oapv_bsw_write_direct(bs_pos_au_beg, au_size, 32); /* u(32) */
1362 
1363     oapv_bsw_deinit(&ctx->bs); /* de-init BSW */
1364     stat->write = bsw_get_write_byte(&ctx->bs);
1365 
1366     return OAPV_OK;
1367 }
1368 
oapve_config(oapve_t eid,int cfg,void * buf,int * size)1369 int oapve_config(oapve_t eid, int cfg, void *buf, int *size)
1370 {
1371     oapve_ctx_t *ctx;
1372     int          t0;
1373 
1374     ctx = enc_id_to_ctx(eid);
1375     oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
1376 
1377     switch(cfg) {
1378     /* set config **********************************************************/
1379     case OAPV_CFG_SET_QP:
1380         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1381         t0 = *((int *)buf);
1382         oapv_assert_rv(t0 >= MIN_QUANT && t0 <= MAX_QUANT,
1383                        OAPV_ERR_INVALID_ARGUMENT);
1384         ctx->param->qp = t0;
1385         break;
1386     case OAPV_CFG_SET_FPS_NUM:
1387         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1388         t0 = *((int *)buf);
1389         oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1390         ctx->param->fps_num = t0;
1391         break;
1392     case OAPV_CFG_SET_FPS_DEN:
1393         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1394         t0 = *((int *)buf);
1395         oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1396         ctx->param->fps_den = t0;
1397         break;
1398     case OAPV_CFG_SET_BPS:
1399         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1400         t0 = *((int *)buf);
1401         oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1402         ctx->param->bitrate = t0;
1403         break;
1404     case OAPV_CFG_SET_USE_FRM_HASH:
1405         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1406         ctx->use_frm_hash = (*((int *)buf)) ? 1 : 0;
1407         break;
1408     /* get config *******************************************************/
1409     case OAPV_CFG_GET_QP:
1410         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1411         *((int *)buf) = ctx->param->qp;
1412         break;
1413     case OAPV_CFG_GET_WIDTH:
1414         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1415         *((int *)buf) = ctx->param->w;
1416         break;
1417     case OAPV_CFG_GET_HEIGHT:
1418         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1419         *((int *)buf) = ctx->param->h;
1420         break;
1421     case OAPV_CFG_GET_FPS_NUM:
1422         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1423         *((int *)buf) = ctx->param->fps_num;
1424         break;
1425     case OAPV_CFG_GET_FPS_DEN:
1426         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1427         *((int *)buf) = ctx->param->fps_den;
1428         break;
1429     case OAPV_CFG_GET_BPS:
1430         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1431         *((int *)buf) = ctx->param->bitrate;
1432         break;
1433     default:
1434         oapv_trace("unknown config value (%d)\n", cfg);
1435         oapv_assert_rv(0, OAPV_ERR_UNSUPPORTED);
1436     }
1437 
1438     return OAPV_OK;
1439 }
1440 
oapve_param_default(oapve_param_t * param)1441 int oapve_param_default(oapve_param_t *param)
1442 {
1443     oapv_mset(param, 0, sizeof(oapve_param_t));
1444     param->preset = OAPV_PRESET_DEFAULT;
1445 
1446     param->qp_cb_offset = 0;
1447     param->qp_cr_offset = 0;
1448 
1449     param->tile_w_mb = 16;
1450     param->tile_h_mb = 16;
1451 
1452     param->profile_idc = OAPV_PROFILE_422_10;
1453     param->level_idc = (int)(4.1 * 30);
1454     param->band_idc = 2;
1455 
1456     return OAPV_OK;
1457 }
1458 
1459 ///////////////////////////////////////////////////////////////////////////////
1460 // enc of encoder code
1461 #endif // ENABLE_ENCODER
1462 ///////////////////////////////////////////////////////////////////////////////
1463 
1464 ///////////////////////////////////////////////////////////////////////////////
1465 // start of decoder code
1466 #if ENABLE_DECODER
1467 ///////////////////////////////////////////////////////////////////////////////
dec_id_to_ctx(oapvd_t id)1468 static oapvd_ctx_t *dec_id_to_ctx(oapvd_t id)
1469 {
1470     oapvd_ctx_t *ctx;
1471     oapv_assert_rv(id, NULL);
1472     ctx = (oapvd_ctx_t *)id;
1473     oapv_assert_rv(ctx->magic == OAPVD_MAGIC_CODE, NULL);
1474     return ctx;
1475 }
1476 
dec_ctx_alloc(void)1477 static oapvd_ctx_t *dec_ctx_alloc(void)
1478 {
1479     oapvd_ctx_t *ctx;
1480 
1481     ctx = (oapvd_ctx_t *)oapv_malloc_fast(sizeof(oapvd_ctx_t));
1482 
1483     oapv_assert_rv(ctx != NULL, NULL);
1484     oapv_mset_x64a(ctx, 0, sizeof(oapvd_ctx_t));
1485 
1486     return ctx;
1487 }
1488 
dec_ctx_free(oapvd_ctx_t * ctx)1489 static void dec_ctx_free(oapvd_ctx_t *ctx)
1490 {
1491     oapv_mfree_fast(ctx);
1492 }
1493 
dec_core_alloc(void)1494 static oapvd_core_t *dec_core_alloc(void)
1495 {
1496     oapvd_core_t *core;
1497 
1498     core = (oapvd_core_t *)oapv_malloc_fast(sizeof(oapvd_core_t));
1499 
1500     oapv_assert_rv(core, NULL);
1501     oapv_mset_x64a(core, 0, sizeof(oapvd_core_t));
1502 
1503     return core;
1504 }
1505 
dec_core_free(oapvd_core_t * core)1506 static void dec_core_free(oapvd_core_t *core)
1507 {
1508     oapv_mfree_fast(core);
1509 }
1510 
dec_block(oapvd_ctx_t * ctx,oapvd_core_t * core,int log2_w,int log2_h,int c)1511 static int dec_block(oapvd_ctx_t *ctx, oapvd_core_t *core, int log2_w, int log2_h, int c)
1512 {
1513     int bit_depth = ctx->bit_depth;
1514 
1515     // DC prediction
1516     core->coef[0] += core->prev_dc[c];
1517     core->prev_dc[c] = core->coef[0];
1518     // Inverse quantization
1519     ctx->fn_dquant[0](core->coef, core->q_mat[c], log2_w, log2_h, core->dq_shift[c]);
1520     // Inverse transform
1521     ctx->fn_itx[0](core->coef, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
1522     return OAPV_OK;
1523 }
1524 
dec_set_tile_info(oapvd_tile_t * tile,int w_pel,int h_pel,int tile_w,int tile_h,int num_tile_cols,int num_tiles)1525 static int dec_set_tile_info(oapvd_tile_t* tile, int w_pel, int h_pel, int tile_w, int tile_h, int num_tile_cols, int num_tiles)
1526 {
1527 
1528     for (int i = 0; i < num_tiles; i++)
1529     {
1530         int tx = (i % (num_tile_cols)) * tile_w;
1531         int ty = (i / (num_tile_cols)) * tile_h;
1532         tile[i].x = tx;
1533         tile[i].y = ty;
1534         tile[i].w = tx + tile_w > w_pel ? w_pel - tx : tile_w;
1535         tile[i].h = ty + tile_h > h_pel ? h_pel - ty : tile_h;
1536     }
1537     return OAPV_OK;
1538 }
1539 
dec_frm_prepare(oapvd_ctx_t * ctx,oapv_imgb_t * imgb)1540 static int dec_frm_prepare(oapvd_ctx_t *ctx, oapv_imgb_t *imgb)
1541 {
1542     ctx->imgb = imgb;
1543     imgb_addref(ctx->imgb); // increase reference count
1544 
1545     ctx->bit_depth = ctx->fh.fi.bit_depth;
1546     ctx->cfi = ctx->fh.fi.chroma_format_idc;
1547     ctx->num_comp = get_num_comp(ctx->cfi);
1548     ctx->comp_sft[Y_C][0] = 0;
1549     ctx->comp_sft[Y_C][1] = 0;
1550 
1551     for(int c = 1; c < ctx->num_comp; c++) {
1552         ctx->comp_sft[c][0] = get_chroma_sft_w(color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs)));
1553         ctx->comp_sft[c][1] = get_chroma_sft_h(color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs)));
1554     }
1555 
1556     ctx->w = oapv_align_value(ctx->fh.fi.frame_width, OAPV_MB_W);
1557     ctx->h = oapv_align_value(ctx->fh.fi.frame_height, OAPV_MB_H);
1558 
1559     if(OAPV_CS_GET_FORMAT(imgb->cs) == OAPV_CF_PLANAR2) {
1560         ctx->fn_block_to_imgb[Y_C] = block_to_imgb_p210_y;
1561         ctx->fn_block_to_imgb[U_C] = block_to_imgb_p210_uv;
1562         ctx->fn_block_to_imgb[V_C] = block_to_imgb_p210_uv;
1563     }
1564     else {
1565         for(int c = 0; c < ctx->num_comp; c++) {
1566             ctx->fn_block_to_imgb[c] = block_to_imgb_10bit;
1567         }
1568     }
1569 
1570     int tile_w = ctx->fh.tile_width_in_mbs * OAPV_MB_W;
1571     int tile_h = ctx->fh.tile_height_in_mbs * OAPV_MB_H;
1572 
1573     ctx->num_tile_cols = (ctx->w + (tile_w - 1)) / tile_w;
1574     ctx->num_tile_rows = (ctx->h + (tile_h - 1)) / tile_h;
1575     ctx->num_tiles = ctx->num_tile_cols * ctx->num_tile_rows;
1576 
1577     oapv_assert_rv((ctx->num_tile_cols <= OAPV_MAX_TILE_COLS) && (ctx->num_tile_rows <= OAPV_MAX_TILE_ROWS), OAPV_ERR_MALFORMED_BITSTREAM);
1578     dec_set_tile_info(ctx->tile, ctx->w, ctx->h, tile_w, tile_h, ctx->num_tile_cols, ctx->num_tiles);
1579 
1580     for(int i = 0; i < ctx->num_tiles; i++) {
1581         ctx->tile[i].bs_beg = NULL;
1582     }
1583     ctx->tile[0].bs_beg = oapv_bsr_sink(&ctx->bs);
1584 
1585     for(int i = 0; i < ctx->num_tiles; i++) {
1586         ctx->tile[i].stat = DEC_TILE_STAT_NOT_DECODED;
1587     }
1588 
1589     return OAPV_OK;
1590 }
1591 
dec_frm_finish(oapvd_ctx_t * ctx)1592 static int dec_frm_finish(oapvd_ctx_t *ctx)
1593 {
1594     oapv_mset(&ctx->bs, 0, sizeof(oapv_bs_t)); // clean data
1595     imgb_release(ctx->imgb);                   // decrease reference cnout
1596     ctx->imgb = NULL;
1597     return OAPV_OK;
1598 }
1599 
dec_tile_comp(oapvd_tile_t * tile,oapvd_ctx_t * ctx,oapvd_core_t * core,oapv_bs_t * bs,int c,int s_dst,void * dst)1600 static int dec_tile_comp(oapvd_tile_t *tile, oapvd_ctx_t *ctx, oapvd_core_t *core, oapv_bs_t *bs, int c, int s_dst, void *dst)
1601 {
1602     int  mb_h, mb_w, mb_y, mb_x, blk_y, blk_x;
1603     int  le, ri, to, bo;
1604     int  ret;
1605     s16 *d16;
1606 
1607     mb_h = OAPV_MB_H >> ctx->comp_sft[c][1];
1608     mb_w = OAPV_MB_W >> ctx->comp_sft[c][0];
1609 
1610     le = tile->x >> ctx->comp_sft[c][0];        // left position of tile
1611     ri = (tile->w >> ctx->comp_sft[c][0]) + le; // right pixel position of tile
1612     to = tile->y >> ctx->comp_sft[c][1];        // top pixel position of tile
1613     bo = (tile->h >> ctx->comp_sft[c][1]) + to; // bottom pixel position of tile
1614 
1615     for(mb_y = to; mb_y < bo; mb_y += mb_h) {
1616         for(mb_x = le; mb_x < ri; mb_x += mb_w) {
1617             for(blk_y = mb_y; blk_y < (mb_y + mb_h); blk_y += OAPV_BLK_H) {
1618                 for(blk_x = mb_x; blk_x < (mb_x + mb_w); blk_x += OAPV_BLK_W) {
1619                     // parse DC coefficient
1620                     ret = oapvd_vlc_dc_coeff(ctx, core, bs, &core->coef[0], c);
1621                     oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1622 
1623                     // parse AC coefficient
1624                     ret = oapvd_vlc_ac_coeff(ctx, core, bs, core->coef, c);
1625                     oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1626                     DUMP_COEF(core->coef, OAPV_BLK_D, blk_x, blk_y, c);
1627 
1628                     // decode a block
1629                     ret = dec_block(ctx, core, OAPV_LOG2_BLK_W, OAPV_LOG2_BLK_H, c);
1630                     oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1631 
1632                     // copy decoded block to image buffer
1633                     d16 = (s16 *)((u8 *)dst + blk_y * s_dst) + blk_x;
1634                     ctx->fn_block_to_imgb[c](core->coef, OAPV_BLK_W, OAPV_BLK_H, (OAPV_BLK_W << 1), blk_x, s_dst, d16);
1635                 }
1636             }
1637         }
1638     }
1639 
1640     /* byte align */
1641     oapv_bsr_align8(bs);
1642     return OAPV_OK;
1643 }
1644 
dec_tile(oapvd_core_t * core,oapvd_tile_t * tile)1645 static int dec_tile(oapvd_core_t *core, oapvd_tile_t *tile)
1646 {
1647     int          ret, midx, x, y, c;
1648     oapvd_ctx_t *ctx = core->ctx;
1649     oapv_bs_t    bs;
1650 
1651     oapv_bsr_init(&bs, tile->bs_beg + OAPV_TILE_SIZE_LEN, tile->data_size, NULL);
1652     ret = oapvd_vlc_tile_header(&bs, ctx, &tile->th);
1653     oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1654     for(c = 0; c < ctx->num_comp; c++) {
1655         core->qp[c] = tile->th.tile_qp[c];
1656         int dq_scale = oapv_tbl_dq_scale[core->qp[c] % 6];
1657         core->dq_shift[c] = ctx->bit_depth - 2 - (core->qp[c] / 6);
1658 
1659         core->prev_dc_ctx[c] = 20;
1660         core->prev_1st_ac_ctx[c] = 0;
1661         core->prev_dc[c] = 0;
1662 
1663         midx = 0;
1664         for(y = 0; y < OAPV_BLK_H; y++) {
1665             for(x = 0; x < OAPV_BLK_W; x++) {
1666                 core->q_mat[c][midx++] = dq_scale * ctx->fh.q_matrix[c][y][x]; // 7bit + 8bit
1667             }
1668         }
1669     }
1670 
1671     for(c = 0; c < ctx->num_comp; c++) {
1672         int  tc, s_dst;
1673         s16 *dst;
1674 
1675         if(OAPV_CS_GET_FORMAT(ctx->imgb->cs) == OAPV_CF_PLANAR2) {
1676             tc = c > 0 ? 1 : 0;
1677             dst = ctx->imgb->a[tc];
1678             dst += (c > 1) ? 1 : 0;
1679             s_dst = ctx->imgb->s[tc];
1680         }
1681         else {
1682             dst = ctx->imgb->a[c];
1683             s_dst = ctx->imgb->s[c];
1684         }
1685 
1686         ret = dec_tile_comp(tile, ctx, core, &bs, c, s_dst, dst);
1687         oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1688     }
1689 
1690     oapvd_vlc_tile_dummy_data(&bs);
1691     return OAPV_OK;
1692 }
1693 
dec_thread_tile(void * arg)1694 static int dec_thread_tile(void *arg)
1695 {
1696     oapv_bs_t     bs;
1697     int           i, ret, run, tile_idx = 0, thread_ret = OAPV_OK;
1698 
1699     oapvd_core_t *core = (oapvd_core_t *)arg;
1700     oapvd_ctx_t  *ctx = core->ctx;
1701     oapvd_tile_t *tile = ctx->tile;
1702 
1703     while(1) {
1704         // find not decoded tile
1705         oapv_tpool_enter_cs(ctx->sync_obj);
1706         for(i = 0; i < ctx->num_tiles; i++) {
1707             if(tile[i].stat == DEC_TILE_STAT_NOT_DECODED) {
1708                 tile[i].stat = DEC_TILE_STAT_ON_DECODING;
1709                 tile_idx = i;
1710                 break;
1711             }
1712         }
1713         oapv_tpool_leave_cs(ctx->sync_obj);
1714         if(i == ctx->num_tiles) {
1715             break;
1716         }
1717 
1718         // wait until to know bistream start position
1719         run = 1;
1720         while(run) {
1721             oapv_tpool_enter_cs(ctx->sync_obj);
1722             if(tile[tile_idx].bs_beg != NULL) {
1723                 run = 0;
1724             }
1725             oapv_tpool_leave_cs(ctx->sync_obj);
1726         }
1727         /* read tile size */
1728         oapv_bsr_init(&bs, tile[tile_idx].bs_beg, OAPV_TILE_SIZE_LEN, NULL);
1729         ret = oapvd_vlc_tile_size(&bs, &tile[tile_idx].data_size);
1730         oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1731         oapv_assert_g(tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + (tile[tile_idx].data_size - 1) <= ctx->bs.end, ERR);
1732 
1733         oapv_tpool_enter_cs(ctx->sync_obj);
1734         if(tile_idx + 1 < ctx->num_tiles) {
1735             tile[tile_idx + 1].bs_beg = tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + tile[tile_idx].data_size;
1736         }
1737         else {
1738             ctx->tile_end = tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + tile[tile_idx].data_size;
1739         }
1740         oapv_tpool_leave_cs(ctx->sync_obj);
1741 
1742         ret = dec_tile(core, &tile[tile_idx]);
1743 
1744         oapv_tpool_enter_cs(ctx->sync_obj);
1745         if (OAPV_SUCCEEDED(ret)) {
1746             tile[tile_idx].stat = DEC_TILE_STAT_DECODED;
1747         }
1748         else {
1749             tile[tile_idx].stat = ret;
1750             thread_ret = ret;
1751         }
1752         tile[tile_idx].stat = OAPV_SUCCEEDED(ret) ? DEC_TILE_STAT_DECODED : ret;
1753         oapv_tpool_leave_cs(ctx->sync_obj);
1754     }
1755     return thread_ret;
1756 
1757 ERR:
1758     oapv_tpool_enter_cs(ctx->sync_obj);
1759     tile[tile_idx].stat = DEC_TILE_STAT_SIZE_ERROR;
1760     if (tile_idx + 1 < ctx->num_tiles)
1761     {
1762         tile[tile_idx + 1].bs_beg = tile[tile_idx].bs_beg;
1763     }
1764     oapv_tpool_leave_cs(ctx->sync_obj);
1765     return OAPV_ERR_MALFORMED_BITSTREAM;
1766 }
1767 
dec_flush(oapvd_ctx_t * ctx)1768 static void dec_flush(oapvd_ctx_t *ctx)
1769 {
1770     if(ctx->cdesc.threads >= 2) {
1771         if(ctx->tpool) {
1772             // thread controller instance is present
1773             // terminate the created thread
1774             for(int i = 0; i < ctx->cdesc.threads - 1; i++) {
1775                 if(ctx->thread_id[i]) {
1776                     // valid thread instance
1777                     ctx->tpool->release(&ctx->thread_id[i]);
1778                 }
1779             }
1780             // dinitialize the tpool
1781             oapv_tpool_deinit(ctx->tpool);
1782             oapv_mfree(ctx->tpool);
1783             ctx->tpool = NULL;
1784         }
1785     }
1786 
1787     oapv_tpool_sync_obj_delete(&(ctx->sync_obj));
1788 
1789     for(int i = 0; i < ctx->cdesc.threads; i++) {
1790         dec_core_free(ctx->core[i]);
1791     }
1792 }
1793 
dec_ready(oapvd_ctx_t * ctx)1794 static int dec_ready(oapvd_ctx_t *ctx)
1795 {
1796     int i, ret = OAPV_OK;
1797 
1798     if(ctx->core[0] == NULL) {
1799         // create cores
1800         for(i = 0; i < ctx->cdesc.threads; i++) {
1801             ctx->core[i] = dec_core_alloc();
1802             oapv_assert_gv(ctx->core[i], ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
1803             ctx->core[i]->ctx = ctx;
1804         }
1805     }
1806 
1807     // initialize the threads to NULL
1808     for(i = 0; i < OAPV_MAX_THREADS; i++) {
1809         ctx->thread_id[i] = 0;
1810     }
1811 
1812     // get the context synchronization handle
1813     ctx->sync_obj = oapv_tpool_sync_obj_create();
1814     oapv_assert_gv(ctx->sync_obj != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
1815 
1816     if(ctx->cdesc.threads >= 2) {
1817         ctx->tpool = oapv_malloc(sizeof(oapv_tpool_t));
1818         oapv_tpool_init(ctx->tpool, ctx->cdesc.threads - 1);
1819         for(i = 0; i < ctx->cdesc.threads - 1; i++) {
1820             ctx->thread_id[i] = ctx->tpool->create(ctx->tpool, i);
1821             oapv_assert_gv(ctx->thread_id[i] != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
1822         }
1823     }
1824     return OAPV_OK;
1825 
1826 ERR:
1827     dec_flush(ctx);
1828 
1829     return ret;
1830 }
1831 
dec_platform_init(oapvd_ctx_t * ctx)1832 static int dec_platform_init(oapvd_ctx_t *ctx)
1833 {
1834     // default settings
1835     ctx->fn_itx = oapv_tbl_fn_itx;
1836     ctx->fn_dquant = oapv_tbl_fn_dquant;
1837 
1838 #if X86_SSE
1839     int check_cpu, support_sse, support_avx2;
1840 
1841     check_cpu = oapv_check_cpu_info_x86();
1842     support_sse = (check_cpu >> 0) & 1;
1843     support_avx2 = (check_cpu >> 2) & 1;
1844 
1845     if(support_avx2) {
1846         ctx->fn_itx = oapv_tbl_fn_itx_avx;
1847         ctx->fn_dquant = oapv_tbl_fn_dquant_avx;
1848     }
1849     else if(support_sse) {
1850         ctx->fn_itx = oapv_tbl_fn_itx;
1851         ctx->fn_dquant = oapv_tbl_fn_dquant;
1852     }
1853 #elif ARM_NEON
1854     ctx->fn_itx = oapv_tbl_fn_itx_neon;
1855     ctx->fn_dquant = oapv_tbl_fn_dquant;
1856 #endif
1857     return OAPV_OK;
1858 }
1859 
oapvd_create(oapvd_cdesc_t * cdesc,int * err)1860 oapvd_t oapvd_create(oapvd_cdesc_t *cdesc, int *err)
1861 {
1862     oapvd_ctx_t *ctx;
1863     int          ret;
1864 
1865     DUMP_CREATE(0);
1866     ctx = NULL;
1867 
1868     /* check if any decoder argument is correctly set */
1869     oapv_assert_gv(cdesc->threads > 0 && cdesc->threads <= OAPV_MAX_THREADS, ret, OAPV_ERR_INVALID_ARGUMENT, ERR);
1870 
1871     /* memory allocation for ctx and core structure */
1872     ctx = (oapvd_ctx_t *)dec_ctx_alloc();
1873     oapv_assert_gv(ctx != NULL, ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
1874     oapv_mcpy(&ctx->cdesc, cdesc, sizeof(oapvd_cdesc_t));
1875 
1876     /* initialize platform-specific variables */
1877     ret = dec_platform_init(ctx);
1878     oapv_assert_g(ret == OAPV_OK, ERR);
1879 
1880     /* ready for decoding */
1881     ret = dec_ready(ctx);
1882     oapv_assert_g(ret == OAPV_OK, ERR);
1883 
1884     ctx->magic = OAPVD_MAGIC_CODE;
1885     ctx->id = (oapvd_t)ctx;
1886     if(err) {
1887         *err = OAPV_OK;
1888     }
1889     return (ctx->id);
1890 
1891 ERR:
1892     if(ctx) {
1893         dec_ctx_free(ctx);
1894     }
1895     if(err) {
1896         *err = ret;
1897     }
1898     return NULL;
1899 }
1900 
oapvd_delete(oapvd_t did)1901 void oapvd_delete(oapvd_t did)
1902 {
1903     oapvd_ctx_t *ctx;
1904     ctx = dec_id_to_ctx(did);
1905     oapv_assert_r(ctx);
1906 
1907     DUMP_DELETE();
1908     dec_flush(ctx);
1909     dec_ctx_free(ctx);
1910 }
1911 
oapvd_decode(oapvd_t did,oapv_bitb_t * bitb,oapv_frms_t * ofrms,oapvm_t mid,oapvd_stat_t * stat)1912 int oapvd_decode(oapvd_t did, oapv_bitb_t *bitb, oapv_frms_t *ofrms, oapvm_t mid, oapvd_stat_t *stat)
1913 {
1914     oapvd_ctx_t *ctx;
1915     oapv_bs_t   *bs;
1916     oapv_pbuh_t  pbuh;
1917     int          ret = OAPV_OK;
1918     u32          pbu_size;
1919     u32          remain;
1920     u8          *curpos;
1921     int          frame_cnt = 0;
1922 
1923     ctx = dec_id_to_ctx(did);
1924     oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
1925 
1926     curpos = (u8 *)bitb->addr;
1927     remain = bitb->ssize;
1928 
1929     while(remain > 8) {
1930         oapv_bsr_init(&ctx->bs, curpos, remain, NULL);
1931         bs = &ctx->bs;
1932 
1933         ret = oapvd_vlc_pbu_size(bs, &pbu_size); // 4byte
1934         oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1935         oapv_assert_g((pbu_size + 4) <= bs->size, ERR);
1936 
1937         curpos += 4; // pbu_size syntax
1938         remain -= 4;
1939 
1940         ret = oapvd_vlc_pbu_header(bs, &pbuh);
1941         oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1942 
1943         if(pbuh.pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
1944            pbuh.pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME ||
1945            pbuh.pbu_type == OAPV_PBU_TYPE_PREVIEW_FRAME ||
1946            pbuh.pbu_type == OAPV_PBU_TYPE_DEPTH_FRAME ||
1947            pbuh.pbu_type == OAPV_PBU_TYPE_ALPHA_FRAME) {
1948             ret = oapvd_vlc_frame_header(bs, &ctx->fh);
1949             oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1950 
1951             ret = dec_frm_prepare(ctx, ofrms->frm[frame_cnt].imgb);
1952             oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1953 
1954             int           res;
1955             oapv_tpool_t *tpool = ctx->tpool;
1956             int           parallel_task = 1;
1957             int           tidx = 0;
1958 
1959             parallel_task = (ctx->cdesc.threads > ctx->num_tiles) ? ctx->num_tiles : ctx->cdesc.threads;
1960 
1961             /* decode tiles ************************************/
1962             for(tidx = 0; tidx < (parallel_task - 1); tidx++) {
1963                 tpool->run(ctx->thread_id[tidx], dec_thread_tile,
1964                            (void *)ctx->core[tidx]);
1965             }
1966             ret = dec_thread_tile((void *)ctx->core[tidx]);
1967             for(tidx = 0; tidx < parallel_task - 1; tidx++) {
1968                 tpool->join(ctx->thread_id[tidx], &res);
1969                 if(OAPV_FAILED(res)) {
1970                     ret = res;
1971                 }
1972             }
1973             /****************************************************/
1974 
1975             /* READ FILLER HERE !!! */
1976 
1977             oapv_bsr_move(&ctx->bs, ctx->tile_end);
1978             stat->read += bsr_get_read_byte(&ctx->bs);
1979 
1980             copy_fi_to_finfo(&ctx->fh.fi, pbuh.pbu_type, pbuh.group_id, &stat->aui.frm_info[frame_cnt]);
1981             if(ret == OAPV_OK && ctx->use_frm_hash) {
1982                 oapv_imgb_set_md5(ctx->imgb);
1983             }
1984             ret = dec_frm_finish(ctx); // FIX-ME
1985             oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1986 
1987             ofrms->frm[frame_cnt].pbu_type = pbuh.pbu_type;
1988             ofrms->frm[frame_cnt].group_id = pbuh.group_id;
1989             stat->frm_size[frame_cnt] = pbu_size + 4 /* PUB size length*/;
1990             frame_cnt++;
1991         }
1992         else if(pbuh.pbu_type == OAPV_PBU_TYPE_METADATA) {
1993             ret = oapvd_vlc_metadata(bs, pbu_size, mid, pbuh.group_id);
1994             oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1995 
1996             stat->read += bsr_get_read_byte(&ctx->bs);
1997         }
1998         else if(pbuh.pbu_type == OAPV_PBU_TYPE_FILLER) {
1999             ret = oapvd_vlc_filler(bs, (pbu_size - 4));
2000             oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
2001         }
2002         curpos += pbu_size;
2003         remain = (remain < pbu_size)? 0: (remain - pbu_size);
2004     }
2005     stat->aui.num_frms = frame_cnt;
2006     oapv_assert_rv(ofrms->num_frms == frame_cnt, OAPV_ERR_MALFORMED_BITSTREAM);
2007     return ret;
2008 
2009 ERR:
2010     return ret;
2011 }
2012 
oapvd_config(oapvd_t did,int cfg,void * buf,int * size)2013 int oapvd_config(oapvd_t did, int cfg, void *buf, int *size)
2014 {
2015     oapvd_ctx_t *ctx;
2016 
2017     ctx = dec_id_to_ctx(did);
2018     oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
2019 
2020     switch(cfg) {
2021     /* set config ************************************************************/
2022     case OAPV_CFG_SET_USE_FRM_HASH:
2023         ctx->use_frm_hash = (*((int *)buf)) ? 1 : 0;
2024         break;
2025 
2026     default:
2027         oapv_assert_rv(0, OAPV_ERR_UNSUPPORTED);
2028     }
2029     return OAPV_OK;
2030 }
2031 
oapvd_info(void * au,int au_size,oapv_au_info_t * aui)2032 int oapvd_info(void *au, int au_size, oapv_au_info_t *aui)
2033 {
2034     int ret, frm_count = 0;
2035     int pbu_cnt = 0;
2036     u8 *curpos;
2037     u32 remain;
2038 
2039     curpos = (u8 *)au;
2040     remain = au_size;
2041 
2042     DUMP_SET(0);
2043     while(remain > 8) // FIX-ME (8byte?)
2044     {
2045         oapv_bs_t bs;
2046         u32       pbu_size = 0;
2047 
2048         oapv_bsr_init(&bs, curpos, remain, NULL);
2049 
2050         ret = oapvd_vlc_pbu_size(&bs, &pbu_size); // 4 byte
2051         oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2052         curpos += 4; // pbu_size syntax
2053         remain -= 4;
2054 
2055         /* pbu header */
2056         oapv_pbuh_t pbuh;
2057         ret = oapvd_vlc_pbu_header(&bs, &pbuh); // 4 byte
2058         oapv_assert_rv(OAPV_SUCCEEDED(ret), OAPV_ERR_MALFORMED_BITSTREAM);
2059         if(pbuh.pbu_type == OAPV_PBU_TYPE_AU_INFO) {
2060             // parse access_unit_info in PBU
2061             oapv_aui_t ai;
2062 
2063             ret = oapvd_vlc_au_info(&bs, &ai);
2064             oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2065 
2066             aui->num_frms = ai.num_frames;
2067             for(int i = 0; i < ai.num_frames; i++) {
2068                 copy_fi_to_finfo(&ai.frame_info[i], ai.pbu_type[i], ai.group_id[i], &aui->frm_info[i]);
2069             }
2070             return OAPV_OK; // founded access_unit_info, no need to read more PBUs
2071         }
2072         if(pbuh.pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
2073            pbuh.pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME ||
2074            pbuh.pbu_type == OAPV_PBU_TYPE_PREVIEW_FRAME ||
2075            pbuh.pbu_type == OAPV_PBU_TYPE_DEPTH_FRAME ||
2076            pbuh.pbu_type == OAPV_PBU_TYPE_ALPHA_FRAME) {
2077             // parse frame_info in PBU
2078             oapv_fi_t fi;
2079 
2080             oapv_assert_rv(frm_count < OAPV_MAX_NUM_FRAMES, OAPV_ERR_REACHED_MAX)
2081             ret = oapvd_vlc_frame_info(&bs, &fi);
2082             oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2083 
2084             copy_fi_to_finfo(&fi, pbuh.pbu_type, pbuh.group_id, &aui->frm_info[frm_count]);
2085             frm_count++;
2086         }
2087         aui->num_frms = frm_count;
2088 
2089         curpos += pbu_size;
2090         remain = (remain < pbu_size)? 0: (remain - pbu_size);
2091         ++pbu_cnt;
2092     }
2093     DUMP_SET(1);
2094     return OAPV_OK;
2095 }
2096 
2097 ///////////////////////////////////////////////////////////////////////////////
2098 // end of decoder code
2099 #endif // ENABLE_DECODER
2100 ///////////////////////////////////////////////////////////////////////////////