1 /*
2 * Copyright (c) 2022 Samsung Electronics Co., Ltd.
3 * All Rights Reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * - Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * - Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * - Neither the name of the copyright owner, nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "oapv_def.h"
33
imgb_to_block(oapv_imgb_t * imgb,int c,int x_l,int y_l,int w_l,int h_l,s16 * block)34 static void imgb_to_block(oapv_imgb_t *imgb, int c, int x_l, int y_l, int w_l, int h_l, s16 *block)
35 {
36 u8 *src, *dst;
37 int i, sft_hor, sft_ver;
38 int bd = OAPV_CS_GET_BYTE_DEPTH(imgb->cs);
39
40 if(c == 0) {
41 sft_hor = sft_ver = 0;
42 }
43 else {
44 u8 cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs));
45 sft_hor = get_chroma_sft_w(cfi);
46 sft_ver = get_chroma_sft_h(cfi);
47 }
48
49 src = ((u8 *)imgb->a[c]) + ((y_l >> sft_ver) * imgb->s[c]) + ((x_l * bd) >> sft_hor);
50 dst = (u8 *)block;
51
52 for(i = 0; i < (h_l); i++) {
53 oapv_mcpy(dst, src, (w_l)*bd);
54
55 src += imgb->s[c];
56 dst += (w_l)*bd;
57 }
58 }
59
imgb_to_block_10bit(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)60 static void imgb_to_block_10bit(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
61 {
62 const int mid_val = (1 << (10 - 1));
63 s16 *s = (s16 *)src;
64 s16 *d = (s16 *)dst;
65
66 for(int h = 0; h < blk_h; h++) {
67 for(int w = 0; w < blk_w; w++) {
68 d[w] = s[w] - mid_val;
69 }
70 s = (s16 *)(((u8 *)s) + s_src);
71 d = (s16 *)(((u8 *)d) + s_dst);
72 }
73 }
74
imgb_to_block_p210_y(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)75 static void imgb_to_block_p210_y(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
76 {
77 const int mid_val = (1 << (10 - 1));
78 u16 *s = (s16 *)src;
79 s16 *d = (s16 *)dst;
80
81 for(int h = 0; h < blk_h; h++) {
82 for(int w = 0; w < blk_w; w++) {
83 d[w] = (s16)(s[w] >> 6) - mid_val;
84 }
85 s = (u16 *)(((u8 *)s) + s_src);
86 d = (s16 *)(((u8 *)d) + s_dst);
87 }
88 }
89
imgb_to_block_p210_uv(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)90 static void imgb_to_block_p210_uv(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
91 {
92 const int mid_val = (1 << (10 - 1));
93 u16 *s = (u16 *)src + offset_src;
94 s16 *d = (s16 *)dst;
95
96 for(int h = 0; h < blk_h; h++) {
97 for(int w = 0; w < blk_w; w++) {
98 d[w] = (s16)(s[w * 2] >> 6) - mid_val;
99 }
100 s = (u16 *)(((u8 *)s) + s_src);
101 d = (s16 *)(((u8 *)d) + s_dst);
102 }
103 }
104
imgb_to_block_p210(oapv_imgb_t * imgb,int c,int x_l,int y_l,int w_l,int h_l,s16 * block)105 static void imgb_to_block_p210(oapv_imgb_t *imgb, int c, int x_l, int y_l, int w_l, int h_l, s16 *block)
106 {
107 u16 *src, *dst;
108 int sft_hor, sft_ver, s_src;
109 int bd = OAPV_CS_GET_BYTE_DEPTH(imgb->cs);
110 int size_scale = 1;
111 int tc = c;
112
113 if(c == 0) {
114 sft_hor = sft_ver = 0;
115 }
116 else {
117 u8 cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs));
118 sft_hor = get_chroma_sft_w(cfi);
119 sft_ver = get_chroma_sft_h(cfi);
120 size_scale = 2;
121 tc = 1;
122 }
123
124 s_src = imgb->s[tc] >> (bd > 1 ? 1 : 0);
125 src = ((u16 *)imgb->a[tc]) + ((y_l >> sft_ver) * s_src) + ((x_l * size_scale) >> sft_hor);
126 dst = (u16 *)block;
127
128 for(int i = 0; i < (h_l); i++) {
129 for(int j = 0; j < (w_l); j++) {
130 dst[j] = (src[j * size_scale + (c >> 1)] >> 6);
131 }
132 src += s_src;
133 dst += w_l;
134 }
135 }
136
block_to_imgb_10bit(void * src,int blk_w,int blk_h,int s_src,int offset_dst,int s_dst,void * dst)137 static void block_to_imgb_10bit(void *src, int blk_w, int blk_h, int s_src, int offset_dst, int s_dst, void *dst)
138 {
139 const int max_val = (1 << 10) - 1;
140 const int mid_val = (1 << (10 - 1));
141 s16 *s = (s16 *)src;
142 u16 *d = (u16 *)dst;
143
144 for(int h = 0; h < blk_h; h++) {
145 for(int w = 0; w < blk_w; w++) {
146 d[w] = oapv_clip3(0, max_val, s[w] + mid_val);
147 }
148 s = (s16 *)(((u8 *)s) + s_src);
149 d = (u16 *)(((u8 *)d) + s_dst);
150 }
151 }
152
block_to_imgb_p210_y(void * src,int blk_w,int blk_h,int s_src,int offset_dst,int s_dst,void * dst)153 static void block_to_imgb_p210_y(void *src, int blk_w, int blk_h, int s_src, int offset_dst, int s_dst, void *dst)
154 {
155 const int max_val = (1 << 10) - 1;
156 const int mid_val = (1 << (10 - 1));
157 s16 *s = (s16 *)src;
158 u16 *d = (u16 *)dst;
159
160 for(int h = 0; h < blk_h; h++) {
161 for(int w = 0; w < blk_w; w++) {
162 d[w] = oapv_clip3(0, max_val, s[w] + mid_val) << 6;
163 }
164 s = (s16 *)(((u8 *)s) + s_src);
165 d = (u16 *)(((u8 *)d) + s_dst);
166 }
167 }
168
block_to_imgb_p210_uv(void * src,int blk_w,int blk_h,int s_src,int x_pel,int s_dst,void * dst)169 static void block_to_imgb_p210_uv(void *src, int blk_w, int blk_h, int s_src, int x_pel, int s_dst, void *dst)
170 {
171 const int max_val = (1 << 10) - 1;
172 const int mid_val = (1 << (10 - 1));
173 s16 *s = (s16 *)src;
174
175 // x_pel is x-offset value from left boundary of picture in unit of pixel.
176 // the 'dst' address has calculated by
177 // dst = (s16*)((u8*)origin + y_pel*s_dst) + x_pel;
178 // in case of P210 color format,
179 // since 's_dst' is byte size of stride including all U and V pixel values,
180 // y-offset calculation is correct.
181 // however, the adding only x_pel is not enough to address the correct pixel
182 // position of U or V because U & V use the same buffer plane
183 // in interleaved way,
184 // so, the 'dst' address should be increased by 'x_pel' to address pixel
185 // position correctly.
186 u16 *d = (u16 *)dst + x_pel; // p210 pixel value needs 0~65535 range
187
188 for(int h = 0; h < blk_h; h++) {
189 for(int w = 0; w < blk_w; w++) {
190 d[w * 2] = ((u16)oapv_clip3(0, max_val, s[w] + mid_val)) << 6;
191 }
192 s = (s16 *)(((u8 *)s) + s_src);
193 d = (u16 *)(((u8 *)d) + s_dst);
194 }
195 }
196
plus_mid_val(s16 * coef,int b_w,int b_h,int bit_depth)197 static void plus_mid_val(s16 *coef, int b_w, int b_h, int bit_depth)
198 {
199 int mid_val = 1 << (bit_depth - 1);
200 for(int i = 0; i < b_h * b_w; i++) {
201 coef[i] = oapv_clip3(0, (1 << bit_depth) - 1, coef[i] + mid_val);
202 }
203 }
204
copy_fi_to_finfo(oapv_fi_t * fi,int pbu_type,int group_id,oapv_frm_info_t * finfo)205 static void copy_fi_to_finfo(oapv_fi_t *fi, int pbu_type, int group_id, oapv_frm_info_t *finfo)
206 {
207 finfo->w = (int)fi->frame_width; // casting to 'int' would be fine here
208 finfo->h = (int)fi->frame_height; // casting to 'int' would be fine here
209 finfo->cs = OAPV_CS_SET(chroma_format_idc_to_color_format(fi->chroma_format_idc), fi->bit_depth, 0);
210 finfo->pbu_type = pbu_type;
211 finfo->group_id = group_id;
212 finfo->profile_idc = fi->profile_idc;
213 finfo->level_idc = fi->level_idc;
214 finfo->band_idc = fi->band_idc;
215 finfo->chroma_format_idc = fi->chroma_format_idc;
216 finfo->bit_depth = fi->bit_depth;
217 finfo->capture_time_distance = fi->capture_time_distance;
218 }
219
220 ///////////////////////////////////////////////////////////////////////////////
221 // start of encoder code
222 #if ENABLE_ENCODER
223 ///////////////////////////////////////////////////////////////////////////////
224
enc_id_to_ctx(oapve_t id)225 static oapve_ctx_t *enc_id_to_ctx(oapve_t id)
226 {
227 oapve_ctx_t *ctx;
228 oapv_assert_rv(id, NULL);
229 ctx = (oapve_ctx_t *)id;
230 oapv_assert_rv((ctx)->magic == OAPVE_MAGIC_CODE, NULL);
231 return ctx;
232 }
233
enc_ctx_alloc(void)234 static oapve_ctx_t *enc_ctx_alloc(void)
235 {
236 oapve_ctx_t *ctx;
237 ctx = (oapve_ctx_t *)oapv_malloc_fast(sizeof(oapve_ctx_t));
238 oapv_assert_rv(ctx, NULL);
239 oapv_mset_x64a(ctx, 0, sizeof(oapve_ctx_t));
240 return ctx;
241 }
242
enc_ctx_free(oapve_ctx_t * ctx)243 static void enc_ctx_free(oapve_ctx_t *ctx)
244 {
245 oapv_mfree_fast(ctx);
246 }
247
enc_core_alloc()248 static oapve_core_t *enc_core_alloc()
249 {
250 oapve_core_t *core;
251 core = (oapve_core_t *)oapv_malloc_fast(sizeof(oapve_core_t));
252
253 oapv_assert_rv(core, NULL);
254 oapv_mset_x64a(core, 0, sizeof(oapve_core_t));
255
256 return core;
257 }
258
enc_core_free(oapve_core_t * core)259 static void enc_core_free(oapve_core_t *core)
260 {
261 oapv_mfree_fast(core);
262 }
263
enc_core_init(oapve_core_t * core,oapve_ctx_t * ctx,int tile_idx,int thread_idx)264 static int enc_core_init(oapve_core_t *core, oapve_ctx_t *ctx, int tile_idx, int thread_idx)
265 {
266 core->tile_idx = tile_idx;
267 core->ctx = ctx;
268 return OAPV_OK;
269 }
270
enc_minus_mid_val(s16 * coef,int w_blk,int h_blk,int bit_depth)271 static void enc_minus_mid_val(s16 *coef, int w_blk, int h_blk, int bit_depth)
272 {
273 int mid_val = 1 << (bit_depth - 1);
274 for(int i = 0; i < h_blk * w_blk; i++) {
275 coef[i] -= mid_val;
276 }
277 }
278
enc_set_tile_info(oapve_tile_t * ti,int w_pel,int h_pel,int tile_w,int tile_h,int * num_tile_cols,int * num_tile_rows,int * num_tiles)279 static int enc_set_tile_info(oapve_tile_t *ti, int w_pel, int h_pel, int tile_w,
280 int tile_h, int *num_tile_cols, int *num_tile_rows, int *num_tiles)
281 {
282 (*num_tile_cols) = (w_pel + (tile_w - 1)) / tile_w;
283 (*num_tile_rows) = (h_pel + (tile_h - 1)) / tile_h;
284 (*num_tiles) = (*num_tile_cols) * (*num_tile_rows);
285
286 for(int i = 0; i < (*num_tiles); i++) {
287 int tx = (i % (*num_tile_cols)) * tile_w;
288 int ty = (i / (*num_tile_cols)) * tile_h;
289 ti[i].x = tx;
290 ti[i].y = ty;
291 ti[i].w = tx + tile_w > w_pel ? w_pel - tx : tile_w;
292 ti[i].h = ty + tile_h > h_pel ? h_pel - ty : tile_h;
293 }
294 return OAPV_OK;
295 }
296
enc_block(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)297 static double enc_block(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
298 {
299 int bit_depth = ctx->bit_depth;
300
301 oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
302 ctx->fn_quant[0](core->coef, core->qp[c], core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 128 : 212);
303
304 int prev_dc = core->prev_dc[c];
305 core->prev_dc[c] = core->coef[0];
306 core->coef[0] = core->coef[0] - prev_dc;
307
308 if(ctx->rec) {
309 oapv_mcpy(core->coef_rec, core->coef, sizeof(s16) * OAPV_BLK_D);
310 core->coef_rec[0] = core->coef_rec[0] + prev_dc;
311 ctx->fn_dquant[0](core->coef_rec, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
312 ctx->fn_itx[0](core->coef_rec, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
313 }
314
315 return 0;
316 }
317
enc_block_rdo_slow(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)318 static double enc_block_rdo_slow(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
319 {
320 ALIGNED_16(s16 recon[OAPV_BLK_D]) = { 0 };
321 ALIGNED_16(s16 coeff[OAPV_BLK_D]) = { 0 };
322 int blk_w = 1 << log2_w;
323 int blk_h = 1 << log2_h;
324 int bit_depth = ctx->bit_depth;
325 int qp = core->qp[c];
326 s16 org[OAPV_BLK_D] = { 0 };
327 s16 *best_coeff = core->coef;
328 s16 *best_recon = core->coef_rec;
329 int best_cost = INT_MAX;
330 int zero_dist = 0;
331 const u16 *scanp = oapv_tbl_scan;
332 const int map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
333
334 oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
335 oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
336 oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
337 ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
338
339 {
340 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
341 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
342 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
343 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
344 oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
345 if(ctx->rec) {
346 oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
347 }
348 if(cost == 0) {
349 zero_dist = 1;
350 }
351 best_cost = cost;
352 }
353
354 for(int itr = 0; itr < (c == 0 ? 2 : 1) && !zero_dist; itr++) {
355 for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
356 int best_idx = 0;
357 s16 org_coef = coeff[scanp[j]];
358 int adj_rng = c == 0 ? 13 : 5;
359 if(org_coef == 0) {
360 if(c == 0 && scanp[j] < 3) {
361 adj_rng = 3;
362 }
363 else {
364 continue;
365 }
366 }
367
368 for(int i = 1; i < adj_rng && !zero_dist; i++) {
369 if(i > 2) {
370 if(best_idx == 0) {
371 continue;
372 }
373 else if(best_idx % 2 == 1 && i % 2 == 0) {
374 continue;
375 }
376 else if(best_idx % 2 == 0 && i % 2 == 1) {
377 continue;
378 }
379 }
380
381 s16 test_coef = org_coef + map_idx_diff[i];
382 coeff[scanp[j]] = test_coef;
383
384 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
385 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
386 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
387 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
388
389 if(cost < best_cost) {
390 best_cost = cost;
391 best_coeff[scanp[j]] = test_coef;
392 if(ctx->rec) {
393 oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
394 }
395 best_idx = i;
396 if(cost == 0) {
397 zero_dist = 1;
398 }
399 }
400 else {
401 coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
402 }
403 }
404 }
405 }
406
407 int curr_dc = best_coeff[0];
408 best_coeff[0] -= core->prev_dc[c];
409 core->prev_dc[c] = curr_dc;
410
411 return best_cost;
412 }
413
enc_block_rdo_medium(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)414 static double enc_block_rdo_medium(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
415 {
416 ALIGNED_16(s16 org[OAPV_BLK_D]);
417 ALIGNED_16(s16 recon[OAPV_BLK_D]);
418 ALIGNED_16(s16 coeff[OAPV_BLK_D]);
419 ALIGNED_16(s16 tmp_buf[OAPV_BLK_D]);
420
421 ALIGNED_32(int rec_ups[OAPV_BLK_D]);
422 ALIGNED_32(int rec_tmp[OAPV_BLK_D]);
423
424 int blk_w = 1 << log2_w;
425 int blk_h = 1 << log2_h;
426 int bit_depth = ctx->bit_depth;
427 int qp = core->qp[c];
428
429 s16 *best_coeff = core->coef;
430 s16 *best_recon = core->coef_rec;
431
432 int best_cost = INT_MAX;
433 int zero_dist = 0;
434 const u16 *scanp = oapv_tbl_scan;
435 const int map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
436
437 oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
438 oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
439 oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
440
441 ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
442
443 {
444 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
445 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
446 ctx->fn_itx_part[0](recon, tmp_buf, ITX_SHIFT1, 1 << log2_w);
447 oapv_itx_get_wo_sft(tmp_buf, recon, rec_ups, ITX_SHIFT2(bit_depth), 1 << log2_h);
448
449 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
450 oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
451 if(ctx->rec) {
452 oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
453 }
454 if(cost == 0) {
455 zero_dist = 1;
456 }
457 best_cost = cost;
458 }
459
460 for(int itr = 0; itr < (c == 0 ? 2 : 1) && !zero_dist; itr++) {
461 for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
462 int best_idx = 0;
463 s16 org_coef = coeff[scanp[j]];
464 int adj_rng = (c == 0 ? 13 : 5);
465 if(org_coef == 0) {
466 if(c == 0 && scanp[j] < 3) {
467 adj_rng = 3;
468 }
469 else {
470 continue;
471 }
472 }
473 int q_step = 0;
474 if(core->dq_shift[c] > 0) {
475 q_step = (core->q_mat_dec[c][scanp[j]] + (1 << (core->dq_shift[c] - 1))) >> core->dq_shift[c];
476 }
477 else {
478 q_step = (core->q_mat_dec[c][scanp[j]]) << (-core->dq_shift[c]);
479 }
480
481 for(int i = 1; i < adj_rng && !zero_dist; i++) {
482 if(i > 2) {
483 if(best_idx == 0) {
484 continue;
485 }
486 else if(best_idx % 2 == 1 && i % 2 == 0) {
487 continue;
488 }
489 else if(best_idx % 2 == 0 && i % 2 == 1) {
490 continue;
491 }
492 }
493
494 s16 test_coef = org_coef + map_idx_diff[i];
495 coeff[scanp[j]] = test_coef;
496 int step_diff = q_step * map_idx_diff[i];
497 ctx->fn_itx_adj[0](rec_ups, rec_tmp, j, step_diff, 9);
498 for(int k = 0; k < 64; k++) {
499 recon[k] = (rec_tmp[k] + 512) >> 10;
500 }
501
502 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
503 if(cost < best_cost) {
504 oapv_mcpy(rec_ups, rec_tmp, sizeof(int) * OAPV_BLK_D);
505 best_cost = cost;
506 best_coeff[scanp[j]] = test_coef;
507 best_idx = i;
508 if(cost == 0) {
509 zero_dist = 1;
510 }
511 }
512 else {
513 coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
514 }
515 }
516 }
517 }
518
519 if(ctx->rec) {
520 oapv_mcpy(best_recon, best_coeff, sizeof(s16) * OAPV_BLK_D);
521 ctx->fn_dquant[0](best_recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
522 ctx->fn_itx[0](best_recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
523 }
524
525 int curr_dc = best_coeff[0];
526 best_coeff[0] -= core->prev_dc[c];
527 core->prev_dc[c] = curr_dc;
528
529 return best_cost;
530 }
531
enc_block_rdo_placebo(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)532 static double enc_block_rdo_placebo(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
533 {
534 int blk_w = 1 << log2_w;
535 int blk_h = 1 << log2_h;
536 int bit_depth = ctx->bit_depth;
537 int qp = core->qp[c];
538 s16 *best_coeff = core->coef;
539 s16 *best_recon = core->coef_rec;
540 ALIGNED_16(s16 org[OAPV_BLK_D]);
541 ALIGNED_16(s16 recon[OAPV_BLK_D]);
542 ALIGNED_16(s16 coeff[OAPV_BLK_D]);
543 int best_cost = INT_MAX;
544 int zero_dist = 0;
545 const u16 *scanp = oapv_tbl_scan;
546 const int map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
547
548 oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
549 oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
550 oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
551
552 ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
553
554 {
555 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
556 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
557 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
558 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
559 oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
560 if(ctx->rec) {
561 oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
562 }
563 if(cost == 0) {
564 zero_dist = 1;
565 }
566 best_cost = cost;
567 }
568
569 for(int itr = 0; itr < (c == 0 ? 7 : 3) && !zero_dist; itr++) {
570 for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
571 int best_idx = 0;
572 s16 org_coef = coeff[scanp[j]];
573 int adj_rng = (c == 0 ? 15 : 5);
574 if(org_coef == 0) {
575 if(c == 0 && scanp[j] < 3) {
576 adj_rng = 3;
577 }
578 else {
579 continue;
580 }
581 }
582
583 for(int i = 1; i < adj_rng && !zero_dist; i++) {
584 if(i > 2) {
585 if(best_idx == 0) {
586 continue;
587 }
588 else if(best_idx % 2 == 1 && i % 2 == 0) {
589 continue;
590 }
591 else if(best_idx % 2 == 0 && i % 2 == 1) {
592 continue;
593 }
594 }
595
596 s16 test_coef = org_coef + map_idx_diff[i];
597 coeff[scanp[j]] = test_coef;
598
599 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
600 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
601 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
602 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w, bit_depth);
603
604 if(cost < best_cost) {
605 best_cost = cost;
606 best_coeff[scanp[j]] = test_coef;
607 if(ctx->rec) {
608 oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
609 }
610 best_idx = i;
611 if(cost == 0) {
612 zero_dist = 1;
613 }
614 }
615 else {
616 coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
617 }
618 }
619 }
620 }
621
622 int curr_dc = best_coeff[0];
623 best_coeff[0] -= core->prev_dc[c];
624 core->prev_dc[c] = curr_dc;
625
626 return best_cost;
627 }
628
enc_read_param(oapve_ctx_t * ctx,oapve_param_t * param)629 static int enc_read_param(oapve_ctx_t *ctx, oapve_param_t *param)
630 {
631 /* check input parameters */
632 oapv_assert_rv(param->w > 0 && param->h > 0, OAPV_ERR_INVALID_ARGUMENT);
633 oapv_assert_rv(param->qp >= MIN_QUANT && param->qp <= MAX_QUANT, OAPV_ERR_INVALID_ARGUMENT);
634
635 ctx->qp[Y_C] = param->qp;
636 ctx->qp[U_C] = param->qp + param->qp_cb_offset;
637 ctx->qp[V_C] = param->qp + param->qp_cr_offset;
638 ctx->qp[X_C] = param->qp;
639
640 ctx->num_comp = get_num_comp(param->csp);
641
642 if(param->preset == OAPV_PRESET_SLOW) {
643 ctx->fn_block = enc_block_rdo_slow;
644 }
645 else if(param->preset == OAPV_PRESET_PLACEBO) {
646 ctx->fn_block = enc_block_rdo_placebo;
647 }
648 else if(param->preset == OAPV_PRESET_MEDIUM) {
649 ctx->fn_block = enc_block_rdo_medium;
650 }
651 else {
652 ctx->fn_block = enc_block;
653 }
654
655 ctx->log2_block = OAPV_LOG2_BLK;
656
657 /* set various value */
658 ctx->w = ((ctx->param->w + (OAPV_MB_W - 1)) >> OAPV_LOG2_MB_W) << OAPV_LOG2_MB_W;
659 ctx->h = ((ctx->param->h + (OAPV_MB_H - 1)) >> OAPV_LOG2_MB_H) << OAPV_LOG2_MB_H;
660
661 int tile_w = ctx->param->tile_w_mb * OAPV_MB_W;
662 int tile_h = ctx->param->tile_h_mb * OAPV_MB_H;
663 enc_set_tile_info(ctx->tile, ctx->w, ctx->h, tile_w, tile_h, &ctx->num_tile_cols, &ctx->num_tile_rows, &ctx->num_tiles);
664
665 return OAPV_OK;
666 }
667
enc_flush(oapve_ctx_t * ctx)668 static void enc_flush(oapve_ctx_t *ctx)
669 {
670 // Release thread pool controller and created threads
671 if(ctx->cdesc.threads >= 1) {
672 if(ctx->tpool) {
673 // thread controller instance is present
674 // terminate the created thread
675 for(int i = 0; i < ctx->cdesc.threads; i++) {
676 if(ctx->thread_id[i]) {
677 // valid thread instance
678 ctx->tpool->release(&ctx->thread_id[i]);
679 }
680 }
681 // dinitialize the tc
682 oapv_tpool_deinit(ctx->tpool);
683 oapv_mfree_fast(ctx->tpool);
684 ctx->tpool = NULL;
685 }
686 }
687
688 oapv_tpool_sync_obj_delete(&ctx->sync_obj);
689 for(int i = 0; i < ctx->cdesc.threads; i++) {
690 enc_core_free(ctx->core[i]);
691 ctx->core[i] = NULL;
692 }
693
694 oapv_mfree_fast(ctx->tile[0].bs_buf);
695 }
696
enc_ready(oapve_ctx_t * ctx)697 static int enc_ready(oapve_ctx_t *ctx)
698 {
699 oapve_core_t *core = NULL;
700 int ret = OAPV_OK;
701 oapv_assert(ctx->core[0] == NULL);
702
703 for(int i = 0; i < ctx->cdesc.threads; i++) {
704 core = enc_core_alloc();
705 oapv_assert_gv(core != NULL, ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
706 ctx->core[i] = core;
707 }
708
709 // initialize the threads to NULL
710 for(int i = 0; i < OAPV_MAX_THREADS; i++) {
711 ctx->thread_id[i] = 0;
712 }
713
714 // get the context synchronization handle
715 ctx->sync_obj = oapv_tpool_sync_obj_create();
716 oapv_assert_gv(ctx->sync_obj != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
717
718 if(ctx->cdesc.threads >= 1) {
719 ctx->tpool = oapv_malloc(sizeof(oapv_tpool_t));
720 oapv_tpool_init(ctx->tpool, ctx->cdesc.threads);
721 for(int i = 0; i < ctx->cdesc.threads; i++) {
722 ctx->thread_id[i] = ctx->tpool->create(ctx->tpool, i);
723 oapv_assert_gv(ctx->thread_id[i] != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
724 }
725 }
726
727 for(int i = 0; i < OAPV_MAX_TILES; i++) {
728 ctx->tile[i].stat = ENC_TILE_STAT_NOT_ENCODED;
729 }
730 ctx->tile[0].bs_buf = (u8 *)oapv_malloc(ctx->cdesc.max_bs_buf_size);
731 oapv_assert_gv(ctx->tile[0].bs_buf, ret, OAPV_ERR_UNKNOWN, ERR);
732
733 ctx->rc_param.alpha = OAPV_RC_ALPHA;
734 ctx->rc_param.beta = OAPV_RC_BETA;
735
736 return OAPV_OK;
737 ERR:
738
739 enc_flush(ctx);
740
741 return ret;
742 }
743
enc_tile_comp(oapv_bs_t * bs,oapve_tile_t * tile,oapve_ctx_t * ctx,oapve_core_t * core,int c,int s_org,void * org,int s_rec,void * rec)744 static int enc_tile_comp(oapv_bs_t *bs, oapve_tile_t *tile, oapve_ctx_t *ctx, oapve_core_t *core, int c, int s_org, void *org, int s_rec, void *rec)
745 {
746 int mb_h, mb_w, mb_y, mb_x, blk_x, blk_y;
747 s16 *o16 = NULL, *r16 = NULL;
748
749 u8 *bs_cur = oapv_bsw_sink(bs);
750 oapv_assert_rv(bsw_is_align8(bs), OAPV_ERR_MALFORMED_BITSTREAM);
751
752 mb_w = OAPV_MB_W >> ctx->comp_sft[c][0];
753 mb_h = OAPV_MB_H >> ctx->comp_sft[c][1];
754
755 int tile_le = tile->x >> ctx->comp_sft[c][0];
756 int tile_ri = (tile->w >> ctx->comp_sft[c][0]) + tile_le;
757 int tile_to = tile->y >> ctx->comp_sft[c][1];
758 int tile_bo = (tile->h >> ctx->comp_sft[c][1]) + tile_to;
759
760 for(mb_y = tile_to; mb_y < tile_bo; mb_y += mb_h) {
761 for(mb_x = tile_le; mb_x < tile_ri; mb_x += mb_w) {
762 for(blk_y = mb_y; blk_y < (mb_y + mb_h); blk_y += OAPV_BLK_H) {
763 for(blk_x = mb_x; blk_x < (mb_x + mb_w); blk_x += OAPV_BLK_W) {
764 o16 = (s16 *)((u8 *)org + blk_y * s_org) + blk_x;
765 ctx->fn_imgb_to_block[c](o16, OAPV_BLK_W, OAPV_BLK_H, s_org, blk_x, (OAPV_BLK_W << 1), core->coef);
766
767 ctx->fn_block(ctx, core, OAPV_LOG2_BLK_W, OAPV_LOG2_BLK_H, c);
768 oapve_vlc_dc_coeff(ctx, core, bs, core->coef[0], c);
769 oapve_vlc_ac_coeff(ctx, core, bs, core->coef, 0, c);
770 DUMP_COEF(core->coef, OAPV_BLK_D, blk_x, blk_y, c);
771
772 if(rec != NULL) {
773 r16 = (s16 *)((u8 *)rec + blk_y * s_rec) + blk_x;
774 ctx->fn_block_to_imgb[c](core->coef_rec, OAPV_BLK_W, OAPV_BLK_H, (OAPV_BLK_W << 1), blk_x, s_rec, r16);
775 }
776 }
777 }
778 }
779 }
780
781 /* byte align */
782 while(!bsw_is_align8(bs)) {
783 oapv_bsw_write1(bs, 0);
784 }
785
786 /* de-init BSW */
787 oapv_bsw_deinit(bs);
788
789 return (int)(bs->cur - bs_cur);
790 }
791
enc_tile(oapve_ctx_t * ctx,oapve_core_t * core,oapve_tile_t * tile)792 static int enc_tile(oapve_ctx_t *ctx, oapve_core_t *core, oapve_tile_t *tile)
793 {
794 oapv_bs_t bs;
795 oapv_bsw_init(&bs, tile->bs_buf, tile->bs_buf_max, NULL);
796
797 int qp = 0;
798 if(ctx->param->rc_type != 0) {
799 oapve_rc_get_qp(ctx, tile, ctx->qp[Y_C], &qp);
800 oapv_assert(qp != 0);
801 }
802 else {
803 qp = ctx->qp[Y_C];
804 }
805
806 tile->tile_size = 0;
807 DUMP_SAVE(0);
808 oapve_vlc_tile_size(&bs, tile->tile_size);
809 oapve_set_tile_header(ctx, &tile->th, core->tile_idx, qp);
810 oapve_vlc_tile_header(ctx, &bs, &tile->th);
811
812 for(int c = 0; c < ctx->num_comp; c++) {
813 int cnt = 0;
814 core->qp[c] = tile->th.tile_qp[c];
815 int qscale = oapv_quant_scale[core->qp[c] % 6];
816 s32 scale_multiply_16 = (s32)(qscale << 4); // 15bit + 4bit
817 for(int y = 0; y < OAPV_BLK_H; y++) {
818 for(int x = 0; x < OAPV_BLK_W; x++) {
819 core->q_mat_enc[c][cnt++] = scale_multiply_16 / ctx->fh.q_matrix[c][y][x];
820 }
821 }
822
823 if(ctx->rec || ctx->param->preset > OAPV_PRESET_MEDIUM) {
824 core->dq_shift[c] = ctx->bit_depth - 2 - (core->qp[c] / 6);
825
826 int cnt = 0;
827 int dq_scale = oapv_tbl_dq_scale[core->qp[c] % 6];
828 for(int y = 0; y < OAPV_BLK_H; y++) {
829 for(int x = 0; x < OAPV_BLK_W; x++) {
830 core->q_mat_dec[c][cnt++] = dq_scale * ctx->fh.q_matrix[c][y][x];
831 }
832 }
833 }
834 }
835
836 for(int c = 0; c < ctx->num_comp; c++) {
837 core->prev_dc_ctx[c] = 20;
838 core->prev_1st_ac_ctx[c] = 0;
839 core->prev_dc[c] = 0;
840
841 int tc, s_org, s_rec;
842 s16 *org, *rec;
843
844 if(OAPV_CS_GET_FORMAT(ctx->imgb->cs) == OAPV_CF_PLANAR2) {
845 tc = c > 0 ? 1 : 0;
846 org = ctx->imgb->a[tc];
847 org += (c > 1) ? 1 : 0;
848 s_org = ctx->imgb->s[tc];
849
850 if(ctx->rec) {
851 rec = ctx->rec->a[tc];
852 rec += (c > 1) ? 1 : 0;
853 s_rec = ctx->imgb->s[tc];
854 }
855 else {
856 rec = NULL;
857 s_rec = 0;
858 }
859 }
860 else {
861 org = ctx->imgb->a[c];
862 s_org = ctx->imgb->s[c];
863 if(ctx->rec) {
864 rec = ctx->rec->a[c];
865 s_rec = ctx->imgb->s[c];
866 }
867 else {
868 rec = NULL;
869 s_rec = 0;
870 }
871 }
872
873 tile->th.tile_data_size[c] = enc_tile_comp(&bs, tile, ctx, core, c, s_org, org, s_rec, rec);
874 }
875
876 u32 bs_size = (int)(bs.cur - bs.beg);
877 if(bs_size > tile->bs_buf_max) {
878 return OAPV_ERR_OUT_OF_BS_BUF;
879 }
880 tile->bs_size = bs_size;
881
882 oapv_bs_t bs_th;
883 bs_th.is_bin_count = 0;
884 oapv_bsw_init(&bs_th, tile->bs_buf, tile->bs_size, NULL);
885 tile->tile_size = bs_size - OAPV_TILE_SIZE_LEN;
886
887 DUMP_SAVE(1);
888 DUMP_LOAD(0);
889 oapve_vlc_tile_size(&bs_th, tile->tile_size);
890 oapve_vlc_tile_header(ctx, &bs_th, &tile->th);
891 DUMP_LOAD(1);
892 oapv_bsw_deinit(&bs_th);
893 return OAPV_OK;
894 }
895
enc_thread_tile(void * arg)896 static int enc_thread_tile(void *arg)
897 {
898 oapve_core_t *core = (oapve_core_t *)arg;
899 oapve_ctx_t *ctx = core->ctx;
900 oapve_tile_t *tile = ctx->tile;
901 int ret = OAPV_OK, i;
902
903 while(1) {
904 // find not encoded tile
905 oapv_tpool_enter_cs(ctx->sync_obj);
906 for(i = 0; i < ctx->num_tiles; i++) {
907 if(tile[i].stat == ENC_TILE_STAT_NOT_ENCODED) {
908 tile[i].stat = ENC_TILE_STAT_ON_ENCODING;
909 core->tile_idx = i;
910 break;
911 }
912 }
913 oapv_tpool_leave_cs(ctx->sync_obj);
914 if(i == ctx->num_tiles) {
915 break;
916 }
917
918 ret = enc_tile(ctx, core, &tile[core->tile_idx]);
919 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
920
921 oapv_tpool_enter_cs(ctx->sync_obj);
922 tile[core->tile_idx].stat = ENC_TILE_STAT_ENCODED;
923 oapv_tpool_leave_cs(ctx->sync_obj);
924 }
925 ERR:
926 return ret;
927 }
928
enc_img_pad_p210(oapve_ctx_t * ctx,oapv_imgb_t * imgb)929 static void enc_img_pad_p210(oapve_ctx_t *ctx, oapv_imgb_t *imgb)
930 {
931 if(ctx->w == ctx->param->w && ctx->h == ctx->param->h) {
932 return;
933 }
934
935 if(ctx->w != ctx->param->w) {
936 for(int c = 0; c < imgb->np; c++) {
937 int shift_w = 0;
938 int shift_h = 0;
939
940 int sw = ctx->param->w >> shift_w;
941 int ew = ctx->w >> shift_w;
942 int th = ctx->h >> shift_h;
943 pel *dst = (pel *)imgb->a[c];
944 pel src;
945
946 for(int h = 0; h < th; h++) {
947 src = dst[sw - 1];
948 for(int w = sw; w < ew; w++) {
949 dst[w] = src;
950 }
951 dst += (imgb->s[c] >> 1);
952 }
953 }
954 }
955
956 if(ctx->h != ctx->param->h) {
957 for(int c = 0; c < imgb->np; c++) {
958 int shift_w = 0;
959 int shift_h = 0;
960
961 int sh = ctx->param->h >> shift_h;
962 int eh = ctx->h >> shift_h;
963 int tw = ctx->w >> shift_w;
964 pel *dst = ((pel *)imgb->a[c]) + sh * (imgb->s[c] >> 1);
965 pel *src = dst - (imgb->s[c] >> 1);
966
967 for(int h = sh; h < eh; h++) {
968 oapv_mcpy(dst, src, sizeof(pel) * tw);
969 dst += (imgb->s[c] >> 1);
970 }
971 }
972 }
973 }
enc_img_pad(oapve_ctx_t * ctx,oapv_imgb_t * imgb)974 static void enc_img_pad(oapve_ctx_t *ctx, oapv_imgb_t *imgb)
975 {
976 if(ctx->w == ctx->param->w && ctx->h == ctx->param->h) {
977 return;
978 }
979
980 if(ctx->w != ctx->param->w) {
981 for(int c = 0; c < imgb->np; c++) {
982 int sw = ctx->param->w >> ctx->comp_sft[c][0];
983 int ew = ctx->w >> ctx->comp_sft[c][0];
984 int th = ctx->h >> ctx->comp_sft[c][1];
985 pel *dst = (pel *)imgb->a[c];
986 pel src;
987
988 for(int h = 0; h < th; h++) {
989 src = dst[sw - 1];
990 for(int w = sw; w < ew; w++) {
991 dst[w] = src;
992 }
993 dst += (imgb->s[c] >> 1);
994 }
995 }
996 }
997
998 if(ctx->h != ctx->param->h) {
999 for(int c = 0; c < imgb->np; c++) {
1000 int sh = ctx->param->h >> ctx->comp_sft[c][1];
1001 int eh = ctx->h >> ctx->comp_sft[c][1];
1002 int tw = ctx->w >> ctx->comp_sft[c][0];
1003 pel *dst = ((pel *)imgb->a[c]) + sh * (imgb->s[c] >> 1);
1004 pel *src = dst - (imgb->s[c] >> 1);
1005
1006 for(int h = sh; h < eh; h++) {
1007 oapv_mcpy(dst, src, sizeof(pel) * tw);
1008 dst += (imgb->s[c] >> 1);
1009 }
1010 }
1011 }
1012 }
1013
enc_frm_prepare(oapve_ctx_t * ctx,oapv_imgb_t * imgb_i,oapv_imgb_t * imgb_r)1014 static int enc_frm_prepare(oapve_ctx_t *ctx, oapv_imgb_t *imgb_i, oapv_imgb_t *imgb_r)
1015 {
1016 ctx->cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb_i->cs));
1017 ctx->num_comp = get_num_comp(ctx->cfi);
1018
1019 ctx->comp_sft[Y_C][0] = 0;
1020 ctx->comp_sft[Y_C][1] = 0;
1021 for(int c = 1; c < ctx->num_comp; c++) {
1022 ctx->comp_sft[c][0] = get_chroma_sft_w(ctx->cfi);
1023 ctx->comp_sft[c][1] = get_chroma_sft_h(ctx->cfi);
1024 }
1025
1026 ctx->bit_depth = OAPV_CS_GET_BIT_DEPTH(imgb_i->cs);
1027
1028 if(OAPV_CS_GET_FORMAT(imgb_i->cs) == OAPV_CF_PLANAR2) {
1029 ctx->fn_imgb_to_block_rc = imgb_to_block_p210;
1030
1031 ctx->fn_imgb_to_block[Y_C] = imgb_to_block_p210_y;
1032 ctx->fn_imgb_to_block[U_C] = imgb_to_block_p210_uv;
1033 ctx->fn_imgb_to_block[V_C] = imgb_to_block_p210_uv;
1034
1035 ctx->fn_block_to_imgb[Y_C] = block_to_imgb_p210_y;
1036 ctx->fn_block_to_imgb[U_C] = block_to_imgb_p210_uv;
1037 ctx->fn_block_to_imgb[V_C] = block_to_imgb_p210_uv;
1038 ctx->fn_img_pad = enc_img_pad_p210;
1039 }
1040 else {
1041 ctx->fn_imgb_to_block_rc = imgb_to_block;
1042 for(int i = 0; i < ctx->num_comp; i++) {
1043 ctx->fn_imgb_to_block[i] = imgb_to_block_10bit;
1044 ctx->fn_block_to_imgb[i] = block_to_imgb_10bit;
1045 }
1046 ctx->fn_img_pad = enc_img_pad;
1047 }
1048
1049 /* initialize bitstream container */
1050 // oapv_bsw_init(&ctx->bs, bitb->addr, bitb->bsize, NULL); // TODO : remove
1051 ctx->w = (imgb_i->aw[Y_C] > 0) ? imgb_i->aw[Y_C] : imgb_i->w[Y_C];
1052 ctx->h = (imgb_i->ah[Y_C] > 0) ? imgb_i->ah[Y_C] : imgb_i->h[Y_C];
1053
1054 ctx->fn_img_pad(ctx, imgb_i);
1055
1056 for(int i = 0; i < ctx->num_tiles; i++) {
1057 ctx->tile[i].stat = ENC_TILE_STAT_NOT_ENCODED;
1058 }
1059
1060 ctx->imgb = imgb_i;
1061 imgb_addref(ctx->imgb);
1062 if(imgb_r != NULL) {
1063 for(int c = 0; c < ctx->num_comp; c++) {
1064 imgb_r->w[c] = imgb_i->w[c];
1065 imgb_r->h[c] = imgb_i->h[c];
1066 imgb_r->x[c] = imgb_i->x[c];
1067 imgb_r->y[c] = imgb_i->y[c];
1068 }
1069 ctx->rec = imgb_r;
1070 imgb_addref(ctx->rec);
1071 }
1072
1073 int buf_size = ctx->cdesc.max_bs_buf_size / ctx->num_tiles;
1074 ctx->tile[0].bs_buf_max = buf_size;
1075 for(int i = 1; i < ctx->num_tiles; i++) {
1076 ctx->tile[i].bs_buf = ctx->tile[i - 1].bs_buf + buf_size;
1077 ctx->tile[i].bs_buf_max = buf_size;
1078 }
1079
1080 for(int i = 0; i < ctx->cdesc.threads; i++) {
1081 ctx->core[i]->ctx = ctx;
1082 ctx->core[i]->thread_idx = i;
1083 }
1084
1085 return OAPV_OK;
1086 }
1087
enc_frm_finish(oapve_ctx_t * ctx,oapve_stat_t * stat)1088 static int enc_frm_finish(oapve_ctx_t *ctx, oapve_stat_t *stat)
1089 {
1090 imgb_release(ctx->imgb);
1091 if(ctx->rec) {
1092 imgb_release(ctx->rec);
1093 ctx->rec = NULL;
1094 }
1095 return OAPV_OK;
1096 }
1097
enc_frame(oapve_ctx_t * ctx)1098 static int enc_frame(oapve_ctx_t *ctx)
1099 {
1100 oapv_bs_t *bs = &ctx->bs;
1101 int ret = OAPV_OK;
1102
1103 oapv_bs_t bs_fh;
1104 oapv_mcpy(&bs_fh, bs, sizeof(oapv_bs_t));
1105
1106 /* write frame header */
1107 oapve_set_frame_header(ctx, &ctx->fh);
1108 oapve_vlc_frame_header(bs, ctx, &ctx->fh);
1109
1110 /* de-init BSW */
1111 oapv_bsw_deinit(bs);
1112
1113 /* rc init */
1114 u64 cost_sum = 0;
1115 if(ctx->param->rc_type != 0) {
1116 oapve_rc_get_tile_cost_thread(ctx, &cost_sum);
1117
1118 double bits_pic = ((double)ctx->param->bitrate * 1000) / ((double)ctx->param->fps_num / ctx->param->fps_den);
1119 for(int i = 0; i < ctx->num_tiles; i++) {
1120 ctx->tile[i].rc.target_bits_left = bits_pic * ctx->tile[i].rc.cost / cost_sum;
1121 ctx->tile[i].rc.target_bits = ctx->tile[i].rc.target_bits_left;
1122 }
1123
1124 ctx->rc_param.lambda = oapve_rc_estimate_pic_lambda(ctx, cost_sum);
1125 ctx->rc_param.qp = oapve_rc_estimate_pic_qp(ctx->rc_param.lambda);
1126 for(int c = 0; c < ctx->num_comp; c++) {
1127 ctx->qp[c] = ctx->rc_param.qp;
1128 if(c == 1) {
1129 ctx->qp[c] += ctx->param->qp_cb_offset;
1130 }
1131 else if(c == 2) {
1132 ctx->qp[c] += ctx->param->qp_cr_offset;
1133 }
1134 }
1135 }
1136
1137 oapv_tpool_t *tpool = ctx->tpool;
1138 int res, tidx = 0, thread_num1 = 0;
1139 int parallel_task = (ctx->cdesc.threads > ctx->num_tiles) ? ctx->num_tiles : ctx->cdesc.threads;
1140
1141 /* encode tiles ************************************/
1142 for(tidx = 0; tidx < (parallel_task - 1); tidx++) {
1143 tpool->run(ctx->thread_id[tidx], enc_thread_tile,
1144 (void *)ctx->core[tidx]);
1145 }
1146 ret = enc_thread_tile((void *)ctx->core[tidx]);
1147 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1148
1149 for(thread_num1 = 0; thread_num1 < parallel_task - 1; thread_num1++) {
1150 res = tpool->join(ctx->thread_id[thread_num1], &ret);
1151 oapv_assert_gv(res == TPOOL_SUCCESS, ret, OAPV_ERR_FAILED_SYSCALL, ERR);
1152 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1153 }
1154 /****************************************************/
1155
1156 for(int i = 0; i < ctx->num_tiles; i++) {
1157 oapv_mcpy(ctx->bs.cur, ctx->tile[i].bs_buf, ctx->tile[i].bs_size);
1158 ctx->bs.cur = ctx->bs.cur + ctx->tile[i].bs_size;
1159 ctx->fh.tile_size[i] = ctx->tile[i].bs_size - OAPV_TILE_SIZE_LEN;
1160 }
1161
1162 /* rewrite frame header */
1163 if(ctx->fh.tile_size_present_in_fh_flag) {
1164 oapve_vlc_frame_header(&bs_fh, ctx, &ctx->fh);
1165 }
1166 if(ctx->param->rc_type != 0) {
1167 oapve_rc_update_after_pic(ctx, cost_sum);
1168 }
1169 return ret;
1170
1171 ERR:
1172 return ret;
1173 }
1174
enc_platform_init(oapve_ctx_t * ctx)1175 static int enc_platform_init(oapve_ctx_t *ctx)
1176 {
1177 // default settings
1178 ctx->fn_sad = oapv_tbl_fn_sad_16b;
1179 ctx->fn_ssd = oapv_tbl_fn_ssd_16b;
1180 ctx->fn_diff = oapv_tbl_fn_diff_16b;
1181 ctx->fn_itx_part = oapv_tbl_fn_itx_part;
1182 ctx->fn_itx = oapv_tbl_fn_itx;
1183 ctx->fn_itx_adj = oapv_tbl_fn_itx_adj;
1184 ctx->fn_txb = oapv_tbl_fn_tx;
1185 ctx->fn_quant = oapv_tbl_fn_quant;
1186 ctx->fn_dquant = oapv_tbl_fn_dquant;
1187 ctx->fn_had8x8 = oapv_dc_removed_had8x8;
1188 #if X86_SSE
1189 int check_cpu, support_sse, support_avx2;
1190
1191 check_cpu = oapv_check_cpu_info_x86();
1192 support_sse = (check_cpu >> 0) & 1;
1193 support_avx2 = (check_cpu >> 2) & 1;
1194
1195 if(support_avx2) {
1196 ctx->fn_ssd = oapv_tbl_fn_ssd_16b_avx;
1197 ctx->fn_itx_part = oapv_tbl_fn_itx_part_avx;
1198 ctx->fn_itx = oapv_tbl_fn_itx_avx;
1199 ctx->fn_itx_adj = oapv_tbl_fn_itx_adj_avx;
1200 ctx->fn_txb = oapv_tbl_fn_txb_avx;
1201 ctx->fn_quant = oapv_tbl_fn_quant_avx;
1202 ctx->fn_dquant = oapv_tbl_fn_dquant_avx;
1203 ctx->fn_had8x8 = oapv_dc_removed_had8x8_sse;
1204 }
1205 else if(support_sse) {
1206 ctx->fn_ssd = oapv_tbl_fn_ssd_16b_sse;
1207 ctx->fn_had8x8 = oapv_dc_removed_had8x8_sse;
1208 }
1209 #elif ARM_NEON
1210 ctx->fn_ssd = oapv_tbl_fn_ssd_16b_neon;
1211 ctx->fn_itx = oapv_tbl_fn_itx_neon;
1212 ctx->fn_txb = oapv_tbl_fn_txb_neon;
1213 ctx->fn_quant = oapv_tbl_fn_quant_neon;
1214 ctx->fn_had8x8 = oapv_dc_removed_had8x8;
1215 #endif
1216 return OAPV_OK;
1217 }
1218
oapve_create(oapve_cdesc_t * cdesc,int * err)1219 oapve_t oapve_create(oapve_cdesc_t *cdesc, int *err)
1220 {
1221 oapve_ctx_t *ctx;
1222 int ret;
1223
1224 DUMP_CREATE(1);
1225 /* memory allocation for ctx and core structure */
1226 ctx = (oapve_ctx_t *)enc_ctx_alloc();
1227 if(ctx != NULL) {
1228 oapv_mcpy(&ctx->cdesc, cdesc, sizeof(oapve_cdesc_t));
1229 ret = enc_platform_init(ctx);
1230 oapv_assert_g(ret == OAPV_OK, ERR);
1231
1232 ret = enc_ready(ctx);
1233 oapv_assert_g(ret == OAPV_OK, ERR);
1234
1235 /* set default value for ctx */
1236 ctx->magic = OAPVE_MAGIC_CODE;
1237 ctx->id = (oapve_t)ctx;
1238 if(err) {
1239 *err = OAPV_OK;
1240 }
1241 return (ctx->id);
1242 }
1243 else {
1244 ret = OAPV_ERR;
1245 }
1246 ERR:
1247 if(ctx) {
1248 enc_ctx_free(ctx);
1249 }
1250 if(err) {
1251 *err = ret;
1252 }
1253 return NULL;
1254 }
1255
oapve_delete(oapve_t eid)1256 void oapve_delete(oapve_t eid)
1257 {
1258 oapve_ctx_t *ctx;
1259
1260 ctx = enc_id_to_ctx(eid);
1261 oapv_assert_r(ctx);
1262
1263 DUMP_DELETE();
1264 enc_flush(ctx);
1265 enc_ctx_free(ctx);
1266 }
1267
oapve_encode(oapve_t eid,oapv_frms_t * ifrms,oapvm_t mid,oapv_bitb_t * bitb,oapve_stat_t * stat,oapv_frms_t * rfrms)1268 int oapve_encode(oapve_t eid, oapv_frms_t *ifrms, oapvm_t mid, oapv_bitb_t *bitb, oapve_stat_t *stat, oapv_frms_t *rfrms)
1269 {
1270 oapve_ctx_t *ctx;
1271 oapv_frm_t *frm;
1272 oapv_bs_t *bs;
1273 int i, ret;
1274
1275 ctx = enc_id_to_ctx(eid);
1276 oapv_assert_rv(ctx != NULL && bitb->addr && bitb->bsize > 0, OAPV_ERR_INVALID_ARGUMENT);
1277
1278 bs = &ctx->bs;
1279
1280 oapv_bsw_init(bs, bitb->addr, bitb->bsize, NULL);
1281 oapv_mset(stat, 0, sizeof(oapve_stat_t));
1282
1283 u8 *bs_pos_au_beg = oapv_bsw_sink(bs); // address syntax of au size
1284 u8 *bs_pos_pbu_beg;
1285 oapv_bs_t bs_pbu_beg;
1286 oapv_bsw_write(bs, 0, 32);
1287
1288 for(i = 0; i < ifrms->num_frms; i++) {
1289 frm = &ifrms->frm[i];
1290
1291 /* set default value for encoding parameter */
1292 ctx->param = &ctx->cdesc.param[i];
1293 ret = enc_read_param(ctx, ctx->param);
1294 oapv_assert_rv(ret == OAPV_OK, OAPV_ERR);
1295
1296 oapv_assert_rv(ctx->param->profile_idc == OAPV_PROFILE_422_10, OAPV_ERR_UNSUPPORTED);
1297
1298 // prepare for encoding a frame
1299 ret = enc_frm_prepare(ctx, frm->imgb, (rfrms != NULL) ? rfrms->frm[i].imgb : NULL);
1300 oapv_assert_rv(ret == OAPV_OK, ret);
1301
1302 bs_pos_pbu_beg = oapv_bsw_sink(bs); /* store pbu pos to calculate size */
1303 oapv_mcpy(&bs_pbu_beg, bs, sizeof(oapv_bs_t)); /* store pbu pos of ai to re-write */
1304
1305 DUMP_SAVE(0);
1306 oapve_vlc_pbu_size(bs, 0);
1307 oapve_vlc_pbu_header(bs, frm->pbu_type, frm->group_id);
1308 // encode a frame
1309 ret = enc_frame(ctx);
1310 oapv_assert_rv(ret == OAPV_OK, ret);
1311
1312 // rewrite pbu_size
1313 int pbu_size = ((u8 *)oapv_bsw_sink(bs)) - bs_pos_pbu_beg - 4;
1314 DUMP_SAVE(1);
1315 DUMP_LOAD(0);
1316 oapve_vlc_pbu_size(&bs_pbu_beg, pbu_size);
1317 DUMP_LOAD(1);
1318
1319 stat->frm_size[i] = pbu_size + 4 /* PUB size length*/;
1320 copy_fi_to_finfo(&ctx->fh.fi, frm->pbu_type, frm->group_id, &stat->aui.frm_info[i]);
1321
1322 // add frame hash value of reconstructed frame into metadata list
1323 if(ctx->use_frm_hash) {
1324 if(frm->pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
1325 frm->pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME) {
1326 oapv_assert_rv(mid != NULL, OAPV_ERR_INVALID_ARGUMENT);
1327 ret = oapv_set_md5_pld(mid, frm->group_id, ctx->rec);
1328 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1329 }
1330 }
1331
1332 // finishing of encoding a frame
1333 ret = enc_frm_finish(ctx, stat);
1334 oapv_assert_rv(ret == OAPV_OK, ret);
1335 }
1336 stat->aui.num_frms = ifrms->num_frms;
1337
1338 oapvm_ctx_t *md_list = mid;
1339 if(md_list != NULL) {
1340 int num_md = md_list->num;
1341 for(i = 0; i < num_md; i++) {
1342 int group_id = md_list->group_ids[i];
1343 bs_pos_pbu_beg = oapv_bsw_sink(bs); /* store pbu pos to calculate size */
1344 oapv_mcpy(&bs_pbu_beg, bs, sizeof(oapv_bs_t)); /* store pbu pos of ai to re-write */
1345 DUMP_SAVE(0);
1346
1347 oapve_vlc_pbu_size(bs, 0);
1348 oapve_vlc_pbu_header(bs, OAPV_PBU_TYPE_METADATA, group_id);
1349 oapve_vlc_metadata(&md_list->md_arr[i], bs);
1350
1351 // rewrite pbu_size
1352 int pbu_size = ((u8 *)oapv_bsw_sink(bs)) - bs_pos_pbu_beg - 4;
1353 DUMP_SAVE(1);
1354 DUMP_LOAD(0);
1355 oapve_vlc_pbu_size(&bs_pbu_beg, pbu_size);
1356 DUMP_LOAD(1);
1357 }
1358 }
1359
1360 u32 au_size = (u32)((u8 *)oapv_bsw_sink(bs) - bs_pos_au_beg) - 4;
1361 oapv_bsw_write_direct(bs_pos_au_beg, au_size, 32); /* u(32) */
1362
1363 oapv_bsw_deinit(&ctx->bs); /* de-init BSW */
1364 stat->write = bsw_get_write_byte(&ctx->bs);
1365
1366 return OAPV_OK;
1367 }
1368
oapve_config(oapve_t eid,int cfg,void * buf,int * size)1369 int oapve_config(oapve_t eid, int cfg, void *buf, int *size)
1370 {
1371 oapve_ctx_t *ctx;
1372 int t0;
1373
1374 ctx = enc_id_to_ctx(eid);
1375 oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
1376
1377 switch(cfg) {
1378 /* set config **********************************************************/
1379 case OAPV_CFG_SET_QP:
1380 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1381 t0 = *((int *)buf);
1382 oapv_assert_rv(t0 >= MIN_QUANT && t0 <= MAX_QUANT,
1383 OAPV_ERR_INVALID_ARGUMENT);
1384 ctx->param->qp = t0;
1385 break;
1386 case OAPV_CFG_SET_FPS_NUM:
1387 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1388 t0 = *((int *)buf);
1389 oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1390 ctx->param->fps_num = t0;
1391 break;
1392 case OAPV_CFG_SET_FPS_DEN:
1393 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1394 t0 = *((int *)buf);
1395 oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1396 ctx->param->fps_den = t0;
1397 break;
1398 case OAPV_CFG_SET_BPS:
1399 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1400 t0 = *((int *)buf);
1401 oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1402 ctx->param->bitrate = t0;
1403 break;
1404 case OAPV_CFG_SET_USE_FRM_HASH:
1405 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1406 ctx->use_frm_hash = (*((int *)buf)) ? 1 : 0;
1407 break;
1408 /* get config *******************************************************/
1409 case OAPV_CFG_GET_QP:
1410 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1411 *((int *)buf) = ctx->param->qp;
1412 break;
1413 case OAPV_CFG_GET_WIDTH:
1414 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1415 *((int *)buf) = ctx->param->w;
1416 break;
1417 case OAPV_CFG_GET_HEIGHT:
1418 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1419 *((int *)buf) = ctx->param->h;
1420 break;
1421 case OAPV_CFG_GET_FPS_NUM:
1422 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1423 *((int *)buf) = ctx->param->fps_num;
1424 break;
1425 case OAPV_CFG_GET_FPS_DEN:
1426 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1427 *((int *)buf) = ctx->param->fps_den;
1428 break;
1429 case OAPV_CFG_GET_BPS:
1430 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1431 *((int *)buf) = ctx->param->bitrate;
1432 break;
1433 default:
1434 oapv_trace("unknown config value (%d)\n", cfg);
1435 oapv_assert_rv(0, OAPV_ERR_UNSUPPORTED);
1436 }
1437
1438 return OAPV_OK;
1439 }
1440
oapve_param_default(oapve_param_t * param)1441 int oapve_param_default(oapve_param_t *param)
1442 {
1443 oapv_mset(param, 0, sizeof(oapve_param_t));
1444 param->preset = OAPV_PRESET_DEFAULT;
1445
1446 param->qp_cb_offset = 0;
1447 param->qp_cr_offset = 0;
1448
1449 param->tile_w_mb = 16;
1450 param->tile_h_mb = 16;
1451
1452 param->profile_idc = OAPV_PROFILE_422_10;
1453 param->level_idc = (int)(4.1 * 30);
1454 param->band_idc = 2;
1455
1456 return OAPV_OK;
1457 }
1458
1459 ///////////////////////////////////////////////////////////////////////////////
1460 // enc of encoder code
1461 #endif // ENABLE_ENCODER
1462 ///////////////////////////////////////////////////////////////////////////////
1463
1464 ///////////////////////////////////////////////////////////////////////////////
1465 // start of decoder code
1466 #if ENABLE_DECODER
1467 ///////////////////////////////////////////////////////////////////////////////
dec_id_to_ctx(oapvd_t id)1468 static oapvd_ctx_t *dec_id_to_ctx(oapvd_t id)
1469 {
1470 oapvd_ctx_t *ctx;
1471 oapv_assert_rv(id, NULL);
1472 ctx = (oapvd_ctx_t *)id;
1473 oapv_assert_rv(ctx->magic == OAPVD_MAGIC_CODE, NULL);
1474 return ctx;
1475 }
1476
dec_ctx_alloc(void)1477 static oapvd_ctx_t *dec_ctx_alloc(void)
1478 {
1479 oapvd_ctx_t *ctx;
1480
1481 ctx = (oapvd_ctx_t *)oapv_malloc_fast(sizeof(oapvd_ctx_t));
1482
1483 oapv_assert_rv(ctx != NULL, NULL);
1484 oapv_mset_x64a(ctx, 0, sizeof(oapvd_ctx_t));
1485
1486 return ctx;
1487 }
1488
dec_ctx_free(oapvd_ctx_t * ctx)1489 static void dec_ctx_free(oapvd_ctx_t *ctx)
1490 {
1491 oapv_mfree_fast(ctx);
1492 }
1493
dec_core_alloc(void)1494 static oapvd_core_t *dec_core_alloc(void)
1495 {
1496 oapvd_core_t *core;
1497
1498 core = (oapvd_core_t *)oapv_malloc_fast(sizeof(oapvd_core_t));
1499
1500 oapv_assert_rv(core, NULL);
1501 oapv_mset_x64a(core, 0, sizeof(oapvd_core_t));
1502
1503 return core;
1504 }
1505
dec_core_free(oapvd_core_t * core)1506 static void dec_core_free(oapvd_core_t *core)
1507 {
1508 oapv_mfree_fast(core);
1509 }
1510
dec_block(oapvd_ctx_t * ctx,oapvd_core_t * core,int log2_w,int log2_h,int c)1511 static int dec_block(oapvd_ctx_t *ctx, oapvd_core_t *core, int log2_w, int log2_h, int c)
1512 {
1513 int bit_depth = ctx->bit_depth;
1514
1515 // DC prediction
1516 core->coef[0] += core->prev_dc[c];
1517 core->prev_dc[c] = core->coef[0];
1518 // Inverse quantization
1519 ctx->fn_dquant[0](core->coef, core->q_mat[c], log2_w, log2_h, core->dq_shift[c]);
1520 // Inverse transform
1521 ctx->fn_itx[0](core->coef, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
1522 return OAPV_OK;
1523 }
1524
dec_set_tile_info(oapvd_tile_t * tile,int w_pel,int h_pel,int tile_w,int tile_h,int num_tile_cols,int num_tiles)1525 static int dec_set_tile_info(oapvd_tile_t* tile, int w_pel, int h_pel, int tile_w, int tile_h, int num_tile_cols, int num_tiles)
1526 {
1527
1528 for (int i = 0; i < num_tiles; i++)
1529 {
1530 int tx = (i % (num_tile_cols)) * tile_w;
1531 int ty = (i / (num_tile_cols)) * tile_h;
1532 tile[i].x = tx;
1533 tile[i].y = ty;
1534 tile[i].w = tx + tile_w > w_pel ? w_pel - tx : tile_w;
1535 tile[i].h = ty + tile_h > h_pel ? h_pel - ty : tile_h;
1536 }
1537 return OAPV_OK;
1538 }
1539
dec_frm_prepare(oapvd_ctx_t * ctx,oapv_imgb_t * imgb)1540 static int dec_frm_prepare(oapvd_ctx_t *ctx, oapv_imgb_t *imgb)
1541 {
1542 ctx->imgb = imgb;
1543 imgb_addref(ctx->imgb); // increase reference count
1544
1545 ctx->bit_depth = ctx->fh.fi.bit_depth;
1546 ctx->cfi = ctx->fh.fi.chroma_format_idc;
1547 ctx->num_comp = get_num_comp(ctx->cfi);
1548 ctx->comp_sft[Y_C][0] = 0;
1549 ctx->comp_sft[Y_C][1] = 0;
1550
1551 for(int c = 1; c < ctx->num_comp; c++) {
1552 ctx->comp_sft[c][0] = get_chroma_sft_w(color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs)));
1553 ctx->comp_sft[c][1] = get_chroma_sft_h(color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs)));
1554 }
1555
1556 ctx->w = oapv_align_value(ctx->fh.fi.frame_width, OAPV_MB_W);
1557 ctx->h = oapv_align_value(ctx->fh.fi.frame_height, OAPV_MB_H);
1558
1559 if(OAPV_CS_GET_FORMAT(imgb->cs) == OAPV_CF_PLANAR2) {
1560 ctx->fn_block_to_imgb[Y_C] = block_to_imgb_p210_y;
1561 ctx->fn_block_to_imgb[U_C] = block_to_imgb_p210_uv;
1562 ctx->fn_block_to_imgb[V_C] = block_to_imgb_p210_uv;
1563 }
1564 else {
1565 for(int c = 0; c < ctx->num_comp; c++) {
1566 ctx->fn_block_to_imgb[c] = block_to_imgb_10bit;
1567 }
1568 }
1569
1570 int tile_w = ctx->fh.tile_width_in_mbs * OAPV_MB_W;
1571 int tile_h = ctx->fh.tile_height_in_mbs * OAPV_MB_H;
1572
1573 ctx->num_tile_cols = (ctx->w + (tile_w - 1)) / tile_w;
1574 ctx->num_tile_rows = (ctx->h + (tile_h - 1)) / tile_h;
1575 ctx->num_tiles = ctx->num_tile_cols * ctx->num_tile_rows;
1576
1577 oapv_assert_rv((ctx->num_tile_cols <= OAPV_MAX_TILE_COLS) && (ctx->num_tile_rows <= OAPV_MAX_TILE_ROWS), OAPV_ERR_MALFORMED_BITSTREAM);
1578 dec_set_tile_info(ctx->tile, ctx->w, ctx->h, tile_w, tile_h, ctx->num_tile_cols, ctx->num_tiles);
1579
1580 for(int i = 0; i < ctx->num_tiles; i++) {
1581 ctx->tile[i].bs_beg = NULL;
1582 }
1583 ctx->tile[0].bs_beg = oapv_bsr_sink(&ctx->bs);
1584
1585 for(int i = 0; i < ctx->num_tiles; i++) {
1586 ctx->tile[i].stat = DEC_TILE_STAT_NOT_DECODED;
1587 }
1588
1589 return OAPV_OK;
1590 }
1591
dec_frm_finish(oapvd_ctx_t * ctx)1592 static int dec_frm_finish(oapvd_ctx_t *ctx)
1593 {
1594 oapv_mset(&ctx->bs, 0, sizeof(oapv_bs_t)); // clean data
1595 imgb_release(ctx->imgb); // decrease reference cnout
1596 ctx->imgb = NULL;
1597 return OAPV_OK;
1598 }
1599
dec_tile_comp(oapvd_tile_t * tile,oapvd_ctx_t * ctx,oapvd_core_t * core,oapv_bs_t * bs,int c,int s_dst,void * dst)1600 static int dec_tile_comp(oapvd_tile_t *tile, oapvd_ctx_t *ctx, oapvd_core_t *core, oapv_bs_t *bs, int c, int s_dst, void *dst)
1601 {
1602 int mb_h, mb_w, mb_y, mb_x, blk_y, blk_x;
1603 int le, ri, to, bo;
1604 int ret;
1605 s16 *d16;
1606
1607 mb_h = OAPV_MB_H >> ctx->comp_sft[c][1];
1608 mb_w = OAPV_MB_W >> ctx->comp_sft[c][0];
1609
1610 le = tile->x >> ctx->comp_sft[c][0]; // left position of tile
1611 ri = (tile->w >> ctx->comp_sft[c][0]) + le; // right pixel position of tile
1612 to = tile->y >> ctx->comp_sft[c][1]; // top pixel position of tile
1613 bo = (tile->h >> ctx->comp_sft[c][1]) + to; // bottom pixel position of tile
1614
1615 for(mb_y = to; mb_y < bo; mb_y += mb_h) {
1616 for(mb_x = le; mb_x < ri; mb_x += mb_w) {
1617 for(blk_y = mb_y; blk_y < (mb_y + mb_h); blk_y += OAPV_BLK_H) {
1618 for(blk_x = mb_x; blk_x < (mb_x + mb_w); blk_x += OAPV_BLK_W) {
1619 // parse DC coefficient
1620 ret = oapvd_vlc_dc_coeff(ctx, core, bs, &core->coef[0], c);
1621 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1622
1623 // parse AC coefficient
1624 ret = oapvd_vlc_ac_coeff(ctx, core, bs, core->coef, c);
1625 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1626 DUMP_COEF(core->coef, OAPV_BLK_D, blk_x, blk_y, c);
1627
1628 // decode a block
1629 ret = dec_block(ctx, core, OAPV_LOG2_BLK_W, OAPV_LOG2_BLK_H, c);
1630 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1631
1632 // copy decoded block to image buffer
1633 d16 = (s16 *)((u8 *)dst + blk_y * s_dst) + blk_x;
1634 ctx->fn_block_to_imgb[c](core->coef, OAPV_BLK_W, OAPV_BLK_H, (OAPV_BLK_W << 1), blk_x, s_dst, d16);
1635 }
1636 }
1637 }
1638 }
1639
1640 /* byte align */
1641 oapv_bsr_align8(bs);
1642 return OAPV_OK;
1643 }
1644
dec_tile(oapvd_core_t * core,oapvd_tile_t * tile)1645 static int dec_tile(oapvd_core_t *core, oapvd_tile_t *tile)
1646 {
1647 int ret, midx, x, y, c;
1648 oapvd_ctx_t *ctx = core->ctx;
1649 oapv_bs_t bs;
1650
1651 oapv_bsr_init(&bs, tile->bs_beg + OAPV_TILE_SIZE_LEN, tile->data_size, NULL);
1652 ret = oapvd_vlc_tile_header(&bs, ctx, &tile->th);
1653 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1654 for(c = 0; c < ctx->num_comp; c++) {
1655 core->qp[c] = tile->th.tile_qp[c];
1656 int dq_scale = oapv_tbl_dq_scale[core->qp[c] % 6];
1657 core->dq_shift[c] = ctx->bit_depth - 2 - (core->qp[c] / 6);
1658
1659 core->prev_dc_ctx[c] = 20;
1660 core->prev_1st_ac_ctx[c] = 0;
1661 core->prev_dc[c] = 0;
1662
1663 midx = 0;
1664 for(y = 0; y < OAPV_BLK_H; y++) {
1665 for(x = 0; x < OAPV_BLK_W; x++) {
1666 core->q_mat[c][midx++] = dq_scale * ctx->fh.q_matrix[c][y][x]; // 7bit + 8bit
1667 }
1668 }
1669 }
1670
1671 for(c = 0; c < ctx->num_comp; c++) {
1672 int tc, s_dst;
1673 s16 *dst;
1674
1675 if(OAPV_CS_GET_FORMAT(ctx->imgb->cs) == OAPV_CF_PLANAR2) {
1676 tc = c > 0 ? 1 : 0;
1677 dst = ctx->imgb->a[tc];
1678 dst += (c > 1) ? 1 : 0;
1679 s_dst = ctx->imgb->s[tc];
1680 }
1681 else {
1682 dst = ctx->imgb->a[c];
1683 s_dst = ctx->imgb->s[c];
1684 }
1685
1686 ret = dec_tile_comp(tile, ctx, core, &bs, c, s_dst, dst);
1687 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1688 }
1689
1690 oapvd_vlc_tile_dummy_data(&bs);
1691 return OAPV_OK;
1692 }
1693
dec_thread_tile(void * arg)1694 static int dec_thread_tile(void *arg)
1695 {
1696 oapv_bs_t bs;
1697 int i, ret, run, tile_idx = 0, thread_ret = OAPV_OK;
1698
1699 oapvd_core_t *core = (oapvd_core_t *)arg;
1700 oapvd_ctx_t *ctx = core->ctx;
1701 oapvd_tile_t *tile = ctx->tile;
1702
1703 while(1) {
1704 // find not decoded tile
1705 oapv_tpool_enter_cs(ctx->sync_obj);
1706 for(i = 0; i < ctx->num_tiles; i++) {
1707 if(tile[i].stat == DEC_TILE_STAT_NOT_DECODED) {
1708 tile[i].stat = DEC_TILE_STAT_ON_DECODING;
1709 tile_idx = i;
1710 break;
1711 }
1712 }
1713 oapv_tpool_leave_cs(ctx->sync_obj);
1714 if(i == ctx->num_tiles) {
1715 break;
1716 }
1717
1718 // wait until to know bistream start position
1719 run = 1;
1720 while(run) {
1721 oapv_tpool_enter_cs(ctx->sync_obj);
1722 if(tile[tile_idx].bs_beg != NULL) {
1723 run = 0;
1724 }
1725 oapv_tpool_leave_cs(ctx->sync_obj);
1726 }
1727 /* read tile size */
1728 oapv_bsr_init(&bs, tile[tile_idx].bs_beg, OAPV_TILE_SIZE_LEN, NULL);
1729 ret = oapvd_vlc_tile_size(&bs, &tile[tile_idx].data_size);
1730 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1731 oapv_assert_g(tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + (tile[tile_idx].data_size - 1) <= ctx->bs.end, ERR);
1732
1733 oapv_tpool_enter_cs(ctx->sync_obj);
1734 if(tile_idx + 1 < ctx->num_tiles) {
1735 tile[tile_idx + 1].bs_beg = tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + tile[tile_idx].data_size;
1736 }
1737 else {
1738 ctx->tile_end = tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + tile[tile_idx].data_size;
1739 }
1740 oapv_tpool_leave_cs(ctx->sync_obj);
1741
1742 ret = dec_tile(core, &tile[tile_idx]);
1743
1744 oapv_tpool_enter_cs(ctx->sync_obj);
1745 if (OAPV_SUCCEEDED(ret)) {
1746 tile[tile_idx].stat = DEC_TILE_STAT_DECODED;
1747 }
1748 else {
1749 tile[tile_idx].stat = ret;
1750 thread_ret = ret;
1751 }
1752 tile[tile_idx].stat = OAPV_SUCCEEDED(ret) ? DEC_TILE_STAT_DECODED : ret;
1753 oapv_tpool_leave_cs(ctx->sync_obj);
1754 }
1755 return thread_ret;
1756
1757 ERR:
1758 oapv_tpool_enter_cs(ctx->sync_obj);
1759 tile[tile_idx].stat = DEC_TILE_STAT_SIZE_ERROR;
1760 if (tile_idx + 1 < ctx->num_tiles)
1761 {
1762 tile[tile_idx + 1].bs_beg = tile[tile_idx].bs_beg;
1763 }
1764 oapv_tpool_leave_cs(ctx->sync_obj);
1765 return OAPV_ERR_MALFORMED_BITSTREAM;
1766 }
1767
dec_flush(oapvd_ctx_t * ctx)1768 static void dec_flush(oapvd_ctx_t *ctx)
1769 {
1770 if(ctx->cdesc.threads >= 2) {
1771 if(ctx->tpool) {
1772 // thread controller instance is present
1773 // terminate the created thread
1774 for(int i = 0; i < ctx->cdesc.threads - 1; i++) {
1775 if(ctx->thread_id[i]) {
1776 // valid thread instance
1777 ctx->tpool->release(&ctx->thread_id[i]);
1778 }
1779 }
1780 // dinitialize the tpool
1781 oapv_tpool_deinit(ctx->tpool);
1782 oapv_mfree(ctx->tpool);
1783 ctx->tpool = NULL;
1784 }
1785 }
1786
1787 oapv_tpool_sync_obj_delete(&(ctx->sync_obj));
1788
1789 for(int i = 0; i < ctx->cdesc.threads; i++) {
1790 dec_core_free(ctx->core[i]);
1791 }
1792 }
1793
dec_ready(oapvd_ctx_t * ctx)1794 static int dec_ready(oapvd_ctx_t *ctx)
1795 {
1796 int i, ret = OAPV_OK;
1797
1798 if(ctx->core[0] == NULL) {
1799 // create cores
1800 for(i = 0; i < ctx->cdesc.threads; i++) {
1801 ctx->core[i] = dec_core_alloc();
1802 oapv_assert_gv(ctx->core[i], ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
1803 ctx->core[i]->ctx = ctx;
1804 }
1805 }
1806
1807 // initialize the threads to NULL
1808 for(i = 0; i < OAPV_MAX_THREADS; i++) {
1809 ctx->thread_id[i] = 0;
1810 }
1811
1812 // get the context synchronization handle
1813 ctx->sync_obj = oapv_tpool_sync_obj_create();
1814 oapv_assert_gv(ctx->sync_obj != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
1815
1816 if(ctx->cdesc.threads >= 2) {
1817 ctx->tpool = oapv_malloc(sizeof(oapv_tpool_t));
1818 oapv_tpool_init(ctx->tpool, ctx->cdesc.threads - 1);
1819 for(i = 0; i < ctx->cdesc.threads - 1; i++) {
1820 ctx->thread_id[i] = ctx->tpool->create(ctx->tpool, i);
1821 oapv_assert_gv(ctx->thread_id[i] != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
1822 }
1823 }
1824 return OAPV_OK;
1825
1826 ERR:
1827 dec_flush(ctx);
1828
1829 return ret;
1830 }
1831
dec_platform_init(oapvd_ctx_t * ctx)1832 static int dec_platform_init(oapvd_ctx_t *ctx)
1833 {
1834 // default settings
1835 ctx->fn_itx = oapv_tbl_fn_itx;
1836 ctx->fn_dquant = oapv_tbl_fn_dquant;
1837
1838 #if X86_SSE
1839 int check_cpu, support_sse, support_avx2;
1840
1841 check_cpu = oapv_check_cpu_info_x86();
1842 support_sse = (check_cpu >> 0) & 1;
1843 support_avx2 = (check_cpu >> 2) & 1;
1844
1845 if(support_avx2) {
1846 ctx->fn_itx = oapv_tbl_fn_itx_avx;
1847 ctx->fn_dquant = oapv_tbl_fn_dquant_avx;
1848 }
1849 else if(support_sse) {
1850 ctx->fn_itx = oapv_tbl_fn_itx;
1851 ctx->fn_dquant = oapv_tbl_fn_dquant;
1852 }
1853 #elif ARM_NEON
1854 ctx->fn_itx = oapv_tbl_fn_itx_neon;
1855 ctx->fn_dquant = oapv_tbl_fn_dquant;
1856 #endif
1857 return OAPV_OK;
1858 }
1859
oapvd_create(oapvd_cdesc_t * cdesc,int * err)1860 oapvd_t oapvd_create(oapvd_cdesc_t *cdesc, int *err)
1861 {
1862 oapvd_ctx_t *ctx;
1863 int ret;
1864
1865 DUMP_CREATE(0);
1866 ctx = NULL;
1867
1868 /* check if any decoder argument is correctly set */
1869 oapv_assert_gv(cdesc->threads > 0 && cdesc->threads <= OAPV_MAX_THREADS, ret, OAPV_ERR_INVALID_ARGUMENT, ERR);
1870
1871 /* memory allocation for ctx and core structure */
1872 ctx = (oapvd_ctx_t *)dec_ctx_alloc();
1873 oapv_assert_gv(ctx != NULL, ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
1874 oapv_mcpy(&ctx->cdesc, cdesc, sizeof(oapvd_cdesc_t));
1875
1876 /* initialize platform-specific variables */
1877 ret = dec_platform_init(ctx);
1878 oapv_assert_g(ret == OAPV_OK, ERR);
1879
1880 /* ready for decoding */
1881 ret = dec_ready(ctx);
1882 oapv_assert_g(ret == OAPV_OK, ERR);
1883
1884 ctx->magic = OAPVD_MAGIC_CODE;
1885 ctx->id = (oapvd_t)ctx;
1886 if(err) {
1887 *err = OAPV_OK;
1888 }
1889 return (ctx->id);
1890
1891 ERR:
1892 if(ctx) {
1893 dec_ctx_free(ctx);
1894 }
1895 if(err) {
1896 *err = ret;
1897 }
1898 return NULL;
1899 }
1900
oapvd_delete(oapvd_t did)1901 void oapvd_delete(oapvd_t did)
1902 {
1903 oapvd_ctx_t *ctx;
1904 ctx = dec_id_to_ctx(did);
1905 oapv_assert_r(ctx);
1906
1907 DUMP_DELETE();
1908 dec_flush(ctx);
1909 dec_ctx_free(ctx);
1910 }
1911
oapvd_decode(oapvd_t did,oapv_bitb_t * bitb,oapv_frms_t * ofrms,oapvm_t mid,oapvd_stat_t * stat)1912 int oapvd_decode(oapvd_t did, oapv_bitb_t *bitb, oapv_frms_t *ofrms, oapvm_t mid, oapvd_stat_t *stat)
1913 {
1914 oapvd_ctx_t *ctx;
1915 oapv_bs_t *bs;
1916 oapv_pbuh_t pbuh;
1917 int ret = OAPV_OK;
1918 u32 pbu_size;
1919 u32 remain;
1920 u8 *curpos;
1921 int frame_cnt = 0;
1922
1923 ctx = dec_id_to_ctx(did);
1924 oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
1925
1926 curpos = (u8 *)bitb->addr;
1927 remain = bitb->ssize;
1928
1929 while(remain > 8) {
1930 oapv_bsr_init(&ctx->bs, curpos, remain, NULL);
1931 bs = &ctx->bs;
1932
1933 ret = oapvd_vlc_pbu_size(bs, &pbu_size); // 4byte
1934 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1935 oapv_assert_g((pbu_size + 4) <= bs->size, ERR);
1936
1937 curpos += 4; // pbu_size syntax
1938 remain -= 4;
1939
1940 ret = oapvd_vlc_pbu_header(bs, &pbuh);
1941 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1942
1943 if(pbuh.pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
1944 pbuh.pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME ||
1945 pbuh.pbu_type == OAPV_PBU_TYPE_PREVIEW_FRAME ||
1946 pbuh.pbu_type == OAPV_PBU_TYPE_DEPTH_FRAME ||
1947 pbuh.pbu_type == OAPV_PBU_TYPE_ALPHA_FRAME) {
1948 ret = oapvd_vlc_frame_header(bs, &ctx->fh);
1949 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1950
1951 ret = dec_frm_prepare(ctx, ofrms->frm[frame_cnt].imgb);
1952 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1953
1954 int res;
1955 oapv_tpool_t *tpool = ctx->tpool;
1956 int parallel_task = 1;
1957 int tidx = 0;
1958
1959 parallel_task = (ctx->cdesc.threads > ctx->num_tiles) ? ctx->num_tiles : ctx->cdesc.threads;
1960
1961 /* decode tiles ************************************/
1962 for(tidx = 0; tidx < (parallel_task - 1); tidx++) {
1963 tpool->run(ctx->thread_id[tidx], dec_thread_tile,
1964 (void *)ctx->core[tidx]);
1965 }
1966 ret = dec_thread_tile((void *)ctx->core[tidx]);
1967 for(tidx = 0; tidx < parallel_task - 1; tidx++) {
1968 tpool->join(ctx->thread_id[tidx], &res);
1969 if(OAPV_FAILED(res)) {
1970 ret = res;
1971 }
1972 }
1973 /****************************************************/
1974
1975 /* READ FILLER HERE !!! */
1976
1977 oapv_bsr_move(&ctx->bs, ctx->tile_end);
1978 stat->read += bsr_get_read_byte(&ctx->bs);
1979
1980 copy_fi_to_finfo(&ctx->fh.fi, pbuh.pbu_type, pbuh.group_id, &stat->aui.frm_info[frame_cnt]);
1981 if(ret == OAPV_OK && ctx->use_frm_hash) {
1982 oapv_imgb_set_md5(ctx->imgb);
1983 }
1984 ret = dec_frm_finish(ctx); // FIX-ME
1985 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1986
1987 ofrms->frm[frame_cnt].pbu_type = pbuh.pbu_type;
1988 ofrms->frm[frame_cnt].group_id = pbuh.group_id;
1989 stat->frm_size[frame_cnt] = pbu_size + 4 /* PUB size length*/;
1990 frame_cnt++;
1991 }
1992 else if(pbuh.pbu_type == OAPV_PBU_TYPE_METADATA) {
1993 ret = oapvd_vlc_metadata(bs, pbu_size, mid, pbuh.group_id);
1994 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1995
1996 stat->read += bsr_get_read_byte(&ctx->bs);
1997 }
1998 else if(pbuh.pbu_type == OAPV_PBU_TYPE_FILLER) {
1999 ret = oapvd_vlc_filler(bs, (pbu_size - 4));
2000 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
2001 }
2002 curpos += pbu_size;
2003 remain = (remain < pbu_size)? 0: (remain - pbu_size);
2004 }
2005 stat->aui.num_frms = frame_cnt;
2006 oapv_assert_rv(ofrms->num_frms == frame_cnt, OAPV_ERR_MALFORMED_BITSTREAM);
2007 return ret;
2008
2009 ERR:
2010 return ret;
2011 }
2012
oapvd_config(oapvd_t did,int cfg,void * buf,int * size)2013 int oapvd_config(oapvd_t did, int cfg, void *buf, int *size)
2014 {
2015 oapvd_ctx_t *ctx;
2016
2017 ctx = dec_id_to_ctx(did);
2018 oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
2019
2020 switch(cfg) {
2021 /* set config ************************************************************/
2022 case OAPV_CFG_SET_USE_FRM_HASH:
2023 ctx->use_frm_hash = (*((int *)buf)) ? 1 : 0;
2024 break;
2025
2026 default:
2027 oapv_assert_rv(0, OAPV_ERR_UNSUPPORTED);
2028 }
2029 return OAPV_OK;
2030 }
2031
oapvd_info(void * au,int au_size,oapv_au_info_t * aui)2032 int oapvd_info(void *au, int au_size, oapv_au_info_t *aui)
2033 {
2034 int ret, frm_count = 0;
2035 int pbu_cnt = 0;
2036 u8 *curpos;
2037 u32 remain;
2038
2039 curpos = (u8 *)au;
2040 remain = au_size;
2041
2042 DUMP_SET(0);
2043 while(remain > 8) // FIX-ME (8byte?)
2044 {
2045 oapv_bs_t bs;
2046 u32 pbu_size = 0;
2047
2048 oapv_bsr_init(&bs, curpos, remain, NULL);
2049
2050 ret = oapvd_vlc_pbu_size(&bs, &pbu_size); // 4 byte
2051 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2052 curpos += 4; // pbu_size syntax
2053 remain -= 4;
2054
2055 /* pbu header */
2056 oapv_pbuh_t pbuh;
2057 ret = oapvd_vlc_pbu_header(&bs, &pbuh); // 4 byte
2058 oapv_assert_rv(OAPV_SUCCEEDED(ret), OAPV_ERR_MALFORMED_BITSTREAM);
2059 if(pbuh.pbu_type == OAPV_PBU_TYPE_AU_INFO) {
2060 // parse access_unit_info in PBU
2061 oapv_aui_t ai;
2062
2063 ret = oapvd_vlc_au_info(&bs, &ai);
2064 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2065
2066 aui->num_frms = ai.num_frames;
2067 for(int i = 0; i < ai.num_frames; i++) {
2068 copy_fi_to_finfo(&ai.frame_info[i], ai.pbu_type[i], ai.group_id[i], &aui->frm_info[i]);
2069 }
2070 return OAPV_OK; // founded access_unit_info, no need to read more PBUs
2071 }
2072 if(pbuh.pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
2073 pbuh.pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME ||
2074 pbuh.pbu_type == OAPV_PBU_TYPE_PREVIEW_FRAME ||
2075 pbuh.pbu_type == OAPV_PBU_TYPE_DEPTH_FRAME ||
2076 pbuh.pbu_type == OAPV_PBU_TYPE_ALPHA_FRAME) {
2077 // parse frame_info in PBU
2078 oapv_fi_t fi;
2079
2080 oapv_assert_rv(frm_count < OAPV_MAX_NUM_FRAMES, OAPV_ERR_REACHED_MAX)
2081 ret = oapvd_vlc_frame_info(&bs, &fi);
2082 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2083
2084 copy_fi_to_finfo(&fi, pbuh.pbu_type, pbuh.group_id, &aui->frm_info[frm_count]);
2085 frm_count++;
2086 }
2087 aui->num_frms = frm_count;
2088
2089 curpos += pbu_size;
2090 remain = (remain < pbu_size)? 0: (remain - pbu_size);
2091 ++pbu_cnt;
2092 }
2093 DUMP_SET(1);
2094 return OAPV_OK;
2095 }
2096
2097 ///////////////////////////////////////////////////////////////////////////////
2098 // end of decoder code
2099 #endif // ENABLE_DECODER
2100 ///////////////////////////////////////////////////////////////////////////////