xref: /aosp_15_r20/external/libavc/encoder/ih264e_fmt_conv.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264e_fmt_conv.c
25 *
26 * @brief
27 *  Contains functions for format conversion or frame copy of output buffer
28 *
29 * @author
30 *  ittiam
31 *
32 * @par List of Functions:
33 *  - ih264e_fmt_conv_420sp_to_rgb565
34 *  - ih264e_fmt_conv_420sp_to_rgba8888
35 *  - ih264e_fmt_conv_420sp_to_420sp
36 *  - ih264e_fmt_conv_420sp_to_420sp_swap_uv
37 *  - ih264e_fmt_conv_420sp_to_420p
38 *  - ih264e_fmt_conv_420p_to_420sp
39 *  - ih264e_fmt_conv_422i_to_420sp
40 *  - ih264e_fmt_conv
41 *
42 * @remarks
43 *  none
44 *
45 *******************************************************************************
46 */
47 
48 /*****************************************************************************/
49 /* File Includes                                                             */
50 /*****************************************************************************/
51 
52 /* System Include Files */
53 #include <stdio.h>
54 #include <stddef.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <assert.h>
58 
59 /* User Include Files */
60 #include "ih264_typedefs.h"
61 #include "iv2.h"
62 #include "ive2.h"
63 #include "ithread.h"
64 
65 #include "ih264_debug.h"
66 #include "ih264_macros.h"
67 #include "ih264_error.h"
68 #include "ih264_defs.h"
69 #include "ih264_mem_fns.h"
70 #include "ih264_padding.h"
71 #include "ih264_structs.h"
72 #include "ih264_trans_quant_itrans_iquant.h"
73 #include "ih264_inter_pred_filters.h"
74 #include "ih264_intra_pred_filters.h"
75 #include "ih264_deblk_edge_filters.h"
76 #include "ih264_cabac_tables.h"
77 #include "ih264_platform_macros.h"
78 
79 #include "ime_defs.h"
80 #include "ime_distortion_metrics.h"
81 #include "ime_structs.h"
82 
83 #include "irc_cntrl_param.h"
84 #include "irc_frame_info_collector.h"
85 
86 #include "ih264e.h"
87 #include "ih264e_error.h"
88 #include "ih264e_defs.h"
89 #include "ih264e_rate_control.h"
90 #include "ih264e_bitstream.h"
91 #include "ih264e_cabac_structs.h"
92 #include "ih264e_structs.h"
93 #include "ih264e_fmt_conv.h"
94 
95 
96 /*****************************************************************************/
97 /* Function Definitions                                                      */
98 /*****************************************************************************/
99 
100 /**
101 *******************************************************************************
102 *
103 * @brief Function used to perform color space conversion from 420SP to RGB565
104 *
105 * @par   Description
106 * Function used to perform color space conversion from 420SP to RGB565
107 *
108 * @param[in] pu1_y_src
109 *  Input Y pointer
110 *
111 * @param[in] pu1_uv_src
112 *  Input UV pointer
113 *
114 * @param[in] pu2_rgb_dst
115 *  Output RGB pointer
116 *
117 * @param[in] wd
118 *  Width
119 *
120 * @param[in] ht
121 *  Height
122 *
123 * @param[in] src_y_strd
124 *  Input Y Stride
125 *
126 * @param[in] src_uv_strd
127 *  Input UV stride
128 *
129 * @param[in] dst_strd
130 *  Output stride
131 *
132 * @param[in] is_u_first
133 *  Flag to indicate chroma ordering
134 *
135 * @returns none
136 *
137 *******************************************************************************
138 */
ih264e_fmt_conv_420sp_to_rgb565(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD16 * pu2_rgb_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)139 void ih264e_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
140                                      UWORD8 *pu1_uv_src,
141                                      UWORD16 *pu2_rgb_dst,
142                                      WORD32 wd,
143                                      WORD32 ht,
144                                      WORD32 src_y_strd,
145                                      WORD32 src_uv_strd,
146                                      WORD32 dst_strd,
147                                      WORD32 is_u_first)
148 {
149     WORD16 i2_r, i2_g, i2_b;
150     UWORD32 u4_r, u4_g, u4_b;
151     WORD16 i2_i, i2_j;
152     UWORD8 *pu1_y_src_nxt;
153     UWORD16 *pu2_rgb_dst_NextRow;
154 
155     UWORD8 *pu1_u_src, *pu1_v_src;
156 
157     if (is_u_first)
158     {
159         pu1_u_src = (UWORD8 *) pu1_uv_src;
160         pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
161     }
162     else
163     {
164         pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
165         pu1_v_src = (UWORD8 *) pu1_uv_src;
166     }
167 
168     pu1_y_src_nxt = pu1_y_src + src_y_strd;
169     pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
170 
171     for (i2_i = 0; i2_i < (ht >> 1); i2_i++)
172     {
173         for (i2_j = (wd >> 1); i2_j > 0; i2_j--)
174         {
175             i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
176             i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
177                             >> 13;
178             i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
179 
180             pu1_u_src += 2;
181             pu1_v_src += 2;
182             /* pixel 0 */
183             /* B */
184             u4_b = CLIP_U8(*pu1_y_src + i2_b);
185             u4_b >>= 3;
186             /* G */
187             u4_g = CLIP_U8(*pu1_y_src + i2_g);
188             u4_g >>= 2;
189             /* R */
190             u4_r = CLIP_U8(*pu1_y_src + i2_r);
191             u4_r >>= 3;
192 
193             pu1_y_src++;
194             *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
195 
196             /* pixel 1 */
197             /* B */
198             u4_b = CLIP_U8(*pu1_y_src + i2_b);
199             u4_b >>= 3;
200             /* G */
201             u4_g = CLIP_U8(*pu1_y_src + i2_g);
202             u4_g >>= 2;
203             /* R */
204             u4_r = CLIP_U8(*pu1_y_src + i2_r);
205             u4_r >>= 3;
206 
207             pu1_y_src++;
208             *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
209 
210             /* pixel 2 */
211             /* B */
212             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
213             u4_b >>= 3;
214             /* G */
215             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
216             u4_g >>= 2;
217             /* R */
218             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
219             u4_r >>= 3;
220 
221             pu1_y_src_nxt++;
222             *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
223 
224             /* pixel 3 */
225             /* B */
226             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
227             u4_b >>= 3;
228             /* G */
229             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
230             u4_g >>= 2;
231             /* R */
232             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
233             u4_r >>= 3;
234 
235             pu1_y_src_nxt++;
236             *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
237 
238         }
239 
240         pu1_u_src = pu1_u_src + src_uv_strd - wd;
241         pu1_v_src = pu1_v_src + src_uv_strd - wd;
242 
243         pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
244         pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
245 
246         pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
247         pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
248     }
249 
250 }
251 
252 /**
253 *******************************************************************************
254 *
255 * @brief Function used to perform color space conversion from 420SP to RGBA888
256 *
257 * @par   Description
258 * Function used to perform color space conversion from 420SP to RGBA888
259 *
260 * @param[in] pu1_y_src
261 *  Input Y pointer
262 *
263 * @param[in] pu1_uv_src
264 *  Input UV pointer
265 *
266 * @param[in] pu4_rgba_dst
267 *  Output RGB pointer
268 *
269 * @param[in] wd
270 *  Width
271 *
272 * @param[in] ht
273 *  Height
274 *
275 * @param[in] src_y_strd
276 *  Input Y Stride
277 *
278 * @param[in] src_uv_strd
279 *  Input UV stride
280 *
281 * @param[in] dst_strd
282 *  Output stride
283 *
284 * @param[in] is_u_first
285 *  Flag to indicate chroma ordering
286 *
287 * @returns none
288 *
289 *******************************************************************************
290 */
ih264e_fmt_conv_420sp_to_rgba8888(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD32 * pu4_rgba_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_strd,WORD32 is_u_first)291 void ih264e_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
292                                        UWORD8 *pu1_uv_src,
293                                        UWORD32 *pu4_rgba_dst,
294                                        WORD32 wd,
295                                        WORD32 ht,
296                                        WORD32 src_y_strd,
297                                        WORD32 src_uv_strd,
298                                        WORD32 dst_strd,
299                                        WORD32 is_u_first)
300 {
301     WORD16 i2_r, i2_g, i2_b;
302     UWORD32 u4_r, u4_g, u4_b;
303     WORD16 i2_i, i2_j;
304     UWORD8 *pu1_y_src_nxt;
305     UWORD32 *pu4_rgba_dst_NextRow;
306     UWORD8 *pu1_u_src, *pu1_v_src;
307 
308     if (is_u_first)
309     {
310         pu1_u_src = (UWORD8 *) pu1_uv_src;
311         pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
312     }
313     else
314     {
315         pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
316         pu1_v_src = (UWORD8 *) pu1_uv_src;
317     }
318 
319     pu1_y_src_nxt = pu1_y_src + src_y_strd;
320 
321     pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
322 
323     for (i2_i = 0; i2_i < (ht >> 1); i2_i++)
324     {
325         for (i2_j = (wd >> 1); i2_j > 0; i2_j--)
326         {
327             i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
328             i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
329                             >> 13;
330             i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
331 
332             pu1_u_src += 2;
333             pu1_v_src += 2;
334             /* pixel 0 */
335             /* B */
336             u4_b = CLIP_U8(*pu1_y_src + i2_b);
337             /* G */
338             u4_g = CLIP_U8(*pu1_y_src + i2_g);
339             /* R */
340             u4_r = CLIP_U8(*pu1_y_src + i2_r);
341 
342             pu1_y_src++;
343             *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
344 
345             /* pixel 1 */
346             /* B */
347             u4_b = CLIP_U8(*pu1_y_src + i2_b);
348             /* G */
349             u4_g = CLIP_U8(*pu1_y_src + i2_g);
350             /* R */
351             u4_r = CLIP_U8(*pu1_y_src + i2_r);
352 
353             pu1_y_src++;
354             *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
355 
356             /* pixel 2 */
357             /* B */
358             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
359             /* G */
360             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
361             /* R */
362             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
363 
364             pu1_y_src_nxt++;
365             *pu4_rgba_dst_NextRow++ =
366                             ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
367 
368             /* pixel 3 */
369             /* B */
370             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
371             /* G */
372             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
373             /* R */
374             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
375 
376             pu1_y_src_nxt++;
377             *pu4_rgba_dst_NextRow++ =
378                             ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
379 
380         }
381 
382         pu1_u_src = pu1_u_src + src_uv_strd - wd;
383         pu1_v_src = pu1_v_src + src_uv_strd - wd;
384 
385         pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
386         pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
387 
388         pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
389         pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
390     }
391 
392 }
393 
394 /**
395 *******************************************************************************
396 *
397 * @brief Function used for copying a 420SP buffer
398 *
399 * @par   Description
400 *  Function used for copying a 420SP buffer
401 *
402 * @param[in] pu1_y_src
403 *  Input Y pointer
404 *
405 * @param[in] pu1_uv_src
406 *  Input UV pointer (UV is interleaved either in UV or VU format)
407 *
408 * @param[in] pu1_y_dst
409 *  Output Y pointer
410 *
411 * @param[in] pu1_uv_dst
412 *  Output UV pointer (UV is interleaved in the same format as that of input)
413 *
414 * @param[in] wd
415 *  Width
416 *
417 * @param[in] ht
418 *  Height
419 *
420 * @param[in] src_y_strd
421 *  Input Y Stride
422 *
423 * @param[in] src_uv_strd
424 *  Input UV stride
425 *
426 * @param[in] dst_y_strd
427 *  Output Y stride
428 *
429 * @param[in] dst_uv_strd
430 *  Output UV stride
431 *
432 * @returns None
433 *
434 *******************************************************************************
435 */
ih264e_fmt_conv_420sp_to_420sp(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)436 void ih264e_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
437                                     UWORD8 *pu1_uv_src,
438                                     UWORD8 *pu1_y_dst,
439                                     UWORD8 *pu1_uv_dst,
440                                     WORD32 wd,
441                                     WORD32 ht,
442                                     WORD32 src_y_strd,
443                                     WORD32 src_uv_strd,
444                                     WORD32 dst_y_strd,
445                                     WORD32 dst_uv_strd)
446 {
447     UWORD8 *pu1_src, *pu1_dst;
448     WORD32 num_rows, num_cols, src_strd, dst_strd;
449     WORD32 i;
450 
451     /* copy luma */
452     pu1_src = (UWORD8 *) pu1_y_src;
453     pu1_dst = (UWORD8 *) pu1_y_dst;
454 
455     num_rows = ht;
456     num_cols = wd;
457 
458     src_strd = src_y_strd;
459     dst_strd = dst_y_strd;
460 
461     for (i = 0; i < num_rows; i++)
462     {
463         memcpy(pu1_dst, pu1_src, num_cols);
464         pu1_dst += dst_strd;
465         pu1_src += src_strd;
466     }
467 
468     /* copy U and V */
469     pu1_src = (UWORD8 *) pu1_uv_src;
470     pu1_dst = (UWORD8 *) pu1_uv_dst;
471 
472     num_rows = ht >> 1;
473     num_cols = wd;
474 
475     src_strd = src_uv_strd;
476     dst_strd = dst_uv_strd;
477 
478     for (i = 0; i < num_rows; i++)
479     {
480         memcpy(pu1_dst, pu1_src, num_cols);
481         pu1_dst += dst_strd;
482         pu1_src += src_strd;
483     }
484     return;
485 }
486 
487 /**
488 *******************************************************************************
489 *
490 * @brief Function used for copying a 420SP buffer and interchange chroma planes
491 *
492 * @par   Description
493 *  Function used for copying a 420SP buffer and interchange chroma planes
494 *
495 * @param[in] pu1_y_src
496 *  Input Y pointer
497 *
498 * @param[in] pu1_uv_src
499 *  Input UV pointer (UV is interleaved either in UV or VU format)
500 *
501 * @param[in] pu1_y_dst
502 *  Output Y pointer
503 *
504 * @param[in] pu1_uv_dst
505 *  Output UV pointer (UV is interleaved in the opp. format as that of input)
506 *
507 * @param[in] wd
508 *  Width
509 *
510 * @param[in] ht
511 *  Height
512 *
513 * @param[in] src_y_strd
514 *  Input Y Stride
515 *
516 * @param[in] src_uv_strd
517 *  Input UV stride
518 *
519 * @param[in] dst_y_strd
520 *  Output Y stride
521 *
522 * @param[in] dst_uv_strd
523 *  Output UV stride
524 *
525 * @returns None
526 *
527 *******************************************************************************
528 */
ih264e_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd)529 void ih264e_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
530                                             UWORD8 *pu1_uv_src,
531                                             UWORD8 *pu1_y_dst,
532                                             UWORD8 *pu1_uv_dst,
533                                             WORD32 wd,
534                                             WORD32 ht,
535                                             WORD32 src_y_strd,
536                                             WORD32 src_uv_strd,
537                                             WORD32 dst_y_strd,
538                                             WORD32 dst_uv_strd)
539 {
540     UWORD8 *pu1_src, *pu1_dst;
541     WORD32 num_rows, num_cols, src_strd, dst_strd;
542     WORD32 i;
543 
544     /* copy luma */
545     pu1_src = (UWORD8 *) pu1_y_src;
546     pu1_dst = (UWORD8 *) pu1_y_dst;
547 
548     num_rows = ht;
549     num_cols = wd;
550 
551     src_strd = src_y_strd;
552     dst_strd = dst_y_strd;
553 
554     for (i = 0; i < num_rows; i++)
555     {
556         memcpy(pu1_dst, pu1_src, num_cols);
557         pu1_dst += dst_strd;
558         pu1_src += src_strd;
559     }
560 
561     /* copy U and V */
562     pu1_src = (UWORD8 *) pu1_uv_src;
563     pu1_dst = (UWORD8 *) pu1_uv_dst;
564 
565     num_rows = ht >> 1;
566     num_cols = wd;
567 
568     src_strd = src_uv_strd;
569     dst_strd = dst_uv_strd;
570 
571     for (i = 0; i < num_rows; i++)
572     {
573         WORD32 j;
574         for (j = 0; j < num_cols; j += 2)
575         {
576             pu1_dst[j + 0] = pu1_src[j + 1];
577             pu1_dst[j + 1] = pu1_src[j + 0];
578         }
579         pu1_dst += dst_strd;
580         pu1_src += src_strd;
581     }
582     return;
583 }
584 
585 /**
586 *******************************************************************************
587 *
588 * @brief Function used to perform color space conversion from 420SP to 420P
589 *
590 * @par   Description
591 * Function used to perform color space conversion from 420SP to 420P
592 *
593 * @param[in] pu1_y_src
594 *  Input Y pointer
595 *
596 * @param[in] pu1_uv_src
597 *  Input UV pointer (UV is interleaved either in UV or VU format)
598 *
599 * @param[in] pu1_y_dst
600 *  Output Y pointer
601 *
602 * @param[in] pu1_u_dst
603 *  Output U pointer
604 *
605 * @param[in] pu1_v_dst
606 *  Output V pointer
607 *
608 * @param[in] wd
609 *  Width
610 *
611 * @param[in] ht
612 *  Height
613 *
614 * @param[in] src_y_strd
615 *  Input Y Stride
616 *
617 * @param[in] src_uv_strd
618 *  Input UV stride
619 *
620 * @param[in] dst_y_strd
621 *  Output Y stride
622 *
623 * @param[in] dst_uv_strd
624 *  Output UV stride
625 *
626 * @param[in] is_u_first
627 *  Flag to indicate chroma ordering
628 *
629 * @param[in] disable_luma_copy
630 *  Flag to indicate if only UV copy needs to be done
631 *
632 * @returns none
633 *
634 * @remarks none
635 *
636 *******************************************************************************
637 */
ih264e_fmt_conv_420sp_to_420p(UWORD8 * pu1_y_src,UWORD8 * pu1_uv_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,WORD32 wd,WORD32 ht,WORD32 src_y_strd,WORD32 src_uv_strd,WORD32 dst_y_strd,WORD32 dst_uv_strd,WORD32 is_u_first,WORD32 disable_luma_copy)638 void ih264e_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
639                                    UWORD8 *pu1_uv_src,
640                                    UWORD8 *pu1_y_dst,
641                                    UWORD8 *pu1_u_dst,
642                                    UWORD8 *pu1_v_dst,
643                                    WORD32 wd,
644                                    WORD32 ht,
645                                    WORD32 src_y_strd,
646                                    WORD32 src_uv_strd,
647                                    WORD32 dst_y_strd,
648                                    WORD32 dst_uv_strd,
649                                    WORD32 is_u_first,
650                                    WORD32 disable_luma_copy)
651 {
652     UWORD8 *pu1_src, *pu1_dst;
653     UWORD8 *pu1_u_src, *pu1_v_src;
654     WORD32 num_rows, num_cols, src_strd, dst_strd;
655     WORD32 i, j;
656 
657     if (0 == disable_luma_copy)
658     {
659         /* copy luma */
660         pu1_src = (UWORD8 *) pu1_y_src;
661         pu1_dst = (UWORD8 *) pu1_y_dst;
662 
663         num_rows = ht;
664         num_cols = wd;
665 
666         src_strd = src_y_strd;
667         dst_strd = dst_y_strd;
668 
669         for (i = 0; i < num_rows; i++)
670         {
671             memcpy(pu1_dst, pu1_src, num_cols);
672             pu1_dst += dst_strd;
673             pu1_src += src_strd;
674         }
675     }
676     /* de-interleave U and V and copy to destination */
677     if (is_u_first)
678     {
679         pu1_u_src = (UWORD8 *) pu1_uv_src;
680         pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
681     }
682     else
683     {
684         pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
685         pu1_v_src = (UWORD8 *) pu1_uv_src;
686     }
687 
688     num_rows = ht >> 1;
689     num_cols = wd >> 1;
690 
691     src_strd = src_uv_strd;
692     dst_strd = dst_uv_strd;
693 
694     for (i = 0; i < num_rows; i++)
695     {
696         for (j = 0; j < num_cols; j++)
697         {
698             pu1_u_dst[j] = pu1_u_src[j * 2];
699             pu1_v_dst[j] = pu1_v_src[j * 2];
700         }
701 
702         pu1_u_dst += dst_strd;
703         pu1_v_dst += dst_strd;
704         pu1_u_src += src_strd;
705         pu1_v_src += src_strd;
706     }
707     return;
708 }
709 
710 /**
711 *******************************************************************************
712 *
713 * @brief Function used to perform color space conversion from 420P to 420SP
714 *
715 * @par   Description
716 * Function used to perform color space conversion from 420P to 420SP
717 *
718 * @param[in] pu1_y_src
719 *  Input Y pointer
720 *
721 * @param[in] pu1_u_src
722 *  Input U pointer
723 *
724 * @param[in] pu1_v_dst
725 *  Input V pointer
726 *
727 * @param[in] pu1_y_dst
728 *  Output Y pointer
729 *
730 * @param[in] pu1_uv_dst
731 *  Output UV pointer
732 *
733 * @param[in] u4_width
734 *  Width
735 *
736 * @param[in] u4_height
737 *  Height
738 *
739 * @param[in] src_y_strd
740 *  Input Y Stride
741 *
742 * @param[in] src_u_strd
743 *  Input U stride
744 *
745 * @param[in] src_v_strd
746 *  Input V stride
747 *
748 * @param[in] dst_y_strd
749 *  Output Y stride
750 *
751 * @param[in] dst_uv_strd
752 *  Output UV stride
753 *
754 * @param[in] convert_uv_only
755 *  Flag to indicate if only UV copy needs to be done
756 *
757 * @returns none
758 *
759 * @remarks none
760 *
761 *******************************************************************************
762 */
ih264e_fmt_conv_420p_to_420sp(UWORD8 * pu1_y_src,UWORD8 * pu1_u_src,UWORD8 * pu1_v_src,UWORD8 * pu1_y_dst,UWORD8 * pu1_uv_dst,UWORD16 u2_height,UWORD16 u2_width,UWORD16 src_y_strd,UWORD16 src_u_strd,UWORD16 src_v_strd,UWORD16 dst_y_strd,UWORD16 dst_uv_strd,UWORD32 convert_uv_only)763 void ih264e_fmt_conv_420p_to_420sp(UWORD8 *pu1_y_src,
764                                    UWORD8 *pu1_u_src,
765                                    UWORD8 *pu1_v_src,
766                                    UWORD8 *pu1_y_dst,
767                                    UWORD8 *pu1_uv_dst,
768                                    UWORD16 u2_height,
769                                    UWORD16 u2_width,
770                                    UWORD16 src_y_strd,
771                                    UWORD16 src_u_strd,
772                                    UWORD16 src_v_strd,
773                                    UWORD16 dst_y_strd,
774                                    UWORD16 dst_uv_strd,
775                                    UWORD32 convert_uv_only)
776 {
777     UWORD8 *pu1_src, *pu1_dst;
778     UWORD8 *pu1_src_u, *pu1_src_v;
779     UWORD16 i;
780     UWORD32 u2_width_uv;
781     UWORD32 dest_inc_Y = 0, dest_inc_UV = 0;
782 
783     dest_inc_UV = dst_uv_strd;
784 
785     if (0 == convert_uv_only)
786     {
787         /* Copy Y buffer */
788         pu1_dst = (UWORD8 *) pu1_y_dst;
789         pu1_src = (UWORD8 *) pu1_y_src;
790 
791         dest_inc_Y = dst_y_strd;
792 
793         for (i = 0; i < u2_height; i++)
794         {
795             memcpy((void *) pu1_dst, (void *) pu1_src, u2_width);
796             pu1_dst += dest_inc_Y;
797             pu1_src += src_y_strd;
798         }
799     }
800 
801     /* Interleave Cb and Cr buffers */
802     pu1_src_u = pu1_u_src;
803     pu1_src_v = pu1_v_src;
804     pu1_dst = pu1_uv_dst;
805 
806     u2_height = (u2_height + 1) >> 1;
807     u2_width_uv = (u2_width + 1) >> 1;
808     for (i = 0; i < u2_height; i++)
809     {
810         UWORD32 j;
811         for (j = 0; j < u2_width_uv; j++)
812         {
813             *pu1_dst++ = *pu1_src_u++;
814             *pu1_dst++ = *pu1_src_v++;
815         }
816 
817         pu1_dst += dest_inc_UV - u2_width;
818         pu1_src_u += src_u_strd - u2_width_uv;
819         pu1_src_v += src_v_strd - u2_width_uv;
820     }
821 }
822 
823 /**
824 *******************************************************************************
825 *
826 * @brief Function used to convert 422 interleaved to 420sp
827 *
828 * @par   Description
829 *  Function used to convert 422 interleaved to 420sp
830 *
831 * @param[in] pu1_y_buf
832 *  Output Y pointer
833 *
834 * @param[in] pu1_u_buf
835 *  Output u pointer
836 *
837 * @param[in[ pu1_v_buf
838 *  Output V pointer
839 *
840 * @param[in] pu1_422i_buf
841 *  Input 422i pointer
842 *
843 * @param[in] u4_y_width
844 *  Width of Y component
845 *
846 * @param[in] u4_y_height
847 *  Height of Y component
848 *
849 * @param[in] u4_y_stride
850 *  Stride of pu1_y_buf
851 *
852 * @param[in] u4_u_stride
853 *  Stride of pu1_u_buf
854 *
855 * @param[in] u4_v_stride
856 *  Stride of pu1_v_buf
857 *
858 * @param[in] u4_422i_stride
859 *  Stride of pu1_422i_buf
860 *
861 * @returns None
862 *
863 * @remarks For conversion
864 *  pu1_v_buf = pu1_u_buf+1
865 *  u4_u_stride = u4_v_stride
866 *
867 * The extra parameters are for maintaining API with assembly function
868 *
869 *******************************************************************************
870 */
ih264e_fmt_conv_422i_to_420sp(UWORD8 * pu1_y_buf,UWORD8 * pu1_u_buf,UWORD8 * pu1_v_buf,UWORD8 * pu1_422i_buf,WORD32 u4_y_width,WORD32 u4_y_height,WORD32 u4_y_stride,WORD32 u4_u_stride,WORD32 u4_v_stride,WORD32 u4_422i_stride)871 void ih264e_fmt_conv_422i_to_420sp(UWORD8 *pu1_y_buf,
872                                    UWORD8 *pu1_u_buf,
873                                    UWORD8 *pu1_v_buf,
874                                    UWORD8 *pu1_422i_buf,
875                                    WORD32 u4_y_width,
876                                    WORD32 u4_y_height,
877                                    WORD32 u4_y_stride,
878                                    WORD32 u4_u_stride,
879                                    WORD32 u4_v_stride,
880                                    WORD32 u4_422i_stride)
881 {
882     WORD32 row, col;
883     UWORD8 *row_even_422 = pu1_422i_buf;
884     UWORD8 *row_odd_422 = row_even_422 + (u4_422i_stride << 1);
885     UWORD8 *row_even_luma = pu1_y_buf;
886     /* Since at the end of loop, we have row_even_luma += (luma_width << 1),
887      * it should be same here right? */
888     UWORD8 *row_odd_luma = row_even_luma + u4_y_stride;
889     UWORD8 *row_cb = pu1_u_buf;
890     UWORD8 *row_cr = pu1_v_buf;
891 
892     for (row = 0; row < u4_y_height; row = row + 2)
893     {
894         for (col = 0; col < (u4_y_width << 1); col = col + 4)
895         {
896             UWORD8 cb_even = row_even_422[col];
897             UWORD8 cr_even = row_even_422[col + 2];
898 
899             row_cb[col >> 1] = cb_even;
900             row_cr[col >> 1] = cr_even;
901 
902             row_even_luma[col >> 1] = row_even_422[col + 1];
903             row_even_luma[(col >> 1) + 1] = row_even_422[col + 3];
904 
905             row_odd_luma[col >> 1] = row_odd_422[col + 1];
906             row_odd_luma[(col >> 1) + 1] = row_odd_422[col + 3];
907         }
908 
909         row_even_422 += (u4_422i_stride << 2);
910         row_odd_422 += (u4_422i_stride << 2);
911 
912         row_even_luma += (u4_y_stride << 1);
913         row_odd_luma += (u4_y_stride << 1);
914 
915         row_cb += u4_u_stride;
916         row_cr += u4_v_stride;
917     }
918 }
919 
920 /**
921 *******************************************************************************
922 *
923 * @brief Function used for format conversion or frame copy
924 *
925 * @par   Description
926 * Function used from copying or converting a reference frame to display buffer
927 * in non shared mode
928 *
929 * @param[in] ps_codec
930 *  Codec ctxt
931 *
932 * @param[in] ps_pic
933 *  Reference pic ctxt
934 *
935 * @param[in] pu1_y_dst
936 *  Output Y pointer
937 *
938 * @param[in] pu1_u_dst
939 *  Output U/UV pointer ( UV is interleaved in the same format as that of input)
940 *
941 * @param[in] pu1_v_dst
942 *  Output V pointer ( used in 420P output case)
943 *
944 * @param[in] u4_dst_y_strd
945 *  Stride of destination Y buffer
946 *
947 * @param[in] u4_dst_u_strd
948 *  Stride of destination  U/V buffer
949 *
950 * @param[in] cur_row
951 *  Start row of fmt conversion
952 *
953 * @param[in] num_rows
954 *  number of rows to process
955 *
956 * @returns error status
957 *
958 * @remarks Assumes that the stride of U and V buffers are same.
959 *
960 *******************************************************************************
961 */
ih264e_fmt_conv(codec_t * ps_codec,pic_buf_t * ps_pic,UWORD8 * pu1_y_dst,UWORD8 * pu1_u_dst,UWORD8 * pu1_v_dst,UWORD32 u4_dst_y_strd,UWORD32 u4_dst_uv_strd,WORD32 cur_row,WORD32 num_rows)962 IH264E_ERROR_T ih264e_fmt_conv(codec_t *ps_codec,
963                                pic_buf_t *ps_pic,
964                                UWORD8 *pu1_y_dst,
965                                UWORD8 *pu1_u_dst,
966                                UWORD8 *pu1_v_dst,
967                                UWORD32 u4_dst_y_strd,
968                                UWORD32 u4_dst_uv_strd,
969                                WORD32 cur_row,
970                                WORD32 num_rows)
971 {
972     IH264E_ERROR_T ret = IH264E_SUCCESS;
973     UWORD8 *pu1_y_src, *pu1_uv_src;
974     UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
975     UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
976     UWORD16 *pu2_rgb_dst_tmp;
977     UWORD32 *pu4_rgb_dst_tmp;
978     WORD32 is_u_first;
979     UWORD8 *pu1_luma;
980     UWORD8 *pu1_chroma;
981     WORD32 dst_stride, wd;
982 
983     if (0 == num_rows)
984         return ret;
985 
986     pu1_luma = ps_pic->pu1_luma;
987     pu1_chroma = ps_pic->pu1_chroma;
988 
989     dst_stride = ps_codec->s_cfg.u4_wd;
990     wd = ps_codec->s_cfg.u4_disp_wd;
991     is_u_first = (IV_YUV_420SP_UV == ps_codec->e_codec_color_format) ? 1 : 0;
992 
993     /* In case of 420P output luma copy is disabled for shared mode */
994     {
995         pu1_y_src = pu1_luma + cur_row * ps_codec->i4_rec_strd;
996         pu1_uv_src = pu1_chroma + (cur_row / 2) * ps_codec->i4_rec_strd;
997 
998         pu2_rgb_dst_tmp = (UWORD16 *) pu1_y_dst;
999         pu2_rgb_dst_tmp += cur_row * dst_stride;
1000         pu4_rgb_dst_tmp = (UWORD32 *) pu1_y_dst;
1001         pu4_rgb_dst_tmp += cur_row * dst_stride;
1002 
1003         pu1_y_dst_tmp = pu1_y_dst + cur_row * u4_dst_y_strd;
1004         pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
1005         pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
1006         pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * u4_dst_uv_strd;
1007 
1008         /* If the call is non-blocking and there are no rows to be copied then return */
1009         /* In non-shared mode, reference buffers are in 420SP UV format,
1010          * if output also is in 420SP_UV, then just copy
1011          * if output is in 420SP_VU then swap UV values
1012          */
1013         if ((IV_YUV_420SP_UV == ps_codec->s_cfg.e_recon_color_fmt) ||
1014                         (IV_YUV_420SP_VU == ps_codec->s_cfg.e_recon_color_fmt))
1015         {
1016             ih264e_fmt_conv_420sp_to_420sp(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp,
1017                                            pu1_uv_dst_tmp, wd, num_rows,
1018                                            ps_codec->i4_rec_strd,
1019                                            ps_codec->i4_rec_strd, u4_dst_y_strd,
1020                                            u4_dst_uv_strd);
1021         }
1022         else if (IV_YUV_420P == ps_codec->s_cfg.e_recon_color_fmt)
1023         {
1024             ih264e_fmt_conv_420sp_to_420p(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp,
1025                                           pu1_u_dst_tmp, pu1_v_dst_tmp, wd,
1026                                           num_rows, ps_codec->i4_rec_strd,
1027                                           ps_codec->i4_rec_strd, u4_dst_y_strd,
1028                                           u4_dst_uv_strd, is_u_first, 0);
1029         }
1030     }
1031     return(ret);
1032 }
1033 
1034