xref: /aosp_15_r20/external/libhevc/common/arm/ihevc_intra_ref_substitution_a9q.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 *  ihevcd_intra_ref_substitution.c
22 *
23 * @brief
24 *  Contains ref substitution functions
25 *
26 * @author
27 *  Naveen
28 *
29 * @par List of Functions:
30 * @remarks
31 *  None
32 *
33 *******************************************************************************
34 */
35 /*****************************************************************************/
36 /* File Includes                                                             */
37 /*****************************************************************************/
38 #include <stdio.h>
39 #include <stddef.h>
40 #include <stdlib.h>
41 #include <string.h>
42 
43 #include "ihevc_typedefs.h"
44 #include "ihevc_platform_macros.h"
45 #include "ihevc_intra_pred.h"
46 #include "ihevc_mem_fns.h"
47 #include "ihevc_chroma_intra_pred.h"
48 #include "ihevc_common_tables.h"
49 #include "ihevc_defs.h"
50 #include "ihevc_mem_fns.h"
51 #include "ihevc_macros.h"
52 
53 #define MAX_CU_SIZE 64
54 #define BIT_DEPTH 8
55 #define T32_4NT 128
56 #define T16_4NT 64
57 #define T16C_4NT 64
58 #define T8C_4NT 32
59 
60 /**
61 *******************************************************************************
62 *
63 * @brief
64 *  Reference substitution process for samples unavailable  for prediction
65 * Refer to section 8.4.4.2.2
66 *
67 * @par Description:
68 *
69 *
70 * @param[in] pu1_top_left
71 *  UWORD8 pointer to the top-left
72 *
73 * @param[in] pu1_top
74 *  UWORD8 pointer to the top
75 *
76 * @param[in] pu1_left
77 *  UWORD8 pointer to the left
78 *
79 * @param[in] src_strd
80 *  WORD32 Source stride
81 *
82 * @param[in] nbr_flags
83 *  WORD32 neighbor availability flags
84 *
85 * @param[in] nt
86 *  WORD32 transform Block size
87 *
88 * @param[in] dst_strd
89 *  WORD32 Destination stride
90 *
91 * @returns
92 *
93 * @remarks
94 *  None
95 *
96 *******************************************************************************
97 */
98 
ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 * pu1_top_left,UWORD8 * pu1_top,UWORD8 * pu1_left,WORD32 src_strd,WORD32 nt,WORD32 nbr_flags,UWORD8 * pu1_dst,WORD32 dst_strd)99 void ihevc_intra_pred_chroma_ref_substitution_a9q(UWORD8 *pu1_top_left,
100                                                   UWORD8 *pu1_top,
101                                                   UWORD8 *pu1_left,
102                                                   WORD32 src_strd,
103                                                   WORD32 nt,
104                                                   WORD32 nbr_flags,
105                                                   UWORD8 *pu1_dst,
106                                                   WORD32 dst_strd)
107 {
108     UWORD8 pu1_ref_u, pu1_ref_v;
109     WORD32 dc_val, i, j;
110     WORD32 total_samples = (4 * nt) + 1;
111     WORD32 get_bits;
112     WORD32 next;
113     WORD32 bot_left, left, top, tp_right, tp_left;
114     WORD32 idx, nbr_id_from_bl, frwd_nbr_flag;
115     WORD32 a_nbr_flag[5];
116     UNUSED(dst_strd);
117     /* Neighbor Flag Structure*/
118     /* WORD32 nbr_flags MSB-->LSB   TOP LEFT | TOP-RIGHT |  TOP   | LEFT    | BOTTOM LEFT*/
119     /*                              (1 bit)     (4 bits)  (4 bits) (4 bits)  (4 bits)  */
120 
121     if(nbr_flags == 0)
122     {
123 /* If no neighbor flags are present, fill the neighbor samples with DC value */
124         /*dc_val = 1 << (BIT_DEPTH - 1);*/
125         dc_val = 1 << (8 - 1);
126         for(i = 0; i < (2 * total_samples); i++)
127         {
128             pu1_dst[i] = dc_val;
129         }
130     }
131     else
132     {
133         /* Else fill the corresponding samples */
134 
135         /* Check for the neighbors availibility */
136         tp_left     = (nbr_flags & 0x10000);
137         tp_right    = (nbr_flags & 0x0f000);
138         top         = (nbr_flags & 0x00f00);
139         left        = (nbr_flags & 0x000f0);
140         bot_left    = (nbr_flags & 0x0000f);
141 
142         /* Fill nbrs depending on avalibility */
143         /* Top -Left nbrs  */
144         if(0 != tp_left)
145         {
146             pu1_dst[(4 * nt)] = *pu1_top_left; // U top-left sample
147             pu1_dst[(4 * nt) + 1] = *(pu1_top_left + 1); // V top-left sample
148         }
149         /* Left nbrs  */
150         if(0 != left)
151         {
152             for(i = 0, j = 0; i < (2 * nt); i += 2)
153             {
154                 pu1_dst[(4 * nt) - 2 - i] = pu1_left[j * src_strd]; // U left samples
155                 pu1_dst[(4 * nt) - 1 - i] = pu1_left[(j * src_strd) + 1]; // V left samples
156                 j++;
157             }
158         }
159         /* Bottom - Left nbrs  */
160         if(0 != bot_left)
161         {
162             for(i = (2 * nt), j = nt; i < (4 * nt); i += 2)
163             {
164                 pu1_dst[(4 * nt) - 2 - i] = pu1_left[j * src_strd]; // U left samples
165                 pu1_dst[(4 * nt) - 1 - i] = pu1_left[(j * src_strd) + 1]; // V left samples
166                 j++;
167             }
168         }
169         /* Top nbrs  */
170         if(0 != top)
171         {
172             ihevc_memcpy_mul_8_a9q(&pu1_dst[(4 * nt) + 2], pu1_top, 2 * nt);
173             // U-V interleaved Top-top right samples
174         }
175 
176         /* Top - Right nbrs  */
177         if(0 != tp_right)
178         {
179             ihevc_memcpy_mul_8_a9q(&pu1_dst[(4 * nt) + 2 + 2 * nt], pu1_top + 2 * nt, 2 * nt);
180             // U-V interleaved Top-top right samples
181         }
182 
183         if(nt == 4)
184         {
185             /* 1 bit extraction for all the neighboring blocks */
186             tp_left = (nbr_flags & 0x10000) >> 16;
187             bot_left = (nbr_flags & 0x8) >> 3;
188             left = (nbr_flags & 0x80) >> 7;
189             top = (nbr_flags & 0x100) >> 8;
190             tp_right = (nbr_flags & 0x1000) >> 12;
191 
192             next = 1;
193             a_nbr_flag[0] = bot_left;
194             a_nbr_flag[1] = left;
195             a_nbr_flag[2] = tp_left;
196             a_nbr_flag[3] = top;
197             a_nbr_flag[4] = tp_right;
198 
199             /* If bottom -left is not available, reverse substitution process*/
200             if(bot_left == 0)
201             {
202                 /* Check for the 1st available sample from bottom-left*/
203                 while(!a_nbr_flag[next])
204                     next++;
205 
206                 /* If Left, top-left are available*/
207                 if(next <= 2)
208                 {
209                     UWORD16 *pu2_dst;
210                     idx = (nt * next);
211                     pu2_dst = (UWORD16 *)&pu1_dst[2 * idx];
212                     ihevc_memset_16bit_a9q((UWORD16 *)pu1_dst, pu2_dst[0], idx);
213                 }
214                 else /* If top, top-right are available */
215                 {
216                     UWORD16 *pu2_dst;
217                     /* Idx is changed to copy 1 pixel value for top-left ,if top-left is not available*/
218                     idx = (nt * (next - 1)) + 1;
219                     pu2_dst = (UWORD16 *)&pu1_dst[2 * idx];
220                     ihevc_memset_16bit_a9q((UWORD16 *)pu1_dst, pu2_dst[0], idx);
221                 }
222             }
223 
224             if(left == 0)
225             {
226                 UWORD16 *pu2_dst = (UWORD16 *)&pu1_dst[(2 * nt) - 2];
227                 ihevc_memset_16bit_a9q((UWORD16 *)&pu1_dst[(2 * nt)], pu2_dst[0], nt);
228 
229 
230             }
231             if(tp_left == 0)
232             {
233                 pu1_dst[4 * nt] = pu1_dst[(4 * nt) - 2];
234                 pu1_dst[(4 * nt) + 1] = pu1_dst[(4 * nt) - 1];
235             }
236             if(top == 0)
237             {
238                 UWORD16 *pu2_dst = (UWORD16 *)&pu1_dst[(4 * nt)];
239                 ihevc_memset_16bit_a9q((UWORD16 *)&pu1_dst[(4 * nt) + 2], pu2_dst[0], nt);
240 
241 
242             }
243             if(tp_right == 0)
244             {
245                 UWORD16 *pu2_dst = (UWORD16 *)&pu1_dst[(6 * nt)];
246                 ihevc_memset_16bit_a9q((UWORD16 *)&pu1_dst[(6 * nt) + 2], pu2_dst[0], nt);
247 
248 
249             }
250         }
251         else if(nt == 8)
252         {
253             WORD32 nbr_flags_temp = 0;
254             nbr_flags_temp = ((nbr_flags & 0xC) >> 2) + ((nbr_flags & 0xC0) >> 4)
255                             + ((nbr_flags & 0x300) >> 4)
256                             + ((nbr_flags & 0x3000) >> 6)
257                             + ((nbr_flags & 0x10000) >> 8);
258 
259             /* compute trailing zeors based on nbr_flag for substitution process of below left see section .*/
260             /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
261             {
262                 nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * 4; /* for bottom left and left */
263                 if(nbr_id_from_bl == 32)
264                     nbr_id_from_bl = 16;
265                 if(nbr_id_from_bl == 16)
266                 {
267                     /* for top left : 1 pel per nbr bit */
268                     if(!((nbr_flags_temp >> 8) & 0x1))
269                     {
270                         nbr_id_from_bl++;
271                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 4; /* top and top right;  8 pels per nbr bit */
272 
273                     }
274                 }
275                 /* Reverse Substitution Process*/
276                 if(nbr_id_from_bl)
277                 {
278                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
279                     pu1_ref_u = pu1_dst[2 * nbr_id_from_bl];
280                     pu1_ref_v = pu1_dst[(2 * nbr_id_from_bl) + 1];
281                     for(i = 2 * (nbr_id_from_bl - 1); i >= 0; i -= 2)
282                     {
283                         pu1_dst[i] = pu1_ref_u;
284                         pu1_dst[i + 1] = pu1_ref_v;
285                     }
286                 }
287             }
288 
289             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
290             while(nbr_id_from_bl < ((T8C_4NT)+1))
291             {
292                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
293                 /* Divide by 8 to obtain the original index */
294                 frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/
295 
296                 /* The Top-left flag is at the last bit location of nbr_flags*/
297                 if(nbr_id_from_bl == (T8C_4NT / 2))
298                 {
299                     get_bits = GET_BIT(nbr_flags_temp, 8);
300 
301                     /* only pel substitution for TL */
302                     if(!get_bits)
303                     {
304                         pu1_dst[2 * nbr_id_from_bl] = pu1_dst[(2 * nbr_id_from_bl) - 2];
305                         pu1_dst[(2 * nbr_id_from_bl) + 1] = pu1_dst[(2 * nbr_id_from_bl) - 1];
306                     }
307                 }
308                 else
309                 {
310                     get_bits = GET_BIT(nbr_flags_temp, frwd_nbr_flag);
311                     if(!get_bits)
312                     {
313                         UWORD16 *pu2_dst;
314                         /* 8 pel substitution (other than TL) */
315                         pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2];
316                         ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4);
317                     }
318 
319                 }
320                 nbr_id_from_bl += (nbr_id_from_bl == (T8C_4NT / 2)) ? 1 : 4;
321             }
322 
323         }
324         else if(nt == 16)
325         {
326             /* compute trailing ones based on mbr_flag for substitution process of below left see section .*/
327             /* as each bit in nbr flags corresponds to 4 pels for bot_left, left, top and topright but 1 pel for topleft */
328             {
329                 nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 4; /* for bottom left and left */
330 
331                 if(nbr_id_from_bl == 32)
332                 {
333                     /* for top left : 1 pel per nbr bit */
334                     if(!((nbr_flags >> 16) & 0x1))
335                     {
336                         /* top left not available */
337                         nbr_id_from_bl++;
338                         /* top and top right;  4 pels per nbr bit */
339                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 4;
340                     }
341                 }
342                 /* Reverse Substitution Process*/
343                 if(nbr_id_from_bl)
344                 {
345                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
346                     pu1_ref_u = pu1_dst[2 * nbr_id_from_bl];
347                     pu1_ref_v = pu1_dst[2 * nbr_id_from_bl + 1];
348                     for(i = (2 * (nbr_id_from_bl - 1)); i >= 0; i -= 2)
349                     {
350                         pu1_dst[i] = pu1_ref_u;
351                         pu1_dst[i + 1] = pu1_ref_v;
352                     }
353                 }
354             }
355 
356             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
357             while(nbr_id_from_bl < ((T16C_4NT)+1))
358             {
359                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
360                 /* Devide by 4 to obtain the original index */
361                 frwd_nbr_flag = (nbr_id_from_bl >> 2); /*+ (nbr_id_from_bl & 0x1);*/
362 
363                 /* The Top-left flag is at the last bit location of nbr_flags*/
364                 if(nbr_id_from_bl == (T16C_4NT / 2))
365                 {
366                     get_bits = GET_BIT(nbr_flags, 16);
367                     /* only pel substitution for TL */
368                     if(!get_bits)
369                     {
370                         pu1_dst[2 * nbr_id_from_bl] = pu1_dst[(2 * nbr_id_from_bl) - 2];
371                         pu1_dst[(2 * nbr_id_from_bl) + 1] = pu1_dst[(2 * nbr_id_from_bl) - 1];
372                     }
373                 }
374                 else
375                 {
376                     get_bits = GET_BIT(nbr_flags, frwd_nbr_flag);
377                     if(!get_bits)
378                     {
379                         UWORD16 *pu2_dst;
380                         /* 4 pel substitution (other than TL) */
381                         pu2_dst = (UWORD16 *)&pu1_dst[(2 * nbr_id_from_bl) - 2];
382                         ihevc_memset_16bit_a9q((UWORD16 *)(pu1_dst + (2 * nbr_id_from_bl)), pu2_dst[0], 4);
383                     }
384 
385                 }
386                 nbr_id_from_bl += (nbr_id_from_bl == (T16C_4NT / 2)) ? 1 : 4;
387             }
388         }
389     }
390 }
391 
392 
ihevc_intra_pred_luma_ref_substitution_a9q(UWORD8 * pu1_top_left,UWORD8 * pu1_top,UWORD8 * pu1_left,WORD32 src_strd,WORD32 nt,WORD32 nbr_flags,UWORD8 * pu1_dst,WORD32 dst_strd)393 void ihevc_intra_pred_luma_ref_substitution_a9q(UWORD8 *pu1_top_left,
394                                                 UWORD8 *pu1_top,
395                                                 UWORD8 *pu1_left,
396                                                 WORD32 src_strd,
397                                                 WORD32 nt,
398                                                 WORD32 nbr_flags,
399                                                 UWORD8 *pu1_dst,
400                                                 WORD32 dst_strd)
401 {
402     UWORD8 pu1_ref;
403     WORD32 dc_val, i;
404     WORD32 total_samples = (4 * nt) + 1;
405     WORD32 two_nt = 2 * nt;
406 
407     WORD32 three_nt = 3 * nt;
408     WORD32 get_bits;
409     WORD32 next;
410     WORD32 bot_left, left, top, tp_right, tp_left;
411 
412     WORD32 idx, nbr_id_from_bl, frwd_nbr_flag;
413     UNUSED(dst_strd);
414     /*dc_val = 1 << (BIT_DEPTH - 1);*/
415     dc_val = 1 << (8 - 1);
416 
417 
418     /* Neighbor Flag Structure*/
419     /* MSB ---> LSB */
420     /*    Top-Left | Top-Right | Top | Left | Bottom-Left
421               1         4         4     4         4
422      */
423     /* If no neighbor flags are present, fill the neighbor samples with DC value */
424     if(nbr_flags == 0)
425     {
426         for(i = 0; i < total_samples; i++)
427         {
428             pu1_dst[i] = dc_val;
429         }
430     }
431     else
432     {
433         if(nt <= 8)
434         {
435             /* 1 bit extraction for all the neighboring blocks */
436             tp_left = (nbr_flags & 0x10000) >> 16;
437             bot_left = (nbr_flags & 0x8) >> 3;
438             left = (nbr_flags & 0x80) >> 7;
439             top = (nbr_flags & 0x100) >> 8;
440             tp_right = (nbr_flags & 0x1000) >> 12;
441 
442             /* Else fill the corresponding samples */
443             if(tp_left)
444                 pu1_dst[two_nt] = *pu1_top_left;
445             else
446                 pu1_dst[two_nt] = 0;
447 
448 
449             if(left)
450             {
451                 for(i = 0; i < nt; i++)
452                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
453             }
454             else
455             {
456                 ihevc_memset_a9q(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
457             }
458 
459 
460             if(bot_left)
461             {
462                 for(i = nt; i < two_nt; i++)
463                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
464             }
465             else
466             {
467                 ihevc_memset_a9q(&pu1_dst[two_nt - 1 - (two_nt - 1)], 0, nt);
468             }
469 
470 
471             if(top)
472             {
473                 ihevc_memcpy_a9q(&pu1_dst[two_nt + 1], pu1_top, nt);
474             }
475             else
476             {
477                 ihevc_memset_a9q(&pu1_dst[two_nt + 1], 0, nt);
478             }
479 
480             if(tp_right)
481             {
482                 ihevc_memcpy_a9q(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
483             }
484             else
485             {
486                 ihevc_memset_a9q(&pu1_dst[two_nt + 1 + nt], 0, nt);
487             }
488             next = 1;
489 
490             /* If bottom -left is not available, reverse substitution process*/
491             if(bot_left == 0)
492             {
493                 WORD32 a_nbr_flag[5];
494                 a_nbr_flag[0] = bot_left;
495                 a_nbr_flag[1] = left;
496                 a_nbr_flag[2] = tp_left;
497                 a_nbr_flag[3] = top;
498                 a_nbr_flag[4] = tp_right;
499 
500                 /* Check for the 1st available sample from bottom-left*/
501                 while(!a_nbr_flag[next])
502                     next++;
503 
504                 /* If Left, top-left are available*/
505                 if(next <= 2)
506                 {
507                     idx = nt * next;
508                     pu1_ref = pu1_dst[idx];
509                     for(i = 0; i < idx; i++)
510                         pu1_dst[i] = pu1_ref;
511                 }
512                 else /* If top, top-right are available */
513                 {
514                     /* Idx is changed to copy 1 pixel value for top-left ,if top-left is not available*/
515                     idx = (nt * (next - 1)) + 1;
516                     pu1_ref = pu1_dst[idx];
517                     for(i = 0; i < idx; i++)
518                         pu1_dst[i] = pu1_ref;
519                 }
520             }
521 
522             /* Forward Substitution Process */
523             /* If left is Unavailable, copy the last bottom-left value */
524             if(left == 0)
525             {
526                 ihevc_memset_a9q(&pu1_dst[nt], pu1_dst[nt - 1], nt);
527 
528             }
529             /* If top-left is Unavailable, copy the last left value */
530             if(tp_left == 0)
531                 pu1_dst[two_nt] = pu1_dst[two_nt - 1];
532             /* If top is Unavailable, copy the last top-left value */
533             if(top == 0)
534             {
535                 ihevc_memset_a9q(&pu1_dst[two_nt + 1], pu1_dst[two_nt], nt);
536             }
537             /* If to right is Unavailable, copy the last top value */
538             if(tp_right == 0)
539             {
540                 ihevc_memset_a9q(&pu1_dst[three_nt + 1], pu1_dst[three_nt], nt);
541 
542             }
543         }
544 
545         if(nt == 16)
546         {
547             WORD32 nbr_flags_temp = 0;
548             nbr_flags_temp = ((nbr_flags & 0xC) >> 2) + ((nbr_flags & 0xC0) >> 4)
549                             + ((nbr_flags & 0x300) >> 4)
550                             + ((nbr_flags & 0x3000) >> 6)
551                             + ((nbr_flags & 0x10000) >> 8);
552 
553             /* Else fill the corresponding samples */
554             if(nbr_flags & 0x10000)
555                 pu1_dst[two_nt] = *pu1_top_left;
556             else
557                 pu1_dst[two_nt] = 0;
558 
559             if(nbr_flags & 0xC0)
560             {
561                 for(i = 0; i < nt; i++)
562                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
563             }
564             else
565             {
566                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
567             }
568 
569             /* Bottom - left availability is checked for every 8x8 TU position and set accordingly */
570             {
571                 if(nbr_flags & 0x8)
572                 {
573                     for(i = nt; i < (nt + 8); i++)
574                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
575                 }
576                 else
577                 {
578                     ihevc_memset_mul_8_a9q(&pu1_dst[nt - 8], 0, 8);
579                 }
580 
581                 if(nbr_flags & 0x4)
582                 {
583                     for(i = (nt + 8); i < two_nt; i++)
584                         pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
585                 }
586                 else
587                 {
588                     ihevc_memset_mul_8_a9q(&pu1_dst[0], 0, 8);
589                 }
590             }
591 
592 
593             if(nbr_flags & 0x300)
594             {
595                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1], pu1_top, nt);
596             }
597             else
598             {
599                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1], 0, nt);
600             }
601 
602             if(nbr_flags & 0x3000)
603             {
604                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
605             }
606             else
607             {
608                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], 0, nt);
609             }
610             /* compute trailing zeors based on nbr_flag for substitution process of below left see section .*/
611             /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
612             {
613                 nbr_id_from_bl = look_up_trailing_zeros(nbr_flags_temp & 0XF) * 8; /* for below left and left */
614 
615                 if(nbr_id_from_bl == 64)
616                     nbr_id_from_bl = 32;
617 
618                 if(nbr_id_from_bl == 32)
619                 {
620                     /* for top left : 1 pel per nbr bit */
621                     if(!((nbr_flags_temp >> 8) & 0x1))
622                     {
623                         nbr_id_from_bl++;
624                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags_temp >> 4) & 0xF) * 8; /* top and top right;  8 pels per nbr bit */
625                         //nbr_id_from_bl += idx * 8;
626                     }
627                 }
628                 /* Reverse Substitution Process*/
629                 if(nbr_id_from_bl)
630                 {
631                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
632                     pu1_ref = pu1_dst[nbr_id_from_bl];
633                     for(i = (nbr_id_from_bl - 1); i >= 0; i--)
634                     {
635                         pu1_dst[i] = pu1_ref;
636                     }
637                 }
638             }
639 
640             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
641             while(nbr_id_from_bl < ((T16_4NT) + 1))
642             {
643                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
644                 /* Devide by 8 to obtain the original index */
645                 frwd_nbr_flag = (nbr_id_from_bl >> 3); /*+ (nbr_id_from_bl & 0x1);*/
646 
647                 /* The Top-left flag is at the last bit location of nbr_flags*/
648                 if(nbr_id_from_bl == (T16_4NT / 2))
649                 {
650                     get_bits = GET_BIT(nbr_flags_temp, 8);
651 
652                     /* only pel substitution for TL */
653                     if(!get_bits)
654                         pu1_dst[nbr_id_from_bl] = pu1_dst[nbr_id_from_bl - 1];
655                 }
656                 else
657                 {
658                     get_bits = GET_BIT(nbr_flags_temp, frwd_nbr_flag);
659                     if(!get_bits)
660                     {
661                         /* 8 pel substitution (other than TL) */
662                         pu1_ref = pu1_dst[nbr_id_from_bl - 1];
663                         ihevc_memset_mul_8_a9q(pu1_dst + nbr_id_from_bl, pu1_ref, 8);
664 
665 
666                     }
667 
668                 }
669                 nbr_id_from_bl += (nbr_id_from_bl == (T16_4NT / 2)) ? 1 : 8;
670             }
671 
672 
673         }
674 
675         if(nt == 32)
676         {
677             /* Else fill the corresponding samples */
678             if(nbr_flags & 0x10000)
679                 pu1_dst[two_nt] = *pu1_top_left;
680             else
681                 pu1_dst[two_nt] = 0;
682 
683             if(nbr_flags & 0xF0)
684             {
685                 for(i = 0; i < nt; i++)
686                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
687             }
688             else
689             {
690                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt - 1 - (nt - 1)], 0, nt);
691             }
692 
693             /* Bottom - left availability is checked for every 8x8 TU position and set accordingly */
694             {
695                 if(nbr_flags & 0x8)
696                 {
697                     for(i = nt; i < (nt + 8); i++)
698                     pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
699                 }
700                 else
701                 {
702                     ihevc_memset_mul_8_a9q(&pu1_dst[24], 0, 8);
703                 }
704 
705                 if(nbr_flags & 0x4)
706                 {
707                     for(i = (nt + 8); i < (nt + 16); i++)
708                         pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
709                 }
710                 else
711                 {
712                     ihevc_memset_mul_8_a9q(&pu1_dst[16], 0, 8);
713                 }
714 
715                 if(nbr_flags & 0x2)
716                 {
717                     for(i = (nt + 16); i < (nt + 24); i++)
718                         pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
719                 }
720                 else
721                 {
722                     ihevc_memset_mul_8_a9q(&pu1_dst[8], 0, 8);
723                 }
724 
725                 if(nbr_flags & 0x1)
726                 {
727                     for(i = (nt + 24); i < (two_nt); i++)
728                         pu1_dst[two_nt - 1 - i] = pu1_left[i * src_strd];
729                 }
730                 else
731                 {
732                     ihevc_memset_mul_8_a9q(&pu1_dst[0], 0, 8);
733                 }
734             }
735 
736             if(nbr_flags & 0xF00)
737             {
738                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1], pu1_top, nt);
739             }
740             else
741             {
742                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1], 0, nt);
743             }
744 
745             if(nbr_flags & 0xF000)
746             {
747                 ihevc_memcpy_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], pu1_top + nt, nt);
748             }
749             else
750             {
751                 ihevc_memset_mul_8_a9q(&pu1_dst[two_nt + 1 + nt], 0, nt);
752             }
753             /* compute trailing ones based on mbr_flag for substitution process of below left see section .*/
754             /* as each bit in nbr flags corresponds to 8 pels for bot_left, left, top and topright but 1 pel for topleft */
755             {
756                 nbr_id_from_bl = look_up_trailing_zeros((nbr_flags & 0XFF)) * 8; /* for below left and left */
757 
758                 if(nbr_id_from_bl == 64)
759                 {
760                     /* for top left : 1 pel per nbr bit */
761                     if(!((nbr_flags >> 16) & 0x1))
762                     {
763                         /* top left not available */
764                         nbr_id_from_bl++;
765                         /* top and top right;  8 pels per nbr bit */
766                         nbr_id_from_bl += look_up_trailing_zeros((nbr_flags >> 8) & 0xFF) * 8;
767                     }
768                 }
769                 /* Reverse Substitution Process*/
770                 if(nbr_id_from_bl)
771                 {
772                     /* Replicate the bottom-left and subsequent unavailable pixels with the 1st available pixel above */
773                     pu1_ref = pu1_dst[nbr_id_from_bl];
774                     for(i = (nbr_id_from_bl - 1); i >= 0; i--)
775                         pu1_dst[i] = pu1_ref;
776                 }
777             }
778 
779             /* for the loop of 4*Nt+1 pixels (excluding pixels computed from reverse substitution) */
780             while(nbr_id_from_bl < ((T32_4NT) + 1))
781             {
782                 /* To Obtain the next unavailable idx flag after reverse neighbor substitution  */
783                 /* Devide by 8 to obtain the original index */
784                 frwd_nbr_flag = (nbr_id_from_bl >> 3); /*+ (nbr_id_from_bl & 0x1);*/
785 
786                 /* The Top-left flag is at the last bit location of nbr_flags*/
787                 if(nbr_id_from_bl == (T32_4NT / 2))
788                 {
789                     get_bits = GET_BIT(nbr_flags, 16);
790                     /* only pel substitution for TL */
791                     if(!get_bits)
792                         pu1_dst[nbr_id_from_bl] = pu1_dst[nbr_id_from_bl - 1];
793                 }
794                 else
795                 {
796                     get_bits = GET_BIT(nbr_flags, frwd_nbr_flag);
797                     if(!get_bits)
798                     {
799                         /* 8 pel substitution (other than TL) */
800                         pu1_ref = pu1_dst[nbr_id_from_bl - 1];
801                         ihevc_memset_mul_8_a9q(&pu1_dst[nbr_id_from_bl], pu1_ref, 8);
802 
803                     }
804 
805                 }
806                 nbr_id_from_bl += (nbr_id_from_bl == (T32_4NT / 2)) ? 1 : 8;
807             }
808         }
809 
810     }
811 }
812