1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar *****************************************************************************
18*c83a76b0SSuyog Pawar * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar /**
21*c83a76b0SSuyog Pawar *******************************************************************************
22*c83a76b0SSuyog Pawar * @file
23*c83a76b0SSuyog Pawar * ihevce_sao.c
24*c83a76b0SSuyog Pawar *
25*c83a76b0SSuyog Pawar * @brief
26*c83a76b0SSuyog Pawar * Contains definition for the ctb level sao function
27*c83a76b0SSuyog Pawar *
28*c83a76b0SSuyog Pawar * @author
29*c83a76b0SSuyog Pawar * Ittiam
30*c83a76b0SSuyog Pawar *
31*c83a76b0SSuyog Pawar * @par List of Functions:
32*c83a76b0SSuyog Pawar * ihevce_sao_set_avilability()
33*c83a76b0SSuyog Pawar * ihevce_sao_ctb()
34*c83a76b0SSuyog Pawar * ihevce_sao_analyse()
35*c83a76b0SSuyog Pawar *
36*c83a76b0SSuyog Pawar * @remarks
37*c83a76b0SSuyog Pawar * None
38*c83a76b0SSuyog Pawar *
39*c83a76b0SSuyog Pawar *******************************************************************************
40*c83a76b0SSuyog Pawar */
41*c83a76b0SSuyog Pawar
42*c83a76b0SSuyog Pawar /*****************************************************************************/
43*c83a76b0SSuyog Pawar /* File Includes */
44*c83a76b0SSuyog Pawar /*****************************************************************************/
45*c83a76b0SSuyog Pawar /* System include files */
46*c83a76b0SSuyog Pawar #include <stdio.h>
47*c83a76b0SSuyog Pawar #include <string.h>
48*c83a76b0SSuyog Pawar #include <stdlib.h>
49*c83a76b0SSuyog Pawar #include <assert.h>
50*c83a76b0SSuyog Pawar #include <stdarg.h>
51*c83a76b0SSuyog Pawar #include <math.h>
52*c83a76b0SSuyog Pawar
53*c83a76b0SSuyog Pawar /* User include files */
54*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
55*c83a76b0SSuyog Pawar #include "itt_video_api.h"
56*c83a76b0SSuyog Pawar #include "ihevce_api.h"
57*c83a76b0SSuyog Pawar
58*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
59*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
60*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
61*c83a76b0SSuyog Pawar
62*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
63*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
64*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
65*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
66*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
67*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
68*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
69*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
70*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
71*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
72*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
73*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
74*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
75*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
76*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
77*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
78*c83a76b0SSuyog Pawar
79*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
80*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
81*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
82*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
83*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
84*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
85*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
86*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
87*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
88*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
89*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
90*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
91*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
92*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
93*c83a76b0SSuyog Pawar #include "ihevce_cabac_rdo.h"
94*c83a76b0SSuyog Pawar #include "ihevce_sao.h"
95*c83a76b0SSuyog Pawar
96*c83a76b0SSuyog Pawar /*****************************************************************************/
97*c83a76b0SSuyog Pawar /* Function Definitions */
98*c83a76b0SSuyog Pawar /*****************************************************************************/
99*c83a76b0SSuyog Pawar
100*c83a76b0SSuyog Pawar /**
101*c83a76b0SSuyog Pawar *******************************************************************************
102*c83a76b0SSuyog Pawar *
103*c83a76b0SSuyog Pawar * @brief
104*c83a76b0SSuyog Pawar * ihevce_sao_set_avilability
105*c83a76b0SSuyog Pawar *
106*c83a76b0SSuyog Pawar * @par Description:
107*c83a76b0SSuyog Pawar * Sets the availability flag for SAO.
108*c83a76b0SSuyog Pawar *
109*c83a76b0SSuyog Pawar * @param[in]
110*c83a76b0SSuyog Pawar * ps_sao_ctxt: Pointer to SAO context
111*c83a76b0SSuyog Pawar * @returns
112*c83a76b0SSuyog Pawar *
113*c83a76b0SSuyog Pawar * @remarks
114*c83a76b0SSuyog Pawar * None
115*c83a76b0SSuyog Pawar *
116*c83a76b0SSuyog Pawar *******************************************************************************
117*c83a76b0SSuyog Pawar */
ihevce_sao_set_avilability(UWORD8 * pu1_avail,sao_ctxt_t * ps_sao_ctxt,ihevce_tile_params_t * ps_tile_params)118*c83a76b0SSuyog Pawar void ihevce_sao_set_avilability(
119*c83a76b0SSuyog Pawar UWORD8 *pu1_avail, sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
120*c83a76b0SSuyog Pawar {
121*c83a76b0SSuyog Pawar WORD32 i;
122*c83a76b0SSuyog Pawar
123*c83a76b0SSuyog Pawar WORD32 ctb_x_pos = ps_sao_ctxt->i4_ctb_x;
124*c83a76b0SSuyog Pawar WORD32 ctb_y_pos = ps_sao_ctxt->i4_ctb_y;
125*c83a76b0SSuyog Pawar
126*c83a76b0SSuyog Pawar for(i = 0; i < 8; i++)
127*c83a76b0SSuyog Pawar {
128*c83a76b0SSuyog Pawar pu1_avail[i] = 255;
129*c83a76b0SSuyog Pawar }
130*c83a76b0SSuyog Pawar
131*c83a76b0SSuyog Pawar /* SAO_note_01: If the CTB lies on a tile or a slice boundary and
132*c83a76b0SSuyog Pawar in-loop filtering is enabled at tile and slice boundary, then SAO must
133*c83a76b0SSuyog Pawar be performed at tile/slice boundaries also.
134*c83a76b0SSuyog Pawar Hence the boundary checks should be based on frame position of CTB
135*c83a76b0SSuyog Pawar rather than s_ctb_nbr_avail_flags.u1_left_avail flags.
136*c83a76b0SSuyog Pawar Search for <SAO_note_01> in workspace to know more */
137*c83a76b0SSuyog Pawar /* Availaibility flags for first col*/
138*c83a76b0SSuyog Pawar if(ctb_x_pos == ps_tile_params->i4_first_ctb_x)
139*c83a76b0SSuyog Pawar {
140*c83a76b0SSuyog Pawar pu1_avail[0] = 0;
141*c83a76b0SSuyog Pawar pu1_avail[4] = 0;
142*c83a76b0SSuyog Pawar pu1_avail[6] = 0;
143*c83a76b0SSuyog Pawar }
144*c83a76b0SSuyog Pawar
145*c83a76b0SSuyog Pawar /* Availaibility flags for last col*/
146*c83a76b0SSuyog Pawar if((ctb_x_pos + 1) ==
147*c83a76b0SSuyog Pawar (ps_tile_params->i4_first_ctb_x + ps_tile_params->i4_curr_tile_wd_in_ctb_unit))
148*c83a76b0SSuyog Pawar {
149*c83a76b0SSuyog Pawar pu1_avail[1] = 0;
150*c83a76b0SSuyog Pawar pu1_avail[5] = 0;
151*c83a76b0SSuyog Pawar pu1_avail[7] = 0;
152*c83a76b0SSuyog Pawar }
153*c83a76b0SSuyog Pawar
154*c83a76b0SSuyog Pawar /* Availaibility flags for first row*/
155*c83a76b0SSuyog Pawar if(ctb_y_pos == ps_tile_params->i4_first_ctb_y)
156*c83a76b0SSuyog Pawar {
157*c83a76b0SSuyog Pawar pu1_avail[2] = 0;
158*c83a76b0SSuyog Pawar pu1_avail[4] = 0;
159*c83a76b0SSuyog Pawar pu1_avail[5] = 0;
160*c83a76b0SSuyog Pawar }
161*c83a76b0SSuyog Pawar
162*c83a76b0SSuyog Pawar /* Availaibility flags for last row*/
163*c83a76b0SSuyog Pawar if((ctb_y_pos + 1) ==
164*c83a76b0SSuyog Pawar (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit))
165*c83a76b0SSuyog Pawar {
166*c83a76b0SSuyog Pawar pu1_avail[3] = 0;
167*c83a76b0SSuyog Pawar pu1_avail[6] = 0;
168*c83a76b0SSuyog Pawar pu1_avail[7] = 0;
169*c83a76b0SSuyog Pawar }
170*c83a76b0SSuyog Pawar }
171*c83a76b0SSuyog Pawar
172*c83a76b0SSuyog Pawar /**
173*c83a76b0SSuyog Pawar *******************************************************************************
174*c83a76b0SSuyog Pawar *
175*c83a76b0SSuyog Pawar * @brief
176*c83a76b0SSuyog Pawar * Sao CTB level function.
177*c83a76b0SSuyog Pawar *
178*c83a76b0SSuyog Pawar * @par Description:
179*c83a76b0SSuyog Pawar * For a given CTB, sao is done. Both the luma and chroma
180*c83a76b0SSuyog Pawar * blocks are processed
181*c83a76b0SSuyog Pawar *
182*c83a76b0SSuyog Pawar * @param[in]
183*c83a76b0SSuyog Pawar * ps_sao_ctxt: Pointer to SAO context
184*c83a76b0SSuyog Pawar *
185*c83a76b0SSuyog Pawar * @returns
186*c83a76b0SSuyog Pawar *
187*c83a76b0SSuyog Pawar * @remarks
188*c83a76b0SSuyog Pawar * None
189*c83a76b0SSuyog Pawar *
190*c83a76b0SSuyog Pawar *******************************************************************************
191*c83a76b0SSuyog Pawar */
ihevce_sao_ctb(sao_ctxt_t * ps_sao_ctxt,ihevce_tile_params_t * ps_tile_params)192*c83a76b0SSuyog Pawar void ihevce_sao_ctb(sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
193*c83a76b0SSuyog Pawar {
194*c83a76b0SSuyog Pawar sao_enc_t *ps_sao;
195*c83a76b0SSuyog Pawar UWORD8 u1_src_top_left_luma, u1_src_top_left_chroma[2];
196*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left_luma_buf, *pu1_src_top_luma_buf;
197*c83a76b0SSuyog Pawar UWORD8 *pu1_src_left_chroma_buf, *pu1_src_top_chroma_buf;
198*c83a76b0SSuyog Pawar UWORD8 *pu1_src_luma, *pu1_src_chroma;
199*c83a76b0SSuyog Pawar WORD32 luma_src_stride, ctb_size;
200*c83a76b0SSuyog Pawar WORD32 chroma_src_stride;
201*c83a76b0SSuyog Pawar UWORD8 au1_avail_luma[8], au1_avail_chroma[8];
202*c83a76b0SSuyog Pawar WORD32 sao_blk_wd, sao_blk_ht, sao_wd_chroma, sao_ht_chroma;
203*c83a76b0SSuyog Pawar UWORD8 *pu1_top_left_luma, *pu1_top_left_chroma;
204*c83a76b0SSuyog Pawar UWORD8 *pu1_src_bot_left_luma, *pu1_src_top_right_luma;
205*c83a76b0SSuyog Pawar UWORD8 *pu1_src_bot_left_chroma, *pu1_src_top_right_chroma;
206*c83a76b0SSuyog Pawar UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
207*c83a76b0SSuyog Pawar
208*c83a76b0SSuyog Pawar ps_sao = ps_sao_ctxt->ps_sao;
209*c83a76b0SSuyog Pawar
210*c83a76b0SSuyog Pawar ASSERT(
211*c83a76b0SSuyog Pawar (abs(ps_sao->u1_y_offset[1]) <= 7) && (abs(ps_sao->u1_y_offset[2]) <= 7) &&
212*c83a76b0SSuyog Pawar (abs(ps_sao->u1_y_offset[3]) <= 7) && (abs(ps_sao->u1_y_offset[4]) <= 7));
213*c83a76b0SSuyog Pawar ASSERT(
214*c83a76b0SSuyog Pawar (abs(ps_sao->u1_cb_offset[1]) <= 7) && (abs(ps_sao->u1_cb_offset[2]) <= 7) &&
215*c83a76b0SSuyog Pawar (abs(ps_sao->u1_cb_offset[3]) <= 7) && (abs(ps_sao->u1_cb_offset[4]) <= 7));
216*c83a76b0SSuyog Pawar ASSERT(
217*c83a76b0SSuyog Pawar (abs(ps_sao->u1_cr_offset[1]) <= 7) && (abs(ps_sao->u1_cr_offset[2]) <= 7) &&
218*c83a76b0SSuyog Pawar (abs(ps_sao->u1_cr_offset[3]) <= 7) && (abs(ps_sao->u1_cr_offset[4]) <= 7));
219*c83a76b0SSuyog Pawar ASSERT(
220*c83a76b0SSuyog Pawar (ps_sao->b5_y_band_pos <= 28) && (ps_sao->b5_cb_band_pos <= 28) &&
221*c83a76b0SSuyog Pawar (ps_sao->b5_cr_band_pos <= 28));
222*c83a76b0SSuyog Pawar
223*c83a76b0SSuyog Pawar if(ps_sao_ctxt->i1_slice_sao_luma_flag)
224*c83a76b0SSuyog Pawar {
225*c83a76b0SSuyog Pawar /*initialize the src pointer to current row*/
226*c83a76b0SSuyog Pawar luma_src_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
227*c83a76b0SSuyog Pawar
228*c83a76b0SSuyog Pawar ctb_size = ps_sao_ctxt->i4_ctb_size;
229*c83a76b0SSuyog Pawar
230*c83a76b0SSuyog Pawar /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
231*c83a76b0SSuyog Pawar ps_sao->u1_y_offset[0] = 0; /* 0th element is not being used */
232*c83a76b0SSuyog Pawar sao_blk_wd = ps_sao_ctxt->i4_sao_blk_wd;
233*c83a76b0SSuyog Pawar sao_blk_ht = ps_sao_ctxt->i4_sao_blk_ht;
234*c83a76b0SSuyog Pawar
235*c83a76b0SSuyog Pawar pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
236*c83a76b0SSuyog Pawar /* Pointer to the top luma buffer corresponding to the current ctb row*/
237*c83a76b0SSuyog Pawar pu1_src_top_luma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_luma;
238*c83a76b0SSuyog Pawar
239*c83a76b0SSuyog Pawar /* Pointer to left luma buffer corresponding to the current ctb row*/
240*c83a76b0SSuyog Pawar pu1_src_left_luma_buf = ps_sao_ctxt->au1_left_luma_scratch;
241*c83a76b0SSuyog Pawar
242*c83a76b0SSuyog Pawar /* Pointer to the top right luma buffer corresponding to the current ctb row*/
243*c83a76b0SSuyog Pawar pu1_src_top_right_luma = pu1_src_top_luma_buf /*- top_buf_stide*/ + sao_blk_wd;
244*c83a76b0SSuyog Pawar
245*c83a76b0SSuyog Pawar /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
246*c83a76b0SSuyog Pawar pu1_src_bot_left_luma =
247*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_frm_luma_recon_buf + ctb_size * ps_sao_ctxt->i4_frm_luma_recon_stride -
248*c83a76b0SSuyog Pawar 1 + (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
249*c83a76b0SSuyog Pawar (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
250*c83a76b0SSuyog Pawar
251*c83a76b0SSuyog Pawar /* Back up the top left pixel for (x+1, y+1)th ctb*/
252*c83a76b0SSuyog Pawar u1_src_top_left_luma = *(pu1_src_top_luma_buf + sao_blk_wd - 1);
253*c83a76b0SSuyog Pawar pu1_top_left_luma = pu1_src_top_luma_buf - 1;
254*c83a76b0SSuyog Pawar
255*c83a76b0SSuyog Pawar if(SAO_BAND == ps_sao->b3_y_type_idx)
256*c83a76b0SSuyog Pawar {
257*c83a76b0SSuyog Pawar ihevc_sao_band_offset_luma(
258*c83a76b0SSuyog Pawar pu1_src_luma,
259*c83a76b0SSuyog Pawar luma_src_stride,
260*c83a76b0SSuyog Pawar pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
261*c83a76b0SSuyog Pawar pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
262*c83a76b0SSuyog Pawar pu1_src_top_luma_buf - 1, /* Top left*/
263*c83a76b0SSuyog Pawar ps_sao->b5_y_band_pos,
264*c83a76b0SSuyog Pawar ps_sao->u1_y_offset,
265*c83a76b0SSuyog Pawar sao_blk_wd,
266*c83a76b0SSuyog Pawar sao_blk_ht);
267*c83a76b0SSuyog Pawar
268*c83a76b0SSuyog Pawar if((ps_sao_ctxt->i4_ctb_y > 0))
269*c83a76b0SSuyog Pawar {
270*c83a76b0SSuyog Pawar *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
271*c83a76b0SSuyog Pawar }
272*c83a76b0SSuyog Pawar }
273*c83a76b0SSuyog Pawar else if(ps_sao->b3_y_type_idx >= SAO_EDGE_0_DEG)
274*c83a76b0SSuyog Pawar {
275*c83a76b0SSuyog Pawar /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
276*c83a76b0SSuyog Pawar * corresponding to EO category 1 and 2 which should be always positive
277*c83a76b0SSuyog Pawar * And 3rd and 4th offsets are always inferred as offsets corresponding to
278*c83a76b0SSuyog Pawar * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
279*c83a76b0SSuyog Pawar */
280*c83a76b0SSuyog Pawar // clang-format off
281*c83a76b0SSuyog Pawar ASSERT((ps_sao->u1_y_offset[1] >= 0) && (ps_sao->u1_y_offset[2] >= 0));
282*c83a76b0SSuyog Pawar ASSERT((ps_sao->u1_y_offset[3] <= 0) && (ps_sao->u1_y_offset[4] <= 0));
283*c83a76b0SSuyog Pawar // clang-format on
284*c83a76b0SSuyog Pawar
285*c83a76b0SSuyog Pawar ihevce_sao_set_avilability(au1_avail_luma, ps_sao_ctxt, ps_tile_params);
286*c83a76b0SSuyog Pawar
287*c83a76b0SSuyog Pawar ps_sao_ctxt->apf_sao_luma[ps_sao->b3_y_type_idx - 2](
288*c83a76b0SSuyog Pawar pu1_src_luma,
289*c83a76b0SSuyog Pawar luma_src_stride,
290*c83a76b0SSuyog Pawar pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
291*c83a76b0SSuyog Pawar pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
292*c83a76b0SSuyog Pawar pu1_top_left_luma, /* Top left*/
293*c83a76b0SSuyog Pawar pu1_src_top_right_luma, /* Top right*/
294*c83a76b0SSuyog Pawar pu1_src_bot_left_luma, /* Bottom left*/
295*c83a76b0SSuyog Pawar au1_avail_luma,
296*c83a76b0SSuyog Pawar ps_sao->u1_y_offset,
297*c83a76b0SSuyog Pawar sao_blk_wd,
298*c83a76b0SSuyog Pawar sao_blk_ht);
299*c83a76b0SSuyog Pawar
300*c83a76b0SSuyog Pawar if((ps_sao_ctxt->i4_ctb_y > 0))
301*c83a76b0SSuyog Pawar {
302*c83a76b0SSuyog Pawar *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
303*c83a76b0SSuyog Pawar }
304*c83a76b0SSuyog Pawar }
305*c83a76b0SSuyog Pawar }
306*c83a76b0SSuyog Pawar
307*c83a76b0SSuyog Pawar if(ps_sao_ctxt->i1_slice_sao_chroma_flag)
308*c83a76b0SSuyog Pawar {
309*c83a76b0SSuyog Pawar /*initialize the src pointer to current row*/
310*c83a76b0SSuyog Pawar chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
311*c83a76b0SSuyog Pawar ctb_size = ps_sao_ctxt->i4_ctb_size;
312*c83a76b0SSuyog Pawar
313*c83a76b0SSuyog Pawar /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
314*c83a76b0SSuyog Pawar //top_buf_stide = ps_sao_ctxt->u4_ctb_aligned_wd + 2;
315*c83a76b0SSuyog Pawar ps_sao->u1_cb_offset[0] = 0; /* 0th element is not used */
316*c83a76b0SSuyog Pawar ps_sao->u1_cr_offset[0] = 0;
317*c83a76b0SSuyog Pawar sao_wd_chroma = ps_sao_ctxt->i4_sao_blk_wd;
318*c83a76b0SSuyog Pawar sao_ht_chroma = ps_sao_ctxt->i4_sao_blk_ht / (!u1_is_422 + 1);
319*c83a76b0SSuyog Pawar
320*c83a76b0SSuyog Pawar pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
321*c83a76b0SSuyog Pawar /* Pointer to the top luma buffer corresponding to the current ctb row*/
322*c83a76b0SSuyog Pawar pu1_src_top_chroma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_chroma;
323*c83a76b0SSuyog Pawar // clang-format off
324*c83a76b0SSuyog Pawar /* Pointer to left luma buffer corresponding to the current ctb row*/
325*c83a76b0SSuyog Pawar pu1_src_left_chroma_buf = ps_sao_ctxt->au1_left_chroma_scratch; //ps_sao_ctxt->au1_sao_src_left_chroma;
326*c83a76b0SSuyog Pawar // clang-format on
327*c83a76b0SSuyog Pawar /* Pointer to the top right chroma buffer corresponding to the current ctb row*/
328*c83a76b0SSuyog Pawar pu1_src_top_right_chroma = pu1_src_top_chroma_buf /*- top_buf_stide*/ + sao_wd_chroma;
329*c83a76b0SSuyog Pawar
330*c83a76b0SSuyog Pawar /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
331*c83a76b0SSuyog Pawar pu1_src_bot_left_chroma =
332*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_frm_chroma_recon_buf +
333*c83a76b0SSuyog Pawar (ctb_size >> !u1_is_422) * ps_sao_ctxt->i4_frm_chroma_recon_stride - 2 +
334*c83a76b0SSuyog Pawar (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
335*c83a76b0SSuyog Pawar (ctb_size >> !u1_is_422)) +
336*c83a76b0SSuyog Pawar (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
337*c83a76b0SSuyog Pawar
338*c83a76b0SSuyog Pawar /* Back up the top left pixel for (x+1, y+1)th ctb*/
339*c83a76b0SSuyog Pawar u1_src_top_left_chroma[0] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 2);
340*c83a76b0SSuyog Pawar u1_src_top_left_chroma[1] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 1);
341*c83a76b0SSuyog Pawar pu1_top_left_chroma = pu1_src_top_chroma_buf - 2;
342*c83a76b0SSuyog Pawar
343*c83a76b0SSuyog Pawar if(SAO_BAND == ps_sao->b3_cb_type_idx)
344*c83a76b0SSuyog Pawar {
345*c83a76b0SSuyog Pawar ihevc_sao_band_offset_chroma(
346*c83a76b0SSuyog Pawar pu1_src_chroma,
347*c83a76b0SSuyog Pawar chroma_src_stride,
348*c83a76b0SSuyog Pawar pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
349*c83a76b0SSuyog Pawar pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
350*c83a76b0SSuyog Pawar pu1_top_left_chroma, /* Top left*/
351*c83a76b0SSuyog Pawar ps_sao->b5_cb_band_pos,
352*c83a76b0SSuyog Pawar ps_sao->b5_cr_band_pos,
353*c83a76b0SSuyog Pawar ps_sao->u1_cb_offset,
354*c83a76b0SSuyog Pawar ps_sao->u1_cr_offset,
355*c83a76b0SSuyog Pawar sao_wd_chroma,
356*c83a76b0SSuyog Pawar sao_ht_chroma);
357*c83a76b0SSuyog Pawar
358*c83a76b0SSuyog Pawar if((ps_sao_ctxt->i4_ctb_y > 0))
359*c83a76b0SSuyog Pawar {
360*c83a76b0SSuyog Pawar *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
361*c83a76b0SSuyog Pawar *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
362*c83a76b0SSuyog Pawar }
363*c83a76b0SSuyog Pawar }
364*c83a76b0SSuyog Pawar else if(ps_sao->b3_cb_type_idx >= SAO_EDGE_0_DEG)
365*c83a76b0SSuyog Pawar {
366*c83a76b0SSuyog Pawar /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
367*c83a76b0SSuyog Pawar * corresponding to EO category 1 and 2 which should be always positive
368*c83a76b0SSuyog Pawar * And 3rd and 4th offsets are always inferred as offsets corresponding to
369*c83a76b0SSuyog Pawar * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
370*c83a76b0SSuyog Pawar */
371*c83a76b0SSuyog Pawar ASSERT((ps_sao->u1_cb_offset[1] >= 0) && (ps_sao->u1_cb_offset[2] >= 0));
372*c83a76b0SSuyog Pawar ASSERT((ps_sao->u1_cb_offset[3] <= 0) && (ps_sao->u1_cb_offset[4] <= 0));
373*c83a76b0SSuyog Pawar
374*c83a76b0SSuyog Pawar ASSERT((ps_sao->u1_cr_offset[1] >= 0) && (ps_sao->u1_cr_offset[2] >= 0));
375*c83a76b0SSuyog Pawar ASSERT((ps_sao->u1_cr_offset[3] <= 0) && (ps_sao->u1_cr_offset[4] <= 0));
376*c83a76b0SSuyog Pawar
377*c83a76b0SSuyog Pawar ihevce_sao_set_avilability(au1_avail_chroma, ps_sao_ctxt, ps_tile_params);
378*c83a76b0SSuyog Pawar
379*c83a76b0SSuyog Pawar ps_sao_ctxt->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](
380*c83a76b0SSuyog Pawar pu1_src_chroma,
381*c83a76b0SSuyog Pawar chroma_src_stride,
382*c83a76b0SSuyog Pawar pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
383*c83a76b0SSuyog Pawar pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
384*c83a76b0SSuyog Pawar pu1_top_left_chroma, /* Top left*/
385*c83a76b0SSuyog Pawar pu1_src_top_right_chroma, /* Top right*/
386*c83a76b0SSuyog Pawar pu1_src_bot_left_chroma, /* Bottom left*/
387*c83a76b0SSuyog Pawar au1_avail_chroma,
388*c83a76b0SSuyog Pawar ps_sao->u1_cb_offset,
389*c83a76b0SSuyog Pawar ps_sao->u1_cr_offset,
390*c83a76b0SSuyog Pawar sao_wd_chroma,
391*c83a76b0SSuyog Pawar sao_ht_chroma);
392*c83a76b0SSuyog Pawar
393*c83a76b0SSuyog Pawar if((ps_sao_ctxt->i4_ctb_y > 0))
394*c83a76b0SSuyog Pawar {
395*c83a76b0SSuyog Pawar *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
396*c83a76b0SSuyog Pawar *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
397*c83a76b0SSuyog Pawar }
398*c83a76b0SSuyog Pawar }
399*c83a76b0SSuyog Pawar }
400*c83a76b0SSuyog Pawar }
401*c83a76b0SSuyog Pawar
402*c83a76b0SSuyog Pawar /**
403*c83a76b0SSuyog Pawar *******************************************************************************
404*c83a76b0SSuyog Pawar *
405*c83a76b0SSuyog Pawar * @brief
406*c83a76b0SSuyog Pawar * CTB level function to do SAO analysis.
407*c83a76b0SSuyog Pawar *
408*c83a76b0SSuyog Pawar * @par Description:
409*c83a76b0SSuyog Pawar * For a given CTB, sao analysis is done for both luma and chroma.
410*c83a76b0SSuyog Pawar *
411*c83a76b0SSuyog Pawar *
412*c83a76b0SSuyog Pawar * @param[in]
413*c83a76b0SSuyog Pawar * ps_sao_ctxt: Pointer to SAO context
414*c83a76b0SSuyog Pawar * ps_ctb_enc_loop_out : pointer to ctb level output structure from enc loop
415*c83a76b0SSuyog Pawar *
416*c83a76b0SSuyog Pawar * @returns
417*c83a76b0SSuyog Pawar *
418*c83a76b0SSuyog Pawar * @remarks
419*c83a76b0SSuyog Pawar * None
420*c83a76b0SSuyog Pawar *
421*c83a76b0SSuyog Pawar * @Assumptions:
422*c83a76b0SSuyog Pawar * 1) Initial Cabac state for current ctb to be sao'ed (i.e (x-1,y-1)th ctb) is assumed to be
423*c83a76b0SSuyog Pawar * almost same as cabac state of (x,y)th ctb.
424*c83a76b0SSuyog Pawar * 2) Distortion is calculated in spatial domain but lamda used to calculate the cost is
425*c83a76b0SSuyog Pawar * in freq domain.
426*c83a76b0SSuyog Pawar *******************************************************************************
427*c83a76b0SSuyog Pawar */
ihevce_sao_analyse(sao_ctxt_t * ps_sao_ctxt,ctb_enc_loop_out_t * ps_ctb_enc_loop_out,UWORD32 * pu4_frame_rdopt_header_bits,ihevce_tile_params_t * ps_tile_params)428*c83a76b0SSuyog Pawar void ihevce_sao_analyse(
429*c83a76b0SSuyog Pawar sao_ctxt_t *ps_sao_ctxt,
430*c83a76b0SSuyog Pawar ctb_enc_loop_out_t *ps_ctb_enc_loop_out,
431*c83a76b0SSuyog Pawar UWORD32 *pu4_frame_rdopt_header_bits,
432*c83a76b0SSuyog Pawar ihevce_tile_params_t *ps_tile_params)
433*c83a76b0SSuyog Pawar {
434*c83a76b0SSuyog Pawar UWORD8 *pu1_luma_scratch_buf;
435*c83a76b0SSuyog Pawar UWORD8 *pu1_chroma_scratch_buf;
436*c83a76b0SSuyog Pawar UWORD8 *pu1_src_luma, *pu1_recon_luma;
437*c83a76b0SSuyog Pawar UWORD8 *pu1_src_chroma, *pu1_recon_chroma;
438*c83a76b0SSuyog Pawar WORD32 luma_src_stride, luma_recon_stride, ctb_size, ctb_wd, ctb_ht;
439*c83a76b0SSuyog Pawar WORD32 chroma_src_stride, chroma_recon_stride;
440*c83a76b0SSuyog Pawar WORD32 i4_luma_scratch_buf_stride;
441*c83a76b0SSuyog Pawar WORD32 i4_chroma_scratch_buf_stride;
442*c83a76b0SSuyog Pawar sao_ctxt_t s_sao_ctxt;
443*c83a76b0SSuyog Pawar UWORD32 ctb_bits = 0, distortion = 0, curr_cost = 0, best_cost = 0;
444*c83a76b0SSuyog Pawar LWORD64 i8_cl_ssd_lambda_qf, i8_cl_ssd_lambda_chroma_qf;
445*c83a76b0SSuyog Pawar WORD32 rdo_cand, num_luma_rdo_cand = 0, num_rdo_cand = 0;
446*c83a76b0SSuyog Pawar WORD32 curr_buf_idx, best_buf_idx, best_cand_idx;
447*c83a76b0SSuyog Pawar WORD32 row;
448*c83a76b0SSuyog Pawar WORD32 edgeidx;
449*c83a76b0SSuyog Pawar WORD32 acc_error_category[5] = { 0, 0, 0, 0, 0 }, category_count[5] = { 0, 0, 0, 0, 0 };
450*c83a76b0SSuyog Pawar sao_enc_t s_best_luma_chroma_cand;
451*c83a76b0SSuyog Pawar WORD32 best_ctb_sao_bits = 0;
452*c83a76b0SSuyog Pawar #if DISABLE_SAO_WHEN_NOISY && !defined(ENC_VER_v2)
453*c83a76b0SSuyog Pawar UWORD8 u1_force_no_offset =
454*c83a76b0SSuyog Pawar ps_sao_ctxt
455*c83a76b0SSuyog Pawar ->ps_ctb_data
456*c83a76b0SSuyog Pawar [ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_data_stride * ps_sao_ctxt->i4_ctb_y]
457*c83a76b0SSuyog Pawar .s_ctb_noise_params.i4_noise_present;
458*c83a76b0SSuyog Pawar #endif
459*c83a76b0SSuyog Pawar UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
460*c83a76b0SSuyog Pawar
461*c83a76b0SSuyog Pawar *pu4_frame_rdopt_header_bits = 0;
462*c83a76b0SSuyog Pawar
463*c83a76b0SSuyog Pawar ctb_size = ps_sao_ctxt->i4_ctb_size;
464*c83a76b0SSuyog Pawar ctb_wd = ps_sao_ctxt->i4_sao_blk_wd;
465*c83a76b0SSuyog Pawar ctb_ht = ps_sao_ctxt->i4_sao_blk_ht;
466*c83a76b0SSuyog Pawar
467*c83a76b0SSuyog Pawar s_sao_ctxt = ps_sao_ctxt[0];
468*c83a76b0SSuyog Pawar
469*c83a76b0SSuyog Pawar /* Memset the best luma_chroma_cand structure to avoid asserts in debug mode*/
470*c83a76b0SSuyog Pawar memset(&s_best_luma_chroma_cand, 0, sizeof(sao_enc_t));
471*c83a76b0SSuyog Pawar
472*c83a76b0SSuyog Pawar /* Initialize the pointer and strides for luma buffers*/
473*c83a76b0SSuyog Pawar pu1_recon_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
474*c83a76b0SSuyog Pawar luma_recon_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
475*c83a76b0SSuyog Pawar
476*c83a76b0SSuyog Pawar pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_src_buf;
477*c83a76b0SSuyog Pawar luma_src_stride = ps_sao_ctxt->i4_cur_luma_src_stride;
478*c83a76b0SSuyog Pawar i4_luma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
479*c83a76b0SSuyog Pawar
480*c83a76b0SSuyog Pawar /* Initialize the pointer and strides for luma buffers*/
481*c83a76b0SSuyog Pawar pu1_recon_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
482*c83a76b0SSuyog Pawar chroma_recon_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
483*c83a76b0SSuyog Pawar
484*c83a76b0SSuyog Pawar pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_src_buf;
485*c83a76b0SSuyog Pawar chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_src_stride;
486*c83a76b0SSuyog Pawar i4_chroma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
487*c83a76b0SSuyog Pawar
488*c83a76b0SSuyog Pawar i8_cl_ssd_lambda_qf = ps_sao_ctxt->i8_cl_ssd_lambda_qf;
489*c83a76b0SSuyog Pawar i8_cl_ssd_lambda_chroma_qf = ps_sao_ctxt->i8_cl_ssd_lambda_chroma_qf;
490*c83a76b0SSuyog Pawar
491*c83a76b0SSuyog Pawar /*****************************************************/
492*c83a76b0SSuyog Pawar /********************RDO FOR LUMA CAND****************/
493*c83a76b0SSuyog Pawar /*****************************************************/
494*c83a76b0SSuyog Pawar
495*c83a76b0SSuyog Pawar #if !DISABLE_SAO_WHEN_NOISY
496*c83a76b0SSuyog Pawar if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
497*c83a76b0SSuyog Pawar #else
498*c83a76b0SSuyog Pawar if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && !u1_force_no_offset)
499*c83a76b0SSuyog Pawar #endif
500*c83a76b0SSuyog Pawar {
501*c83a76b0SSuyog Pawar /* Candidate for Edge offset SAO*/
502*c83a76b0SSuyog Pawar /* Following is the convention for curr pixel and
503*c83a76b0SSuyog Pawar * two neighbouring pixels for 0 deg, 90 deg, 135 deg and 45 deg */
504*c83a76b0SSuyog Pawar /*
505*c83a76b0SSuyog Pawar * 0 deg : a c b 90 deg: a 135 deg: a 45 deg: a
506*c83a76b0SSuyog Pawar * c c c
507*c83a76b0SSuyog Pawar * b b b
508*c83a76b0SSuyog Pawar */
509*c83a76b0SSuyog Pawar
510*c83a76b0SSuyog Pawar /* 0 deg SAO CAND*/
511*c83a76b0SSuyog Pawar /* Reset the error and edge count*/
512*c83a76b0SSuyog Pawar for(edgeidx = 0; edgeidx < 5; edgeidx++)
513*c83a76b0SSuyog Pawar {
514*c83a76b0SSuyog Pawar acc_error_category[edgeidx] = 0;
515*c83a76b0SSuyog Pawar category_count[edgeidx] = 0;
516*c83a76b0SSuyog Pawar }
517*c83a76b0SSuyog Pawar
518*c83a76b0SSuyog Pawar /* Call the funciton to populate the EO parameter for this ctb for 0 deg EO class*/
519*c83a76b0SSuyog Pawar // clang-format off
520*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_0_DEG,
521*c83a76b0SSuyog Pawar acc_error_category, category_count);
522*c83a76b0SSuyog Pawar // clang-format on
523*c83a76b0SSuyog Pawar // clang-format off
524*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_0_DEG;
525*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
526*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
527*c83a76b0SSuyog Pawar : 0;
528*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
529*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
530*c83a76b0SSuyog Pawar : 0;
531*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
532*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
533*c83a76b0SSuyog Pawar : 0;
534*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] =category_count[4]
535*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
536*c83a76b0SSuyog Pawar : 0;
537*c83a76b0SSuyog Pawar // clang-format on
538*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
539*c83a76b0SSuyog Pawar // clang-format off
540*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
541*c83a76b0SSuyog Pawar // clang-format on
542*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
543*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
544*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
545*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
546*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
547*c83a76b0SSuyog Pawar
548*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
549*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
550*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
551*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
552*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
553*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
554*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
555*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
556*c83a76b0SSuyog Pawar
557*c83a76b0SSuyog Pawar num_luma_rdo_cand++;
558*c83a76b0SSuyog Pawar
559*c83a76b0SSuyog Pawar /* 90 degree SAO CAND*/
560*c83a76b0SSuyog Pawar for(edgeidx = 0; edgeidx < 5; edgeidx++)
561*c83a76b0SSuyog Pawar {
562*c83a76b0SSuyog Pawar acc_error_category[edgeidx] = 0;
563*c83a76b0SSuyog Pawar category_count[edgeidx] = 0;
564*c83a76b0SSuyog Pawar }
565*c83a76b0SSuyog Pawar
566*c83a76b0SSuyog Pawar /* Call the funciton to populate the EO parameter for this ctb for 90 deg EO class*/
567*c83a76b0SSuyog Pawar // clang-format off
568*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_90_DEG,
569*c83a76b0SSuyog Pawar acc_error_category, category_count);
570*c83a76b0SSuyog Pawar
571*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_90_DEG;
572*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
573*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
574*c83a76b0SSuyog Pawar : 0;
575*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
576*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
577*c83a76b0SSuyog Pawar : 0;
578*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
579*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
580*c83a76b0SSuyog Pawar : 0;
581*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
582*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
583*c83a76b0SSuyog Pawar : 0;
584*c83a76b0SSuyog Pawar // clang-format on
585*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
586*c83a76b0SSuyog Pawar
587*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
588*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
589*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
590*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
591*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
592*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
593*c83a76b0SSuyog Pawar
594*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
595*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
596*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
597*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
598*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
599*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
600*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
601*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
602*c83a76b0SSuyog Pawar
603*c83a76b0SSuyog Pawar num_luma_rdo_cand++;
604*c83a76b0SSuyog Pawar
605*c83a76b0SSuyog Pawar /* 135 degree SAO CAND*/
606*c83a76b0SSuyog Pawar for(edgeidx = 0; edgeidx < 5; edgeidx++)
607*c83a76b0SSuyog Pawar {
608*c83a76b0SSuyog Pawar acc_error_category[edgeidx] = 0;
609*c83a76b0SSuyog Pawar category_count[edgeidx] = 0;
610*c83a76b0SSuyog Pawar }
611*c83a76b0SSuyog Pawar
612*c83a76b0SSuyog Pawar /* Call the funciton to populate the EO parameter for this ctb for 135 deg EO class*/
613*c83a76b0SSuyog Pawar // clang-format off
614*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_135_DEG,
615*c83a76b0SSuyog Pawar acc_error_category, category_count);
616*c83a76b0SSuyog Pawar
617*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_135_DEG;
618*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
619*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
620*c83a76b0SSuyog Pawar : 0;
621*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
622*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
623*c83a76b0SSuyog Pawar : 0;
624*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
625*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
626*c83a76b0SSuyog Pawar : 0;
627*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
628*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
629*c83a76b0SSuyog Pawar : 0;
630*c83a76b0SSuyog Pawar // clang-format on
631*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
632*c83a76b0SSuyog Pawar
633*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
634*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
635*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
636*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
637*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
638*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
639*c83a76b0SSuyog Pawar
640*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
641*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
642*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
643*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
644*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
645*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
646*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
647*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
648*c83a76b0SSuyog Pawar
649*c83a76b0SSuyog Pawar num_luma_rdo_cand++;
650*c83a76b0SSuyog Pawar
651*c83a76b0SSuyog Pawar /* 45 degree SAO CAND*/
652*c83a76b0SSuyog Pawar for(edgeidx = 0; edgeidx < 5; edgeidx++)
653*c83a76b0SSuyog Pawar {
654*c83a76b0SSuyog Pawar acc_error_category[edgeidx] = 0;
655*c83a76b0SSuyog Pawar category_count[edgeidx] = 0;
656*c83a76b0SSuyog Pawar }
657*c83a76b0SSuyog Pawar
658*c83a76b0SSuyog Pawar /* Call the funciton to populate the EO parameter for this ctb for 45 deg EO class*/
659*c83a76b0SSuyog Pawar // clang-format off
660*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_45_DEG,
661*c83a76b0SSuyog Pawar acc_error_category, category_count);
662*c83a76b0SSuyog Pawar
663*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_45_DEG;
664*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
665*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
666*c83a76b0SSuyog Pawar : 0;
667*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
668*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
669*c83a76b0SSuyog Pawar : 0;
670*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
671*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
672*c83a76b0SSuyog Pawar : 0;
673*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
674*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
675*c83a76b0SSuyog Pawar : 0;
676*c83a76b0SSuyog Pawar // clang-format on
677*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
678*c83a76b0SSuyog Pawar
679*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
680*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
681*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
682*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
683*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
684*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
685*c83a76b0SSuyog Pawar
686*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
687*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
688*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
689*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
690*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
691*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
692*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
693*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
694*c83a76b0SSuyog Pawar
695*c83a76b0SSuyog Pawar num_luma_rdo_cand++;
696*c83a76b0SSuyog Pawar
697*c83a76b0SSuyog Pawar /* First cand will be best cand after 1st iteration*/
698*c83a76b0SSuyog Pawar curr_buf_idx = 0;
699*c83a76b0SSuyog Pawar best_buf_idx = 1;
700*c83a76b0SSuyog Pawar best_cost = 0xFFFFFFFF;
701*c83a76b0SSuyog Pawar best_cand_idx = 0;
702*c83a76b0SSuyog Pawar
703*c83a76b0SSuyog Pawar /*Back up the top pixels for (x,y+1)th ctb*/
704*c83a76b0SSuyog Pawar if(!ps_sao_ctxt->i4_is_last_ctb_row)
705*c83a76b0SSuyog Pawar {
706*c83a76b0SSuyog Pawar memcpy(
707*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
708*c83a76b0SSuyog Pawar pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
709*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_sao_blk_wd);
710*c83a76b0SSuyog Pawar }
711*c83a76b0SSuyog Pawar
712*c83a76b0SSuyog Pawar for(rdo_cand = 0; rdo_cand < num_luma_rdo_cand; rdo_cand++)
713*c83a76b0SSuyog Pawar {
714*c83a76b0SSuyog Pawar s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
715*c83a76b0SSuyog Pawar
716*c83a76b0SSuyog Pawar /* This memcpy is required because cabac uses parameters from this structure
717*c83a76b0SSuyog Pawar * to evaluate bits and this structure ptr is sent to cabac through
718*c83a76b0SSuyog Pawar * "ihevce_cabac_rdo_encode_sao" function
719*c83a76b0SSuyog Pawar */
720*c83a76b0SSuyog Pawar memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
721*c83a76b0SSuyog Pawar
722*c83a76b0SSuyog Pawar /* Copy the left pixels to the scratch buffer for evry rdo cand because its
723*c83a76b0SSuyog Pawar overwritten by the sao leaf level function for next ctb*/
724*c83a76b0SSuyog Pawar memcpy(
725*c83a76b0SSuyog Pawar s_sao_ctxt.au1_left_luma_scratch,
726*c83a76b0SSuyog Pawar ps_sao_ctxt->au1_sao_src_left_luma,
727*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_sao_blk_ht);
728*c83a76b0SSuyog Pawar
729*c83a76b0SSuyog Pawar /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
730*c83a76b0SSuyog Pawar overwritten by the sao leaf level function for next ctb*/
731*c83a76b0SSuyog Pawar memcpy(
732*c83a76b0SSuyog Pawar s_sao_ctxt.au1_top_luma_scratch,
733*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
734*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_sao_blk_wd + 2);
735*c83a76b0SSuyog Pawar s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
736*c83a76b0SSuyog Pawar
737*c83a76b0SSuyog Pawar pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
738*c83a76b0SSuyog Pawar
739*c83a76b0SSuyog Pawar ASSERT(
740*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
741*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
742*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
743*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
744*c83a76b0SSuyog Pawar ASSERT(
745*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
746*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
747*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
748*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
749*c83a76b0SSuyog Pawar ASSERT(
750*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
751*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
752*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
753*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
754*c83a76b0SSuyog Pawar ASSERT(
755*c83a76b0SSuyog Pawar (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
756*c83a76b0SSuyog Pawar (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
757*c83a76b0SSuyog Pawar (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
758*c83a76b0SSuyog Pawar
759*c83a76b0SSuyog Pawar /* Copy the deblocked recon data to scratch buffer to do sao*/
760*c83a76b0SSuyog Pawar
761*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
762*c83a76b0SSuyog Pawar pu1_luma_scratch_buf,
763*c83a76b0SSuyog Pawar i4_luma_scratch_buf_stride,
764*c83a76b0SSuyog Pawar pu1_recon_luma,
765*c83a76b0SSuyog Pawar luma_recon_stride,
766*c83a76b0SSuyog Pawar SCRATCH_BUF_STRIDE,
767*c83a76b0SSuyog Pawar ctb_ht + 1);
768*c83a76b0SSuyog Pawar
769*c83a76b0SSuyog Pawar s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
770*c83a76b0SSuyog Pawar s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
771*c83a76b0SSuyog Pawar
772*c83a76b0SSuyog Pawar s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
773*c83a76b0SSuyog Pawar s_sao_ctxt.i1_slice_sao_chroma_flag = 0;
774*c83a76b0SSuyog Pawar
775*c83a76b0SSuyog Pawar ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
776*c83a76b0SSuyog Pawar
777*c83a76b0SSuyog Pawar /* Calculate the distortion between sao'ed ctb and original src ctb*/
778*c83a76b0SSuyog Pawar // clang-format off
779*c83a76b0SSuyog Pawar distortion =
780*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
781*c83a76b0SSuyog Pawar s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
782*c83a76b0SSuyog Pawar s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht, NULL_PLANE);
783*c83a76b0SSuyog Pawar // clang-format on
784*c83a76b0SSuyog Pawar
785*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
786*c83a76b0SSuyog Pawar ctb_bits = ihevce_cabac_rdo_encode_sao(
787*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
788*c83a76b0SSuyog Pawar
789*c83a76b0SSuyog Pawar /* Calculate the cost as D+(lamda)*R */
790*c83a76b0SSuyog Pawar curr_cost = distortion +
791*c83a76b0SSuyog Pawar COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
792*c83a76b0SSuyog Pawar
793*c83a76b0SSuyog Pawar if(curr_cost < best_cost)
794*c83a76b0SSuyog Pawar {
795*c83a76b0SSuyog Pawar best_cost = curr_cost;
796*c83a76b0SSuyog Pawar best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
797*c83a76b0SSuyog Pawar best_cand_idx = rdo_cand;
798*c83a76b0SSuyog Pawar curr_buf_idx = !curr_buf_idx;
799*c83a76b0SSuyog Pawar }
800*c83a76b0SSuyog Pawar }
801*c83a76b0SSuyog Pawar
802*c83a76b0SSuyog Pawar /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
803*c83a76b0SSuyog Pawar * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
804*c83a76b0SSuyog Pawar */
805*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b3_y_type_idx =
806*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b3_y_type_idx;
807*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_y_offset[1] =
808*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[1];
809*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_y_offset[2] =
810*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[2];
811*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_y_offset[3] =
812*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[3];
813*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_y_offset[4] =
814*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[4];
815*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b5_y_band_pos =
816*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b5_y_band_pos;
817*c83a76b0SSuyog Pawar }
818*c83a76b0SSuyog Pawar else
819*c83a76b0SSuyog Pawar {
820*c83a76b0SSuyog Pawar /*Back up the top pixels for (x,y+1)th ctb*/
821*c83a76b0SSuyog Pawar if(!ps_sao_ctxt->i4_is_last_ctb_row)
822*c83a76b0SSuyog Pawar {
823*c83a76b0SSuyog Pawar memcpy(
824*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
825*c83a76b0SSuyog Pawar pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
826*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_sao_blk_wd);
827*c83a76b0SSuyog Pawar }
828*c83a76b0SSuyog Pawar
829*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b3_y_type_idx = SAO_NONE;
830*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_y_offset[1] = 0;
831*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_y_offset[2] = 0;
832*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_y_offset[3] = 0;
833*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_y_offset[4] = 0;
834*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b5_y_band_pos = 0;
835*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
836*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
837*c83a76b0SSuyog Pawar
838*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
839*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
840*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
841*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
842*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
843*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b5_cb_band_pos = 0;
844*c83a76b0SSuyog Pawar
845*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
846*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
847*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
848*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
849*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
850*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b5_cr_band_pos = 0;
851*c83a76b0SSuyog Pawar }
852*c83a76b0SSuyog Pawar /*****************************************************/
853*c83a76b0SSuyog Pawar /********************RDO FOR CHROMA CAND**************/
854*c83a76b0SSuyog Pawar /*****************************************************/
855*c83a76b0SSuyog Pawar #if !DISABLE_SAO_WHEN_NOISY
856*c83a76b0SSuyog Pawar if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
857*c83a76b0SSuyog Pawar #else
858*c83a76b0SSuyog Pawar if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && !u1_force_no_offset)
859*c83a76b0SSuyog Pawar #endif
860*c83a76b0SSuyog Pawar {
861*c83a76b0SSuyog Pawar /*Back up the top pixels for (x,y+1)th ctb*/
862*c83a76b0SSuyog Pawar if(!ps_sao_ctxt->i4_is_last_ctb_row)
863*c83a76b0SSuyog Pawar {
864*c83a76b0SSuyog Pawar memcpy(
865*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
866*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
867*c83a76b0SSuyog Pawar pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
868*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_sao_blk_wd);
869*c83a76b0SSuyog Pawar }
870*c83a76b0SSuyog Pawar
871*c83a76b0SSuyog Pawar /* Reset the error and edge count*/
872*c83a76b0SSuyog Pawar for(edgeidx = 0; edgeidx < 5; edgeidx++)
873*c83a76b0SSuyog Pawar {
874*c83a76b0SSuyog Pawar acc_error_category[edgeidx] = 0;
875*c83a76b0SSuyog Pawar category_count[edgeidx] = 0;
876*c83a76b0SSuyog Pawar }
877*c83a76b0SSuyog Pawar // clang-format off
878*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_chroma_eo_sao_params(ps_sao_ctxt,
879*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b3_y_type_idx, acc_error_category,
880*c83a76b0SSuyog Pawar category_count);
881*c83a76b0SSuyog Pawar // clang-format on
882*c83a76b0SSuyog Pawar
883*c83a76b0SSuyog Pawar /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
884*c83a76b0SSuyog Pawar * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
885*c83a76b0SSuyog Pawar */
886*c83a76b0SSuyog Pawar // clang-format off
887*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b3_cb_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
888*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[1] = category_count[0]
889*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
890*c83a76b0SSuyog Pawar : 0;
891*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[2] = category_count[1]
892*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
893*c83a76b0SSuyog Pawar : 0;
894*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[3] = category_count[3]
895*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
896*c83a76b0SSuyog Pawar : 0;
897*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[4] = category_count[4]
898*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
899*c83a76b0SSuyog Pawar : 0;
900*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b5_cb_band_pos = 0;
901*c83a76b0SSuyog Pawar
902*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b3_cr_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
903*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[1] = category_count[0]
904*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
905*c83a76b0SSuyog Pawar : 0;
906*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[2] = category_count[1]
907*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
908*c83a76b0SSuyog Pawar : 0;
909*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[3] = category_count[3]
910*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
911*c83a76b0SSuyog Pawar : 0;
912*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[4] = category_count[4]
913*c83a76b0SSuyog Pawar ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
914*c83a76b0SSuyog Pawar : 0;
915*c83a76b0SSuyog Pawar // clang-format on
916*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b5_cr_band_pos = 0;
917*c83a76b0SSuyog Pawar }
918*c83a76b0SSuyog Pawar else
919*c83a76b0SSuyog Pawar {
920*c83a76b0SSuyog Pawar /*Back up the top pixels for (x,y+1)th ctb*/
921*c83a76b0SSuyog Pawar if(!ps_sao_ctxt->i4_is_last_ctb_row)
922*c83a76b0SSuyog Pawar {
923*c83a76b0SSuyog Pawar memcpy(
924*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
925*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
926*c83a76b0SSuyog Pawar pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
927*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_sao_blk_wd);
928*c83a76b0SSuyog Pawar }
929*c83a76b0SSuyog Pawar
930*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
931*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
932*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
933*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
934*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
935*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b5_cb_band_pos = 0;
936*c83a76b0SSuyog Pawar
937*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
938*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
939*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
940*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
941*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
942*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b5_cr_band_pos = 0;
943*c83a76b0SSuyog Pawar
944*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
945*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
946*c83a76b0SSuyog Pawar }
947*c83a76b0SSuyog Pawar
948*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
949*c83a76b0SSuyog Pawar s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
950*c83a76b0SSuyog Pawar
951*c83a76b0SSuyog Pawar /*****************************************************/
952*c83a76b0SSuyog Pawar /**RDO for Best Luma - Chroma combined, No SAO,*******/
953*c83a76b0SSuyog Pawar /*************Left merge and Top merge****************/
954*c83a76b0SSuyog Pawar /*****************************************************/
955*c83a76b0SSuyog Pawar
956*c83a76b0SSuyog Pawar /* No SAO cand*/
957*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
958*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
959*c83a76b0SSuyog Pawar
960*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_y_type_idx = SAO_NONE;
961*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[1] = 0;
962*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[2] = 0;
963*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[3] = 0;
964*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[4] = 0;
965*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_y_band_pos = 0;
966*c83a76b0SSuyog Pawar
967*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cb_type_idx = SAO_NONE;
968*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[1] = 0;
969*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[2] = 0;
970*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[3] = 0;
971*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[4] = 0;
972*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cb_band_pos = 0;
973*c83a76b0SSuyog Pawar
974*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cr_type_idx = SAO_NONE;
975*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[1] = 0;
976*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[2] = 0;
977*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[3] = 0;
978*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[4] = 0;
979*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cr_band_pos = 0;
980*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
981*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
982*c83a76b0SSuyog Pawar
983*c83a76b0SSuyog Pawar num_rdo_cand++;
984*c83a76b0SSuyog Pawar
985*c83a76b0SSuyog Pawar /* SAO_note_01: If the CTB lies on a tile or a slice boundary, then
986*c83a76b0SSuyog Pawar the standard mandates that the merge candidates must be set to unavailable.
987*c83a76b0SSuyog Pawar Hence, check for tile boundary condition by reading
988*c83a76b0SSuyog Pawar s_ctb_nbr_avail_flags.u1_left_avail rather than frame position of CTB.
989*c83a76b0SSuyog Pawar A special case: Merge-candidates should be available at dependent-slices boundaries.
990*c83a76b0SSuyog Pawar Search for <SAO_note_01> in workspace to know more */
991*c83a76b0SSuyog Pawar
992*c83a76b0SSuyog Pawar #if !DISABLE_SAO_WHEN_NOISY
993*c83a76b0SSuyog Pawar if(1)
994*c83a76b0SSuyog Pawar #else
995*c83a76b0SSuyog Pawar if(!u1_force_no_offset)
996*c83a76b0SSuyog Pawar #endif
997*c83a76b0SSuyog Pawar {
998*c83a76b0SSuyog Pawar /* Merge left cand*/
999*c83a76b0SSuyog Pawar if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_left_avail)
1000*c83a76b0SSuyog Pawar {
1001*c83a76b0SSuyog Pawar memcpy(
1002*c83a76b0SSuyog Pawar &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1003*c83a76b0SSuyog Pawar &ps_sao_ctxt->s_left_ctb_sao,
1004*c83a76b0SSuyog Pawar sizeof(sao_enc_t));
1005*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 1;
1006*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
1007*c83a76b0SSuyog Pawar num_rdo_cand++;
1008*c83a76b0SSuyog Pawar }
1009*c83a76b0SSuyog Pawar
1010*c83a76b0SSuyog Pawar /* Merge top cand*/
1011*c83a76b0SSuyog Pawar if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_top_avail)
1012*c83a76b0SSuyog Pawar {
1013*c83a76b0SSuyog Pawar memcpy(
1014*c83a76b0SSuyog Pawar &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1015*c83a76b0SSuyog Pawar (ps_sao_ctxt->ps_top_ctb_sao - ps_sao_ctxt->u4_num_ctbs_horz),
1016*c83a76b0SSuyog Pawar sizeof(sao_enc_t));
1017*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
1018*c83a76b0SSuyog Pawar ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 1;
1019*c83a76b0SSuyog Pawar num_rdo_cand++;
1020*c83a76b0SSuyog Pawar }
1021*c83a76b0SSuyog Pawar
1022*c83a76b0SSuyog Pawar /* Best luma-chroma candidate*/
1023*c83a76b0SSuyog Pawar memcpy(
1024*c83a76b0SSuyog Pawar &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1025*c83a76b0SSuyog Pawar &s_best_luma_chroma_cand,
1026*c83a76b0SSuyog Pawar sizeof(sao_enc_t));
1027*c83a76b0SSuyog Pawar num_rdo_cand++;
1028*c83a76b0SSuyog Pawar }
1029*c83a76b0SSuyog Pawar
1030*c83a76b0SSuyog Pawar {
1031*c83a76b0SSuyog Pawar UWORD32 luma_distortion = 0, chroma_distortion = 0;
1032*c83a76b0SSuyog Pawar /* First cand will be best cand after 1st iteration*/
1033*c83a76b0SSuyog Pawar curr_buf_idx = 0;
1034*c83a76b0SSuyog Pawar best_buf_idx = 1;
1035*c83a76b0SSuyog Pawar best_cost = 0xFFFFFFFF;
1036*c83a76b0SSuyog Pawar best_cand_idx = 0;
1037*c83a76b0SSuyog Pawar
1038*c83a76b0SSuyog Pawar for(rdo_cand = 0; rdo_cand < num_rdo_cand; rdo_cand++)
1039*c83a76b0SSuyog Pawar {
1040*c83a76b0SSuyog Pawar s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
1041*c83a76b0SSuyog Pawar
1042*c83a76b0SSuyog Pawar distortion = 0;
1043*c83a76b0SSuyog Pawar
1044*c83a76b0SSuyog Pawar /* This memcpy is required because cabac uses parameters from this structure
1045*c83a76b0SSuyog Pawar * to evaluate bits and this structure ptr is sent to cabac through
1046*c83a76b0SSuyog Pawar * "ihevce_cabac_rdo_encode_sao" function
1047*c83a76b0SSuyog Pawar */
1048*c83a76b0SSuyog Pawar memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
1049*c83a76b0SSuyog Pawar
1050*c83a76b0SSuyog Pawar if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1051*c83a76b0SSuyog Pawar {
1052*c83a76b0SSuyog Pawar /* Copy the left pixels to the scratch buffer for evry rdo cand because its
1053*c83a76b0SSuyog Pawar overwritten by the sao leaf level function for next ctb*/
1054*c83a76b0SSuyog Pawar memcpy(
1055*c83a76b0SSuyog Pawar s_sao_ctxt.au1_left_luma_scratch,
1056*c83a76b0SSuyog Pawar ps_sao_ctxt->au1_sao_src_left_luma,
1057*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_sao_blk_ht);
1058*c83a76b0SSuyog Pawar
1059*c83a76b0SSuyog Pawar /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
1060*c83a76b0SSuyog Pawar overwritten by the sao leaf level function for next ctb*/
1061*c83a76b0SSuyog Pawar memcpy(
1062*c83a76b0SSuyog Pawar s_sao_ctxt.au1_top_luma_scratch,
1063*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
1064*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_sao_blk_wd + 2);
1065*c83a76b0SSuyog Pawar s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
1066*c83a76b0SSuyog Pawar
1067*c83a76b0SSuyog Pawar pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
1068*c83a76b0SSuyog Pawar
1069*c83a76b0SSuyog Pawar /* Copy the deblocked recon data to scratch buffer to do sao*/
1070*c83a76b0SSuyog Pawar
1071*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1072*c83a76b0SSuyog Pawar pu1_luma_scratch_buf,
1073*c83a76b0SSuyog Pawar i4_luma_scratch_buf_stride,
1074*c83a76b0SSuyog Pawar pu1_recon_luma,
1075*c83a76b0SSuyog Pawar luma_recon_stride,
1076*c83a76b0SSuyog Pawar SCRATCH_BUF_STRIDE,
1077*c83a76b0SSuyog Pawar ctb_ht + 1);
1078*c83a76b0SSuyog Pawar s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
1079*c83a76b0SSuyog Pawar s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
1080*c83a76b0SSuyog Pawar
1081*c83a76b0SSuyog Pawar ASSERT(
1082*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
1083*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
1084*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
1085*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
1086*c83a76b0SSuyog Pawar }
1087*c83a76b0SSuyog Pawar if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1088*c83a76b0SSuyog Pawar {
1089*c83a76b0SSuyog Pawar /* Copy the left pixels to the scratch buffer for evry rdo cand because its
1090*c83a76b0SSuyog Pawar overwritten by the sao leaf level function for next ctb*/
1091*c83a76b0SSuyog Pawar memcpy(
1092*c83a76b0SSuyog Pawar s_sao_ctxt.au1_left_chroma_scratch,
1093*c83a76b0SSuyog Pawar ps_sao_ctxt->au1_sao_src_left_chroma,
1094*c83a76b0SSuyog Pawar (ps_sao_ctxt->i4_sao_blk_ht >> !u1_is_422) * 2);
1095*c83a76b0SSuyog Pawar
1096*c83a76b0SSuyog Pawar /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
1097*c83a76b0SSuyog Pawar overwritten by the sao leaf level function for next ctb*/
1098*c83a76b0SSuyog Pawar memcpy(
1099*c83a76b0SSuyog Pawar s_sao_ctxt.au1_top_chroma_scratch,
1100*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_curr_sao_src_top_chroma - 2,
1101*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_sao_blk_wd + 4);
1102*c83a76b0SSuyog Pawar
1103*c83a76b0SSuyog Pawar s_sao_ctxt.pu1_curr_sao_src_top_chroma = s_sao_ctxt.au1_top_chroma_scratch + 2;
1104*c83a76b0SSuyog Pawar
1105*c83a76b0SSuyog Pawar pu1_chroma_scratch_buf = ps_sao_ctxt->au1_sao_chroma_scratch[curr_buf_idx];
1106*c83a76b0SSuyog Pawar
1107*c83a76b0SSuyog Pawar /* Copy the deblocked recon data to scratch buffer to do sao*/
1108*c83a76b0SSuyog Pawar
1109*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1110*c83a76b0SSuyog Pawar pu1_chroma_scratch_buf,
1111*c83a76b0SSuyog Pawar i4_chroma_scratch_buf_stride,
1112*c83a76b0SSuyog Pawar pu1_recon_chroma,
1113*c83a76b0SSuyog Pawar chroma_recon_stride,
1114*c83a76b0SSuyog Pawar SCRATCH_BUF_STRIDE,
1115*c83a76b0SSuyog Pawar (ctb_ht >> !u1_is_422) + 1);
1116*c83a76b0SSuyog Pawar
1117*c83a76b0SSuyog Pawar s_sao_ctxt.pu1_cur_chroma_recon_buf = pu1_chroma_scratch_buf;
1118*c83a76b0SSuyog Pawar s_sao_ctxt.i4_cur_chroma_recon_stride = i4_chroma_scratch_buf_stride;
1119*c83a76b0SSuyog Pawar
1120*c83a76b0SSuyog Pawar ASSERT(
1121*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
1122*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
1123*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
1124*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
1125*c83a76b0SSuyog Pawar ASSERT(
1126*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
1127*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
1128*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
1129*c83a76b0SSuyog Pawar (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
1130*c83a76b0SSuyog Pawar }
1131*c83a76b0SSuyog Pawar
1132*c83a76b0SSuyog Pawar ASSERT(
1133*c83a76b0SSuyog Pawar (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
1134*c83a76b0SSuyog Pawar (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
1135*c83a76b0SSuyog Pawar (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
1136*c83a76b0SSuyog Pawar
1137*c83a76b0SSuyog Pawar s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
1138*c83a76b0SSuyog Pawar s_sao_ctxt.i1_slice_sao_chroma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_chroma_flag;
1139*c83a76b0SSuyog Pawar
1140*c83a76b0SSuyog Pawar ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
1141*c83a76b0SSuyog Pawar
1142*c83a76b0SSuyog Pawar if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1143*c83a76b0SSuyog Pawar { // clang-format off
1144*c83a76b0SSuyog Pawar luma_distortion =
1145*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
1146*c83a76b0SSuyog Pawar s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
1147*c83a76b0SSuyog Pawar s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd,
1148*c83a76b0SSuyog Pawar ctb_ht,
1149*c83a76b0SSuyog Pawar NULL_PLANE);
1150*c83a76b0SSuyog Pawar } // clang-format on
1151*c83a76b0SSuyog Pawar
1152*c83a76b0SSuyog Pawar if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1153*c83a76b0SSuyog Pawar { // clang-format off
1154*c83a76b0SSuyog Pawar chroma_distortion =
1155*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_chroma,
1156*c83a76b0SSuyog Pawar s_sao_ctxt.pu1_cur_chroma_recon_buf,
1157*c83a76b0SSuyog Pawar chroma_src_stride,
1158*c83a76b0SSuyog Pawar s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd,
1159*c83a76b0SSuyog Pawar (ctb_ht >> !u1_is_422),
1160*c83a76b0SSuyog Pawar NULL_PLANE);
1161*c83a76b0SSuyog Pawar } // clang-format on
1162*c83a76b0SSuyog Pawar
1163*c83a76b0SSuyog Pawar /*chroma distortion is added after correction because of lambda difference*/
1164*c83a76b0SSuyog Pawar distortion =
1165*c83a76b0SSuyog Pawar luma_distortion +
1166*c83a76b0SSuyog Pawar (UWORD32)(chroma_distortion * (i8_cl_ssd_lambda_qf / i8_cl_ssd_lambda_chroma_qf));
1167*c83a76b0SSuyog Pawar
1168*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
1169*c83a76b0SSuyog Pawar ctb_bits = ihevce_cabac_rdo_encode_sao(
1170*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
1171*c83a76b0SSuyog Pawar
1172*c83a76b0SSuyog Pawar /* Calculate the cost as D+(lamda)*R */
1173*c83a76b0SSuyog Pawar curr_cost = distortion +
1174*c83a76b0SSuyog Pawar COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
1175*c83a76b0SSuyog Pawar
1176*c83a76b0SSuyog Pawar if(curr_cost < best_cost)
1177*c83a76b0SSuyog Pawar {
1178*c83a76b0SSuyog Pawar best_ctb_sao_bits = ctb_bits;
1179*c83a76b0SSuyog Pawar best_cost = curr_cost;
1180*c83a76b0SSuyog Pawar best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
1181*c83a76b0SSuyog Pawar best_cand_idx = rdo_cand;
1182*c83a76b0SSuyog Pawar curr_buf_idx = !curr_buf_idx;
1183*c83a76b0SSuyog Pawar }
1184*c83a76b0SSuyog Pawar }
1185*c83a76b0SSuyog Pawar /*Adding sao bits to header bits*/
1186*c83a76b0SSuyog Pawar *pu4_frame_rdopt_header_bits = best_ctb_sao_bits;
1187*c83a76b0SSuyog Pawar
1188*c83a76b0SSuyog Pawar ihevce_update_best_sao_cabac_state(ps_sao_ctxt->ps_rdopt_entropy_ctxt, best_buf_idx);
1189*c83a76b0SSuyog Pawar
1190*c83a76b0SSuyog Pawar /* store the sao parameters of curr ctb for top merge and left merge*/
1191*c83a76b0SSuyog Pawar memcpy(
1192*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_top_ctb_sao,
1193*c83a76b0SSuyog Pawar &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1194*c83a76b0SSuyog Pawar sizeof(sao_enc_t));
1195*c83a76b0SSuyog Pawar memcpy(
1196*c83a76b0SSuyog Pawar &ps_sao_ctxt->s_left_ctb_sao,
1197*c83a76b0SSuyog Pawar &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1198*c83a76b0SSuyog Pawar sizeof(sao_enc_t));
1199*c83a76b0SSuyog Pawar
1200*c83a76b0SSuyog Pawar /* Copy the sao parameters of winning candidate into the structure which will be sent to entropy thrd*/
1201*c83a76b0SSuyog Pawar memcpy(
1202*c83a76b0SSuyog Pawar &ps_ctb_enc_loop_out->s_sao,
1203*c83a76b0SSuyog Pawar &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1204*c83a76b0SSuyog Pawar sizeof(sao_enc_t));
1205*c83a76b0SSuyog Pawar
1206*c83a76b0SSuyog Pawar if(!ps_sao_ctxt->i4_is_last_ctb_col)
1207*c83a76b0SSuyog Pawar {
1208*c83a76b0SSuyog Pawar /* Update left luma buffer for next ctb */
1209*c83a76b0SSuyog Pawar for(row = 0; row < ps_sao_ctxt->i4_sao_blk_ht; row++)
1210*c83a76b0SSuyog Pawar {
1211*c83a76b0SSuyog Pawar ps_sao_ctxt->au1_sao_src_left_luma[row] =
1212*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_cur_luma_recon_buf
1213*c83a76b0SSuyog Pawar [row * ps_sao_ctxt->i4_cur_luma_recon_stride +
1214*c83a76b0SSuyog Pawar (ps_sao_ctxt->i4_sao_blk_wd - 1)];
1215*c83a76b0SSuyog Pawar }
1216*c83a76b0SSuyog Pawar }
1217*c83a76b0SSuyog Pawar
1218*c83a76b0SSuyog Pawar if(!ps_sao_ctxt->i4_is_last_ctb_col)
1219*c83a76b0SSuyog Pawar {
1220*c83a76b0SSuyog Pawar /* Update left chroma buffer for next ctb */
1221*c83a76b0SSuyog Pawar for(row = 0; row < (ps_sao_ctxt->i4_sao_blk_ht >> 1); row++)
1222*c83a76b0SSuyog Pawar {
1223*c83a76b0SSuyog Pawar *(UWORD16 *)(ps_sao_ctxt->au1_sao_src_left_chroma + row * 2) =
1224*c83a76b0SSuyog Pawar *(UWORD16 *)(ps_sao_ctxt->pu1_cur_chroma_recon_buf +
1225*c83a76b0SSuyog Pawar row * ps_sao_ctxt->i4_cur_chroma_recon_stride +
1226*c83a76b0SSuyog Pawar (ps_sao_ctxt->i4_sao_blk_wd - 2));
1227*c83a76b0SSuyog Pawar }
1228*c83a76b0SSuyog Pawar }
1229*c83a76b0SSuyog Pawar
1230*c83a76b0SSuyog Pawar if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1231*c83a76b0SSuyog Pawar {
1232*c83a76b0SSuyog Pawar /* Copy the sao'ed output of the best candidate to the recon buffer*/
1233*c83a76b0SSuyog Pawar
1234*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1235*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_cur_luma_recon_buf,
1236*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_cur_luma_recon_stride,
1237*c83a76b0SSuyog Pawar ps_sao_ctxt->au1_sao_luma_scratch[best_buf_idx],
1238*c83a76b0SSuyog Pawar i4_luma_scratch_buf_stride,
1239*c83a76b0SSuyog Pawar ctb_wd,
1240*c83a76b0SSuyog Pawar ctb_ht);
1241*c83a76b0SSuyog Pawar }
1242*c83a76b0SSuyog Pawar if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1243*c83a76b0SSuyog Pawar {
1244*c83a76b0SSuyog Pawar /* Copy the sao'ed output of the best candidate to the chroma recon buffer*/
1245*c83a76b0SSuyog Pawar
1246*c83a76b0SSuyog Pawar ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1247*c83a76b0SSuyog Pawar ps_sao_ctxt->pu1_cur_chroma_recon_buf,
1248*c83a76b0SSuyog Pawar ps_sao_ctxt->i4_cur_chroma_recon_stride,
1249*c83a76b0SSuyog Pawar ps_sao_ctxt->au1_sao_chroma_scratch[best_buf_idx],
1250*c83a76b0SSuyog Pawar i4_chroma_scratch_buf_stride,
1251*c83a76b0SSuyog Pawar ctb_wd,
1252*c83a76b0SSuyog Pawar ctb_ht >> !u1_is_422);
1253*c83a76b0SSuyog Pawar }
1254*c83a76b0SSuyog Pawar }
1255*c83a76b0SSuyog Pawar }
1256