1 /*
2 * Copyright (c) 2017-2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_encode_hevc_g9.cpp
24 //! \brief    HEVC dual-pipe encoder for GEN9.
25 //!
26 
27 #include "codechal_encode_hevc_g9.h"
28 #include "codechal_kernel_hme_g9.h"
29 #include "igcodeckrn_g9.h"
30 #include "codeckrnheader.h"
31 #include "mhw_mmio_g9.h"
32 
33 #define CS_ALU_COMMAND_LOAD(bSrcRegA, GprReg)           ((0x80 << 20) | (((bSrcRegA) ? 0x20 : 0x21) << 10) | ((GprReg) & 0x0F))
34 #define CS_ALU_COMMAND_STORE_ACCU(GprReg)               ((0x180 << 20) | (((GprReg) & 0x0F) << 10) | 0x31)
35 
36 
37 #define GPUMMU_WA_PADDING                               (64 * 1024)
38 
39 //! HME step
40 enum
41 {
42     HME_FIRST_STEP = 0,
43     HME_FOLLOWING_STEP = 1
44 };
45 
46 //! Motion vector shift factor
47 enum
48 {
49     MV_SHIFT_FACTOR_32x = 1,
50     MV_SHIFT_FACTOR_16x = 2,
51     MV_SHIFT_FACTOR_4x = 2
52 };
53 
54 //! Previous motion vector read position
55 enum
56 {
57     PREV_MV_READ_POSITION_16x = 1,
58     PREV_MV_READ_POSITION_4x = 0
59 };
60 
61 //! ALU Opcode
62 enum
63 {
64     CS_ALU_COMMAND_ADD = ((0x100) << 20),
65     CS_ALU_COMMAND_SUB = ((0x101) << 20),
66     CS_ALU_COMMAND_AND = ((0x102) << 20),
67     CS_ALU_COMMAND_OR = ((0x103) << 20),
68     CS_ALU_COMMAND_XOR = ((0x104) << 20)
69 };
70 
71 // This ROI structure is defined in kernel for ROI surface calculations
72 // and differs slightly from the ENCODE_ROI structure used by DDI/App, so this
73 // will be used only for ROI surface loading
74 struct CODECHAL_ENC_HEVC_ROI_G9
75 {
76     uint32_t Top;
77     uint32_t Left;
78     uint32_t Bottom;
79     uint32_t Right;
80     int32_t QPDelta;
81     int32_t ROI_Level;
82 };
83 
84 //! HEVC encoder ME kernel curbe for GEN9
85 struct CODECHAL_ENC_HEVC_ME_CURBE_G9
86 {
87     // DW0
88     union
89     {
90         struct
91         {
92             uint32_t   SkipModeEn                       : MOS_BITFIELD_BIT(0);
93             uint32_t   AdaptiveEn                       : MOS_BITFIELD_BIT(1);
94             uint32_t   BiMixDis                         : MOS_BITFIELD_BIT(2);
95             uint32_t                                    : MOS_BITFIELD_RANGE(3, 4);
96             uint32_t   EarlyImeSuccessEn                : MOS_BITFIELD_BIT(5);
97             uint32_t                                    : MOS_BITFIELD_BIT(6);
98             uint32_t   T8x8FlagForInterEn               : MOS_BITFIELD_BIT(7);
99             uint32_t                                    : MOS_BITFIELD_RANGE(8, 23);
100             uint32_t   EarlyImeStop                     : MOS_BITFIELD_RANGE(24, 31);
101         };
102         struct
103         {
104             uint32_t   Value;
105         };
106     } DW0;
107 
108     // DW1
109     union
110     {
111         struct
112         {
113             uint32_t   MaxNumMVs                        : MOS_BITFIELD_RANGE(0, 5);
114             uint32_t                                    : MOS_BITFIELD_RANGE(6, 15);
115             uint32_t   BiWeight                         : MOS_BITFIELD_RANGE(16, 21);
116             uint32_t                                    : MOS_BITFIELD_RANGE(22, 27);
117             uint32_t   UniMixDisable                    : MOS_BITFIELD_BIT(28);
118             uint32_t                                    : MOS_BITFIELD_RANGE(29, 31);
119         };
120         struct
121         {
122             uint32_t   Value;
123         };
124     } DW1;
125 
126     // DW2
127     union
128     {
129         struct
130         {
131             uint32_t   MaxLenSP                         : MOS_BITFIELD_RANGE(0, 7);
132             uint32_t   MaxNumSU                         : MOS_BITFIELD_RANGE(8, 15);
133             uint32_t                                    : MOS_BITFIELD_RANGE(16, 31);
134         };
135         struct
136         {
137             uint32_t   Value;
138         };
139     } DW2;
140 
141     // DW3
142     union
143     {
144         struct
145         {
146             uint32_t   SrcSize                          : MOS_BITFIELD_RANGE(0, 1);
147             uint32_t                                    : MOS_BITFIELD_RANGE(2, 3);
148             uint32_t   MbTypeRemap                      : MOS_BITFIELD_RANGE(4, 5);
149             uint32_t   SrcAccess                        : MOS_BITFIELD_BIT(6);
150             uint32_t   RefAccess                        : MOS_BITFIELD_BIT(7);
151             uint32_t   SearchCtrl                       : MOS_BITFIELD_RANGE(8, 10);
152             uint32_t   DualSearchPathOption             : MOS_BITFIELD_BIT(11);
153             uint32_t   SubPelMode                       : MOS_BITFIELD_RANGE(12, 13);
154             uint32_t   SkipType                         : MOS_BITFIELD_BIT(14);
155             uint32_t   DisableFieldCacheAlloc           : MOS_BITFIELD_BIT(15);
156             uint32_t   InterChromaMode                  : MOS_BITFIELD_BIT(16);
157             uint32_t   FTEnable                         : MOS_BITFIELD_BIT(17);
158             uint32_t   BMEDisableFBR                    : MOS_BITFIELD_BIT(18);
159             uint32_t   BlockBasedSkipEnable             : MOS_BITFIELD_BIT(19);
160             uint32_t   InterSAD                         : MOS_BITFIELD_RANGE(20, 21);
161             uint32_t   IntraSAD                         : MOS_BITFIELD_RANGE(22, 23);
162             uint32_t   SubMbPartMask                    : MOS_BITFIELD_RANGE(24, 30);
163             uint32_t                                    : MOS_BITFIELD_BIT(31);
164         };
165         struct
166         {
167             uint32_t   Value;
168         };
169     } DW3;
170 
171     // DW4
172     union
173     {
174         struct
175         {
176             uint32_t                                    : MOS_BITFIELD_RANGE(0, 7);
177             uint32_t   PictureHeightMinus1              : MOS_BITFIELD_RANGE(8, 15);
178             uint32_t   PictureWidth                     : MOS_BITFIELD_RANGE(16, 23);
179             uint32_t                                    : MOS_BITFIELD_RANGE(24, 31);
180         };
181         struct
182         {
183             uint32_t   Value;
184         };
185     } DW4;
186 
187     // DW5
188     union
189     {
190         struct
191         {
192             uint32_t                                    : MOS_BITFIELD_RANGE(0, 7);
193             uint32_t   QpPrimeY                         : MOS_BITFIELD_RANGE(8, 15);
194             uint32_t   RefWidth                         : MOS_BITFIELD_RANGE(16, 23);
195             uint32_t   RefHeight                        : MOS_BITFIELD_RANGE(24, 31);
196         };
197         struct
198         {
199             uint32_t   Value;
200         };
201     } DW5;
202 
203     // DW6
204     union
205     {
206         struct
207         {
208             uint32_t                                    : MOS_BITFIELD_RANGE(0, 2);
209             uint32_t   WriteDistortions                 : MOS_BITFIELD_BIT(3);
210             uint32_t   UseMvFromPrevStep                : MOS_BITFIELD_BIT(4);
211             uint32_t                                    : MOS_BITFIELD_RANGE(5, 7);
212             uint32_t   SuperCombineDist                 : MOS_BITFIELD_RANGE(8, 15);
213             uint32_t   MaxVmvR                          : MOS_BITFIELD_RANGE(16, 31);
214         };
215         struct
216         {
217             uint32_t   Value;
218         };
219     } DW6;
220 
221     // DW7
222     union
223     {
224         struct
225         {
226             uint32_t                                    : MOS_BITFIELD_RANGE(0, 15);
227             uint32_t   MVCostScaleFactor                : MOS_BITFIELD_RANGE(16, 17);
228             uint32_t   BilinearEnable                   : MOS_BITFIELD_BIT(18);
229             uint32_t   SrcFieldPolarity                 : MOS_BITFIELD_BIT(19);
230             uint32_t   WeightedSADHAAR                  : MOS_BITFIELD_BIT(20);
231             uint32_t   AConlyHAAR                       : MOS_BITFIELD_BIT(21);
232             uint32_t   RefIDCostMode                    : MOS_BITFIELD_BIT(22);
233             uint32_t                                    : MOS_BITFIELD_BIT(23);
234             uint32_t   SkipCenterMask                   : MOS_BITFIELD_RANGE(24, 31);
235         };
236         struct
237         {
238             uint32_t   Value;
239         };
240     } DW7;
241 
242     // DW8
243     union
244     {
245         struct
246         {
247             uint32_t   Mode0Cost                        : MOS_BITFIELD_RANGE(0, 7);
248             uint32_t   Mode1Cost                        : MOS_BITFIELD_RANGE(8, 15);
249             uint32_t   Mode2Cost                        : MOS_BITFIELD_RANGE(16, 23);
250             uint32_t   Mode3Cost                        : MOS_BITFIELD_RANGE(24, 31);
251         };
252         struct
253         {
254             uint32_t   Value;
255         };
256     } DW8;
257 
258     // DW9
259     union
260     {
261         struct
262         {
263             uint32_t   Mode4Cost                        : MOS_BITFIELD_RANGE(0, 7);
264             uint32_t   Mode5Cost                        : MOS_BITFIELD_RANGE(8, 15);
265             uint32_t   Mode6Cost                        : MOS_BITFIELD_RANGE(16, 23);
266             uint32_t   Mode7Cost                        : MOS_BITFIELD_RANGE(24, 31);
267         };
268         struct
269         {
270             uint32_t   Value;
271         };
272     } DW9;
273 
274     // DW10
275     union
276     {
277         struct
278         {
279             uint32_t   Mode8Cost                        : MOS_BITFIELD_RANGE(0, 7);
280             uint32_t   Mode9Cost                        : MOS_BITFIELD_RANGE(8, 15);
281             uint32_t   RefIDCost                        : MOS_BITFIELD_RANGE(16, 23);
282             uint32_t   ChromaIntraModeCost              : MOS_BITFIELD_RANGE(24, 31);
283         };
284         struct
285         {
286             uint32_t   Value;
287         };
288     } DW10;
289 
290     // DW11
291     union
292     {
293         struct
294         {
295             uint32_t   MV0Cost                          : MOS_BITFIELD_RANGE(0, 7);
296             uint32_t   MV1Cost                          : MOS_BITFIELD_RANGE(8, 15);
297             uint32_t   MV2Cost                          : MOS_BITFIELD_RANGE(16, 23);
298             uint32_t   MV3Cost                          : MOS_BITFIELD_RANGE(24, 31);
299         };
300         struct
301         {
302             uint32_t   Value;
303         };
304     } DW11;
305 
306     // DW12
307     union
308     {
309         struct
310         {
311             uint32_t   MV4Cost                          : MOS_BITFIELD_RANGE(0, 7);
312             uint32_t   MV5Cost                          : MOS_BITFIELD_RANGE(8, 15);
313             uint32_t   MV6Cost                          : MOS_BITFIELD_RANGE(16, 23);
314             uint32_t   MV7Cost                          : MOS_BITFIELD_RANGE(24, 31);
315         };
316         struct
317         {
318             uint32_t   Value;
319         };
320     } DW12;
321 
322     // DW13
323     union
324     {
325         struct
326         {
327             uint32_t   NumRefIdxL0MinusOne              : MOS_BITFIELD_RANGE(0, 7);
328             uint32_t   NumRefIdxL1MinusOne              : MOS_BITFIELD_RANGE(8, 15);
329             uint32_t   RefStreaminCost                  : MOS_BITFIELD_RANGE(16, 23);
330             uint32_t   ROIEnable                        : MOS_BITFIELD_RANGE(24, 26);
331             uint32_t                                    : MOS_BITFIELD_RANGE(27, 31);
332         };
333         struct
334         {
335             uint32_t   Value;
336         };
337     } DW13;
338 
339     // DW14
340     union
341     {
342         struct
343         {
344             uint32_t   List0RefID0FieldParity           : MOS_BITFIELD_BIT(0);
345             uint32_t   List0RefID1FieldParity           : MOS_BITFIELD_BIT(1);
346             uint32_t   List0RefID2FieldParity           : MOS_BITFIELD_BIT(2);
347             uint32_t   List0RefID3FieldParity           : MOS_BITFIELD_BIT(3);
348             uint32_t   List0RefID4FieldParity           : MOS_BITFIELD_BIT(4);
349             uint32_t   List0RefID5FieldParity           : MOS_BITFIELD_BIT(5);
350             uint32_t   List0RefID6FieldParity           : MOS_BITFIELD_BIT(6);
351             uint32_t   List0RefID7FieldParity           : MOS_BITFIELD_BIT(7);
352             uint32_t   List1RefID0FieldParity           : MOS_BITFIELD_BIT(8);
353             uint32_t   List1RefID1FieldParity           : MOS_BITFIELD_BIT(9);
354             uint32_t                                    : MOS_BITFIELD_RANGE(10, 31);
355         };
356         struct
357         {
358             uint32_t   Value;
359         };
360     } DW14;
361 
362     // DW15
363     union
364     {
365         struct
366         {
367             uint32_t   PrevMvReadPosFactor              : MOS_BITFIELD_RANGE(0, 7);
368             uint32_t   MvShiftFactor                    : MOS_BITFIELD_RANGE(8, 15);
369             uint32_t   Reserved                         : MOS_BITFIELD_RANGE(16, 31);
370         };
371         struct
372         {
373             uint32_t   Value;
374         };
375     } DW15;
376 
377     struct
378     {
379         // DW16
380         union
381         {
382             struct
383             {
384                 SearchPathDelta   SPDelta_0;
385                 SearchPathDelta   SPDelta_1;
386                 SearchPathDelta   SPDelta_2;
387                 SearchPathDelta   SPDelta_3;
388             };
389             struct
390             {
391                 uint32_t   Value;
392             };
393         } DW16;
394 
395         // DW17
396         union
397         {
398             struct
399             {
400                 SearchPathDelta   SPDelta_4;
401                 SearchPathDelta   SPDelta_5;
402                 SearchPathDelta   SPDelta_6;
403                 SearchPathDelta   SPDelta_7;
404             };
405             struct
406             {
407                 uint32_t   Value;
408             };
409         } DW17;
410 
411         // DW18
412         union
413         {
414             struct
415             {
416                 SearchPathDelta   SPDelta_8;
417                 SearchPathDelta   SPDelta_9;
418                 SearchPathDelta   SPDelta_10;
419                 SearchPathDelta   SPDelta_11;
420             };
421             struct
422             {
423                 uint32_t   Value;
424             };
425         } DW18;
426 
427         // DW19
428         union
429         {
430             struct
431             {
432                 SearchPathDelta   SPDelta_12;
433                 SearchPathDelta   SPDelta_13;
434                 SearchPathDelta   SPDelta_14;
435                 SearchPathDelta   SPDelta_15;
436             };
437             struct
438             {
439                 uint32_t   Value;
440             };
441         } DW19;
442 
443         // DW20
444         union
445         {
446             struct
447             {
448                 SearchPathDelta   SPDelta_16;
449                 SearchPathDelta   SPDelta_17;
450                 SearchPathDelta   SPDelta_18;
451                 SearchPathDelta   SPDelta_19;
452             };
453             struct
454             {
455                 uint32_t   Value;
456             };
457         } DW20;
458 
459         // DW21
460         union
461         {
462             struct
463             {
464                 SearchPathDelta   SPDelta_20;
465                 SearchPathDelta   SPDelta_21;
466                 SearchPathDelta   SPDelta_22;
467                 SearchPathDelta   SPDelta_23;
468             };
469             struct
470             {
471                 uint32_t   Value;
472             };
473         } DW21;
474 
475         // DW22
476         union
477         {
478             struct
479             {
480                 SearchPathDelta   SPDelta_24;
481                 SearchPathDelta   SPDelta_25;
482                 SearchPathDelta   SPDelta_26;
483                 SearchPathDelta   SPDelta_27;
484             };
485             struct
486             {
487                 uint32_t   Value;
488             };
489         } DW22;
490 
491         // DW23
492         union
493         {
494             struct
495             {
496                 SearchPathDelta   SPDelta_28;
497                 SearchPathDelta   SPDelta_29;
498                 SearchPathDelta   SPDelta_30;
499                 SearchPathDelta   SPDelta_31;
500             };
501             struct
502             {
503                 uint32_t   Value;
504             };
505         } DW23;
506 
507         // DW24
508         union
509         {
510             struct
511             {
512                 SearchPathDelta   SPDelta_32;
513                 SearchPathDelta   SPDelta_33;
514                 SearchPathDelta   SPDelta_34;
515                 SearchPathDelta   SPDelta_35;
516             };
517             struct
518             {
519                 uint32_t   Value;
520             };
521         } DW24;
522 
523         // DW25
524         union
525         {
526             struct
527             {
528                 SearchPathDelta   SPDelta_36;
529                 SearchPathDelta   SPDelta_37;
530                 SearchPathDelta   SPDelta_38;
531                 SearchPathDelta   SPDelta_39;
532             };
533             struct
534             {
535                 uint32_t   Value;
536             };
537         } DW25;
538 
539         // DW26
540         union
541         {
542             struct
543             {
544                 SearchPathDelta   SPDelta_40;
545                 SearchPathDelta   SPDelta_41;
546                 SearchPathDelta   SPDelta_42;
547                 SearchPathDelta   SPDelta_43;
548             };
549             struct
550             {
551                 uint32_t   Value;
552             };
553         } DW26;
554 
555         // DW27
556         union
557         {
558             struct
559             {
560                 SearchPathDelta   SPDelta_44;
561                 SearchPathDelta   SPDelta_45;
562                 SearchPathDelta   SPDelta_46;
563                 SearchPathDelta   SPDelta_47;
564             };
565             struct
566             {
567                 uint32_t   Value;
568             };
569         } DW27;
570 
571         // DW28
572         union
573         {
574             struct
575             {
576                 SearchPathDelta   SPDelta_48;
577                 SearchPathDelta   SPDelta_49;
578                 SearchPathDelta   SPDelta_50;
579                 SearchPathDelta   SPDelta_51;
580             };
581             struct
582             {
583                 uint32_t   Value;
584             };
585         } DW28;
586 
587         // DW29
588         union
589         {
590             struct
591             {
592                 SearchPathDelta   SPDelta_52;
593                 SearchPathDelta   SPDelta_53;
594                 SearchPathDelta   SPDelta_54;
595                 SearchPathDelta   SPDelta_55;
596             };
597             struct
598             {
599                 uint32_t   Value;
600             };
601         } DW29;
602     } SPDelta;
603 
604     // DW30
605     union
606     {
607         struct
608         {
609             uint32_t   ActualMBWidth                    : MOS_BITFIELD_RANGE(0, 15);
610             uint32_t   ActualMBHeight                   : MOS_BITFIELD_RANGE(16, 31);
611         };
612         struct
613         {
614             uint32_t   Value;
615         };
616     } DW30;
617 
618     // DW31
619     union
620     {
621         struct
622         {
623             uint32_t   Reserved;
624         };
625         struct
626         {
627             uint32_t   Value;
628         };
629     } DW31;
630 
631     // DW32
632     union
633     {
634         struct
635         {
636             uint32_t   _4xMeMvOutputDataSurfIndex;
637         };
638         struct
639         {
640             uint32_t   Value;
641         };
642     } DW32;
643 
644     // DW33
645     union
646     {
647         struct
648         {
649             uint32_t   _16xOr32xMeMvInputDataSurfIndex;
650         };
651         struct
652         {
653             uint32_t   Value;
654         };
655     } DW33;
656 
657     // DW34
658     union
659     {
660         struct
661         {
662             uint32_t   _4xMeOutputDistSurfIndex;
663         };
664         struct
665         {
666             uint32_t   Value;
667         };
668     } DW34;
669 
670     // DW35
671     union
672     {
673         struct
674         {
675             uint32_t   _4xMeOutputBrcDistSurfIndex;
676         };
677         struct
678         {
679             uint32_t   Value;
680         };
681     } DW35;
682 
683     // DW36
684     union
685     {
686         struct
687         {
688             uint32_t   VMEFwdInterPredictionSurfIndex;
689         };
690         struct
691         {
692             uint32_t   Value;
693         };
694     } DW36;
695 
696     // DW37
697     union
698     {
699         struct
700         {
701             uint32_t   VMEBwdInterPredictionSurfIndex;
702         };
703         struct
704         {
705             uint32_t   Value;
706         };
707     } DW37;
708 
709     // DW38
710     union
711     {
712         struct
713         {
714             uint32_t   VDEncStreamInSurfIndex;
715         };
716         struct
717         {
718             uint32_t   Value;
719         };
720     } DW38;
721 };
722 
723 using PCODECHAL_ENC_HEVC_ME_CURBE_G9 = struct CODECHAL_ENC_HEVC_ME_CURBE_G9*;
724 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_ME_CURBE_G9)) == 39);
725 
726 //! HEVC encoder B MBEnc kernel curbe for GEN9
727 struct CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9
728 {
729     // DW0
730     union
731     {
732         struct
733         {
734             uint32_t   SkipModeEn                       : MOS_BITFIELD_BIT(0);
735             uint32_t   AdaptiveEn                       : MOS_BITFIELD_BIT(1);
736             uint32_t   BiMixDis                         : MOS_BITFIELD_BIT(2);
737             uint32_t                                    : MOS_BITFIELD_RANGE(3, 4);
738             uint32_t   EarlyImeSuccessEn                : MOS_BITFIELD_BIT(5);
739             uint32_t                                    : MOS_BITFIELD_BIT(6);
740             uint32_t   T8x8FlagForInterEn               : MOS_BITFIELD_BIT(7);
741             uint32_t                                    : MOS_BITFIELD_RANGE(8, 23);
742             uint32_t   EarlyImeStop                     : MOS_BITFIELD_RANGE(24, 31);
743         };
744         struct
745         {
746             uint32_t   Value;
747         };
748     } DW0;
749 
750     // DW1
751     union
752     {
753         struct
754         {
755             uint32_t   MaxNumMVs                        : MOS_BITFIELD_RANGE(0, 5);
756             uint32_t                                    : MOS_BITFIELD_RANGE(6, 15);
757             uint32_t   BiWeight                         : MOS_BITFIELD_RANGE(16, 21);
758             uint32_t                                    : MOS_BITFIELD_RANGE(22, 27);
759             uint32_t   UniMixDisable                    : MOS_BITFIELD_BIT(28);
760             uint32_t                                    : MOS_BITFIELD_RANGE(29, 31);
761         };
762         struct
763         {
764             uint32_t   Value;
765         };
766     } DW1;
767 
768     // DW2
769     union
770     {
771         struct
772         {
773             uint32_t   LenSP                            : MOS_BITFIELD_RANGE(0, 7);
774             uint32_t   MaxNumSU                         : MOS_BITFIELD_RANGE(8, 15);
775             uint32_t   PicWidth                         : MOS_BITFIELD_RANGE(16, 31);
776         };
777         struct
778         {
779             uint32_t   Value;
780         };
781     } DW2;
782 
783     // DW3
784     union
785     {
786         struct
787         {
788             uint32_t   SrcSize                          : MOS_BITFIELD_RANGE(0, 1);
789             uint32_t                                    : MOS_BITFIELD_RANGE(2, 3);
790             uint32_t   MbTypeRemap                      : MOS_BITFIELD_RANGE(4, 5);
791             uint32_t   SrcAccess                        : MOS_BITFIELD_BIT(6);
792             uint32_t   RefAccess                        : MOS_BITFIELD_BIT(7);
793             uint32_t   SearchCtrl                       : MOS_BITFIELD_RANGE(8, 10);
794             uint32_t   DualSearchPathOption             : MOS_BITFIELD_BIT(11);
795             uint32_t   SubPelMode                       : MOS_BITFIELD_RANGE(12, 13);
796             uint32_t   SkipType                         : MOS_BITFIELD_BIT(14);
797             uint32_t   DisableFieldCacheAlloc           : MOS_BITFIELD_BIT(15);
798             uint32_t   InterChromaMode                  : MOS_BITFIELD_BIT(16);
799             uint32_t   FTEnable                         : MOS_BITFIELD_BIT(17);
800             uint32_t   BMEDisableFBR                    : MOS_BITFIELD_BIT(18);
801             uint32_t   BlockBasedSkipEnable             : MOS_BITFIELD_BIT(19);
802             uint32_t   InterSAD                         : MOS_BITFIELD_RANGE(20, 21);
803             uint32_t   IntraSAD                         : MOS_BITFIELD_RANGE(22, 23);
804             uint32_t   SubMbPartMask                    : MOS_BITFIELD_RANGE(24, 30);
805             uint32_t                                    : MOS_BITFIELD_BIT(31);
806         };
807         struct
808         {
809             uint32_t   Value;
810         };
811     } DW3;
812 
813     union
814     {
815         struct
816         {
817             uint32_t   PicHeightMinus1                  : MOS_BITFIELD_RANGE(0, 15);
818             uint32_t   Res_16_22                        : MOS_BITFIELD_RANGE(16, 22);
819             uint32_t   EnableQualityImprovement         : MOS_BITFIELD_BIT(23);
820             uint32_t   EnableDebug                      : MOS_BITFIELD_BIT(24);
821             uint32_t   EnableFlexibleParam              : MOS_BITFIELD_BIT(25);
822             uint32_t   EnableStatsDataDump              : MOS_BITFIELD_BIT(26);
823             uint32_t   Res_27                           : MOS_BITFIELD_BIT(27);
824             uint32_t   HMEEnable                        : MOS_BITFIELD_BIT(28);
825             uint32_t   SliceType                        : MOS_BITFIELD_RANGE(29, 30);
826             uint32_t   UseActualRefQPValue              : MOS_BITFIELD_BIT(31);
827         };
828         struct
829         {
830             uint32_t   Value;
831         };
832     } DW4;
833 
834     // DW5
835     union
836     {
837         struct
838         {
839             uint32_t   Res_0_15                         : MOS_BITFIELD_RANGE(0, 15);
840             uint32_t   RefWidth                         : MOS_BITFIELD_RANGE(16, 23);
841             uint32_t   RefHeight                        : MOS_BITFIELD_RANGE(24, 31);
842         };
843         struct
844         {
845             uint32_t   Value;
846         };
847     } DW5;
848 
849     union
850     {
851         struct
852         {
853             uint32_t   FrameWidth                       : MOS_BITFIELD_RANGE(0, 15);
854             uint32_t   FrameHeight                      : MOS_BITFIELD_RANGE(16, 31);
855         };
856         struct
857         {
858             uint32_t   Value;
859         };
860     } DW6;
861 
862     // DW7
863     union
864     {
865         struct
866         {
867             uint32_t   IntraPartMask                    : MOS_BITFIELD_RANGE(0, 4);
868             uint32_t   NonSkipZMvAdded                  : MOS_BITFIELD_BIT(5);
869             uint32_t   NonSkipModeAdded                 : MOS_BITFIELD_BIT(6);
870             uint32_t   LumaIntraSrcCornerSwap           : MOS_BITFIELD_BIT(7);
871             uint32_t                                    : MOS_BITFIELD_RANGE(8, 15);
872             uint32_t   MVCostScaleFactor                : MOS_BITFIELD_RANGE(16, 17);
873             uint32_t   BilinearEnable                   : MOS_BITFIELD_BIT(18);
874             uint32_t   Res_19                           : MOS_BITFIELD_BIT(19);
875             uint32_t   WeightedSADHAAR                  : MOS_BITFIELD_BIT(20);
876             uint32_t   AConlyHAAR                       : MOS_BITFIELD_BIT(21);
877             uint32_t   RefIDCostMode                    : MOS_BITFIELD_BIT(22);
878             uint32_t                                    : MOS_BITFIELD_BIT(23);
879             uint32_t   SkipCenterMask                   : MOS_BITFIELD_RANGE(24, 31);
880         };
881         struct
882         {
883             uint32_t   Value;
884         };
885     } DW7;
886 
887     // DW8
888     union
889     {
890         struct
891         {
892             uint32_t   Mode0Cost                        : MOS_BITFIELD_RANGE(0, 7);
893             uint32_t   Mode1Cost                        : MOS_BITFIELD_RANGE(8, 15);
894             uint32_t   Mode2Cost                        : MOS_BITFIELD_RANGE(16, 23);
895             uint32_t   Mode3Cost                        : MOS_BITFIELD_RANGE(24, 31);
896         };
897         struct
898         {
899             uint32_t   Value;
900         };
901     } DW8;
902 
903     // DW9
904     union
905     {
906         struct
907         {
908             uint32_t   Mode4Cost                        : MOS_BITFIELD_RANGE(0, 7);
909             uint32_t   Mode5Cost                        : MOS_BITFIELD_RANGE(8, 15);
910             uint32_t   Mode6Cost                        : MOS_BITFIELD_RANGE(16, 23);
911             uint32_t   Mode7Cost                        : MOS_BITFIELD_RANGE(24, 31);
912         };
913         struct
914         {
915             uint32_t   Value;
916         };
917     } DW9;
918 
919     // DW10
920     union
921     {
922         struct
923         {
924             uint32_t   Mode8Cost                        : MOS_BITFIELD_RANGE(0, 7);
925             uint32_t   Mode9Cost                        : MOS_BITFIELD_RANGE(8, 15);
926             uint32_t   RefIDCost                        : MOS_BITFIELD_RANGE(16, 23);
927             uint32_t   ChromaIntraModeCost              : MOS_BITFIELD_RANGE(24, 31);
928         };
929         struct
930         {
931             uint32_t   Value;
932         };
933     } DW10;
934 
935     // DW11
936     union
937     {
938         struct
939         {
940             uint32_t   MV0Cost                          : MOS_BITFIELD_RANGE(0, 7);
941             uint32_t   MV1Cost                          : MOS_BITFIELD_RANGE(8, 15);
942             uint32_t   MV2Cost                          : MOS_BITFIELD_RANGE(16, 23);
943             uint32_t   MV3Cost                          : MOS_BITFIELD_RANGE(24, 31);
944         };
945         struct
946         {
947             uint32_t   Value;
948         };
949     } DW11;
950 
951     // DW12
952     union
953     {
954         struct
955         {
956             uint32_t   MV4Cost                          : MOS_BITFIELD_RANGE(0, 7);
957             uint32_t   MV5Cost                          : MOS_BITFIELD_RANGE(8, 15);
958             uint32_t   MV6Cost                          : MOS_BITFIELD_RANGE(16, 23);
959             uint32_t   MV7Cost                          : MOS_BITFIELD_RANGE(24, 31);
960         };
961         struct
962         {
963             uint32_t   Value;
964         };
965     } DW12;
966 
967     // DW13
968     union
969     {
970         struct
971         {
972             uint32_t   QpPrimeY                         : MOS_BITFIELD_RANGE(0, 7);
973             uint32_t   QpPrimeCb                        : MOS_BITFIELD_RANGE(8, 15);
974             uint32_t   QpPrimeCr                        : MOS_BITFIELD_RANGE(16, 23);
975             uint32_t   TargetSizeInWord                 : MOS_BITFIELD_RANGE(24, 31);
976         };
977         struct
978         {
979             uint32_t   Value;
980         };
981     } DW13;
982 
983     // DW14
984     union
985     {
986         struct
987         {
988             uint32_t   SICFwdTransCoeffThreshold_0      : MOS_BITFIELD_RANGE(0, 15);
989             uint32_t   SICFwdTransCoeffThreshold_1      : MOS_BITFIELD_RANGE(16, 23);
990             uint32_t   SICFwdTransCoeffThreshold_2      : MOS_BITFIELD_RANGE(24, 31);
991         };
992         struct
993         {
994             uint32_t   Value;
995         };
996     } DW14;
997 
998     // DW15
999     union
1000     {
1001         struct
1002         {
1003             uint32_t   SICFwdTransCoeffThreshold_3      : MOS_BITFIELD_RANGE(0, 7);
1004             uint32_t   SICFwdTransCoeffThreshold_4      : MOS_BITFIELD_RANGE(8, 15);
1005             uint32_t   SICFwdTransCoeffThreshold_5      : MOS_BITFIELD_RANGE(16, 23);
1006             uint32_t   SICFwdTransCoeffThreshold_6      : MOS_BITFIELD_RANGE(24, 31);    // Highest Freq
1007         };
1008         struct
1009         {
1010             uint32_t   Value;
1011         };
1012     } DW15;
1013 
1014     // DW16
1015     union
1016     {
1017         struct
1018         {
1019             SearchPathDelta   SPDelta_0;
1020             SearchPathDelta   SPDelta_1;
1021             SearchPathDelta   SPDelta_2;
1022             SearchPathDelta   SPDelta_3;
1023         };
1024         struct
1025         {
1026             uint32_t   Value;
1027         };
1028     } DW16;
1029 
1030     // DW17
1031     union
1032     {
1033         struct
1034         {
1035             SearchPathDelta   SPDelta_4;
1036             SearchPathDelta   SPDelta_5;
1037             SearchPathDelta   SPDelta_6;
1038             SearchPathDelta   SPDelta_7;
1039         };
1040         struct
1041         {
1042             uint32_t   Value;
1043         };
1044     } DW17;
1045 
1046     // DW18
1047     union
1048     {
1049         struct
1050         {
1051             SearchPathDelta   SPDelta_8;
1052             SearchPathDelta   SPDelta_9;
1053             SearchPathDelta   SPDelta_10;
1054             SearchPathDelta   SPDelta_11;
1055         };
1056         struct
1057         {
1058             uint32_t   Value;
1059         };
1060     } DW18;
1061 
1062     // DW19
1063     union
1064     {
1065         struct
1066         {
1067             SearchPathDelta   SPDelta_12;
1068             SearchPathDelta   SPDelta_13;
1069             SearchPathDelta   SPDelta_14;
1070             SearchPathDelta   SPDelta_15;
1071         };
1072         struct
1073         {
1074             uint32_t   Value;
1075         };
1076     } DW19;
1077 
1078     // DW20
1079     union
1080     {
1081         struct
1082         {
1083             SearchPathDelta   SPDelta_16;
1084             SearchPathDelta   SPDelta_17;
1085             SearchPathDelta   SPDelta_18;
1086             SearchPathDelta   SPDelta_19;
1087         };
1088         struct
1089         {
1090             uint32_t   Value;
1091         };
1092     } DW20;
1093 
1094     // DW21
1095     union
1096     {
1097         struct
1098         {
1099             SearchPathDelta   SPDelta_20;
1100             SearchPathDelta   SPDelta_21;
1101             SearchPathDelta   SPDelta_22;
1102             SearchPathDelta   SPDelta_23;
1103         };
1104         struct
1105         {
1106             uint32_t   Value;
1107         };
1108     } DW21;
1109 
1110     // DW22
1111     union
1112     {
1113         struct
1114         {
1115             SearchPathDelta   SPDelta_24;
1116             SearchPathDelta   SPDelta_25;
1117             SearchPathDelta   SPDelta_26;
1118             SearchPathDelta   SPDelta_27;
1119         };
1120         struct
1121         {
1122             uint32_t   Value;
1123         };
1124     } DW22;
1125 
1126     // DW23
1127     union
1128     {
1129         struct
1130         {
1131             SearchPathDelta   SPDelta_28;
1132             SearchPathDelta   SPDelta_29;
1133             SearchPathDelta   SPDelta_30;
1134             SearchPathDelta   SPDelta_31;
1135         };
1136         struct
1137         {
1138             uint32_t   Value;
1139         };
1140     } DW23;
1141 
1142     // DW24
1143     union
1144     {
1145         struct
1146         {
1147             SearchPathDelta   SPDelta_32;
1148             SearchPathDelta   SPDelta_33;
1149             SearchPathDelta   SPDelta_34;
1150             SearchPathDelta   SPDelta_35;
1151         };
1152         struct
1153         {
1154             uint32_t   Value;
1155         };
1156     } DW24;
1157 
1158     // DW25
1159     union
1160     {
1161         struct
1162         {
1163             SearchPathDelta   SPDelta_36;
1164             SearchPathDelta   SPDelta_37;
1165             SearchPathDelta   SPDelta_38;
1166             SearchPathDelta   SPDelta_39;
1167         };
1168         struct
1169         {
1170             uint32_t   Value;
1171         };
1172     } DW25;
1173 
1174     // DW26
1175     union
1176     {
1177         struct
1178         {
1179             SearchPathDelta   SPDelta_40;
1180             SearchPathDelta   SPDelta_41;
1181             SearchPathDelta   SPDelta_42;
1182             SearchPathDelta   SPDelta_43;
1183         };
1184         struct
1185         {
1186             uint32_t   Value;
1187         };
1188     } DW26;
1189 
1190     // DW27
1191     union
1192     {
1193         struct
1194         {
1195             SearchPathDelta   SPDelta_44;
1196             SearchPathDelta   SPDelta_45;
1197             SearchPathDelta   SPDelta_46;
1198             SearchPathDelta   SPDelta_47;
1199         };
1200         struct
1201         {
1202             uint32_t   Value;
1203         };
1204     } DW27;
1205 
1206     // DW28
1207     union
1208     {
1209         struct
1210         {
1211             SearchPathDelta   SPDelta_48;
1212             SearchPathDelta   SPDelta_49;
1213             SearchPathDelta   SPDelta_50;
1214             SearchPathDelta   SPDelta_51;
1215         };
1216         struct
1217         {
1218             uint32_t   Value;
1219         };
1220     } DW28;
1221 
1222     // DW29
1223     union
1224     {
1225         struct
1226         {
1227             SearchPathDelta   SPDelta_52;
1228             SearchPathDelta   SPDelta_53;
1229             SearchPathDelta   SPDelta_54;
1230             SearchPathDelta   SPDelta_55;
1231         };
1232         struct
1233         {
1234             uint32_t   Value;
1235         };
1236     } DW29;
1237 
1238     // DW30
1239     union
1240     {
1241         struct
1242         {
1243             uint32_t   Intra4x4ModeMask                 : MOS_BITFIELD_RANGE(0, 8);
1244             uint32_t                                    : MOS_BITFIELD_RANGE(9, 15);
1245             uint32_t   Intra8x8ModeMask                 : MOS_BITFIELD_RANGE(16, 24);
1246             uint32_t                                    : MOS_BITFIELD_RANGE(25, 31);
1247         };
1248         struct
1249         {
1250             uint32_t   Value;
1251         };
1252     } DW30;
1253 
1254     // DW31
1255     union
1256     {
1257         struct
1258         {
1259             uint32_t   Intra16x16ModeMask               : MOS_BITFIELD_RANGE(0, 3);
1260             uint32_t   IntraChromaModeMask              : MOS_BITFIELD_RANGE(4, 7);
1261             uint32_t   IntraComputeType                 : MOS_BITFIELD_RANGE(8, 9);
1262             uint32_t                                    : MOS_BITFIELD_RANGE(10, 31);
1263         };
1264         struct
1265         {
1266             uint32_t   Value;
1267         };
1268     } DW31;
1269 
1270     // DW32
1271     union
1272     {
1273         struct
1274         {
1275             uint32_t   SkipVal                          : MOS_BITFIELD_RANGE(0, 15);
1276             uint32_t   MultiPredL0Disable               : MOS_BITFIELD_RANGE(16, 23);
1277             uint32_t   MultiPredL1Disable               : MOS_BITFIELD_RANGE(24, 31);
1278         };
1279         struct
1280         {
1281             uint32_t   Value;
1282         };
1283     } DW32;
1284 
1285     // DW33
1286     union
1287     {
1288         struct
1289         {
1290             uint32_t   Intra16x16NonDCPredPenalty       : MOS_BITFIELD_RANGE(0, 7);
1291             uint32_t   Intra8x8NonDCPredPenalty         : MOS_BITFIELD_RANGE(8, 15);
1292             uint32_t   Intra4x4NonDCPredPenalty         : MOS_BITFIELD_RANGE(16, 23);
1293             uint32_t                                    : MOS_BITFIELD_RANGE(24, 31);
1294         };
1295         struct
1296         {
1297             uint32_t   Value;
1298         };
1299     } DW33;
1300 
1301     union {
1302         struct {
1303             uint32_t       LambdaME;
1304         };
1305         uint32_t Value;
1306     } DW34;
1307 
1308     union {
1309         struct {
1310             uint32_t       SimpIntraInterThreshold      : MOS_BITFIELD_RANGE(0, 15);
1311             uint32_t       ModeCostSp                   : MOS_BITFIELD_RANGE(16, 23);
1312             uint32_t       IntraRefreshEn               : MOS_BITFIELD_RANGE(24, 25);
1313             uint32_t       FirstIntraRefresh            : MOS_BITFIELD_BIT(26);
1314             uint32_t       EnableRollingIntra           : MOS_BITFIELD_BIT(27);
1315             uint32_t       HalfUpdateMixedLCU           : MOS_BITFIELD_BIT(28);
1316             uint32_t       Res_29_31                    : MOS_BITFIELD_RANGE(29, 31);
1317         };
1318         uint32_t Value;
1319     } DW35;
1320 
1321     union {
1322         struct {
1323             uint32_t       NumRefIdxL0MinusOne          : MOS_BITFIELD_RANGE(0, 7);
1324             uint32_t       HMECombinedExtraSUs          : MOS_BITFIELD_RANGE(8, 15);
1325             uint32_t       NumRefIdxL1MinusOne          : MOS_BITFIELD_RANGE(16, 23);
1326             uint32_t       PowerSaving                  : MOS_BITFIELD_BIT(24);
1327             uint32_t       BRCEnable                    : MOS_BITFIELD_BIT(25);
1328             uint32_t       LCUBRCEnable                 : MOS_BITFIELD_BIT(26);
1329             uint32_t       ROIEnable                    : MOS_BITFIELD_BIT(27);
1330             uint32_t       FASTSurveillanceFlag         : MOS_BITFIELD_BIT(28);
1331             uint32_t       CheckAllFractionalEnable     : MOS_BITFIELD_BIT(29);
1332             uint32_t       HMECombinedOverlap           : MOS_BITFIELD_RANGE(30, 31);
1333         };
1334         uint32_t Value;
1335     } DW36;
1336 
1337     union {
1338         struct {
1339             uint32_t       ActualQpRefID0List0          : MOS_BITFIELD_RANGE(0, 7);
1340             uint32_t       ActualQpRefID1List0          : MOS_BITFIELD_RANGE(8, 15);
1341             uint32_t       ActualQpRefID2List0          : MOS_BITFIELD_RANGE(16, 23);
1342             uint32_t       ActualQpRefID3List0          : MOS_BITFIELD_RANGE(24, 31);
1343         };
1344         uint32_t Value;
1345     } DW37;
1346 
1347     union {
1348         struct {
1349             uint32_t       NumIntraRefreshOffFrames     : MOS_BITFIELD_RANGE(0, 15);
1350             uint32_t       NumFrameInGOB                : MOS_BITFIELD_RANGE(16, 31);
1351         };
1352         uint32_t Value;
1353     } DW38;
1354 
1355     union {
1356         struct {
1357             uint32_t       ActualQpRefID0List1          : MOS_BITFIELD_RANGE(0, 7);
1358             uint32_t       ActualQpRefID1List1          : MOS_BITFIELD_RANGE(8, 15);
1359             uint32_t       RefCost                      : MOS_BITFIELD_RANGE(16, 31);
1360         };
1361         uint32_t Value;
1362     } DW39;
1363 
1364     union {
1365         struct {
1366             uint32_t       Reserved;
1367         };
1368         uint32_t Value;
1369     } DW40;
1370 
1371     union {
1372         struct {
1373             uint32_t       TransformThreshold2          : MOS_BITFIELD_RANGE(0, 15);
1374             uint32_t       TextureIntraCostThreshold    : MOS_BITFIELD_RANGE(16, 31);
1375         };
1376         uint32_t Value;
1377     } DW41;
1378 
1379     union {
1380         struct {
1381             uint32_t       Reserved;
1382         };
1383         uint32_t Value;
1384     } DW42;
1385 
1386     union {
1387         struct {
1388             uint32_t       Reserved;
1389         };
1390         uint32_t Value;
1391     } DW43;
1392 
1393     union {
1394         struct {
1395             uint32_t       MaxNumMergeCandidates        : MOS_BITFIELD_RANGE(0, 3);
1396             uint32_t       MaxNumRefList0               : MOS_BITFIELD_RANGE(4, 7);
1397             uint32_t       MaxNumRefList1               : MOS_BITFIELD_RANGE(8, 11);
1398             uint32_t       Res_12_15                    : MOS_BITFIELD_RANGE(12, 15);
1399             uint32_t       MaxVmvR                      : MOS_BITFIELD_RANGE(16, 31);
1400         };
1401         uint32_t Value;
1402     } DW44;
1403 
1404     union {
1405         struct {
1406             uint32_t       TemporalMvpEnableFlag        : MOS_BITFIELD_BIT(0);
1407             uint32_t       Res_1_7                      : MOS_BITFIELD_RANGE(1, 7);
1408             uint32_t       Log2ParallelMergeLevel       : MOS_BITFIELD_RANGE(8, 15);
1409             uint32_t       HMECombineLenPslice          : MOS_BITFIELD_RANGE(16, 23);
1410             uint32_t       HMECombineLenBslice          : MOS_BITFIELD_RANGE(24, 31);
1411         };
1412         uint32_t Value;
1413     } DW45;
1414 
1415     union {
1416         struct {
1417             uint32_t       Log2MinTUSize                : MOS_BITFIELD_RANGE(0, 7);
1418             uint32_t       Log2MaxTUSize                : MOS_BITFIELD_RANGE(8, 15);
1419             uint32_t       Log2MinCUSize                : MOS_BITFIELD_RANGE(16, 23);
1420             uint32_t       Log2MaxCUSize                : MOS_BITFIELD_RANGE(24, 31);
1421         };
1422         uint32_t Value;
1423     } DW46;
1424 
1425     union {
1426         struct {
1427             uint32_t       NumRegionsInSlice            : MOS_BITFIELD_RANGE(0, 7);
1428             uint32_t       TypeOfWalkingPattern         : MOS_BITFIELD_RANGE(8, 11);
1429             uint32_t       ChromaFlatnessCheckFlag      : MOS_BITFIELD_BIT(12);
1430             uint32_t       EnableIntraEarlyExit         : MOS_BITFIELD_BIT(13);
1431             uint32_t       SkipIntraKrnFlag             : MOS_BITFIELD_BIT(14);
1432             uint32_t       ScreenContentFlag            : MOS_BITFIELD_BIT(15);
1433             uint32_t       IsLowDelay                   : MOS_BITFIELD_BIT(16);
1434             uint32_t       CollocatedFromL0Flag         : MOS_BITFIELD_BIT(17);
1435             uint32_t       ArbitarySliceFlag            : MOS_BITFIELD_BIT(18);
1436             uint32_t       MultiSliceFlag               : MOS_BITFIELD_BIT(19);
1437             uint32_t       Res_20_23                    : MOS_BITFIELD_RANGE(20, 23);
1438             uint32_t       isCurrRefL0LongTerm          : MOS_BITFIELD_BIT(24);
1439             uint32_t       isCurrRefL1LongTerm          : MOS_BITFIELD_BIT(25);
1440             uint32_t       NumRegionMinus1              : MOS_BITFIELD_RANGE(26, 31);
1441         };
1442         uint32_t Value;
1443     } DW47;
1444 
1445     union {
1446         struct {
1447             uint32_t       CurrentTdL0_0                : MOS_BITFIELD_RANGE(0, 15);
1448             uint32_t       CurrentTdL0_1                : MOS_BITFIELD_RANGE(16, 31);
1449         };
1450         uint32_t Value;
1451     } DW48;
1452 
1453     union {
1454         struct {
1455             uint32_t       CurrentTdL0_2                : MOS_BITFIELD_RANGE(0, 15);
1456             uint32_t       CurrentTdL0_3                : MOS_BITFIELD_RANGE(16, 31);
1457         };
1458         uint32_t Value;
1459     } DW49;
1460 
1461     union {
1462         struct {
1463             uint32_t       CurrentTdL1_0                : MOS_BITFIELD_RANGE(0, 15);
1464             uint32_t       CurrentTdL1_1                : MOS_BITFIELD_RANGE(16, 31);
1465         };
1466         uint32_t Value;
1467     } DW50;
1468 
1469     union {
1470         struct {
1471             uint32_t       IntraRefreshMBNum            : MOS_BITFIELD_RANGE(0, 15);
1472             uint32_t       IntraRefreshUnitInMB         : MOS_BITFIELD_RANGE(16, 23);
1473             uint32_t       IntraRefreshQPDelta          : MOS_BITFIELD_RANGE(24, 31);
1474         };
1475         uint32_t Value;
1476     } DW51;
1477 
1478     union {
1479         struct {
1480             uint32_t       NumofUnitInRegion            : MOS_BITFIELD_RANGE(0, 15);
1481             uint32_t       MaxHeightInRegion            : MOS_BITFIELD_RANGE(16, 31);
1482         };
1483         uint32_t Value;
1484     } DW52;
1485 
1486     union {
1487         struct {
1488             uint32_t       IntraRefreshRefWidth         : MOS_BITFIELD_RANGE(0, 7);
1489             uint32_t       IntraRefreshRefHeight        : MOS_BITFIELD_RANGE(8, 15);
1490             uint32_t       Res_16_31                    : MOS_BITFIELD_RANGE(16, 31);
1491         };
1492         uint32_t Value;
1493     } DW53;
1494 
1495     union {
1496         struct {
1497             uint32_t       Reserved;
1498         };
1499         uint32_t Value;
1500     } DW54;
1501 
1502     union {
1503         struct {
1504             uint32_t       Reserved;
1505         };
1506         uint32_t Value;
1507     } DW55;
1508 
1509     union {
1510         struct {
1511             uint32_t       BTI_CU_Record;
1512         };
1513         uint32_t Value;
1514     } DW56;
1515 
1516     union {
1517         struct {
1518             uint32_t       BTI_PAK_Cmd;
1519         };
1520         uint32_t Value;
1521     } DW57;
1522 
1523     union {
1524         struct {
1525             uint32_t       BTI_Src_Y;
1526         };
1527         uint32_t Value;
1528     } DW58;
1529 
1530     union {
1531         struct {
1532             uint32_t       BTI_Intra_Dist;
1533         };
1534         uint32_t Value;
1535     } DW59;
1536 
1537     union {
1538         struct {
1539             uint32_t       BTI_Min_Dist;
1540         };
1541         uint32_t Value;
1542     } DW60;
1543 
1544     union {
1545         struct {
1546             uint32_t       BTI_HMEMVPredFwdBwdSurfIndex;
1547         };
1548         uint32_t Value;
1549     } DW61;
1550 
1551     union {
1552         struct {
1553             uint32_t       BTI_HMEDistSurfIndex;
1554         };
1555         uint32_t Value;
1556     } DW62;
1557 
1558     union {
1559         struct {
1560             uint32_t       BTI_Slice_Map;
1561         };
1562         uint32_t Value;
1563     } DW63;
1564 
1565     union {
1566         struct {
1567             uint32_t       BTI_VME_Saved_UNI_SIC;
1568         };
1569         uint32_t Value;
1570     } DW64;
1571 
1572     union {
1573         struct {
1574             uint32_t       BTI_Simplest_Intra;
1575         };
1576         uint32_t Value;
1577     } DW65;
1578 
1579     union {
1580         struct {
1581             uint32_t       BTI_Collocated_RefFrame;
1582         };
1583         uint32_t Value;
1584     } DW66;
1585 
1586     union {
1587         struct {
1588             uint32_t       BTI_Reserved;
1589         };
1590         uint32_t Value;
1591     } DW67;
1592 
1593     union {
1594         struct {
1595             uint32_t       BTI_BRC_Input;
1596         };
1597         uint32_t Value;
1598     } DW68;
1599 
1600     union {
1601         struct {
1602             uint32_t       BTI_LCU_QP;
1603         };
1604         uint32_t Value;
1605     } DW69;
1606 
1607     union {
1608         struct {
1609             uint32_t       BTI_BRC_Data;
1610         };
1611         uint32_t Value;
1612     } DW70;
1613 
1614     union {
1615         struct {
1616             uint32_t       BTI_VMEInterPredictionSurfIndex;
1617         };
1618         uint32_t Value;
1619     } DW71;
1620 
1621     union {
1622         //For B frame
1623         struct {
1624             uint32_t       BTI_VMEInterPredictionBSurfIndex;
1625         };
1626         //For P frame
1627         struct {
1628             uint32_t       BTI_ConcurrentThreadMap;
1629         };
1630         uint32_t Value;
1631     } DW72;
1632 
1633     union {
1634         //For B frame
1635         struct {
1636             uint32_t       BTI_ConcurrentThreadMap;
1637         };
1638         //For P frame
1639         struct {
1640             uint32_t       BTI_MB_Data_CurFrame;
1641         };
1642         uint32_t Value;
1643     } DW73;
1644 
1645     union {
1646         //For B frame
1647         struct {
1648             uint32_t       BTI_MB_Data_CurFrame;
1649         };
1650         //For P frame
1651         struct {
1652             uint32_t       BTI_MVP_CurFrame;
1653         };
1654         uint32_t Value;
1655     } DW74;
1656 
1657     union {
1658         //For B frame
1659         struct {
1660             uint32_t       BTI_MVP_CurFrame;
1661         };
1662         //For P frame
1663         struct {
1664             uint32_t       BTI_Haar_Dist16x16;
1665         };
1666         uint32_t Value;
1667     } DW75;
1668 
1669     union {
1670         //For B frame
1671         struct {
1672             uint32_t       BTI_Haar_Dist16x16;
1673         };
1674         //For P frame
1675         struct {
1676             uint32_t       BTI_Stats_Data;
1677         };
1678         uint32_t Value;
1679     } DW76;
1680 
1681     union {
1682         //For B frame
1683         struct {
1684             uint32_t       BTI_Stats_Data;
1685         };
1686         //For P frame
1687         struct {
1688             uint32_t       BTI_Frame_Stats_Data;
1689         };
1690         uint32_t Value;
1691     } DW77;
1692 
1693     union {
1694         //For B frame
1695         struct {
1696             uint32_t       BTI_Frame_Stats_Data;
1697         };
1698         //For P frame
1699         struct {
1700             uint32_t       BTI_Debug;
1701         };
1702         uint32_t Value;
1703     } DW78;
1704 
1705     union {
1706         struct {
1707             uint32_t       BTI_Debug;
1708         };
1709         uint32_t Value;
1710     } DW79;
1711 };
1712 
1713 using PCODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9 = struct CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9*;
1714 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9)) == 80);
1715 
1716 //! HEVC encoder BRC init/reset curbe for GEN9
1717 struct CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9
1718 {
1719     union
1720     {
1721         struct
1722         {
1723             uint32_t   ProfileLevelMaxFrame;
1724         };
1725         struct
1726         {
1727             uint32_t   Value;
1728         };
1729     } DW0;
1730 
1731     union
1732     {
1733         struct
1734         {
1735             uint32_t   InitBufFull;
1736         };
1737         struct
1738         {
1739             uint32_t   Value;
1740         };
1741     } DW1;
1742 
1743     union
1744     {
1745         struct
1746         {
1747             uint32_t   BufSize;
1748         };
1749         struct
1750         {
1751             uint32_t   Value;
1752         };
1753     } DW2;
1754 
1755     union
1756     {
1757         struct
1758         {
1759             uint32_t   TargetBitRate;
1760         };
1761         struct
1762         {
1763             uint32_t   Value;
1764         };
1765     } DW3;
1766 
1767     union
1768     {
1769         struct
1770         {
1771             uint32_t   MaximumBitRate;
1772         };
1773         struct
1774         {
1775             uint32_t   Value;
1776         };
1777     } DW4;
1778 
1779     union
1780     {
1781         struct
1782         {
1783             uint32_t   MinimumBitRate;
1784         };
1785         struct
1786         {
1787             uint32_t   Value;
1788         };
1789     } DW5;
1790 
1791     union
1792     {
1793         struct
1794         {
1795             uint32_t   FrameRateM;
1796         };
1797         struct
1798         {
1799             uint32_t   Value;
1800         };
1801     } DW6;
1802 
1803     union
1804     {
1805         struct
1806         {
1807             uint32_t   FrameRateD;
1808         };
1809         struct
1810         {
1811             uint32_t   Value;
1812         };
1813     } DW7;
1814 
1815     union
1816     {
1817         struct
1818         {
1819             uint32_t   BRCFlag                          : MOS_BITFIELD_RANGE(0, 15);
1820             uint32_t   BRC_Param_A                      : MOS_BITFIELD_RANGE(16, 31);
1821         };
1822         struct
1823         {
1824             uint32_t   Value;
1825         };
1826     } DW8;
1827 
1828     union
1829     {
1830         struct
1831         {
1832             uint32_t   BRC_Param_B                      : MOS_BITFIELD_RANGE(0, 15);
1833             uint32_t   FrameWidth                       : MOS_BITFIELD_RANGE(16, 31);
1834         };
1835         struct
1836         {
1837             uint32_t   Value;
1838         };
1839     } DW9;
1840 
1841     union
1842     {
1843         struct
1844         {
1845             uint32_t   FrameHeight                      : MOS_BITFIELD_RANGE(0, 15);
1846             uint32_t   AVBRAccuracy                     : MOS_BITFIELD_RANGE(16, 31);
1847         };
1848         struct
1849         {
1850             uint32_t   Value;
1851         };
1852     } DW10;
1853 
1854     union
1855     {
1856         struct
1857         {
1858             uint32_t   AVBRConvergence                  : MOS_BITFIELD_RANGE(0, 15);
1859             uint32_t   MinimumQP                        : MOS_BITFIELD_RANGE(16, 31);
1860         };
1861         struct
1862         {
1863             uint32_t   Value;
1864         };
1865     } DW11;
1866 
1867     union
1868     {
1869         struct
1870         {
1871             uint32_t   MaximumQP                        : MOS_BITFIELD_RANGE(0, 15);
1872             uint32_t   NumberSlice                      : MOS_BITFIELD_RANGE(16, 31);
1873         };
1874         struct
1875         {
1876             uint32_t   Value;
1877         };
1878     } DW12;
1879 
1880     union
1881     {
1882         struct
1883         {
1884             uint32_t   reserved                         : MOS_BITFIELD_RANGE(0, 15);
1885             uint32_t   BRC_Param_C                      : MOS_BITFIELD_RANGE(16, 31);
1886         };
1887         struct
1888         {
1889             uint32_t   Value;
1890         };
1891     } DW13;
1892 
1893     union
1894     {
1895         struct
1896         {
1897             uint32_t   BRC_Param_D                      : MOS_BITFIELD_RANGE(0, 15);
1898             uint32_t   MaxBRCLevel                      : MOS_BITFIELD_RANGE(16, 31);
1899         };
1900         struct
1901         {
1902             uint32_t   Value;
1903         };
1904     } DW14;
1905 
1906     union
1907     {
1908         struct
1909         {
1910             uint32_t   reserved;
1911         };
1912         struct
1913         {
1914             uint32_t   Value;
1915         };
1916     } DW15;
1917 
1918     union
1919     {
1920         struct
1921         {
1922             uint32_t   InstantRateThreshold0_Pframe     : MOS_BITFIELD_RANGE(0, 7);
1923             uint32_t   InstantRateThreshold1_Pframe     : MOS_BITFIELD_RANGE(8, 15);
1924             uint32_t   InstantRateThreshold2_Pframe     : MOS_BITFIELD_RANGE(16, 23);
1925             uint32_t   InstantRateThreshold3_Pframe     : MOS_BITFIELD_RANGE(24, 31);
1926         };
1927         struct
1928         {
1929             uint32_t   Value;
1930         };
1931     } DW16;
1932 
1933     union
1934     {
1935         struct
1936         {
1937             uint32_t   InstantRateThreshold0_Bframe     : MOS_BITFIELD_RANGE(0, 7);
1938             uint32_t   InstantRateThreshold1_Bframe     : MOS_BITFIELD_RANGE(8, 15);
1939             uint32_t   InstantRateThreshold2_Bframe     : MOS_BITFIELD_RANGE(16, 23);
1940             uint32_t   InstantRateThreshold3_Bframe     : MOS_BITFIELD_RANGE(24, 31);
1941         };
1942         struct
1943         {
1944             uint32_t   Value;
1945         };
1946     } DW17;
1947 
1948     union
1949     {
1950         struct
1951         {
1952             uint32_t   InstantRateThreshold0_Iframe     : MOS_BITFIELD_RANGE(0, 7);
1953             uint32_t   InstantRateThreshold1_Iframe     : MOS_BITFIELD_RANGE(8, 15);
1954             uint32_t   InstantRateThreshold2_Iframe     : MOS_BITFIELD_RANGE(16, 23);
1955             uint32_t   InstantRateThreshold3_Iframe     : MOS_BITFIELD_RANGE(24, 31);
1956         };
1957         struct
1958         {
1959             uint32_t   Value;
1960         };
1961     } DW18;
1962 
1963     union
1964     {
1965         struct
1966         {
1967             uint32_t   DeviationThreshold0_PBframe      : MOS_BITFIELD_RANGE(0, 7);
1968             uint32_t   DeviationThreshold1_PBframe      : MOS_BITFIELD_RANGE(8, 15);
1969             uint32_t   DeviationThreshold2_PBframe      : MOS_BITFIELD_RANGE(16, 23);
1970             uint32_t   DeviationThreshold3_PBframe      : MOS_BITFIELD_RANGE(24, 31);
1971         };
1972         struct
1973         {
1974             uint32_t   Value;
1975         };
1976     } DW19;
1977 
1978     union
1979     {
1980         struct
1981         {
1982             uint32_t   DeviationThreshold4_PBframe      : MOS_BITFIELD_RANGE(0, 7);
1983             uint32_t   DeviationThreshold5_PBframe      : MOS_BITFIELD_RANGE(8, 15);
1984             uint32_t   DeviationThreshold6_PBframe      : MOS_BITFIELD_RANGE(16, 23);
1985             uint32_t   DeviationThreshold7_PBframe      : MOS_BITFIELD_RANGE(24, 31);
1986         };
1987         struct
1988         {
1989             uint32_t   Value;
1990         };
1991     } DW20;
1992 
1993     union
1994     {
1995         struct
1996         {
1997             uint32_t   DeviationThreshold0_VBRcontrol   : MOS_BITFIELD_RANGE(0, 7);
1998             uint32_t   DeviationThreshold1_VBRcontrol   : MOS_BITFIELD_RANGE(8, 15);
1999             uint32_t   DeviationThreshold2_VBRcontrol   : MOS_BITFIELD_RANGE(16, 23);
2000             uint32_t   DeviationThreshold3_VBRcontrol   : MOS_BITFIELD_RANGE(24, 31);
2001         };
2002         struct
2003         {
2004             uint32_t   Value;
2005         };
2006     } DW21;
2007 
2008     union
2009     {
2010         struct
2011         {
2012             uint32_t   DeviationThreshold4_VBRcontrol   : MOS_BITFIELD_RANGE(0, 7);
2013             uint32_t   DeviationThreshold5_VBRcontrol   : MOS_BITFIELD_RANGE(8, 15);
2014             uint32_t   DeviationThreshold6_VBRcontrol   : MOS_BITFIELD_RANGE(16, 23);
2015             uint32_t   DeviationThreshold7_VBRcontrol   : MOS_BITFIELD_RANGE(24, 31);
2016         };
2017         struct
2018         {
2019             uint32_t   Value;
2020         };
2021     } DW22;
2022 
2023     union
2024     {
2025         struct
2026         {
2027             uint32_t   DeviationThreshold0_Iframe       : MOS_BITFIELD_RANGE(0, 7);
2028             uint32_t   DeviationThreshold1_Iframe       : MOS_BITFIELD_RANGE(8, 15);
2029             uint32_t   DeviationThreshold2_Iframe       : MOS_BITFIELD_RANGE(16, 23);
2030             uint32_t   DeviationThreshold3_Iframe       : MOS_BITFIELD_RANGE(24, 31);
2031         };
2032         struct
2033         {
2034             uint32_t   Value;
2035         };
2036     } DW23;
2037 
2038     union
2039     {
2040         struct
2041         {
2042             uint32_t   DeviationThreshold4_Iframe       : MOS_BITFIELD_RANGE(0, 7);
2043             uint32_t   DeviationThreshold5_Iframe       : MOS_BITFIELD_RANGE(8, 15);
2044             uint32_t   DeviationThreshold6_Iframe       : MOS_BITFIELD_RANGE(16, 23);
2045             uint32_t   DeviationThreshold7_Iframe       : MOS_BITFIELD_RANGE(24, 31);
2046         };
2047         struct
2048         {
2049             uint32_t   Value;
2050         };
2051     } DW24;
2052 
2053     union
2054     {
2055         struct
2056         {
2057             uint32_t   ACQPBuffer                       : MOS_BITFIELD_RANGE(0, 7);
2058             uint32_t   IntraSADTransform                : MOS_BITFIELD_RANGE(8, 15);
2059             uint32_t   Reserved0                        : MOS_BITFIELD_RANGE(16, 23);
2060             uint32_t   Reserved1                        : MOS_BITFIELD_RANGE(24, 31);
2061         };
2062         struct
2063         {
2064             uint32_t   Value;
2065         };
2066     } DW25;
2067 
2068     union
2069     {
2070         struct
2071         {
2072             uint32_t   reserved;
2073         };
2074         struct
2075         {
2076             uint32_t   Value;
2077         };
2078     } DW26;
2079 
2080     union
2081     {
2082         struct
2083         {
2084             uint32_t   reserved;
2085         };
2086         struct
2087         {
2088             uint32_t   Value;
2089         };
2090     } DW27;
2091 
2092     union
2093     {
2094         struct
2095         {
2096             uint32_t   reserved;
2097         };
2098         struct
2099         {
2100             uint32_t   Value;
2101         };
2102     } DW28;
2103 
2104     union
2105     {
2106         struct
2107         {
2108             uint32_t   reserved;
2109         };
2110         struct
2111         {
2112             uint32_t   Value;
2113         };
2114     } DW29;
2115 
2116     union
2117     {
2118         struct
2119         {
2120             uint32_t   reserved;
2121         };
2122         struct
2123         {
2124             uint32_t   Value;
2125         };
2126     } DW30;
2127 
2128     union
2129     {
2130         struct
2131         {
2132             uint32_t   reserved;
2133         };
2134         struct
2135         {
2136             uint32_t   Value;
2137         };
2138     } DW31;
2139 };
2140 
2141 using PCODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9 = struct CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9*;
2142 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9)) == 32 );
2143 
2144 //! HEVC encoder BRC update kernel curbe for GEN9
2145 struct CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9
2146 {
2147     union
2148     {
2149         struct
2150         {
2151             uint32_t   TARGETSIZE;
2152         };
2153         struct
2154         {
2155             uint32_t   Value;
2156         };
2157     }DW0;
2158 
2159     union
2160     {
2161         struct
2162         {
2163             uint32_t   FrameNumber;
2164         };
2165         struct
2166         {
2167             uint32_t   Value;
2168         };
2169     }DW1;
2170 
2171     union
2172     {
2173         struct
2174         {
2175             uint32_t   PictureHeaderSize;
2176         };
2177         struct
2178         {
2179             uint32_t   Value;
2180         };
2181     }DW2;
2182 
2183     union
2184     {
2185         struct
2186         {
2187             uint32_t   startGAdjFrame0                  : MOS_BITFIELD_RANGE(0, 15);
2188             uint32_t   startGAdjFrame1                  : MOS_BITFIELD_RANGE(16, 31);
2189         };
2190         struct
2191         {
2192             uint32_t   Value;
2193         };
2194     }DW3;
2195 
2196     union
2197     {
2198         struct
2199         {
2200             uint32_t   startGAdjFrame2                  : MOS_BITFIELD_RANGE(0, 15);
2201             uint32_t   startGAdjFrame3                  : MOS_BITFIELD_RANGE(16, 31);
2202         };
2203         struct
2204         {
2205             uint32_t   Value;
2206         };
2207     }DW4;
2208 
2209     union
2210     {
2211         struct
2212         {
2213             uint32_t   TARGETSIZE_FLAG                  : MOS_BITFIELD_RANGE(0, 7);
2214             uint32_t   BRCFlag                          : MOS_BITFIELD_RANGE(8, 15);
2215             uint32_t   MaxNumPAKs                       : MOS_BITFIELD_RANGE(16, 23);
2216             uint32_t   CurrFrameType                    : MOS_BITFIELD_RANGE(24, 31);
2217         };
2218         struct
2219         {
2220             uint32_t   Value;
2221         };
2222     }DW5;
2223 
2224     union
2225     {
2226         struct
2227         {
2228             uint32_t   NumSkippedFrames                 : MOS_BITFIELD_RANGE(0, 7);
2229             uint32_t   CQPValue                         : MOS_BITFIELD_RANGE(8, 15);
2230             uint32_t   ROIFlag                          : MOS_BITFIELD_RANGE(16, 23);
2231             uint32_t   ROIRatio                         : MOS_BITFIELD_RANGE(24, 31);
2232         };
2233         struct
2234         {
2235             uint32_t   Value;
2236         };
2237     }DW6;
2238 
2239     union
2240     {
2241         struct
2242         {
2243             uint32_t   FrameWidthInLCU                  : MOS_BITFIELD_RANGE(0,   7);
2244             uint32_t   Res_8_14                         : MOS_BITFIELD_RANGE(8,  14);
2245             uint32_t   KernelBuildControl               : MOS_BITFIELD_BIT(      15);
2246             uint32_t   ucMinQp                          : MOS_BITFIELD_RANGE(16, 23);
2247             uint32_t   ucMaxQp                          : MOS_BITFIELD_RANGE(24, 31);
2248         };
2249         struct
2250         {
2251             uint32_t   Value;
2252         };
2253     }DW7;
2254 
2255     union
2256     {
2257         struct
2258         {
2259             uint32_t   StartGlobalAdjustMult0           : MOS_BITFIELD_RANGE(0, 7);
2260             uint32_t   StartGlobalAdjustMult1           : MOS_BITFIELD_RANGE(8, 15);
2261             uint32_t   StartGlobalAdjustMult2           : MOS_BITFIELD_RANGE(16, 23);
2262             uint32_t   StartGlobalAdjustMult3           : MOS_BITFIELD_RANGE(24, 31);
2263         };
2264         struct
2265         {
2266             uint32_t   Value;
2267         };
2268     }DW8;
2269 
2270     union
2271     {
2272         struct
2273         {
2274             uint32_t   StartGlobalAdjustMult4           : MOS_BITFIELD_RANGE(0, 7);
2275             uint32_t   StartGlobalAdjustDivd0           : MOS_BITFIELD_RANGE(8, 15);
2276             uint32_t   StartGlobalAdjustDivd1           : MOS_BITFIELD_RANGE(16, 23);
2277             uint32_t   StartGlobalAdjustDivd2           : MOS_BITFIELD_RANGE(24, 31);
2278         };
2279         struct
2280         {
2281             uint32_t   Value;
2282         };
2283     }DW9;
2284 
2285     union
2286     {
2287         struct
2288         {
2289             uint32_t   StartGlobalAdjustDivd3           : MOS_BITFIELD_RANGE(0, 7);
2290             uint32_t   StartGlobalAdjustDivd4           : MOS_BITFIELD_RANGE(8, 15);
2291             uint32_t   QPThreshold0                     : MOS_BITFIELD_RANGE(16, 23);
2292             uint32_t   QPThreshold1                     : MOS_BITFIELD_RANGE(24, 31);
2293         };
2294         struct
2295         {
2296             uint32_t   Value;
2297         };
2298     }DW10;
2299 
2300     union
2301     {
2302         struct
2303         {
2304             uint32_t   QPThreshold2                     : MOS_BITFIELD_RANGE(0, 7);
2305             uint32_t   QPThreshold3                     : MOS_BITFIELD_RANGE(8, 15);
2306             uint32_t   gRateRatioThreshold0             : MOS_BITFIELD_RANGE(16, 23);
2307             uint32_t   gRateRatioThreshold1             : MOS_BITFIELD_RANGE(24, 31);
2308         };
2309         struct
2310         {
2311             uint32_t   Value;
2312         };
2313     }DW11;
2314 
2315     union
2316     {
2317         struct
2318         {
2319             uint32_t   gRateRatioThreshold2             : MOS_BITFIELD_RANGE(0, 7);
2320             uint32_t   gRateRatioThreshold3             : MOS_BITFIELD_RANGE(8, 15);
2321             uint32_t   gRateRatioThreshold4             : MOS_BITFIELD_RANGE(16, 23);
2322             uint32_t   gRateRatioThreshold5             : MOS_BITFIELD_RANGE(24, 31);
2323         };
2324         struct
2325         {
2326             uint32_t   Value;
2327         };
2328     }DW12;
2329 
2330     union
2331     {
2332         struct
2333         {
2334             uint32_t   gRateRatioThreshold6             : MOS_BITFIELD_RANGE(0, 7);
2335             uint32_t   gRateRatioThreshold7             : MOS_BITFIELD_RANGE(8, 15);
2336             uint32_t   gRateRatioThreshold8             : MOS_BITFIELD_RANGE(16, 23);
2337             uint32_t   gRateRatioThreshold9             : MOS_BITFIELD_RANGE(24, 31);
2338         };
2339         struct
2340         {
2341             uint32_t   Value;
2342         };
2343     }DW13;
2344 
2345     union
2346     {
2347         struct
2348         {
2349             uint32_t   gRateRatioThreshold10            : MOS_BITFIELD_RANGE(0, 7);
2350             uint32_t   gRateRatioThreshold11            : MOS_BITFIELD_RANGE(8, 15);
2351             uint32_t   gRateRatioThreshold12            : MOS_BITFIELD_RANGE(16, 23);
2352             uint32_t   ParallelMode                     : MOS_BITFIELD_RANGE(24, 31);
2353         };
2354         struct
2355         {
2356             uint32_t   Value;
2357         };
2358     }DW14;
2359 
2360     union
2361     {
2362         struct
2363         {
2364             uint32_t   SizeOfSkippedFrames;
2365         };
2366         struct
2367         {
2368             uint32_t   Value;
2369         };
2370     }DW15;
2371 };
2372 
2373 using PCODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 = struct CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9*;
2374 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9)) == 16);
2375 
2376 //! HEVC encoder coarse intra kernel curbe for GEN9
2377 struct CODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9
2378 {
2379     union
2380     {
2381         struct
2382         {
2383             uint32_t   PictureWidthInLumaSamples        : MOS_BITFIELD_RANGE(0, 15);
2384             uint32_t   PictureHeightInLumaSamples       : MOS_BITFIELD_RANGE(16, 31);
2385         };
2386         struct
2387         {
2388             uint32_t   Value;
2389         };
2390     }DW0;
2391 
2392     union
2393     {
2394         struct
2395         {
2396             uint32_t   SrcSize                          : MOS_BITFIELD_RANGE(0, 1);
2397             uint32_t   Reserved0                        : MOS_BITFIELD_RANGE(2, 13);
2398             uint32_t   SkipType                         : MOS_BITFIELD_BIT(14);
2399             uint32_t   Reserved1                        : MOS_BITFIELD_BIT(15);
2400             uint32_t   InterChromaMode                  : MOS_BITFIELD_BIT(16);
2401             uint32_t   FTEnable                         : MOS_BITFIELD_BIT(17);
2402             uint32_t   Reserved2                        : MOS_BITFIELD_BIT(18);
2403             uint32_t   BlkSkipEnabled                   : MOS_BITFIELD_BIT(19);
2404             uint32_t   InterSAD                         : MOS_BITFIELD_RANGE(20, 21);
2405             uint32_t   IntraSAD                         : MOS_BITFIELD_RANGE(22, 23);
2406             uint32_t   Reserved3                        : MOS_BITFIELD_RANGE(24, 31);
2407         };
2408         struct
2409         {
2410             uint32_t   Value;
2411         };
2412     }DW1;
2413 
2414     union
2415     {
2416         struct
2417         {
2418             uint32_t   IntraPartMask                    : MOS_BITFIELD_RANGE(0, 4);
2419             uint32_t   NonSkipZMvAdded                  : MOS_BITFIELD_BIT(5);
2420             uint32_t   NonSkipModeAdded                 : MOS_BITFIELD_BIT(6);
2421             uint32_t   IntraCornerSwap                  : MOS_BITFIELD_BIT(7);
2422             uint32_t   Reserved0                        : MOS_BITFIELD_RANGE(8, 15);
2423             uint32_t   MVCostScaleFactor                : MOS_BITFIELD_RANGE(16, 17);
2424             uint32_t   BilinearEnable                   : MOS_BITFIELD_BIT(18);
2425             uint32_t   Reserved1                        : MOS_BITFIELD_BIT(19);
2426             uint32_t   WeightedSADHAAR                  : MOS_BITFIELD_BIT(20);
2427             uint32_t   AConlyHAAR                       : MOS_BITFIELD_BIT(21);
2428             uint32_t   RefIDCostMode                    : MOS_BITFIELD_BIT(22);
2429             uint32_t   Reserved2                        : MOS_BITFIELD_BIT(23);
2430             uint32_t   SkipCenterMask                   : MOS_BITFIELD_RANGE(24, 31);
2431         };
2432         struct
2433         {
2434             uint32_t   Value;
2435         };
2436     }DW2;
2437 
2438     union
2439     {
2440         struct
2441         {
2442             uint32_t Reserved;
2443         };
2444         struct
2445         {
2446             uint32_t   Value;
2447         };
2448     }DW3;
2449 
2450     union
2451     {
2452         struct
2453         {
2454             uint32_t Reserved;
2455         };
2456         struct
2457         {
2458             uint32_t   Value;
2459         };
2460     }DW4;
2461 
2462     union
2463     {
2464         struct
2465         {
2466             uint32_t Reserved;
2467         };
2468         struct
2469         {
2470             uint32_t   Value;
2471         };
2472     }DW5;
2473 
2474     union
2475     {
2476         struct
2477         {
2478             uint32_t Reserved;
2479         };
2480         struct
2481         {
2482             uint32_t   Value;
2483         };
2484     }DW6;
2485 
2486     union
2487     {
2488         struct
2489         {
2490             uint32_t Reserved;
2491         };
2492         struct
2493         {
2494             uint32_t   Value;
2495         };
2496     }DW7;
2497 
2498     union
2499     {
2500         struct
2501         {
2502             uint32_t BTI_Src_Y4;
2503         };
2504         struct
2505         {
2506             uint32_t   Value;
2507         };
2508     }DW8;
2509 
2510     union
2511     {
2512         struct
2513         {
2514             uint32_t BTI_Intra_Dist;
2515         };
2516         struct
2517         {
2518             uint32_t   Value;
2519         };
2520     }DW9;
2521 
2522     union
2523     {
2524         struct
2525         {
2526             uint32_t BTI_VME_Intra;
2527         };
2528         struct
2529         {
2530             uint32_t   Value;
2531         };
2532     }DW10;
2533 };
2534 
2535 using PCODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9 = struct CODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9*;
2536 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9)) == 11 );
2537 
2538 const uint8_t CodechalEncHevcStateG9::m_ftqBasedSkip[NUM_TARGET_USAGE_MODES] =
2539 {
2540     0, 3, 3, 3, 3, 3, 3, 0
2541 };
2542 
2543 const uint8_t CodechalEncHevcStateG9::m_meMethod[NUM_TARGET_USAGE_MODES] =
2544 {
2545     0, 4, 4, 4, 4, 4, 4, 6
2546 };
2547 
2548 const uint8_t CodechalEncHevcStateG9::m_superCombineDist[NUM_TARGET_USAGE_MODES + 1] =
2549 {
2550     0, 1, 1, 5, 5, 5, 9, 9, 0
2551 };
2552 
2553 const uint16_t CodechalEncHevcStateG9::m_skipValB[2][2][64] =
2554 {
2555     {
2556         // Block Based Skip = 0 and Transform Flag = 0
2557         {
2558             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0024,
2559             0x0024, 0x0060, 0x0060, 0x0099, 0x0099, 0x00cf, 0x00cf, 0x0105,
2560             0x0105, 0x0141, 0x0141, 0x0183, 0x0183, 0x01ce, 0x01ce, 0x0228,
2561             0x0228, 0x0291, 0x0291, 0x030c, 0x030c, 0x039f, 0x039f, 0x0447,
2562             0x0447, 0x050d, 0x050d, 0x05f1, 0x05f1, 0x06f6, 0x06f6, 0x0822,
2563             0x0822, 0x0972, 0x0972, 0x0aef, 0x0aef, 0x0c96, 0x0c96, 0x0e70,
2564             0x0e70, 0x107a, 0x107a, 0x1284, 0x0000, 0x0000, 0x0000, 0x0000,
2565             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
2566         },
2567         // Block Based Skip = 0 and Transform Flag = 1
2568         {
2569             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0024,
2570             0x0024, 0x0060, 0x0060, 0x0099, 0x0099, 0x00cf, 0x00cf, 0x0105,
2571             0x0105, 0x0141, 0x0141, 0x0183, 0x0183, 0x01ce, 0x01ce, 0x0228,
2572             0x0228, 0x0291, 0x0291, 0x030c, 0x030c, 0x039f, 0x039f, 0x0447,
2573             0x0447, 0x050d, 0x050d, 0x05f1, 0x05f1, 0x06f6, 0x06f6, 0x0822,
2574             0x0822, 0x0972, 0x0972, 0x0aef, 0x0aef, 0x0c96, 0x0c96, 0x0e70,
2575             0x0e70, 0x107a, 0x107a, 0x1284, 0x0000, 0x0000, 0x0000, 0x0000,
2576             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
2577         }
2578     },
2579     {
2580         // Block Based Skip = 1 and Transform Flag = 0
2581         {
2582             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0006,
2583             0x0006, 0x0010, 0x0010, 0x0019, 0x0019, 0x0022, 0x0022, 0x002b,
2584             0x002b, 0x0035, 0x0035, 0x0040, 0x0040, 0x004d, 0x004d, 0x005c,
2585             0x005c, 0x006d, 0x006d, 0x0082, 0x0082, 0x009a, 0x009a, 0x00b6,
2586             0x00b6, 0x00d7, 0x00d7, 0x00fd, 0x00fd, 0x0129, 0x0129, 0x015b,
2587             0x015b, 0x0193, 0x0193, 0x01d2, 0x01d2, 0x0219, 0x0219, 0x0268,
2588             0x0268, 0x02bf, 0x02bf, 0x0316, 0x0000, 0x0000, 0x0000, 0x0000,
2589             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
2590         },
2591         // Block Based Skip = 1 and Transform Flag = 1
2592         {
2593             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x000c,
2594             0x000c, 0x0020, 0x0020, 0x0033, 0x0033, 0x0045, 0x0045, 0x0057,
2595             0x0057, 0x006b, 0x006b, 0x0081, 0x0081, 0x009a, 0x009a, 0x00b8,
2596             0x00b8, 0x00db, 0x00db, 0x0104, 0x0104, 0x0135, 0x0135, 0x016d,
2597             0x016d, 0x01af, 0x01af, 0x01fb, 0x01fb, 0x0252, 0x0252, 0x02b6,
2598             0x02b6, 0x0326, 0x0326, 0x03a5, 0x03a5, 0x0432, 0x0432, 0x04d0,
2599             0x04d0, 0x057e, 0x057e, 0x062c, 0x0000, 0x0000, 0x0000, 0x0000,
2600             0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
2601         }
2602     }
2603 };
2604 
2605 const double CodechalEncHevcStateG9::m_modeCostLut[3][12] = {
2606     //BPREDSLICE
2607     { 3.5, 4, 14, 40, 6.0, 3.25, 4.25, 0, 3.0, 1.0, 2.0, 0.0 },
2608     //PREDSLICE
2609     { 3.5, 4, 14, 35, 4.5, 1.32, 2.32, 0, 2.75, 0.0, 2.0, 0.0 },
2610     //INTRASLICE
2611     { 3.5, 0, 10.0, 30, 0, 0, 0, 0, 0, 0, 0, 0 }
2612 };
2613 
2614 const double CodechalEncHevcStateG9::m_mvCostLut[3][8] = {
2615     //BPREDSLICE
2616     { 0.0, 1.0, 1.0, 3.0, 5.0, 6.0, 7.0, 8.0 },
2617     //PREDSLICE
2618     { 0.0, 2.0, 2.5, 4.5, 5.0, 6.0, 7.0, 7.5 },
2619     //INTRASLICE
2620     { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }
2621 };
2622 
2623 const uint32_t CodechalEncHevcStateG9::m_brcMvCostHaar[][416] =
2624 {
2625     // I
2626     {
2627         0x0d040001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x3e6c0535, 0x0d040001,
2628         0x0f050001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff010101, 0x3e847641, 0x0f050001,
2629         0x19050002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff020202, 0x3e94aefa, 0x19050002,
2630         0x1a060002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff030303, 0x3ea6e43f, 0x1a060002,
2631         0x1b070002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff040404, 0x3ebb5458, 0x1b070002,
2632         0x1c080002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff050505, 0x3ed2452d, 0x1c080002,
2633         0x1e090003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff060606, 0x3eec0535, 0x1e090003,
2634         0x280a0003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff070707, 0x3f047641, 0x280a0003,
2635         0x290b0004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff080808, 0x3f14aefa, 0x290b0004,
2636         0x2a0d0004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff090909, 0x3f26e43f, 0x2a0d0004,
2637         0x2b0e0005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0a0a0a, 0x3f3b5458, 0x2b0e0005,
2638         0x2c180005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0b0b0b, 0x3f52452d, 0x2c180005,
2639         0x2e190006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0c0c0c, 0x3f6c0535, 0x2e190006,
2640         0x381a0007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0d0d0d, 0x3f847641, 0x381a0007,
2641         0x391c0008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0e0e0e, 0x3f94aefa, 0x391c0008,
2642         0x3a1d0009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0f0f0f, 0x3fa6e43f, 0x3a1d0009,
2643         0x3b1f000a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff101010, 0x3fbb5458, 0x3b1f000a,
2644         0x3c28000b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff111111, 0x3fd2452d, 0x3c28000b,
2645         0x3e29000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff121212, 0x3fec0535, 0x3e29000c,
2646         0x482a000e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff131313, 0x40047641, 0x482a000e,
2647         0x492c0018, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff141414, 0x4014aefa, 0x492c0018,
2648         0x4a2d0019, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff151515, 0x4026e43f, 0x4a2d0019,
2649         0x4b2f001a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff161616, 0x403b5458, 0x4b2f001a,
2650         0x4c38001b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff171717, 0x4052452d, 0x4c38001b,
2651         0x4e39001d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff181818, 0x406c0535, 0x4e39001d,
2652         0x583a001e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff191919, 0x40847641, 0x583a001e,
2653         0x593c0028, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1a1a1a, 0x4094aefa, 0x593c0028,
2654         0x5a3d0029, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1b1b1b, 0x40a6e43f, 0x5a3d0029,
2655         0x5b3f002a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1c1c1c, 0x40bb5458, 0x5b3f002a,
2656         0x5c48002b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1d1d1d, 0x40d2452d, 0x5c48002b,
2657         0x5e49002d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1e1e1e, 0x40ec0535, 0x5e49002d,
2658         0x684a002e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1f1f1f, 0x41047641, 0x684a002e,
2659         0x694c0038, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff202020, 0x4114aefa, 0x694c0038,
2660         0x6a4d0039, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff212121, 0x4126e43f, 0x6a4d0039,
2661         0x6b4f003a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff222222, 0x413b5458, 0x6b4f003a,
2662         0x6c58003b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff232323, 0x4152452d, 0x6c58003b,
2663         0x6e59003d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff242424, 0x416c0535, 0x6e59003d,
2664         0x785a003e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff252525, 0x41847641, 0x785a003e,
2665         0x795c0048, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff262626, 0x4194aefa, 0x795c0048,
2666         0x7a5d0049, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff272727, 0x41a6e43f, 0x7a5d0049,
2667         0x7b5f004a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff282828, 0x41bb5458, 0x7b5f004a,
2668         0x7c68004b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff292929, 0x41d2452d, 0x7c68004b,
2669         0x7e69004d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2a2a2a, 0x41ec0535, 0x7e69004d,
2670         0x886a004e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2b2b2b, 0x42047641, 0x886a004e,
2671         0x896c0058, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2c2c2c, 0x4214aefa, 0x896c0058,
2672         0x8a6d0059, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2d2d2d, 0x4226e43f, 0x8a6d0059,
2673         0x8b6f005a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2e2e2e, 0x423b5458, 0x8b6f005a,
2674         0x8c78005b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2f2f2f, 0x4252452d, 0x8c78005b,
2675         0x8e79005d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff303030, 0x426c0535, 0x8e79005d,
2676         0x8f7a005e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff313131, 0x42847641, 0x8f7a005e,
2677         0x8f7c0068, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff323232, 0x4294aefa, 0x8f7c0068,
2678         0x8f7d0069, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff333333, 0x42a6e43f, 0x8f7d0069
2679     },
2680     // P
2681     {
2682         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x003b003e, 0x3f800000, 0x391e0807,
2683         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x013b003e, 0x3f800000, 0x391e0807,
2684         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x023b003e, 0x3f800000, 0x391e0807,
2685         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x033b003e, 0x3f800000, 0x391e0807,
2686         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x043b003e, 0x3f800000, 0x391e0807,
2687         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x053b003e, 0x3f800000, 0x391e0807,
2688         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x063b003e, 0x3f800000, 0x391e0807,
2689         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x073b003e, 0x3f800000, 0x391e0807,
2690         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x083b003e, 0x3f800000, 0x391e0807,
2691         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x093b003e, 0x3f800000, 0x391e0807,
2692         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0a3b003e, 0x3f800000, 0x391e0807,
2693         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0b3b003e, 0x3f800000, 0x391e0807,
2694         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0c3b003e, 0x3f800000, 0x391e0807,
2695         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0d3b003e, 0x3f800000, 0x391e0807,
2696         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0e3b003e, 0x3f800000, 0x391e0807,
2697         0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0f3b003e, 0x3f800000, 0x391e0807,
2698         0x492e180e, 0x00090519, 0x0008000b, 0x190a0800, 0x1f1e1c1a, 0x104b007c, 0x40000000, 0x492e180e,
2699         0x492e180e, 0x00090519, 0x0008000b, 0x190a0800, 0x1f1e1c1a, 0x114b007c, 0x40000000, 0x492e180e,
2700         0x492e180e, 0x00090519, 0x0008000b, 0x190a0800, 0x1f1e1c1a, 0x124b007c, 0x40000000, 0x492e180e,
2701         0x492e180e, 0x00090519, 0x0008000b, 0x190a0800, 0x1f1e1c1a, 0x134b007c, 0x40000000, 0x492e180e,
2702         0x4d3b1c1b, 0x000d071e, 0x000c0018, 0x1e0f0c00, 0x2b2b291f, 0x145800ba, 0x40400000, 0x4d3b1c1b,
2703         0x4d3b1c1b, 0x000d071e, 0x000c0018, 0x1e0f0c00, 0x2b2b291f, 0x155800ba, 0x40400000, 0x4d3b1c1b,
2704         0x4d3b1c1b, 0x000d071e, 0x000c0018, 0x1e0f0c00, 0x2b2b291f, 0x165800ba, 0x40400000, 0x4d3b1c1b,
2705         0x593e281e, 0x00190a29, 0x0018001b, 0x291a1800, 0x2f2e2c2a, 0x175b00f8, 0x40800000, 0x593e281e,
2706         0x593e281e, 0x00190a29, 0x0018001b, 0x291a1800, 0x2f2e2c2a, 0x185b00f8, 0x40800000, 0x593e281e,
2707         0x593e281e, 0x00190a29, 0x0018001b, 0x291a1800, 0x2f2e2c2a, 0x195b00f8, 0x40800000, 0x593e281e,
2708         0x5b492a29, 0x001c0d2b, 0x001a001e, 0x2b1d1a00, 0x39392f2d, 0x1a5e0136, 0x40a00000, 0x5b492a29,
2709         0x5d4b2c2b, 0x001e0f2e, 0x001c0028, 0x2e1f1c00, 0x3b3b392f, 0x1b680174, 0x40c00000, 0x5d4b2c2b,
2710         0x5d4b2c2b, 0x001e0f2e, 0x001c0028, 0x2e1f1c00, 0x3b3b392f, 0x1c680174, 0x40c00000, 0x5d4b2c2b,
2711         0x5f4c2e2c, 0x00281938, 0x001e002a, 0x38291e00, 0x3d3c3b39, 0x1d6a01b2, 0x40e00000, 0x5f4c2e2c,
2712         0x694e382e, 0x00291b39, 0x0028002b, 0x392a2800, 0x3f3e3c3a, 0x1e6b01f0, 0x41000000, 0x694e382e,
2713         0x6a583938, 0x002a1c3a, 0x0029002c, 0x3a2b2900, 0x48483e3b, 0x1f6d022e, 0x41100000, 0x6a583938,
2714         0x6b593a39, 0x002c1d3b, 0x002a002e, 0x3b2d2a00, 0x49493f3d, 0x206e026c, 0x41200000, 0x6b593a39,
2715         0x6c5a3b3a, 0x002d1f3c, 0x002b002f, 0x3c2e2b00, 0x4a4a483e, 0x216f02aa, 0x41300000, 0x6c5a3b3a,
2716         0x6e5b3d3b, 0x002f293f, 0x002d0039, 0x3f382d00, 0x4c4b4a48, 0x22790326, 0x41500000, 0x6e5b3d3b,
2717         0x6f5c3e3c, 0x00382948, 0x002e003a, 0x48392e00, 0x4d4c4b49, 0x237a0364, 0x41600000, 0x6f5c3e3c,
2718         0x795e483e, 0x00392b49, 0x0038003b, 0x493a3800, 0x4f4e4c4a, 0x247b03e0, 0x41800000, 0x795e483e,
2719         0x7a684948, 0x003a2c4a, 0x0039003c, 0x4a3b3900, 0x58584e4b, 0x257d045c, 0x41900000, 0x7a684948,
2720         0x7b694a49, 0x003c2d4b, 0x003a003e, 0x4b3d3a00, 0x59594f4d, 0x267e04d8, 0x41a00000, 0x7b694a49,
2721         0x7d6a4c4a, 0x003d2f4d, 0x003c0048, 0x4d3e3c00, 0x5b5a594e, 0x27880592, 0x41b80000, 0x7d6a4c4a,
2722         0x7e6b4d4b, 0x003e384e, 0x003d0049, 0x4e483d00, 0x5c5b5958, 0x2889060e, 0x41c80000, 0x7e6b4d4b,
2723         0x886d4f4d, 0x00483a58, 0x003f004a, 0x58493f00, 0x5e5d5b59, 0x298a0706, 0x41e80000, 0x886d4f4d,
2724         0x896e584e, 0x00493b59, 0x0048004b, 0x594a4800, 0x5f5e5c5a, 0x2a8b07c0, 0x42000000, 0x896e584e,
2725         0x8a785958, 0x004a3c5a, 0x0049004c, 0x5a4b4900, 0x68685e5b, 0x2b8d08b8, 0x42100000, 0x8a785958,
2726         0x8b795a59, 0x004c3d5b, 0x004a004e, 0x5b4d4a00, 0x69695f5d, 0x2c8e09b0, 0x42200000, 0x8b795a59,
2727         0x8c7a5b5a, 0x004d3f5d, 0x004b004f, 0x5d4e4b00, 0x6b6a685e, 0x2d8f0ae6, 0x42340000, 0x8c7a5b5a,
2728         0x8e7b5d5b, 0x004f485e, 0x004d0059, 0x5e584d00, 0x6c6b6a68, 0x2e8f0c5a, 0x424c0000, 0x8e7b5d5b,
2729         0x8f7c5e5c, 0x00584968, 0x004e005a, 0x68594e00, 0x6d6c6b69, 0x2f8f0dce, 0x42640000, 0x8f7c5e5c,
2730         0x8f7e685e, 0x00594b69, 0x0058005b, 0x695a5800, 0x6f6e6c6a, 0x308f0f80, 0x42800000, 0x8f7e685e,
2731         0x8f886968, 0x005a4c6a, 0x0059005c, 0x6a5b5900, 0x6f6f6e6b, 0x318f1170, 0x42900000, 0x8f886968,
2732         0x8f896a69, 0x005c4d6b, 0x005a005e, 0x6b5d5a00, 0x6f6f6f6d, 0x328f139e, 0x42a20000, 0x8f896a69,
2733         0x8f8a6b6a, 0x005d4f6d, 0x005b0068, 0x6d5e5b00, 0x6f6f6f6e, 0x338f160a, 0x42b60000, 0x8f8a6b6a
2734     },
2735     // B
2736     {
2737         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x003b0048, 0x3f800000, 0x3a1e0807,
2738         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x013b0048, 0x3f800000, 0x3a1e0807,
2739         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x023b0048, 0x3f800000, 0x3a1e0807,
2740         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x033b0048, 0x3f800000, 0x3a1e0807,
2741         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x043b0048, 0x3f800000, 0x3a1e0807,
2742         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x053b0048, 0x3f800000, 0x3a1e0807,
2743         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x063b0048, 0x3f800000, 0x3a1e0807,
2744         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x073b0048, 0x3f800000, 0x3a1e0807,
2745         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x083b0048, 0x3f800000, 0x3a1e0807,
2746         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x093b0048, 0x3f800000, 0x3a1e0807,
2747         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0a3b0048, 0x3f800000, 0x3a1e0807,
2748         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0b3b0048, 0x3f800000, 0x3a1e0807,
2749         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0c3b0048, 0x3f800000, 0x3a1e0807,
2750         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0d3b0048, 0x3f800000, 0x3a1e0807,
2751         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0e3b0048, 0x3f800000, 0x3a1e0807,
2752         0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0f3b0048, 0x3f800000, 0x3a1e0807,
2753         0x4a2e180e, 0x00190d1c, 0x0008040c, 0x0c040400, 0x281e1c1a, 0x104b0090, 0x40000000, 0x4a2e180e,
2754         0x4a2e180e, 0x00190d1c, 0x0008040c, 0x0c040400, 0x281e1c1a, 0x114b0090, 0x40000000, 0x4a2e180e,
2755         0x4a2e180e, 0x00190d1c, 0x0008040c, 0x0c040400, 0x281e1c1a, 0x124b0090, 0x40000000, 0x4a2e180e,
2756         0x4a2e180e, 0x00190d1c, 0x0008040c, 0x0c040400, 0x281e1c1a, 0x134b0090, 0x40000000, 0x4a2e180e,
2757         0x4f3b1c1b, 0x001d1a29, 0x000c0619, 0x19060600, 0x2c2b291f, 0x145800d8, 0x40400000, 0x4f3b1c1b,
2758         0x4f3b1c1b, 0x001d1a29, 0x000c0619, 0x19060600, 0x2c2b291f, 0x155800d8, 0x40400000, 0x4f3b1c1b,
2759         0x4f3b1c1b, 0x001d1a29, 0x000c0619, 0x19060600, 0x2c2b291f, 0x165800d8, 0x40400000, 0x4f3b1c1b,
2760         0x5a3e281e, 0x00291d2c, 0x0018081c, 0x1c080800, 0x382e2c2a, 0x175b0120, 0x40800000, 0x5a3e281e,
2761         0x5a3e281e, 0x00291d2c, 0x0018081c, 0x1c080800, 0x382e2c2a, 0x185b0120, 0x40800000, 0x5a3e281e,
2762         0x5a3e281e, 0x00291d2c, 0x0018081c, 0x1c080800, 0x382e2c2a, 0x195b0120, 0x40800000, 0x5a3e281e,
2763         0x5d492a29, 0x002b282f, 0x001a0a1f, 0x1f0a0a00, 0x3a392f2d, 0x1a5e0168, 0x40a00000, 0x5d492a29,
2764         0x5f4b2c2b, 0x002d2a39, 0x001c0c29, 0x290c0c00, 0x3c3b392f, 0x1b6801b0, 0x40c00000, 0x5f4b2c2b,
2765         0x5f4b2c2b, 0x002d2a39, 0x001c0c29, 0x290c0c00, 0x3c3b392f, 0x1c6801b0, 0x40c00000, 0x5f4b2c2b,
2766         0x694c2e2c, 0x002f2b3b, 0x001e0e2b, 0x2b0e0e00, 0x3e3c3b39, 0x1d6a01f8, 0x40e00000, 0x694c2e2c,
2767         0x6a4e382e, 0x00392d3c, 0x0028182c, 0x2c181800, 0x483e3c3a, 0x1e6b0240, 0x41000000, 0x6a4e382e,
2768         0x6b583938, 0x003a2f3e, 0x0029192e, 0x2e191900, 0x49483e3b, 0x1f6d0288, 0x41100000, 0x6b583938,
2769         0x6d593a39, 0x003b383f, 0x002a1a2f, 0x2f1a1a00, 0x4a493f3d, 0x206e02d0, 0x41200000, 0x6d593a39,
2770         0x6e5a3b3a, 0x003c3948, 0x002b1b38, 0x381b1b00, 0x4b4a483e, 0x216f0318, 0x41300000, 0x6e5a3b3a,
2771         0x785b3d3b, 0x003e3b4a, 0x002d1d3a, 0x3a1d1d00, 0x4d4b4a48, 0x227903a8, 0x41500000, 0x785b3d3b,
2772         0x795c3e3c, 0x003f3b4b, 0x002e1e3b, 0x3b1e1e00, 0x4e4c4b49, 0x237a03f0, 0x41600000, 0x795c3e3c,
2773         0x7a5e483e, 0x00493d4c, 0x0038283c, 0x3c282800, 0x584e4c4a, 0x247b0480, 0x41800000, 0x7a5e483e,
2774         0x7b684948, 0x004a3f4e, 0x0039293e, 0x3e292900, 0x59584e4b, 0x257d0510, 0x41900000, 0x7b684948,
2775         0x7d694a49, 0x004b484f, 0x003a2a3f, 0x3f2a2a00, 0x5a594f4d, 0x267e05a0, 0x41a00000, 0x7d694a49,
2776         0x7e6a4c4a, 0x004c4959, 0x003c2c49, 0x492c2c00, 0x5c5a594e, 0x27880678, 0x41b80000, 0x7e6a4c4a,
2777         0x886b4d4b, 0x004d4a59, 0x003d2d49, 0x492d2d00, 0x5d5b5958, 0x28890708, 0x41c80000, 0x886b4d4b,
2778         0x896d4f4d, 0x004f4c5b, 0x003f2f4b, 0x4b2f2f00, 0x5f5d5b59, 0x298a0828, 0x41e80000, 0x896d4f4d,
2779         0x8a6e584e, 0x00594d5c, 0x0048384c, 0x4c383800, 0x685e5c5a, 0x2a8b0900, 0x42000000, 0x8a6e584e,
2780         0x8b785958, 0x005a4f5e, 0x0049394e, 0x4e393900, 0x69685e5b, 0x2b8d0a20, 0x42100000, 0x8b785958,
2781         0x8d795a59, 0x005b585f, 0x004a3a4f, 0x4f3a3a00, 0x6a695f5d, 0x2c8e0b40, 0x42200000, 0x8d795a59,
2782         0x8e7a5b5a, 0x005c5968, 0x004b3b58, 0x583b3b00, 0x6b6a685e, 0x2d8f0ca8, 0x42340000, 0x8e7a5b5a,
2783         0x8f7b5d5b, 0x005e5a6a, 0x004d3d5a, 0x5a3d3d00, 0x6d6b6a68, 0x2e8f0e58, 0x424c0000, 0x8f7b5d5b,
2784         0x8f7c5e5c, 0x005f5c6b, 0x004e3e5b, 0x5b3e3e00, 0x6e6c6b69, 0x2f8f1008, 0x42640000, 0x8f7c5e5c,
2785         0x8f7e685e, 0x00695d6c, 0x0058485c, 0x5c484800, 0x6f6e6c6a, 0x308f1200, 0x42800000, 0x8f7e685e,
2786         0x8f886968, 0x006a5f6e, 0x0059495e, 0x5e494900, 0x6f6f6e6b, 0x318f1440, 0x42900000, 0x8f886968,
2787         0x8f896a69, 0x006b686f, 0x005a4a5f, 0x5f4a4a00, 0x6f6f6f6d, 0x328f16c8, 0x42a20000, 0x8f896a69,
2788         0x8f8a6b6a, 0x006c6979, 0x005b4b69, 0x694b4b00, 0x6f6f6f6e, 0x338f1998, 0x42b60000, 0x8f8a6b6a
2789     }
2790 };
2791 
2792 const uint32_t CodechalEncHevcStateG9::m_brcLambdaHaar[QP_NUM * 4] = {
2793     0x00000036, 0x00000024, 0x00000075, 0x00000800, 0x00000044, 0x0000002d, 0x00000084, 0x00000800,
2794     0x00000056, 0x00000039, 0x00000094, 0x00000800, 0x0000006c, 0x00000048, 0x000000a6, 0x00000800,
2795     0x00000089, 0x0000005b, 0x000000ba, 0x00000800, 0x000000ac, 0x00000073, 0x000000d1, 0x00000800,
2796     0x000000d9, 0x00000091, 0x000000eb, 0x00000800, 0x00000112, 0x000000b7, 0x00000108, 0x00000800,
2797     0x00000159, 0x000000e7, 0x00000128, 0x00000800, 0x000001b3, 0x00000123, 0x0000014d, 0x00000800,
2798     0x00000224, 0x0000016f, 0x00000175, 0x00000800, 0x000002b2, 0x000001cf, 0x000001a3, 0x00000800,
2799     0x00000366, 0x00000247, 0x000001d7, 0x00000800, 0x00000448, 0x000002df, 0x00000210, 0x00000800,
2800     0x00000565, 0x0000039e, 0x00000251, 0x00000800, 0x000006cc, 0x0000048f, 0x0000029a, 0x00000800,
2801     0x00000891, 0x000005be, 0x000002eb, 0x00001000, 0x00000acb, 0x0000073d, 0x00000347, 0x00001000,
2802     0x00000d99, 0x0000091e, 0x000003ae, 0x00001000, 0x00001122, 0x00000b7d, 0x00000421, 0x00001000,
2803     0x00001596, 0x00000e7a, 0x000004a2, 0x00001800, 0x00001b33, 0x0000123d, 0x00000534, 0x00001800,
2804     0x00002245, 0x000016fb, 0x000005d7, 0x00001800, 0x00002b2d, 0x00001cf4, 0x000014cf, 0x00002000,
2805     0x00003666, 0x0000247a, 0x0000275c, 0x00002000, 0x0000448a, 0x00002df6, 0x00003e23, 0x00002000,
2806     0x0000565a, 0x000039e8, 0x000059e8, 0x00002800, 0x00006ccc, 0x000048f5, 0x00007b8b, 0x00003000,
2807     0x00008914, 0x00005bec, 0x0000a412, 0x00003000, 0x0000acb5, 0x000073d1, 0x0000d4ac, 0x00003800,
2808     0x0000d999, 0x000091eb, 0x00010eb8, 0x00004000, 0x00011228, 0x0000b7d9, 0x000153ca, 0x00004800,
2809     0x0001596b, 0x0000e7a2, 0x0001a5b8, 0x00005000, 0x0001b333, 0x000123d7, 0x0002069e, 0x00005800,
2810     0x00022451, 0x00016fb2, 0x000278ed, 0x00006800, 0x0002b2d6, 0x0001cf44, 0x0002ff74, 0x00007000,
2811     0x00036666, 0x000247ae, 0x00039d70, 0x00008000, 0x000448a2, 0x0002df64, 0x00043590, 0x00009000,
2812     0x000565ac, 0x00039e88, 0x0004b986, 0x0000a000, 0x0006cccc, 0x00048f5c, 0x00054da5, 0x0000b800,
2813     0x00089145, 0x0005bec8, 0x0005f3e7, 0x0000c800, 0x000acb59, 0x00073d11, 0x0006ae86, 0x0000e800,
2814     0x000d9999, 0x00091eb8, 0x00078000, 0x00010000, 0x0011228a, 0x000b7d90, 0x00086b20, 0x00012000,
2815     0x001596b2, 0x000e7a23, 0x0009730c, 0x00014000, 0x001b3333, 0x00123d70, 0x000a9b4a, 0x00016800,
2816     0x00224515, 0x0016fb20, 0x000be7cf, 0x00019800, 0x002b2d64, 0x001cf446, 0x000d5d0d, 0x0001c800,
2817     0x00366666, 0x00247ae1, 0x000f0000, 0x00020000, 0x00448a2a, 0x002df640, 0x0010d641, 0x00024000,
2818     0x00565ac8, 0x0039e88c, 0x0012e618, 0x00028800, 0x006ccccc, 0x0048f5c2, 0x00153694, 0x0002d800
2819 };
2820 
2821 const uint16_t CodechalEncHevcStateG9::m_skipThread[][QP_NUM] = {
2822     {
2823         0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0024,
2824         0x0024, 0x0060, 0x0060, 0x0099, 0x0099, 0x00cf, 0x00cf, 0x0105,
2825         0x0105, 0x0141, 0x0141, 0x0183, 0x0183, 0x01ce, 0x01ce, 0x0228,
2826         0x0228, 0x0291, 0x0291, 0x030c, 0x030c, 0x039f, 0x039f, 0x0447,
2827         0x0447, 0x050d, 0x050d, 0x05f1, 0x05f1, 0x06f6, 0x06f6, 0x0822,
2828         0x0822, 0x0972, 0x0972, 0x0aef, 0x0aef, 0x0c96, 0x0c96, 0x0e70,
2829         0x0e70, 0x107a, 0x107a, 0x1284
2830     },
2831 
2832     {
2833         0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x000c,
2834         0x000c, 0x0020, 0x0020, 0x0033, 0x0033, 0x0045, 0x0045, 0x0057,
2835         0x0057, 0x006b, 0x006b, 0x0081, 0x0081, 0x009a, 0x009a, 0x00b8,
2836         0x00b8, 0x00db, 0x00db, 0x0104, 0x0104, 0x0135, 0x0135, 0x016d,
2837         0x016d, 0x01af, 0x01af, 0x01fb, 0x01fb, 0x0252, 0x0252, 0x02b6,
2838         0x02b6, 0x0326, 0x0326, 0x03a5, 0x03a5, 0x0432, 0x0432, 0x04d0,
2839         0x04d0, 0x057e, 0x057e, 0x062c
2840     }
2841 };
2842 
2843 const double CodechalEncHevcStateG9::m_qpLambdaMdLut[3][QP_NUM] =      // default lambda = pow(2, (qp-12)/6)
2844 {
2845     //BREDSLICE
2846     {
2847         1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,                //QP=[0 ~12]
2848         1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0,                //QP=[13~25]
2849         5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0,         //QP=[26~38]
2850         23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0    //QP=[39~51]
2851     },
2852     //PREDSLICE
2853     {
2854         1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,                //QP=[0 ~12]
2855         1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0,                //QP=[13~25]
2856         5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0,         //QP=[26~38]
2857         23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0    //QP=[39~51]
2858     },
2859     //INTRASLICE
2860     {
2861         1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,                //QP=[0 ~12]
2862         1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0,                //QP=[13~25]
2863         5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0,         //QP=[26~38]
2864         23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0    //QP=[39~51]
2865     }
2866 };
2867 
2868 const double CodechalEncHevcStateG9::m_qpLambdaMeLut[3][QP_NUM] =      // default lambda = pow(2, (qp-12)/6)
2869 {
2870     //BREDSLICE
2871     {
2872         1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,                //QP=[0 ~12]
2873         1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0,                //QP=[13~25]
2874         5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0,         //QP=[26~38]
2875         23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0    //QP=[39~51]
2876     },
2877     //PREDSLICE
2878     {
2879         1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,                //QP=[0 ~12]
2880         1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0,                //QP=[13~25]
2881         5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0,         //QP=[26~38]
2882         23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0    //QP=[39~51]
2883     },
2884     //INTRASLICE
2885     {
2886         1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,                //QP=[0 ~12]
2887         1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0,                //QP=[13~25]
2888         5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0,         //QP=[26~38]
2889         23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0    //QP=[39~51]
2890     }
2891 };
2892 
2893 const uint32_t CodechalEncHevcStateG9::m_encBTu1BCurbeInit[56] =
2894 {
2895     0x000000a3, 0x00200008, 0x00143939, 0x00a27700, 0x1000000f, 0x20200000, 0x01000140, 0x00400003,
2896     0x4f3b1c1b, 0x001d1a29, 0x000c0619, 0x19060600, 0x2c2b291f, 0x00161616, 0x13130013, 0x13131313,
2897     0x0101f00f, 0x0f0f1010, 0xf0f0f00f, 0x01010101, 0x10101010, 0x0f0f0f0f, 0xf0f0f00f, 0x0101f0f0,
2898     0x01010101, 0x10101010, 0x0f0f1010, 0x0f0f0f0f, 0xf0f0f00f, 0xf0f0f0f0, 0x00000000, 0x00000000,
2899     0x010101ce, 0x00040c24, 0x40400000, 0x005800d8, 0x40000001, 0x00001616, 0x00000000, 0x00000016,
2900     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0125, 0x08080201, 0x05030502, 0x00031101,
2901     0x00020001, 0x00000000, 0x00000001, 0x00000000, 0x00100014, 0x00000000, 0x00000000, 0x00000000
2902 };
2903 
2904 const uint32_t CodechalEncHevcStateG9::m_encBTu4BCurbeInit[56] =
2905 {
2906     0x000000a3, 0x00200008, 0x00143939, 0x00a27700, 0x1000000f, 0x20200000, 0x01000140, 0x00400003,
2907     0x4f3b1c1b, 0x001d1a29, 0x000c0619, 0x19060600, 0x2c2b291f, 0x00161616, 0x13130013, 0x13131313,
2908     0x0101f00f, 0x0f0f1010, 0xf0f0f00f, 0x01010101, 0x10101010, 0x0f0f0f0f, 0xf0f0f00f, 0x0101f0f0,
2909     0x01010101, 0x10101010, 0x0f0f1010, 0x0f0f0f0f, 0xf0f0f00f, 0xf0f0f0f0, 0x00000000, 0x00000000,
2910     0x010101ce, 0x00040c24, 0x40400000, 0x005800d8, 0x40000001, 0x00001616, 0x00000000, 0x00000016,
2911     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0125, 0x08080201, 0x05030502, 0x0c033104,
2912     0x00020001, 0x00000000, 0x00000001, 0x00000000, 0x0010000d, 0x00000000, 0x00000000, 0x00000000
2913 };
2914 
2915 const uint32_t CodechalEncHevcStateG9::m_encBTu7BCurbeInit[56] =
2916 {
2917     0x000000a3, 0x00200008, 0x00143919, 0x00a27700, 0x1000000f, 0x20200000, 0x01000140, 0x00400003,
2918     0x5f4b2c2b, 0x002d2a39, 0x001c0c29, 0x290c0c00, 0x3c3b392f, 0x001b1b1b, 0x1e1e001e, 0x1e1e1e1e,
2919     0x120ff10f, 0x1e22e20d, 0x20e2ff10, 0x2edd06fc, 0x11d33ff1, 0xeb1ff33d, 0x4ef1f1f1, 0xf1f21211,
2920     0x0dffffe0, 0x11201f1f, 0x1105f1cf, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2921     0x0101030c, 0x00040c24, 0x40c00000, 0x006801b0, 0x40000000, 0x0000001b, 0x00000000, 0x0000001b,
2922     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0115, 0x08080201, 0x05030502, 0x0c034104,
2923     0x00000001, 0x00000000, 0x00000001, 0x00000000, 0x0010000d, 0x00000000, 0x00000000, 0x00000000
2924 };
2925 
2926 const uint32_t CodechalEncHevcStateG9::m_encBTu1PCurbeInit[56] =
2927 {
2928     0x000000a3, 0x00200008, 0x000b3919, 0x00a63000, 0x30000008, 0x28300000, 0x009000b0, 0x00400063,
2929     0x5d4b2c2b, 0x001e0f2e, 0x001c0028, 0x2e1f1c00, 0x3b3b392f, 0x001b1b1b, 0x1e1e001e, 0x1e1e1e1e,
2930     0x0101f00f, 0x0f0f1010, 0xf0f0f00f, 0x01010101, 0x10101010, 0x0f0f0f0f, 0xf0f0f00f, 0x0101f0f0,
2931     0x01010101, 0x10101010, 0x0f0f1010, 0x0f0f0f0f, 0xf0f0f00f, 0xf0f0f0f0, 0x00000000, 0x00000000,
2932     0x80010165, 0x00040c24, 0x40c00000, 0x04680174, 0x41000002, 0x001b1b1b, 0x00000000, 0x00000000,
2933     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0035, 0x08080201, 0x05030502, 0x00032000,
2934     0x00020001, 0x00000003, 0x00000000, 0x00000000, 0x000a000e, 0x00002830, 0x00000000, 0x00000000
2935 
2936 };
2937 
2938 const uint32_t CodechalEncHevcStateG9::m_encBTu4PCurbeInit[56] =
2939 {
2940     0x000000a3, 0x00200008, 0x000b3919, 0x00a63000, 0x30000008, 0x28300000, 0x009000b0, 0x00400063,
2941     0x5d4b2c2b, 0x001e0f2e, 0x001c0028, 0x2e1f1c00, 0x3b3b392f, 0x001b1b1b, 0x1e1e001e, 0x1e1e1e1e,
2942     0x0101f00f, 0x0f0f1010, 0xf0f0f00f, 0x01010101, 0x10101010, 0x0f0f0f0f, 0xf0f0f00f, 0x0101f0f0,
2943     0x01010101, 0x10101010, 0x0f0f1010, 0x0f0f0f0f, 0xf0f0f00f, 0xf0f0f0f0, 0x00000000, 0x00000000,
2944     0x80010165, 0x00040c24, 0x40c00000, 0x04680174, 0x41000002, 0x001b1b1b, 0x00000000, 0x00000000,
2945     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0035, 0x08080201, 0x05030502, 0x00032000,
2946     0x00020001, 0x00000003, 0x00000000, 0x00000000, 0x000a000e, 0x00002830, 0x00000000, 0x00000000
2947 
2948 };
2949 
2950 const uint32_t CodechalEncHevcStateG9::m_encBTu7PCurbeInit[56] =
2951 {
2952     0x000000a3, 0x00200008, 0x000b3919, 0x00a63000, 0x30000008, 0x28300000, 0x009000b0, 0x00400063,
2953     0x5d4b2c2b, 0x001e0f2e, 0x001c0028, 0x2e1f1c00, 0x3b3b392f, 0x001b1b1b, 0x1e1e001e, 0x1e1e1e1e,
2954     0x120ff10f, 0x1e22e20d, 0x20e2ff10, 0x2edd06fc, 0x11d33ff1, 0xeb1ff33d, 0x4ef1f1f1, 0xf1f21211,
2955     0x0dffffe0, 0x11201f1f, 0x1105f1cf, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2956     0x80010165, 0x00040c24, 0x40c00000, 0x04680174, 0x41000002, 0x001b1b1b, 0x00000000, 0x00000000,
2957     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0035, 0x08080201, 0x05030502, 0x00032000,
2958     0x00020001, 0x00000003, 0x00000000, 0x00000000, 0x000a000e, 0x00002830, 0x00000000, 0x00000000
2959 };
2960 
2961 const uint32_t CodechalEncHevcStateG9::m_encBTu7ICurbeInit[56] =
2962 {
2963     0x000000a2, 0x00200008, 0x00143919, 0x00a03000, 0x5000000f, 0x28300000, 0x01000140, 0x00000003,
2964     0x5a3d0029, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x001b1b1b, 0x1e1e001e, 0x1e1e1e1e,
2965     0x120ff10f, 0x1e22e20d, 0x20e2ff10, 0x2edd06fc, 0x11d33ff1, 0xeb1ff33d, 0x4ef1f1f1, 0xf1f21211,
2966     0x0dffffe0, 0x11201f1f, 0x1105f1cf, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2967     0x8080030c, 0x00040c24, 0x40a6e43f, 0x005f0139, 0x40000000, 0x00000000, 0x00000000, 0x00000000,
2968     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0005, 0x08080201, 0x05030502, 0x0c034104,
2969     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0010000d, 0x00000000, 0x00000000, 0x00000000
2970 };
2971 
2972 const uint32_t CodechalEncHevcStateG9::m_brcInitCurbeInit[32] =
2973 {
2974     0x000a8c00, 0x0112a880, 0x016e3600, 0x00b71b00, 0x00b71b00, 0x00000000, 0x0000001e, 0x00000001,
2975     0x000a0040, 0x05000000, 0x001e02d0, 0x000100c8, 0x00010033, 0x00000000, 0x00010000, 0x00000000,
2976     0x78503c28, 0x78503c23, 0x735a3c28, 0xe5dfd8d1, 0x2f29211b, 0xe5ddd7d1, 0x5e56463f, 0xeae3dad4,
2977     0x2f281f16, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
2978 };
2979 
2980 const uint32_t CodechalEncHevcStateG9::m_brcUpdateCurbeInit[16] =
2981 {
2982     0x0112a880, 0x00000000, 0x00000230, 0x0042000d, 0x00c80085, 0x02044000, 0x00000000, 0x00000000,
2983     0x02030101, 0x05052801, 0x12070103, 0x4b282519, 0xa07d6761, 0x00fffefd, 0x00030201, 0x00000000
2984 };
2985 
2986 const uint32_t CodechalEncHevcStateG9::m_meCurbeInit[39] =
2987 {
2988     0x00000000, 0x00200008, 0x00003939, 0x77a43000, 0x00000000, 0x28300000, 0x00000000, 0x00000000,
2989     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2990     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2991     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2992     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
2993 };
2994 
PicCodingTypeToSliceType(uint16_t pictureCodingType)2995 uint8_t CodechalEncHevcStateG9::PicCodingTypeToSliceType(uint16_t pictureCodingType)
2996 {
2997     uint8_t sliceType = 0;
2998 
2999     switch (pictureCodingType)
3000     {
3001     case I_TYPE:
3002         sliceType = CODECHAL_ENCODE_HEVC_I_SLICE;
3003         break;
3004     case P_TYPE:
3005         sliceType = CODECHAL_ENCODE_HEVC_P_SLICE;
3006         break;
3007     case B_TYPE:
3008     case B1_TYPE:
3009     case B2_TYPE:
3010         sliceType = CODECHAL_ENCODE_HEVC_B_SLICE;
3011         break;
3012     default:
3013         CODECHAL_ENCODE_ASSERT(false);
3014     }
3015     return sliceType;
3016 }
3017 
GenerateSliceMap()3018 MOS_STATUS CodechalEncHevcStateG9::GenerateSliceMap()
3019 {
3020     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3021 
3022     CODECHAL_ENCODE_FUNCTION_ENTER;
3023 
3024     if (m_numSlices > 1 && m_sliceMap)
3025     {
3026         uint32_t log2LcuSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
3027         CODECHAL_ENCODE_ASSERT(log2LcuSize == 5);
3028 
3029         uint32_t W = MOS_ALIGN_CEIL(m_frameWidth, (1 << log2LcuSize)) >> log2LcuSize;
3030         uint32_t H = MOS_ALIGN_CEIL(m_frameHeight, (1 << log2LcuSize)) >> log2LcuSize;
3031         if (m_sliceMapSurface.dwPitch < W * sizeof(m_sliceMap[0]))
3032         {
3033             eStatus = MOS_STATUS_MORE_DATA;
3034             return eStatus;
3035         }
3036 
3037         MOS_LOCK_PARAMS lockFlags;
3038         MOS_ZeroMemory(&lockFlags, sizeof(lockFlags));
3039         lockFlags.WriteOnly = true;
3040 
3041         uint8_t*  surface = (uint8_t* )m_osInterface->pfnLockResource(
3042             m_osInterface,
3043             &m_sliceMapSurface.OsResource,
3044             &lockFlags);
3045 
3046         if (surface == nullptr)
3047         {
3048             eStatus = MOS_STATUS_NULL_POINTER;
3049             return eStatus;
3050         }
3051 
3052         for (uint32_t h = 0; h < H; h++, surface += m_sliceMapSurface.dwPitch)
3053         {
3054             PCODECHAL_ENCODE_HEVC_SLICE_MAP map = (PCODECHAL_ENCODE_HEVC_SLICE_MAP)surface;
3055             for (uint32_t w = 0; w < W; w++)
3056             {
3057                 map[w] = m_sliceMap[h * W + w];
3058             }
3059         }
3060 
3061         m_osInterface->pfnUnlockResource(
3062             m_osInterface,
3063             &m_sliceMapSurface.OsResource);
3064     }
3065     else if (m_numSlices == 1 && m_lastNumSlices != m_numSlices)
3066     {
3067         // Reset slice map surface
3068         MOS_LOCK_PARAMS lockFlags;
3069         MOS_ZeroMemory(&lockFlags, sizeof(lockFlags));
3070         lockFlags.WriteOnly = true;
3071 
3072         uint8_t*  surface = (uint8_t* )m_osInterface->pfnLockResource(
3073             m_osInterface,
3074             &m_sliceMapSurface.OsResource,
3075             &lockFlags);
3076 
3077         if (surface == nullptr)
3078         {
3079             eStatus = MOS_STATUS_NULL_POINTER;
3080             return eStatus;
3081         }
3082 
3083         MOS_ZeroMemory(surface,
3084             m_sliceMapSurface.dwWidth * m_sliceMapSurface.dwHeight);
3085 
3086         m_osInterface->pfnUnlockResource(
3087             m_osInterface,
3088             &m_sliceMapSurface.OsResource);
3089     }
3090 
3091     m_lastNumSlices = m_numSlices;
3092 
3093     return eStatus;
3094 }
3095 
SetSliceStructs()3096 MOS_STATUS CodechalEncHevcStateG9::SetSliceStructs()
3097 {
3098     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3099 
3100     CODECHAL_ENCODE_FUNCTION_ENTER;
3101 
3102     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetSliceStructs());
3103 
3104     // setup slice map
3105     PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams = m_hevcSliceParams;
3106     for (uint32_t startLCU = 0, slcCount = 0; slcCount < m_numSlices; slcCount++, slcParams++)
3107     {
3108         if (!m_hevcPicParams->tiles_enabled_flag)
3109         {
3110             CODECHAL_ENCODE_ASSERT(slcParams->slice_segment_address == startLCU);
3111 
3112             // process slice map
3113             for (uint32_t i = 0; i < slcParams->NumLCUsInSlice; i++)
3114             {
3115                 m_sliceMap[startLCU + i].ucSliceID = (uint8_t)slcCount;
3116             }
3117 
3118             startLCU += slcParams->NumLCUsInSlice;
3119         }
3120     }
3121 
3122     return eStatus;
3123 }
3124 
SetSequenceStructs()3125 MOS_STATUS CodechalEncHevcStateG9::SetSequenceStructs()
3126 {
3127     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3128 
3129     CODECHAL_ENCODE_FUNCTION_ENTER;
3130 
3131     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetSequenceStructs());
3132 
3133     // TU1 has no wave-front split
3134     if (m_hevcSeqParams->TargetUsage == 1 && m_numRegionsInSlice != 1)
3135     {
3136         m_numRegionsInSlice = 1;
3137     }
3138 
3139     return eStatus;
3140 }
3141 
SetPictureStructs()3142 MOS_STATUS CodechalEncHevcStateG9::SetPictureStructs()
3143 {
3144     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3145 
3146     CODECHAL_ENCODE_FUNCTION_ENTER;
3147 
3148     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetPictureStructs());
3149 
3150     /* dwOriFrameWidth and dwOriFrameHeight must be CU-aligned in HEVC. Set the recon and raw surface resolution as
3151     the actual encoding resolution.
3152     */
3153     m_rawSurface.dwWidth = m_reconSurface.dwWidth = m_oriFrameWidth;
3154     m_rawSurface.dwHeight = m_reconSurface.dwHeight = m_oriFrameHeight;
3155 
3156     m_firstIntraRefresh = true;
3157     m_frameNumInGob = (m_pictureCodingType == I_TYPE) ? 0 : (m_frameNumInGob + 1);
3158 
3159     return eStatus;
3160 }
3161 
CalcScaledDimensions()3162 MOS_STATUS CodechalEncHevcStateG9::CalcScaledDimensions()
3163 {
3164     CODECHAL_ENCODE_FUNCTION_ENTER;
3165 
3166     m_downscaledWidthInMb4x               =
3167         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_4x);
3168 
3169     if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit) && m_hevcSeqParams->bit_depth_luma_minus8)
3170     {
3171         uint32_t downscaledSurfaceWidth4x = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x* CODECHAL_MACROBLOCK_WIDTH), (CODECHAL_MACROBLOCK_WIDTH * 2));
3172         m_downscaledWidthInMb4x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(downscaledSurfaceWidth4x);
3173     }
3174 
3175     m_downscaledHeightInMb4x              =
3176         CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_4x);
3177     m_downscaledWidth4x =
3178         m_downscaledWidthInMb4x * CODECHAL_MACROBLOCK_WIDTH;
3179     m_downscaledHeight4x =
3180         m_downscaledHeightInMb4x * CODECHAL_MACROBLOCK_HEIGHT;
3181 
3182     // SuperHME Scaling WxH
3183     m_downscaledWidthInMb16x              =
3184         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_16x);
3185     m_downscaledHeightInMb16x             =
3186         CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_16x);
3187     m_downscaledWidth16x =
3188         m_downscaledWidthInMb16x * CODECHAL_MACROBLOCK_WIDTH;
3189     m_downscaledHeight16x =
3190         m_downscaledHeightInMb16x * CODECHAL_MACROBLOCK_HEIGHT;
3191 
3192     // UltraHME Scaling WxH
3193     m_downscaledWidthInMb32x              =
3194         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_32x);
3195     m_downscaledHeightInMb32x             =
3196         CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_32x);
3197     m_downscaledWidth32x =
3198         m_downscaledWidthInMb32x * CODECHAL_MACROBLOCK_WIDTH;
3199     m_downscaledHeight32x =
3200         m_downscaledHeightInMb32x * CODECHAL_MACROBLOCK_HEIGHT;
3201 
3202     return MOS_STATUS_SUCCESS;
3203 }
3204 
LoadCosts(uint8_t sliceType,uint8_t qp,uint8_t intraSADTransform)3205 void CodechalEncHevcStateG9::LoadCosts(
3206     uint8_t sliceType,
3207     uint8_t qp,
3208     uint8_t intraSADTransform)
3209 {
3210     float hadBias = 2.0f;
3211 
3212     if (intraSADTransform == INTRA_TRANSFORM_HADAMARD)
3213     {
3214         hadBias = 1.67f;
3215     }
3216 
3217     double lambdaMd = m_qpLambdaMd[sliceType][qp];
3218     double lambdaMe = m_qpLambdaMe[sliceType][qp];
3219 
3220     m_modeCost[0] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][0] * hadBias), 0x6f);
3221     m_modeCost[1] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][1] * hadBias), 0x8f);
3222     m_modeCost[2] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][2] * hadBias), 0x8f);
3223     m_modeCost[3] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][3] * hadBias), 0x8f);
3224     m_modeCost[4] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][4] * hadBias), 0x8f);
3225     m_modeCost[5] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][5] * hadBias), 0x6f);
3226     m_modeCost[6] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][6] * hadBias), 0x6f);
3227     m_modeCost[7] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][7] * hadBias), 0x6f);
3228     m_modeCost[8] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][8] * hadBias), 0x8f);
3229     m_modeCost[9] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][9] * hadBias), 0x6f);
3230     m_modeCost[10] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][10] * hadBias), 0x6f);
3231     m_modeCost[11] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][11] * hadBias), 0x6f);
3232 
3233     m_mvCost[0] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][0] * hadBias), 0x6f);
3234     m_mvCost[1] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][1] * hadBias), 0x6f);
3235     m_mvCost[2] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][2] * hadBias), 0x6f);
3236     m_mvCost[3] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][3] * hadBias), 0x6f);
3237     m_mvCost[4] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][4] * hadBias), 0x6f);
3238     m_mvCost[5] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][5] * hadBias), 0x6f);
3239     m_mvCost[6] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][6] * hadBias), 0x6f);
3240     m_mvCost[7] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][7] * hadBias), 0x6f);
3241 
3242     double m_lambdaMd = lambdaMd * hadBias;
3243     m_simplestIntraInterThreshold = 0;
3244     if (m_modeCostLut[sliceType][1] < m_modeCostLut[sliceType][3])
3245     {
3246         m_simplestIntraInterThreshold = (uint32_t)(m_lambdaMd * (m_modeCostLut[sliceType][3] - m_modeCostLut[sliceType][1]) + 0.5);
3247     }
3248 
3249     m_modeCostSp = Map44LutValue((uint32_t)(lambdaMd * 45 * hadBias), 0x8f);
3250 }
3251 
CalcForwardCoeffThd(uint8_t * forwardCoeffThresh,int32_t qp)3252 void CodechalEncHevcStateG9::CalcForwardCoeffThd(uint8_t* forwardCoeffThresh, int32_t qp)
3253 {
3254     static const uint8_t FTQ25I[27] =
3255     {
3256         0, 0, 0, 0,
3257         1, 3, 6, 8, 11,
3258         13, 16, 19, 22, 26,
3259         30, 34, 39, 44, 50,
3260         56, 62, 69, 77, 85,
3261         94, 104, 115
3262     };
3263 
3264     uint8_t idx = (qp + 1) >> 1;
3265 
3266     forwardCoeffThresh[0] =
3267     forwardCoeffThresh[1] =
3268     forwardCoeffThresh[2] =
3269     forwardCoeffThresh[3] =
3270     forwardCoeffThresh[4] =
3271     forwardCoeffThresh[5] =
3272     forwardCoeffThresh[6] = FTQ25I[idx];
3273 }
3274 
GetQPValueFromRefList(uint32_t list,uint32_t index)3275 uint8_t CodechalEncHevcStateG9::GetQPValueFromRefList(uint32_t list, uint32_t index)
3276 {
3277     CODECHAL_ENCODE_ASSERT(list == LIST_0 || list == LIST_1);
3278     CODECHAL_ENCODE_ASSERT(index < CODEC_MAX_NUM_REF_FRAME_HEVC);
3279 
3280     CODEC_PICTURE picture = m_hevcSliceParams->RefPicList[list][index];
3281 
3282     if (!CodecHal_PictureIsInvalid(picture) && m_picIdx[picture.FrameIdx].bValid)
3283     {
3284         auto picIdx = m_picIdx[picture.FrameIdx].ucPicIdx;
3285         return m_refList[picIdx]->ucQPValue[0];
3286     }
3287     else
3288     {
3289         return 0;
3290     }
3291 }
3292 
GetMaxRefFrames(uint8_t & maxNumRef0,uint8_t & maxNumRef1)3293 void CodechalEncHevcStateG9::GetMaxRefFrames(uint8_t& maxNumRef0, uint8_t& maxNumRef1)
3294 {
3295     maxNumRef0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G9;
3296     maxNumRef1 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G9;
3297 
3298     return;
3299 }
3300 
InitParamForWalkerVfe26z(uint32_t numRegionsInSlice,uint32_t maxSliceHeight)3301 void CodechalEncHevcStateG9::InitParamForWalkerVfe26z(
3302     uint32_t numRegionsInSlice,
3303     uint32_t maxSliceHeight)
3304 {
3305     int32_t width = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth);
3306     int32_t height = maxSliceHeight * 2;
3307     int32_t tsWidth = ((width + 3) & 0xfffc) >> 1;
3308     int32_t lcuWidth = (width + 1) >> 1;
3309     int32_t lcuHeight = (height + 1) >> 1;
3310     int32_t tmp1 = ((lcuWidth + 1) >> 1) + ((lcuWidth + ((lcuHeight - 1) << 1)) + (2 * numRegionsInSlice - 1)) / (2 * numRegionsInSlice);
3311 
3312     m_walkingPatternParam.MediaWalker.UseScoreboard = m_useHwScoreboard;
3313     m_walkingPatternParam.MediaWalker.ScoreboardMask = 0xFF;
3314     m_walkingPatternParam.MediaWalker.GlobalResolution.x = tsWidth;
3315     m_walkingPatternParam.MediaWalker.GlobalResolution.y = 4 * tmp1;
3316 
3317     m_walkingPatternParam.MediaWalker.GlobalStart.x = 0;
3318     m_walkingPatternParam.MediaWalker.GlobalStart.y = 0;
3319 
3320     m_walkingPatternParam.MediaWalker.GlobalOutlerLoopStride.x = tsWidth;
3321     m_walkingPatternParam.MediaWalker.GlobalOutlerLoopStride.y = 0;
3322 
3323     m_walkingPatternParam.MediaWalker.GlobalInnerLoopUnit.x = 0;
3324     m_walkingPatternParam.MediaWalker.GlobalInnerLoopUnit.y = 4 * tmp1;
3325 
3326     m_walkingPatternParam.MediaWalker.BlockResolution.x = tsWidth;
3327     m_walkingPatternParam.MediaWalker.BlockResolution.y = 4 * tmp1;
3328 
3329     m_walkingPatternParam.MediaWalker.LocalStart.x = tsWidth;
3330     m_walkingPatternParam.MediaWalker.LocalStart.y = 0;
3331 
3332     m_walkingPatternParam.MediaWalker.LocalEnd.x = 0;
3333     m_walkingPatternParam.MediaWalker.LocalEnd.y = 0;
3334 
3335     m_walkingPatternParam.MediaWalker.LocalOutLoopStride.x = 1;
3336     m_walkingPatternParam.MediaWalker.LocalOutLoopStride.y = 0;
3337 
3338     m_walkingPatternParam.MediaWalker.LocalInnerLoopUnit.x = MOS_BITFIELD_VALUE((uint32_t)-2, 16);
3339     m_walkingPatternParam.MediaWalker.LocalInnerLoopUnit.y = 4;
3340 
3341     m_walkingPatternParam.MediaWalker.MiddleLoopExtraSteps = 3;
3342 
3343     m_walkingPatternParam.MediaWalker.MidLoopUnitX = 0;
3344     m_walkingPatternParam.MediaWalker.MidLoopUnitY = 1;
3345 
3346     m_walkingPatternParam.MediaWalker.dwGlobalLoopExecCount = 0;
3347     m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount = 2 * ((lcuWidth + (lcuHeight - 1) * 2 + 2 * numRegionsInSlice - 1) / (2 * numRegionsInSlice)) - 1;
3348 
3349     m_walkingPatternParam.ScoreBoard.ScoreboardEnable = m_useHwScoreboard;
3350     m_walkingPatternParam.ScoreBoard.ScoreboardType = m_hwScoreboardType;
3351     m_walkingPatternParam.ScoreBoard.ScoreboardMask = 0xff;
3352 
3353     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[0].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3354     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[0].y = 3;
3355 
3356     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[1].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3357     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[1].y = 1;
3358 
3359     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[2].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3360     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[2].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3361 
3362     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[3].x = 0;
3363     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[3].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3364 
3365     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[4].x = 0;
3366     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[4].y = MOS_BITFIELD_VALUE((uint32_t)-2, 4);
3367 
3368     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[5].x = 0;
3369     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[5].y = MOS_BITFIELD_VALUE((uint32_t)-3, 4);
3370 
3371     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[6].x = 1;
3372     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[6].y = MOS_BITFIELD_VALUE((uint32_t)-2, 4);
3373 
3374     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[7].x = 1;
3375     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[7].y = MOS_BITFIELD_VALUE((uint32_t)-3, 4);
3376 
3377     m_walkingPatternParam.Offset_Y = -4 * ((lcuWidth + 1) >> 1);
3378     m_walkingPatternParam.Offset_Delta = ((lcuWidth + ((lcuHeight - 1) << 1)) + (numRegionsInSlice - 1)) / (numRegionsInSlice);
3379 }
3380 
InitParamForWalkerVfe26(uint32_t numRegionsInSlice,uint32_t maxSliceHeight)3381 void CodechalEncHevcStateG9::InitParamForWalkerVfe26(
3382     uint32_t numRegionsInSlice,
3383     uint32_t maxSliceHeight)
3384 {
3385     int32_t width = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth);
3386     int32_t height = maxSliceHeight;
3387     int32_t tsWidth = (width + 1) & 0xfffe;
3388     int32_t tsHeight = (height + 1) & 0xfffe;
3389     int32_t tmp1 = ((tsWidth + 1) >> 1) + ((tsWidth + ((tsHeight - 1) << 1)) + (2 * numRegionsInSlice - 1)) / (2 * numRegionsInSlice);
3390 
3391     m_walkingPatternParam.MediaWalker.UseScoreboard = m_useHwScoreboard;
3392     m_walkingPatternParam.MediaWalker.ScoreboardMask = 0x0F;
3393     m_walkingPatternParam.MediaWalker.GlobalResolution.x = tsWidth;
3394     m_walkingPatternParam.MediaWalker.GlobalResolution.y = tmp1;    // tsHeight;
3395 
3396     m_walkingPatternParam.MediaWalker.GlobalStart.x = 0;
3397     m_walkingPatternParam.MediaWalker.GlobalStart.y = 0;
3398 
3399     m_walkingPatternParam.MediaWalker.GlobalOutlerLoopStride.x = tsWidth;
3400     m_walkingPatternParam.MediaWalker.GlobalOutlerLoopStride.y = 0;
3401 
3402     m_walkingPatternParam.MediaWalker.GlobalInnerLoopUnit.x = 0;
3403     m_walkingPatternParam.MediaWalker.GlobalInnerLoopUnit.y = tmp1;
3404 
3405     m_walkingPatternParam.MediaWalker.BlockResolution.x = tsWidth;
3406     m_walkingPatternParam.MediaWalker.BlockResolution.y = tmp1;
3407 
3408     m_walkingPatternParam.MediaWalker.LocalStart.x = tsWidth;
3409     m_walkingPatternParam.MediaWalker.LocalStart.y = 0;
3410 
3411     m_walkingPatternParam.MediaWalker.LocalEnd.x = 0;
3412     m_walkingPatternParam.MediaWalker.LocalEnd.y = 0;
3413 
3414     m_walkingPatternParam.MediaWalker.LocalOutLoopStride.x = 1;
3415     m_walkingPatternParam.MediaWalker.LocalOutLoopStride.y = 0;
3416 
3417     m_walkingPatternParam.MediaWalker.LocalInnerLoopUnit.x = MOS_BITFIELD_VALUE((uint32_t)-2, 16);
3418     m_walkingPatternParam.MediaWalker.LocalInnerLoopUnit.y = 1;
3419 
3420     m_walkingPatternParam.MediaWalker.MiddleLoopExtraSteps = 0;
3421 
3422     m_walkingPatternParam.MediaWalker.MidLoopUnitX = 0;
3423     m_walkingPatternParam.MediaWalker.MidLoopUnitY = 0;
3424 
3425     m_walkingPatternParam.MediaWalker.dwGlobalLoopExecCount = 0;
3426 
3427     m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount = (width + (height - 1) * 2 + numRegionsInSlice - 1) / numRegionsInSlice;
3428 
3429     m_walkingPatternParam.ScoreBoard.ScoreboardEnable = m_useHwScoreboard;
3430     m_walkingPatternParam.ScoreBoard.ScoreboardType = m_hwScoreboardType;
3431     m_walkingPatternParam.ScoreBoard.ScoreboardMask = 0x0f;
3432 
3433     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[0].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3434     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[0].y = 0;
3435 
3436     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[1].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3437     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[1].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3438 
3439     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[2].x = 0;
3440     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[2].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3441 
3442     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[3].x = 1;
3443     m_walkingPatternParam.ScoreBoard.ScoreboardDelta[3].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3444 
3445     m_walkingPatternParam.Offset_Y = -((width + 1) >> 1);
3446     m_walkingPatternParam.Offset_Delta = ((width + ((height - 1) << 1)) + (numRegionsInSlice - 1)) / (numRegionsInSlice);
3447 }
3448 
GenerateWalkingControlRegion()3449 MOS_STATUS CodechalEncHevcStateG9::GenerateWalkingControlRegion()
3450 {
3451     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3452 
3453     CODECHAL_ENCODE_FUNCTION_ENTER;
3454 
3455     MOS_ZeroMemory(&m_walkingPatternParam, sizeof(m_walkingPatternParam));
3456 
3457     if (m_numRegionsInSlice < 1)
3458     {
3459         // Region number cannot be smaller than 1
3460         m_numRegionsInSlice = 1;
3461     }
3462 
3463     if (m_numRegionsInSlice > 16)
3464     {
3465         // Region number cannot be larger than 16
3466         m_numRegionsInSlice = 16;
3467     }
3468 
3469     uint32_t frameWidthInUnits = 0, frameHeightInUnits = 0;
3470     if (m_enable26WalkingPattern) /* 26 degree walking pattern */
3471     {
3472         frameWidthInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameWidth, 16);
3473         frameHeightInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameHeight, 16);
3474     }
3475     else /* 26z walking pattern */
3476     {
3477         frameWidthInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameWidth, 32);
3478         frameHeightInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameHeight, 32);
3479     }
3480 
3481     // THE FOLLOWING CODE FOR SLICE MERGING / CONCURRENT THREAD GENERATION IS PORTED FROM THE
3482     // SKL HEVC KRN CMODEL (v8992). FOR FIXES VERIFY THAT PROBLEM DOESN'T EXIST THERE TOO.
3483     bool isArbitrarySlices = false;
3484     int32_t sliceStartY[CODECHAL_HEVC_MAX_NUM_SLICES_LVL_5 + 1] = { 0 };
3485     for (uint32_t slice = 0; slice < m_numSlices; slice++)
3486     {
3487         if (m_hevcSliceParams[slice].slice_segment_address % CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameWidth, 32))
3488         {
3489             isArbitrarySlices = true;
3490         }
3491         else
3492         {
3493             sliceStartY[slice] = m_hevcSliceParams[slice].slice_segment_address / CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameWidth, 32);
3494 
3495             /* 26 degree walking pattern */
3496             if (m_enable26WalkingPattern)
3497             {
3498                 sliceStartY[slice] *= 2;
3499             }
3500         }
3501     }
3502 
3503     sliceStartY[m_numSlices] = frameHeightInUnits;
3504 
3505     const uint32_t regionStartYOffset = 32;
3506     uint32_t numRegions = 1;
3507     uint32_t numSlices = 0, height = 0;
3508     int32_t maxHeight = 0;
3509     uint16_t regionsStartTable[64] = { 0 };
3510 
3511     if (isArbitrarySlices)
3512     {
3513         height = frameHeightInUnits;
3514         numSlices = 1;
3515         maxHeight = height;
3516         if (m_numRegionsInSlice > 1)
3517         {
3518             uint32_t numUnitInRegion =
3519                 (frameWidthInUnits + 2 * (frameHeightInUnits - 1) + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
3520 
3521             numRegions = m_numRegionsInSlice;
3522 
3523             for (uint32_t i = 1; i < m_numRegionsInSlice; i++)
3524             {
3525                 uint32_t front = i*numUnitInRegion;
3526 
3527                 if (front < frameWidthInUnits)
3528                 {
3529                     regionsStartTable[i] = (uint16_t)front;
3530                 }
3531                 else if (((front - frameWidthInUnits + 1) & 1) == 0)
3532                 {
3533                     regionsStartTable[i] = (uint16_t)frameWidthInUnits - 1;
3534                 }
3535                 else
3536                 {
3537                     regionsStartTable[i] = (uint16_t)frameWidthInUnits - 2;
3538                 }
3539 
3540                 regionsStartTable[regionStartYOffset + i] = (uint16_t)((front - regionsStartTable[i]) >> 1);
3541             }
3542         }
3543     }
3544     else
3545     {
3546         maxHeight = 0;
3547         numSlices = m_numSlices;
3548 
3549         for (uint32_t slice = 0; slice < numSlices; slice++)
3550         {
3551             int32_t sliceHeight = sliceStartY[slice + 1] - sliceStartY[slice];
3552             if (sliceHeight > maxHeight)
3553             {
3554                 maxHeight = sliceHeight;
3555             }
3556         }
3557 
3558         bool sliceIsMerged = false;
3559         while (!sliceIsMerged)
3560         {
3561             int32_t newNumSlices = 1;
3562             int32_t startY = 0;
3563 
3564             for (uint32_t slice = 1; slice < numSlices; slice++)
3565             {
3566                 if ((sliceStartY[slice + 1] - startY) <= maxHeight)
3567                 {
3568                     sliceStartY[slice] = -1;
3569                 }
3570                 else
3571                 {
3572                     startY = sliceStartY[slice];
3573                 }
3574             }
3575 
3576             for (uint32_t slice = 1; slice < numSlices; slice++)
3577             {
3578                 if (sliceStartY[slice] > 0)
3579                 {
3580                     sliceStartY[newNumSlices] = sliceStartY[slice];
3581                     newNumSlices++;
3582                 }
3583             }
3584 
3585             numSlices = newNumSlices;
3586             sliceStartY[numSlices] = frameHeightInUnits;
3587 
3588             /* very rough estimation */
3589             if (numSlices * m_numRegionsInSlice <= CODECHAL_MEDIA_WALKER_MAX_COLORS)
3590             {
3591                 sliceIsMerged = true;
3592             }
3593             else
3594             {
3595                 int32_t num = 1;
3596 
3597                 maxHeight = frameHeightInUnits;
3598 
3599                 for (uint32_t slice = 0; slice < numSlices - 1; slice++)
3600                 {
3601                     if ((sliceStartY[slice + 2] - sliceStartY[slice]) <= maxHeight)
3602                     {
3603                         maxHeight = sliceStartY[slice + 2] - sliceStartY[slice];
3604                         num = slice + 1;
3605                     }
3606                 }
3607 
3608                 for (uint32_t slice = num; slice < numSlices; slice++)
3609                 {
3610                     sliceStartY[slice] = sliceStartY[slice + 1];
3611                 }
3612 
3613                 numSlices--;
3614             }
3615         }
3616 
3617         uint32_t numUnitInRegion =
3618             (frameWidthInUnits + 2 * (maxHeight - 1) + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
3619 
3620         numRegions = numSlices * m_numRegionsInSlice;
3621 
3622         CODECHAL_ENCODE_ASSERT(numRegions != 0); // Making sure that the number of regions is at least 1
3623 
3624         for (uint32_t slice = 0; slice < numSlices; slice++)
3625         {
3626             regionsStartTable[slice * m_numRegionsInSlice]                        = 0;
3627             regionsStartTable[regionStartYOffset + (slice * m_numRegionsInSlice)] = (uint16_t)sliceStartY[slice];
3628 
3629             for (uint32_t i = 1; i < m_numRegionsInSlice; i++)
3630             {
3631                 uint32_t front = i*numUnitInRegion;
3632 
3633                 if (front < frameWidthInUnits)
3634                 {
3635                     regionsStartTable[slice * m_numRegionsInSlice + i] = (uint16_t)front;
3636                 }
3637                 else if (((front - frameWidthInUnits + 1) & 1) == 0)
3638                 {
3639                     regionsStartTable[slice * m_numRegionsInSlice + i] = (uint16_t)frameWidthInUnits - 1;
3640                 }
3641                 else
3642                 {
3643                     regionsStartTable[slice * m_numRegionsInSlice + i] = (uint16_t)frameWidthInUnits - 2;
3644                 }
3645 
3646                 regionsStartTable[regionStartYOffset + (slice * m_numRegionsInSlice + i)] = (uint16_t)sliceStartY[slice] +
3647                                                                                             ((front - regionsStartTable[i]) >> 1);
3648             }
3649         }
3650         height = maxHeight;
3651     }
3652 
3653     CODECHAL_ENCODE_ASSERT(numSlices <= CODECHAL_MEDIA_WALKER_MAX_COLORS); // The merged slices should be within the max color limit
3654 
3655     uint16_t datatmp[32][32] = { 0 };
3656     uint32_t offsetToTheRegionStart[16] = { 0 };
3657     for (uint32_t k = 0; k < numSlices; k++)
3658     {
3659         int32_t nearestReg = 0;
3660         int32_t minDelta = m_frameHeight;
3661 
3662         /* 26 degree wave front */
3663         if (m_enable26WalkingPattern)
3664         {
3665             int32_t curLcuPelY  = regionsStartTable[regionStartYOffset + (k * m_numRegionsInSlice)] << 4;
3666             int32_t tsWidth = m_frameWidth >> 4;
3667             int32_t tsHeight = height;
3668             int32_t offsetY = -((tsWidth + 1) >> 1);
3669             int32_t offsetDelta = ((tsWidth + ((tsHeight - 1) << 1)) + (m_numRegionsInSlice - 1)) / (m_numRegionsInSlice);
3670 
3671             for (uint32_t i = 0; i < numRegions; i++)
3672             {
3673                 if (regionsStartTable[i] == 0)
3674                 {
3675                     int32_t delta = curLcuPelY - (regionsStartTable[regionStartYOffset + i] << 4);
3676 
3677                     if (delta >= 0)
3678                     {
3679                         if (delta < minDelta)
3680                         {
3681                             minDelta = delta;
3682                             nearestReg = i;
3683                         }
3684                     }
3685                 }
3686 
3687                 offsetToTheRegionStart[k] = 2 * regionsStartTable[regionStartYOffset + nearestReg];
3688             }
3689             for (uint32_t i = 0; i < m_numRegionsInSlice; i++)
3690             {
3691                 datatmp[k * m_numRegionsInSlice + i][0] = regionsStartTable[nearestReg + i];
3692                 datatmp[k * m_numRegionsInSlice + i][1] = regionsStartTable[regionStartYOffset + (nearestReg + i)];
3693                 datatmp[k * m_numRegionsInSlice + i][2] = regionsStartTable[regionStartYOffset + nearestReg];
3694                 int32_t tmpY                            = regionsStartTable[regionStartYOffset + (nearestReg + m_numRegionsInSlice)];
3695                 datatmp[k * m_numRegionsInSlice + i][3] = (uint16_t)((tmpY != 0) ? tmpY : (m_frameHeight) >> 4);
3696                 datatmp[k * m_numRegionsInSlice + i][4] = offsetToTheRegionStart[k] & 0x0FFFF;
3697                 datatmp[k * m_numRegionsInSlice + i][5] = 0;
3698                 datatmp[k * m_numRegionsInSlice + i][6] = 0;
3699                 datatmp[k * m_numRegionsInSlice + i][7] = (uint16_t)(offsetY + regionsStartTable[regionStartYOffset + nearestReg] + ((i * offsetDelta) >> 1));
3700             }
3701         }
3702         else /* 26z walking pattern */
3703         {
3704             int32_t curLcuPelY  = regionsStartTable[regionStartYOffset + (k * m_numRegionsInSlice)] << 5;
3705             int32_t tsWidth = (m_frameWidth + 16) >> 5;
3706             int32_t tsHeight = height;
3707             int32_t offsetY = -4 * ((tsWidth + 1) >> 1);
3708             int32_t offsetDelta = ((tsWidth + ((tsHeight - 1) << 1)) + (m_numRegionsInSlice - 1)) / (m_numRegionsInSlice);
3709 
3710             for (uint32_t i = 0; i < numRegions; i++)
3711             {
3712                 if (regionsStartTable[i] == 0)
3713                 {
3714                     int32_t delta = curLcuPelY - (regionsStartTable[regionStartYOffset + i] << 5);
3715 
3716                     if (delta >= 0)
3717                     {
3718                         if (delta < minDelta)
3719                         {
3720                             minDelta = delta;
3721                             nearestReg = i;
3722                         }
3723                     }
3724                 }
3725 
3726                 offsetToTheRegionStart[k] = 2 * regionsStartTable[regionStartYOffset + nearestReg];
3727             }
3728 
3729             for (uint32_t i = 0; i < m_numRegionsInSlice; i++)
3730             {
3731                 datatmp[k * m_numRegionsInSlice + i][0] = regionsStartTable[nearestReg + i];
3732                 datatmp[k * m_numRegionsInSlice + i][1] = 2 * regionsStartTable[regionStartYOffset + (nearestReg + i)];
3733                 datatmp[k * m_numRegionsInSlice + i][2] = 2 * regionsStartTable[regionStartYOffset + nearestReg];
3734                 int32_t tmpY                            = 2 * regionsStartTable[regionStartYOffset + (nearestReg + m_numRegionsInSlice)];
3735                 datatmp[k * m_numRegionsInSlice + i][3] = (uint16_t)((tmpY != 0) ? tmpY : (m_frameHeight) >> 4);
3736                 datatmp[k * m_numRegionsInSlice + i][4] = offsetToTheRegionStart[k] & 0x0FFFF;
3737                 datatmp[k * m_numRegionsInSlice + i][5] = 0;
3738                 datatmp[k * m_numRegionsInSlice + i][6] = 0;
3739                 datatmp[k * m_numRegionsInSlice + i][7] = (uint16_t)(offsetY + 4 * regionsStartTable[regionStartYOffset + nearestReg] + (4 * ((i * offsetDelta) >> 1)));
3740             }
3741         }
3742     }
3743 
3744     if (m_enable26WalkingPattern)
3745     {
3746         InitParamForWalkerVfe26(m_numRegionsInSlice, maxHeight);
3747     }
3748     else
3749     {
3750         InitParamForWalkerVfe26z(m_numRegionsInSlice, maxHeight);
3751     }
3752 
3753     MOS_LOCK_PARAMS lockFlags;
3754     MOS_ZeroMemory(&lockFlags, sizeof(lockFlags));
3755     lockFlags.WriteOnly = true;
3756 
3757     PCODECHAL_ENCODE_HEVC_WALKING_CONTROL_REGION region;
3758     region = (PCODECHAL_ENCODE_HEVC_WALKING_CONTROL_REGION)m_osInterface->pfnLockResource(
3759         m_osInterface,
3760         &m_concurrentThreadSurface[m_concurrentThreadIndex].OsResource,
3761         &lockFlags);
3762 
3763     if (region == nullptr)
3764     {
3765         eStatus = MOS_STATUS_NULL_POINTER;
3766         return eStatus;
3767     }
3768 
3769     MOS_ZeroMemory(region, sizeof(*region) * HEVC_CONCURRENT_SURFACE_HEIGHT);
3770 
3771     for (auto i = 0; i < 1024; i += 64)
3772     {
3773         MOS_SecureMemcpy(((uint8_t* )region) + i, 32, (uint8_t* )datatmp[i / 64], 32);
3774     }
3775 
3776     m_walkingPatternParam.dwMaxHeightInRegion = m_enable26WalkingPattern ? maxHeight : maxHeight * 2;
3777     ;
3778     m_walkingPatternParam.dwNumRegion = numRegions;
3779     m_walkingPatternParam.dwNumUnitsInRegion =
3780         (frameWidthInUnits + 2 * (maxHeight - 1) + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
3781 
3782     m_osInterface->pfnUnlockResource(
3783         m_osInterface,
3784         &m_concurrentThreadSurface[m_concurrentThreadIndex].OsResource);
3785 
3786     CODECHAL_DEBUG_TOOL(
3787         eStatus = m_debugInterface->DumpSurface(
3788             &m_concurrentThreadSurface[m_concurrentThreadIndex],
3789             CodechalDbgAttr::attrOutput,
3790             "HEVC_B_MBENC_Out",
3791             CODECHAL_MEDIA_STATE_HEVC_B_MBENC);
3792     )
3793 
3794     return eStatus;
3795 }
3796 
GetMaxBtCount()3797 uint32_t CodechalEncHevcStateG9::GetMaxBtCount()
3798 {
3799     auto btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
3800 
3801     // Init/Reset BRC kernel
3802     uint32_t btCountPhase1 = MOS_ALIGN_CEIL(
3803         m_brcKernelStates[CODECHAL_HEVC_BRC_INIT].KernelParams.iBTCount,
3804         btIdxAlignment);
3805 
3806     // 4x, 16x DS, 2x DS, 4x ME, 16x ME, 32x ME, and coarse intra kernel
3807     uint32_t btCountPhase2 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_COARSE_INTRA].KernelParams.iBTCount, btIdxAlignment) +  // coarse intra
3808                              2 * MOS_ALIGN_CEIL(m_scaling4xKernelStates[0].KernelParams.iBTCount, btIdxAlignment) +                     // 4x and 16x DS
3809                              MOS_ALIGN_CEIL(m_scaling2xKernelStates[0].KernelParams.iBTCount, btIdxAlignment) +                         // 2x DS
3810                              3 * MOS_ALIGN_CEIL(m_hmeKernel ? m_hmeKernel->GetBTCount() : 0, btIdxAlignment);                           // 4x, 16x, and 32x ME
3811 
3812     // BRC update kernels and 6 I kernels
3813     uint32_t btCountPhase3 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
3814                              MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
3815                              MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_2xSCALING].KernelParams.iBTCount, btIdxAlignment) +
3816                              MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_16x16SAD].KernelParams.iBTCount, btIdxAlignment) +
3817                              MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_16x16MD].KernelParams.iBTCount, btIdxAlignment) +
3818                              MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_8x8PU].KernelParams.iBTCount, btIdxAlignment) +
3819                              MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_8x8FMODE].KernelParams.iBTCount, btIdxAlignment);
3820 
3821     btCountPhase3 += MOS_MAX(
3822         MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_32x32MD].KernelParams.iBTCount, btIdxAlignment),
3823         MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_32x32INTRACHECK].KernelParams.iBTCount, btIdxAlignment));
3824 
3825     if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
3826     {
3827         btCountPhase3 += MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_DS_COMBINED].KernelParams.iBTCount, btIdxAlignment);
3828     }
3829 
3830     // BRC update kernels and two B kernels
3831     uint32_t btCountPhase4 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
3832                              MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
3833                              MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_BENC].KernelParams.iBTCount, btIdxAlignment) +
3834                              MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_BPAK].KernelParams.iBTCount, btIdxAlignment);
3835 
3836     uint32_t maxBtCount = MOS_MAX(btCountPhase1, btCountPhase2);
3837     maxBtCount = MOS_MAX(maxBtCount, btCountPhase3);
3838     maxBtCount = MOS_MAX(maxBtCount, btCountPhase4);
3839 
3840     return maxBtCount;
3841 }
3842 
AllocateEncResources()3843 MOS_STATUS CodechalEncHevcStateG9::AllocateEncResources()
3844 {
3845     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3846 
3847     CODECHAL_ENCODE_FUNCTION_ENTER;
3848 
3849     m_sliceMap = (PCODECHAL_ENCODE_HEVC_SLICE_MAP)MOS_AllocAndZeroMemory(
3850         m_widthAlignedMaxLcu * m_heightAlignedMaxLcu * sizeof(m_sliceMap[0]));
3851     CODECHAL_ENCODE_CHK_NULL_RETURN(m_sliceMap);
3852 
3853     uint32_t downscaling2xWidth  = m_widthAlignedMaxLcu >> 1;
3854     uint32_t downscaling2xHeight = m_heightAlignedMaxLcu >> 1;
3855     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
3856         &m_scaled2xSurface,
3857         downscaling2xWidth,
3858         downscaling2xHeight,
3859         "2x Downscaling"));
3860 
3861     uint32_t width  = m_widthAlignedMaxLcu >> 3;
3862     uint32_t height = m_heightAlignedMaxLcu >> 5;
3863     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
3864         &m_sliceMapSurface,
3865         width,
3866         height,
3867         "Slice Map"));
3868 
3869     uint32_t size = 32 * (m_widthAlignedMaxLcu >> 5) * (m_heightAlignedMaxLcu >> 5);
3870     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3871         &m_32x32PuOutputData,
3872         size,
3873         "32x32 PU Output Data"));
3874 
3875     size = 8 * 4 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
3876     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3877         &m_sad16x16Pu,
3878         size,
3879         "SAD 16x16 PU"));
3880 
3881     size = 64 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
3882     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3883         &m_vme8x8Mode,
3884         size,
3885         "VME 8x8 mode"));
3886 
3887     size = 32 * (m_widthAlignedMaxLcu >> 3) * (m_heightAlignedMaxLcu >> 3);
3888     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3889         &m_intraMode,
3890         size,
3891         "Intra mode"));
3892 
3893     size = 16 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
3894     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3895         &m_intraDist,
3896         size,
3897         "Intra dist"));
3898 
3899     // Change the surface size
3900     width  = m_widthAlignedMaxLcu >> 1;
3901     height = m_heightAlignedMaxLcu >> 4;
3902     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
3903         &m_minDistortion,
3904         width,
3905         height,
3906         "Min distortion surface"));
3907 
3908     width = sizeof(CODECHAL_ENCODE_HEVC_WALKING_CONTROL_REGION);
3909     height = HEVC_CONCURRENT_SURFACE_HEIGHT;
3910     for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++)
3911     {
3912         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
3913             &m_concurrentThreadSurface[i],
3914             width,
3915             height,
3916             "Concurrent Thread"));
3917     }
3918 
3919     //size = (dwWidthAlignedMaxLCU * dwHeightAlignedMaxLCU / 4);
3920     size = (m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 4) + GPUMMU_WA_PADDING;
3921     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3922         &m_mvIndex,
3923         size,
3924         "MV index surface"));
3925 
3926     //size = (dwWidthAlignedMaxLCU * dwHeightAlignedMaxLCU / 2);
3927     size = (m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 2) + GPUMMU_WA_PADDING;
3928     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3929         &m_mvpIndex,
3930         size,
3931         "MVP index surface"));
3932 
3933     size = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu;
3934     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3935         &m_vmeSavedUniSic,
3936         size,
3937         "VME Saved UniSic surface"));
3938 
3939     width  = m_widthAlignedMaxLcu >> 3;
3940     height = m_heightAlignedMaxLcu >> 5;
3941     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
3942         &m_simplestIntraSurface,
3943         width,
3944         height,
3945         "Simplest Intra surface"));
3946 
3947     m_allocator->AllocateResource(m_standard, 1024, 1, brcInputForEncKernel, "brcInputForEncKernel", true);
3948 
3949     if (m_hmeKernel && m_hmeSupported)
3950     {
3951         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->AllocateResources());
3952     }
3953 
3954     // BRC Distortion Surface which will be used in ME as the output, too
3955     // In addition, this surface should also be allocated as BRC resource once ENC is enabled
3956     width = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64);
3957     height = MOS_ALIGN_CEIL((m_downscaledHeightInMb4x * 4), 8);
3958     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
3959         &m_brcBuffers.sMeBrcDistortionBuffer,
3960         width,
3961         height,
3962         "BRC distortion surface"));
3963 
3964     if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
3965     {
3966         // adding 10 bit support for KBL : output surface for format conversion from 10bit to 8 bit
3967         for (uint32_t i = 0; i < NUM_FORMAT_CONV_FRAMES; i++)
3968         {
3969             if (Mos_ResourceIsNull(&m_formatConvertedSurface[i].OsResource))
3970             {
3971                 width  = m_widthAlignedMaxLcu;
3972                 height = m_heightAlignedMaxLcu;
3973 
3974                 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
3975                     &m_formatConvertedSurface[i],
3976                     width,
3977                     height,
3978                     "Format Converted Surface"));
3979             }
3980         }
3981 
3982         if (Mos_ResourceIsNull(&m_resMbStatisticsSurface.sResource))
3983         {
3984             size = 52 * m_picWidthInMb * m_picHeightInMb; // 13 DWs or 52 bytes for statistics per MB
3985 
3986             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3987                 &m_resMbStatisticsSurface,
3988                 size,
3989                 "MB stats surface"));
3990         }
3991     }
3992 
3993     // ROI
3994     // ROI buffer size uses MB units for HEVC, not LCU
3995     width  = MOS_ALIGN_CEIL(m_picWidthInMb * 4, 64);
3996     height = MOS_ALIGN_CEIL(m_picHeightInMb, 8);
3997 
3998     MOS_ZeroMemory(&m_roiSurface, sizeof(m_roiSurface));
3999     m_roiSurface.TileType       = MOS_TILE_LINEAR;
4000     m_roiSurface.bArraySpacing  = true;
4001     m_roiSurface.Format         = Format_Buffer_2D;
4002     m_roiSurface.dwWidth        = width;
4003     m_roiSurface.dwPitch        = width;
4004     m_roiSurface.dwHeight       = height;
4005 
4006     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
4007         &m_roiSurface,
4008         width,
4009         height,
4010         "ROI Buffer"));
4011 
4012     return eStatus;
4013 }
4014 
FreeEncResources()4015 MOS_STATUS CodechalEncHevcStateG9::FreeEncResources()
4016 {
4017     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4018 
4019     CODECHAL_ENCODE_FUNCTION_ENTER;
4020 
4021     MOS_Delete(m_meKernelState);
4022     m_meKernelState = nullptr;
4023     MOS_FreeMemory(m_meKernelBindingTable);
4024     m_meKernelBindingTable = nullptr;
4025 
4026     MOS_DeleteArray(m_mbEncKernelStates);
4027     m_mbEncKernelStates = nullptr;
4028     MOS_FreeMemory(m_mbEncKernelBindingTable);
4029     m_mbEncKernelBindingTable = nullptr;
4030 
4031     MOS_DeleteArray(m_brcKernelStates);
4032     m_brcKernelStates = nullptr;
4033     MOS_FreeMemory(m_brcKernelBindingTable);
4034     m_brcKernelBindingTable = nullptr;
4035 
4036     MOS_FreeMemory(m_surfaceParams); m_surfaceParams = nullptr;
4037 
4038     for (uint32_t i = 0; i < NUM_FORMAT_CONV_FRAMES; i++)
4039     {
4040         m_osInterface->pfnFreeResource(
4041             m_osInterface,
4042             &m_formatConvertedSurface[i].OsResource);
4043     }
4044 
4045     m_osInterface->pfnFreeResource(
4046         m_osInterface,
4047         &m_scaled2xSurface.OsResource);
4048 
4049     m_osInterface->pfnFreeResource(
4050         m_osInterface,
4051         &m_resMbStatisticsSurface.sResource);
4052 
4053     m_osInterface->pfnFreeResource(
4054         m_osInterface,
4055         &m_sliceMapSurface.OsResource);
4056 
4057     m_osInterface->pfnFreeResource(
4058         m_osInterface,
4059         &m_32x32PuOutputData.sResource);
4060 
4061     m_osInterface->pfnFreeResource(
4062         m_osInterface,
4063         &m_sad16x16Pu.sResource);
4064 
4065     m_osInterface->pfnFreeResource(
4066         m_osInterface,
4067         &m_vme8x8Mode.sResource);
4068 
4069     m_osInterface->pfnFreeResource(
4070         m_osInterface,
4071         &m_intraMode.sResource);
4072 
4073     m_osInterface->pfnFreeResource(
4074         m_osInterface,
4075         &m_intraDist.sResource);
4076 
4077     m_osInterface->pfnFreeResource(
4078         m_osInterface,
4079         &m_mvIndex.sResource);
4080 
4081     m_osInterface->pfnFreeResource(
4082         m_osInterface,
4083         &m_mvpIndex.sResource);
4084 
4085     m_osInterface->pfnFreeResource(
4086         m_osInterface,
4087         &m_vmeSavedUniSic.sResource);
4088 
4089     m_osInterface->pfnFreeResource(
4090         m_osInterface,
4091         &m_minDistortion.OsResource);
4092 
4093     for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++)
4094     {
4095         m_osInterface->pfnFreeResource(
4096             m_osInterface,
4097             &m_concurrentThreadSurface[i].OsResource);
4098     }
4099 
4100     m_osInterface->pfnFreeResource(
4101         m_osInterface,
4102         &m_simplestIntraSurface.OsResource);
4103 
4104     if (m_encEnabled)
4105     {
4106         m_osInterface->pfnFreeResource(
4107             m_osInterface,
4108             &m_brcBuffers.sMeBrcDistortionBuffer.OsResource);
4109     }
4110 
4111     MOS_FreeMemory(m_sliceMap);
4112     m_sliceMap = nullptr;
4113 
4114     m_osInterface->pfnFreeResource(
4115         m_osInterface,
4116         &m_roiSurface.OsResource);
4117 
4118 #if (_DEBUG || _RELEASE_INTERNAL)
4119     if (m_swBrcMode != nullptr)
4120     {
4121         m_osInterface->pfnFreeLibrary(m_swBrcMode);
4122         m_swBrcMode = nullptr;
4123     }
4124 #endif // (_DEBUG || _RELEASE_INTERNAL)
4125 
4126     return eStatus;
4127 }
4128 
SendMeSurfaces(CodechalHwInterface * hwInterface,PMOS_COMMAND_BUFFER cmdBuffer,MeSurfaceParams * params)4129 MOS_STATUS CodechalEncHevcStateG9::SendMeSurfaces(
4130     CodechalHwInterface                 *hwInterface,
4131     PMOS_COMMAND_BUFFER                 cmdBuffer,
4132     MeSurfaceParams                     *params)
4133 {
4134     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
4135 
4136     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
4137     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
4138     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
4139     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pCurrOriginalPic);
4140     CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps4xMeMvDataBuffer);
4141     CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeDistortionBuffer);
4142     CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeBrcDistortionBuffer);
4143     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pMeBindingTable);
4144 
4145     PMOS_SURFACE currScaledSurface = nullptr, meMvDataBuffer = nullptr;
4146     if (params->b32xMeInUse)
4147     {
4148         CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps32xMeMvDataBuffer);
4149         currScaledSurface = m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
4150         meMvDataBuffer = params->ps32xMeMvDataBuffer;
4151     }
4152     else if (params->b16xMeInUse)
4153     {
4154         CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps16xMeMvDataBuffer);
4155         currScaledSurface = m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
4156         meMvDataBuffer = params->ps16xMeMvDataBuffer;
4157     }
4158     else
4159     {
4160         currScaledSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
4161         meMvDataBuffer = params->ps4xMeMvDataBuffer;
4162     }
4163 
4164     // Reference height and width information should be taken from the current scaled surface rather
4165     // than from the reference scaled surface in the case of PAFF.
4166     uint32_t width = MOS_ALIGN_CEIL(params->dwDownscaledWidthInMb * 32, 64);
4167     uint32_t height = params->dwDownscaledHeightInMb * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER;
4168     // Force the values
4169     meMvDataBuffer->dwWidth = width;
4170     meMvDataBuffer->dwHeight = height;
4171     meMvDataBuffer->dwPitch = width;
4172 
4173     MeKernelBindingTable* meBindingTable = params->pMeBindingTable;
4174     CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
4175     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4176     surfaceParams.bIs2DSurface = true;
4177     surfaceParams.bMediaBlockRW = true;
4178     surfaceParams.psSurface = meMvDataBuffer;
4179     surfaceParams.dwOffset = 0;
4180     surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
4181     surfaceParams.dwBindingTableOffset = meBindingTable->dwMEMVDataSurface;
4182     surfaceParams.bIsWritable = true;
4183     surfaceParams.bRenderTarget = true;
4184     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4185         hwInterface,
4186         cmdBuffer,
4187         &surfaceParams,
4188         params->pKernelState));
4189 
4190     if (params->b16xMeInUse && params->b32xMeEnabled)
4191     {
4192         // Pass 32x MV to 16x ME operation
4193         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4194         surfaceParams.bIs2DSurface = true;
4195         surfaceParams.bMediaBlockRW = true;
4196         surfaceParams.psSurface = params->ps32xMeMvDataBuffer;
4197         surfaceParams.dwOffset = 0;
4198         surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
4199         surfaceParams.dwBindingTableOffset = meBindingTable->dw32xMEMVDataSurface;
4200         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4201             hwInterface,
4202             cmdBuffer,
4203             &surfaceParams,
4204             params->pKernelState));
4205     }
4206     else if (!params->b32xMeInUse && params->b16xMeEnabled)
4207     {
4208         // Pass 16x MV to 4x ME operation
4209         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4210         surfaceParams.bIs2DSurface = true;
4211         surfaceParams.bMediaBlockRW = true;
4212         surfaceParams.psSurface = params->ps16xMeMvDataBuffer;
4213         surfaceParams.dwOffset = 0;
4214         surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
4215         surfaceParams.dwBindingTableOffset = meBindingTable->dw16xMEMVDataSurface;
4216         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4217             hwInterface,
4218             cmdBuffer,
4219             &surfaceParams,
4220             params->pKernelState));
4221     }
4222 
4223     // Insert Distortion buffers only for 4xMe case
4224     if (!params->b32xMeInUse && !params->b16xMeInUse)
4225     {
4226         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4227         surfaceParams.bIs2DSurface = true;
4228         surfaceParams.bMediaBlockRW = true;
4229         surfaceParams.psSurface = params->psMeBrcDistortionBuffer;
4230         surfaceParams.dwOffset = 0;
4231         surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBRCDist;
4232         surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
4233         surfaceParams.bIsWritable = true;
4234         surfaceParams.bRenderTarget = true;
4235         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4236             hwInterface,
4237             cmdBuffer,
4238             &surfaceParams,
4239             params->pKernelState));
4240 
4241         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4242         surfaceParams.bIs2DSurface = true;
4243         surfaceParams.bMediaBlockRW = true;
4244         surfaceParams.psSurface = params->psMeDistortionBuffer;
4245         surfaceParams.dwOffset = 0;
4246         surfaceParams.dwBindingTableOffset = meBindingTable->dwMEDist;
4247         surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value;
4248         surfaceParams.bIsWritable = true;
4249         surfaceParams.bRenderTarget = true;
4250         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4251             hwInterface,
4252             cmdBuffer,
4253             &surfaceParams,
4254             params->pKernelState));
4255     }
4256 
4257     // Setup references 1...n
4258     // LIST 0 references
4259     const uint8_t currVDirection = CODECHAL_VDIRECTION_FRAME;     // Interlaced not supported
4260     for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL0ActiveMinus1; refIdx++)
4261     {
4262         CODEC_PICTURE refPic = params->pL0RefFrameList[refIdx];
4263         MOS_SURFACE refScaledSurface = *currScaledSurface;
4264 
4265         if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
4266         {
4267             if (refIdx == 0)
4268             {
4269                 // Current picture Y - VME
4270                 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4271                 surfaceParams.bUseAdvState = true;
4272                 surfaceParams.psSurface = currScaledSurface;
4273                 surfaceParams.dwOffset = 0;
4274                 surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
4275                 surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForFwdRef;
4276                 surfaceParams.ucVDirection = currVDirection;
4277                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4278                     hwInterface,
4279                     cmdBuffer,
4280                     &surfaceParams,
4281                     params->pKernelState));
4282             }
4283 
4284             uint8_t refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
4285             uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
4286             if (params->b32xMeInUse)
4287             {
4288                 MOS_SURFACE* p32xSurface = m_trackedBuf->Get32xDsSurface(scaledIdx);
4289                 if (p32xSurface != nullptr)
4290                 {
4291                     refScaledSurface.OsResource = p32xSurface->OsResource;
4292                 }
4293                 else
4294                 {
4295                     CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4296                 }
4297             }
4298             else if (params->b16xMeInUse)
4299             {
4300                 MOS_SURFACE* p16xSurface = m_trackedBuf->Get16xDsSurface(scaledIdx);
4301                 if (p16xSurface != nullptr)
4302                 {
4303                     refScaledSurface.OsResource = p16xSurface->OsResource;
4304                 }
4305                 else
4306                 {
4307                     CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4308                 }
4309             }
4310             else
4311             {
4312                 MOS_SURFACE* p4xSurface = m_trackedBuf->Get4xDsSurface(scaledIdx);
4313                 if (p4xSurface != nullptr)
4314                 {
4315                     refScaledSurface.OsResource = p4xSurface->OsResource;
4316                 }
4317                 else
4318                 {
4319                     CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4320                 }
4321             }
4322             // L0 Reference picture Y - VME
4323             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4324             surfaceParams.bUseAdvState = true;
4325             surfaceParams.psSurface = &refScaledSurface;
4326             surfaceParams.dwOffset = 0;
4327             surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
4328             surfaceParams.dwBindingTableOffset = meBindingTable->dwMEFwdRefPicIdx[refIdx];
4329             surfaceParams.ucVDirection = CODECHAL_VDIRECTION_FRAME;
4330             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4331                 hwInterface,
4332                 cmdBuffer,
4333                 &surfaceParams,
4334                 params->pKernelState));
4335         }
4336     }
4337 
4338     // Setup references 1...n
4339     // LIST 1 references
4340     for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL1ActiveMinus1; refIdx++)
4341     {
4342         CODEC_PICTURE refPic = params->pL1RefFrameList[refIdx];
4343         MOS_SURFACE refScaledSurface = *currScaledSurface;
4344 
4345         if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
4346         {
4347             if (refIdx == 0)
4348             {
4349                 // Current picture Y - VME
4350                 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4351                 surfaceParams.bUseAdvState = true;
4352                 surfaceParams.psSurface = currScaledSurface;
4353                 surfaceParams.dwOffset = 0;
4354                 surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
4355                 surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForBwdRef;
4356                 surfaceParams.ucVDirection = currVDirection;
4357                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4358                     hwInterface,
4359                     cmdBuffer,
4360                     &surfaceParams,
4361                     params->pKernelState));
4362             }
4363 
4364             uint8_t refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
4365             uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
4366             if (params->b32xMeInUse)
4367             {
4368                 MOS_SURFACE* p32xSurface = m_trackedBuf->Get32xDsSurface(scaledIdx);
4369                 if (p32xSurface != nullptr)
4370                 {
4371                     refScaledSurface.OsResource = p32xSurface->OsResource;
4372                 }
4373                 else
4374                 {
4375                     CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4376                 }
4377             }
4378             else if (params->b16xMeInUse)
4379             {
4380                 MOS_SURFACE* p16xSurface = m_trackedBuf->Get16xDsSurface(scaledIdx);
4381                 if (p16xSurface != nullptr)
4382                 {
4383                     refScaledSurface.OsResource = p16xSurface->OsResource;
4384                 }
4385                 else
4386                 {
4387                     CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4388                 }
4389             }
4390             else
4391             {
4392                 MOS_SURFACE* p4xSurface = m_trackedBuf->Get4xDsSurface(scaledIdx);
4393                 if (p4xSurface != nullptr)
4394                 {
4395                     refScaledSurface.OsResource = p4xSurface->OsResource;
4396                 }
4397                 else
4398                 {
4399                     CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4400                 }
4401             }
4402             // L1 Reference picture Y - VME
4403             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4404             surfaceParams.bUseAdvState = true;
4405             surfaceParams.psSurface = &refScaledSurface;
4406             surfaceParams.dwOffset = 0;
4407             surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
4408             surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBwdRefPicIdx[refIdx];
4409             surfaceParams.ucVDirection = CODECHAL_VDIRECTION_FRAME;
4410             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4411                 hwInterface,
4412                 cmdBuffer,
4413                 &surfaceParams,
4414                 params->pKernelState));
4415         }
4416     }
4417 
4418     return eStatus;
4419 }
4420 
4421 //------------------------------------------------------------------------------
4422 //| Purpose:    Setup curbe for HEVC ME kernels
4423 //| Return:     N/A
4424 //------------------------------------------------------------------------------
SetCurbeMe(MeCurbeParams * params)4425 MOS_STATUS CodechalEncHevcStateG9::SetCurbeMe(
4426     MeCurbeParams* params)
4427 {
4428     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4429 
4430     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
4431     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
4432 
4433     CODECHAL_ENCODE_ASSERT(params->TargetUsage <= NUM_TARGET_USAGE_MODES);
4434 
4435     uint8_t mvShiftFactor = 0, prevMvReadPosFactor = 0;
4436     bool useMvFromPrevStep= false, writeDistortions = false;
4437     uint32_t scaleFactor = 0;
4438     switch (params->hmeLvl)
4439     {
4440     case HME_LEVEL_32x:
4441         useMvFromPrevStep = HME_FIRST_STEP;
4442         writeDistortions = false;
4443         scaleFactor = SCALE_FACTOR_32x;
4444         mvShiftFactor = MV_SHIFT_FACTOR_32x;
4445         break;
4446     case HME_LEVEL_16x:
4447         useMvFromPrevStep   = (m_b32XMeEnabled) ? HME_FOLLOWING_STEP : HME_FIRST_STEP;
4448         writeDistortions = false;
4449         scaleFactor = SCALE_FACTOR_16x;
4450         mvShiftFactor = MV_SHIFT_FACTOR_16x;
4451         prevMvReadPosFactor = PREV_MV_READ_POSITION_16x;
4452         break;
4453     case HME_LEVEL_4x:
4454         useMvFromPrevStep   = (m_b16XMeEnabled) ? HME_FOLLOWING_STEP : HME_FIRST_STEP;
4455         writeDistortions = true;
4456         scaleFactor = SCALE_FACTOR_4x;
4457         mvShiftFactor = MV_SHIFT_FACTOR_4x;
4458         prevMvReadPosFactor = PREV_MV_READ_POSITION_4x;
4459         break;
4460     default:
4461         return MOS_STATUS_INVALID_PARAMETER;
4462         break;
4463     }
4464 
4465     CODECHAL_ENC_HEVC_ME_CURBE_G9 cmd;
4466     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
4467         &cmd,
4468         sizeof(CODECHAL_ENC_HEVC_ME_CURBE_G9),
4469         m_meCurbeInit,
4470         sizeof(CODECHAL_ENC_HEVC_ME_CURBE_G9)));
4471 
4472     cmd.DW3.SubPelMode = 3;
4473     cmd.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
4474     cmd.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
4475     cmd.DW5.QpPrimeY = params->pic_init_qp_minus26 + 26 + params->slice_qp_delta;
4476     cmd.DW6.WriteDistortions = writeDistortions;
4477     cmd.DW6.UseMvFromPrevStep = useMvFromPrevStep;
4478 
4479     cmd.DW6.SuperCombineDist = m_superCombineDist[params->TargetUsage];
4480     cmd.DW6.MaxVmvR = 512; // CModel always uses 512 for HME even though B_MB uses (levelIDC)*4
4481 
4482     if (m_pictureCodingType == B_TYPE)
4483     {
4484         // This field is irrelevant since we are not using the bi-direct search.
4485         // set it to 32
4486         cmd.DW1.BiWeight = 32;
4487         cmd.DW13.NumRefIdxL1MinusOne = params->num_ref_idx_l1_active_minus1;
4488         cmd.DW13.NumRefIdxL0MinusOne = params->num_ref_idx_l0_active_minus1;
4489     }
4490 
4491     cmd.DW15.MvShiftFactor = mvShiftFactor;
4492     cmd.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
4493 
4494     // r3 & r4
4495     uint8_t meMethod = m_meMethod[params->TargetUsage];
4496 
4497     eStatus = MOS_SecureMemcpy(&(cmd.SPDelta), 14 * sizeof(uint32_t), CodechalEncoderState::m_encodeSearchPath[0][meMethod], 14 * sizeof(uint32_t));
4498     if (eStatus != MOS_STATUS_SUCCESS)
4499     {
4500         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
4501         return eStatus;
4502     }
4503 
4504     // r5
4505     cmd.DW32._4xMeMvOutputDataSurfIndex = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_CM_G9;
4506     cmd.DW33._16xOr32xMeMvInputDataSurfIndex = (params->hmeLvl == HME_LEVEL_32x) ?
4507         CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_CM_G9 : CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_CM_G9;
4508     cmd.DW34._4xMeOutputDistSurfIndex = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_CM_G9;
4509     cmd.DW35._4xMeOutputBrcDistSurfIndex = CODECHAL_ENCODE_ME_BRC_DISTORTION_CM_G9;
4510     cmd.DW36.VMEFwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_CM_G9;
4511     cmd.DW37.VMEBwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_CM_G9;
4512 
4513     CODECHAL_ENCODE_CHK_STATUS_RETURN(params->pKernelState->m_dshRegion.AddData(
4514         &cmd,
4515         params->pKernelState->dwCurbeOffset,
4516         sizeof(cmd)));
4517 
4518     return eStatus;
4519 }
4520 
SetMbEncKernelParams(MHW_KERNEL_PARAM * kernelParams,uint32_t idx)4521 MOS_STATUS CodechalEncHevcStateG9::SetMbEncKernelParams(MHW_KERNEL_PARAM* kernelParams, uint32_t idx)
4522 {
4523     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4524 
4525     CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams);
4526 
4527     auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
4528 
4529     kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
4530     kernelParams->iIdCount     = 1;
4531 
4532     switch (idx)
4533     {
4534     case CODECHAL_HEVC_MBENC_2xSCALING:
4535         kernelParams->iBTCount = CODECHAL_HEVC_SCALING_FRAME_END - CODECHAL_HEVC_SCALING_FRAME_BEGIN;
4536         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9), curbeAlignment);
4537         kernelParams->iBlockWidth = 32;
4538         kernelParams->iBlockHeight = 32;
4539         break;
4540 
4541     case CODECHAL_HEVC_MBENC_32x32MD:
4542         kernelParams->iBTCount = CODECHAL_HEVC_32x32_PU_END - CODECHAL_HEVC_32x32_PU_BEGIN;
4543         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9), curbeAlignment);
4544         kernelParams->iBlockWidth = 32;
4545         kernelParams->iBlockHeight = 32;
4546         break;
4547 
4548     case CODECHAL_HEVC_MBENC_16x16SAD:
4549         kernelParams->iBTCount = CODECHAL_HEVC_16x16_PU_SAD_END - CODECHAL_HEVC_16x16_PU_SAD_BEGIN;
4550         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9), curbeAlignment);
4551         kernelParams->iBlockWidth = 16;
4552         kernelParams->iBlockHeight = 16;
4553         break;
4554 
4555     case CODECHAL_HEVC_MBENC_16x16MD:
4556         kernelParams->iBTCount = CODECHAL_HEVC_16x16_PU_MD_END - CODECHAL_HEVC_16x16_PU_MD_BEGIN;
4557         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9), curbeAlignment);
4558         kernelParams->iBlockWidth = 32;
4559         kernelParams->iBlockHeight = 32;
4560         break;
4561 
4562     case CODECHAL_HEVC_MBENC_8x8PU:
4563         kernelParams->iBTCount = CODECHAL_HEVC_8x8_PU_END - CODECHAL_HEVC_8x8_PU_BEGIN;
4564         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_8x8_PU_CURBE_G9), curbeAlignment);
4565         kernelParams->iBlockWidth = 8;
4566         kernelParams->iBlockHeight = 8;
4567         break;
4568 
4569     case CODECHAL_HEVC_MBENC_8x8FMODE:
4570         kernelParams->iBTCount = CODECHAL_HEVC_8x8_PU_FMODE_END - CODECHAL_HEVC_8x8_PU_FMODE_BEGIN;
4571         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_8x8_PU_FMODE_CURBE_G9), curbeAlignment);
4572         kernelParams->iBlockWidth = 32;
4573         kernelParams->iBlockHeight = 32;
4574         break;
4575 
4576     case CODECHAL_HEVC_MBENC_32x32INTRACHECK:
4577         kernelParams->iBTCount = CODECHAL_HEVC_B_32x32_PU_END - CODECHAL_HEVC_B_32x32_PU_BEGIN;
4578         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9), curbeAlignment);
4579         kernelParams->iBlockWidth = 32;
4580         kernelParams->iBlockHeight = 32;
4581         break;
4582 
4583     case CODECHAL_HEVC_MBENC_BENC:
4584     case CODECHAL_HEVC_MBENC_ADV:
4585         kernelParams->iBTCount = CODECHAL_HEVC_B_MBENC_END - CODECHAL_HEVC_B_MBENC_BEGIN;
4586         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9), curbeAlignment);
4587         kernelParams->iBlockWidth = 16;
4588         kernelParams->iBlockHeight = 16;
4589         break;
4590 
4591     case CODECHAL_HEVC_MBENC_BPAK:
4592         kernelParams->iBTCount = CODECHAL_HEVC_B_PAK_END - CODECHAL_HEVC_B_PAK_BEGIN;
4593         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_PAK_CURBE_G9), curbeAlignment);
4594         kernelParams->iBlockWidth = 32;
4595         kernelParams->iBlockHeight = 32;
4596         break;
4597 
4598     case CODECHAL_HEVC_MBENC_DS_COMBINED:
4599         if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
4600         {
4601             kernelParams->iBTCount = CODECHAL_HEVC_DS_COMBINED_END - CODECHAL_HEVC_DS_COMBINED_BEGIN;
4602             uint32_t dsCombinedKernelCurbeSize = sizeof(CODECHAL_ENC_HEVC_DS_COMBINED_CURBE_G9);
4603             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(dsCombinedKernelCurbeSize, curbeAlignment);
4604             kernelParams->iBlockWidth = 8;
4605             kernelParams->iBlockHeight = 8;
4606         }
4607         else
4608         {
4609             CODECHAL_ENCODE_ASSERT(false);
4610             eStatus = MOS_STATUS_INVALID_PARAMETER;
4611         }
4612         break;
4613 
4614     case CODECHAL_HEVC_MBENC_PENC:
4615     case CODECHAL_HEVC_MBENC_ADV_P:
4616         kernelParams->iBTCount = CODECHAL_HEVC_P_MBENC_END - CODECHAL_HEVC_P_MBENC_BEGIN;
4617         //P MBEnc curbe has one less DWord than B MBEnc curbe
4618         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9) - sizeof(uint32_t), (size_t)curbeAlignment);
4619         kernelParams->iBlockWidth = 16;
4620         kernelParams->iBlockHeight = 16;
4621         break;
4622 
4623     default:
4624         CODECHAL_ENCODE_ASSERT(false);
4625         eStatus = MOS_STATUS_INVALID_PARAMETER;
4626     }
4627 
4628     return eStatus;
4629 }
4630 
SetMbEncBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable,uint32_t idx)4631 MOS_STATUS CodechalEncHevcStateG9::SetMbEncBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable, uint32_t idx)
4632 {
4633     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4634 
4635     CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
4636 
4637     MOS_ZeroMemory(bindingTable, sizeof(*bindingTable));
4638     bindingTable->dwMediaState = ConvertKrnOpsToMediaState(ENC_MBENC, idx);
4639 
4640     switch (idx)
4641     {
4642     case CODECHAL_HEVC_MBENC_2xSCALING:
4643         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_SCALING_FRAME_END - CODECHAL_HEVC_SCALING_FRAME_BEGIN;
4644         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_SCALING_FRAME_BEGIN;
4645         break;
4646 
4647     case CODECHAL_HEVC_MBENC_32x32MD:
4648         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_32x32_PU_END - CODECHAL_HEVC_32x32_PU_BEGIN;
4649         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_32x32_PU_BEGIN;
4650         break;
4651 
4652     case CODECHAL_HEVC_MBENC_16x16SAD:
4653         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_16x16_PU_SAD_END - CODECHAL_HEVC_16x16_PU_SAD_BEGIN;
4654         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_16x16_PU_SAD_BEGIN;
4655         break;
4656 
4657     case CODECHAL_HEVC_MBENC_16x16MD:
4658         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_16x16_PU_MD_END - CODECHAL_HEVC_16x16_PU_MD_BEGIN;
4659         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_16x16_PU_MD_BEGIN;
4660         break;
4661 
4662     case CODECHAL_HEVC_MBENC_8x8PU:
4663         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_8x8_PU_END - CODECHAL_HEVC_8x8_PU_BEGIN;
4664         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_8x8_PU_BEGIN;
4665         break;
4666 
4667     case CODECHAL_HEVC_MBENC_8x8FMODE:
4668         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_8x8_PU_FMODE_END - CODECHAL_HEVC_8x8_PU_FMODE_BEGIN;
4669         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_8x8_PU_FMODE_BEGIN;
4670         break;
4671 
4672     case CODECHAL_HEVC_MBENC_32x32INTRACHECK:
4673         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_B_32x32_PU_END - CODECHAL_HEVC_B_32x32_PU_BEGIN;
4674         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_B_32x32_PU_BEGIN;
4675         break;
4676 
4677     case CODECHAL_HEVC_MBENC_BENC:
4678     case CODECHAL_HEVC_MBENC_ADV:
4679         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_B_MBENC_END - CODECHAL_HEVC_B_MBENC_BEGIN;
4680         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_B_MBENC_BEGIN;
4681         break;
4682 
4683     case CODECHAL_HEVC_MBENC_BPAK:
4684         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_B_PAK_END - CODECHAL_HEVC_B_PAK_BEGIN;
4685         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_B_PAK_BEGIN;
4686         break;
4687 
4688     case CODECHAL_HEVC_MBENC_DS_COMBINED:
4689         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_DS_COMBINED_END - CODECHAL_HEVC_DS_COMBINED_BEGIN;
4690         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_DS_COMBINED_BEGIN;
4691         break;
4692 
4693     case CODECHAL_HEVC_MBENC_PENC:
4694     case CODECHAL_HEVC_MBENC_ADV_P:
4695         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_P_MBENC_END - CODECHAL_HEVC_P_MBENC_BEGIN;
4696         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_P_MBENC_BEGIN;
4697         break;
4698 
4699     default:
4700         CODECHAL_ENCODE_ASSERT(false);
4701         eStatus = MOS_STATUS_INVALID_PARAMETER;
4702         return eStatus;
4703     }
4704 
4705     for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++)
4706     {
4707         bindingTable->dwBindingTableEntries[i] = i;
4708     }
4709 
4710     return eStatus;
4711 }
4712 
SetBrcKernelParams(MHW_KERNEL_PARAM * kernelParams,uint32_t idx)4713 MOS_STATUS CodechalEncHevcStateG9::SetBrcKernelParams(MHW_KERNEL_PARAM* kernelParams, uint32_t idx)
4714 {
4715     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4716 
4717     CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams);
4718 
4719     auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
4720 
4721     kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
4722     kernelParams->iIdCount = 1;
4723 
4724     // Only LCU-based update kernel is running at multple threads. Others run in the single thread.
4725     switch (idx)
4726     {
4727     case CODECHAL_HEVC_BRC_COARSE_INTRA:
4728         kernelParams->iBTCount     = CODECHAL_HEVC_COARSE_INTRA_END - CODECHAL_HEVC_COARSE_INTRA_BEGIN;
4729         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9), curbeAlignment);
4730         kernelParams->iBlockWidth  = 32;
4731         kernelParams->iBlockHeight = 32;
4732         break;
4733 
4734     case CODECHAL_HEVC_BRC_INIT:
4735         kernelParams->iBTCount     = CODECHAL_HEVC_BRC_INIT_RESET_END - CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4736         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9), curbeAlignment);
4737         kernelParams->iBlockWidth  = 32;
4738         kernelParams->iBlockHeight = 32;
4739         break;
4740 
4741     case CODECHAL_HEVC_BRC_RESET:
4742         kernelParams->iBTCount     = CODECHAL_HEVC_BRC_INIT_RESET_END - CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4743         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9), curbeAlignment);
4744         kernelParams->iBlockWidth  = 32;
4745         kernelParams->iBlockHeight = 32;
4746         break;
4747 
4748     case CODECHAL_HEVC_BRC_FRAME_UPDATE:
4749         kernelParams->iBTCount     = CODECHAL_HEVC_BRC_UPDATE_END - CODECHAL_HEVC_BRC_UPDATE_BEGIN;
4750         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9), curbeAlignment);
4751         kernelParams->iBlockWidth  = 32;
4752         kernelParams->iBlockHeight = 32;
4753         break;
4754 
4755     case CODECHAL_HEVC_BRC_LCU_UPDATE:
4756         kernelParams->iBTCount     = CODECHAL_HEVC_BRC_LCU_UPDATE_END - CODECHAL_HEVC_BRC_LCU_UPDATE_BEGIN;
4757         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9), curbeAlignment);
4758         kernelParams->iBlockWidth  = 128;
4759         kernelParams->iBlockHeight = 128;
4760         break;
4761 
4762     default:
4763         CODECHAL_ENCODE_ASSERT(false);
4764         eStatus = MOS_STATUS_INVALID_PARAMETER;
4765         return eStatus;
4766     }
4767 
4768     return eStatus;
4769 }
4770 
SetBrcBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable,uint32_t idx)4771 MOS_STATUS CodechalEncHevcStateG9::SetBrcBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable, uint32_t idx)
4772 {
4773     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4774 
4775     CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
4776 
4777     MOS_ZeroMemory(bindingTable, sizeof(*bindingTable));
4778     bindingTable->dwMediaState = ConvertKrnOpsToMediaState(ENC_BRC, idx);
4779 
4780     switch (idx)
4781     {
4782     case CODECHAL_HEVC_BRC_COARSE_INTRA:
4783         bindingTable->dwNumBindingTableEntries  = CODECHAL_HEVC_COARSE_INTRA_END - CODECHAL_HEVC_COARSE_INTRA_BEGIN;
4784         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_COARSE_INTRA_BEGIN;
4785         break;
4786 
4787     case CODECHAL_HEVC_BRC_INIT:
4788         bindingTable->dwNumBindingTableEntries  = CODECHAL_HEVC_BRC_INIT_RESET_END - CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4789         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4790         break;
4791 
4792     case CODECHAL_HEVC_BRC_RESET:
4793         bindingTable->dwNumBindingTableEntries  = CODECHAL_HEVC_BRC_INIT_RESET_END - CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4794         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4795         break;
4796 
4797     case CODECHAL_HEVC_BRC_FRAME_UPDATE:
4798         bindingTable->dwNumBindingTableEntries  = CODECHAL_HEVC_BRC_UPDATE_END - CODECHAL_HEVC_BRC_UPDATE_BEGIN;
4799         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_BRC_UPDATE_BEGIN;
4800         break;
4801 
4802     case CODECHAL_HEVC_BRC_LCU_UPDATE:
4803         bindingTable->dwNumBindingTableEntries  = CODECHAL_HEVC_BRC_LCU_UPDATE_END - CODECHAL_HEVC_BRC_LCU_UPDATE_BEGIN;
4804         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_BRC_LCU_UPDATE_BEGIN;
4805         break;
4806 
4807     default:
4808         CODECHAL_ENCODE_ASSERT(false);
4809         eStatus = MOS_STATUS_INVALID_PARAMETER;
4810         return eStatus;
4811     }
4812 
4813     for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++)
4814     {
4815         bindingTable->dwBindingTableEntries[i] = i;
4816     }
4817 
4818     return eStatus;
4819 }
4820 
InitKernelStateBrc()4821 MOS_STATUS CodechalEncHevcStateG9::InitKernelStateBrc()
4822 {
4823     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4824 
4825     CODECHAL_ENCODE_FUNCTION_ENTER;
4826 
4827     m_numBrcKrnStates = CODECHAL_HEVC_BRC_NUM;
4828 
4829     m_brcKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numBrcKrnStates);
4830     CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
4831 
4832     m_brcKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
4833         sizeof(GenericBindingTable) * m_numBrcKrnStates);
4834     CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelBindingTable);
4835 
4836     auto kernelStatePtr = m_brcKernelStates;
4837 
4838     for (uint32_t krnStateIdx = 0; krnStateIdx < m_numBrcKrnStates; krnStateIdx++)
4839     {
4840         auto kernelSize = m_combinedKernelSize;
4841         CODECHAL_KERNEL_HEADER currKrnHeader;
4842 
4843         CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
4844             m_kernelBinary,
4845             ENC_BRC,
4846             krnStateIdx,
4847             &currKrnHeader,
4848             &kernelSize));
4849 
4850         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBrcKernelParams(
4851             &kernelStatePtr->KernelParams,
4852             krnStateIdx));
4853 
4854         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBrcBindingTable(
4855             &m_brcKernelBindingTable[krnStateIdx], krnStateIdx));
4856 
4857         kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
4858         kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
4859         kernelStatePtr->KernelParams.iSize = kernelSize;
4860 
4861         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
4862             m_stateHeapInterface,
4863             kernelStatePtr->KernelParams.iBTCount,
4864             &kernelStatePtr->dwSshSize,
4865             &kernelStatePtr->dwBindingTableSize));
4866 
4867         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
4868 
4869         kernelStatePtr++;
4870     }
4871 
4872     return eStatus;
4873 }
4874 
InitKernelStateMbEnc()4875 MOS_STATUS CodechalEncHevcStateG9::InitKernelStateMbEnc()
4876 {
4877     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4878 
4879     CODECHAL_ENCODE_FUNCTION_ENTER;
4880 
4881     if(MEDIA_IS_SKU(m_hwInterface->GetSkuTable(), FtrEncodeHEVC10bit) && m_is10BitHevc)
4882     {
4883         m_numMbEncEncKrnStates = CODECHAL_HEVC_MBENC_NUM_BXT_SKL;
4884     }
4885     else if (!m_noMeKernelForPFrame)
4886     {
4887         m_numMbEncEncKrnStates = CODECHAL_HEVC_MBENC_NUM_BXT_SKL;
4888     }
4889     else
4890     {
4891         m_numMbEncEncKrnStates = CODECHAL_HEVC_MBENC_NUM;
4892     }
4893 
4894     m_mbEncKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numMbEncEncKrnStates);
4895     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
4896 
4897     m_mbEncKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
4898         sizeof(GenericBindingTable) * m_numMbEncEncKrnStates);
4899     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
4900 
4901     auto kernelStatePtr = m_mbEncKernelStates;
4902 
4903     for (uint32_t krnStateIdx = 0; krnStateIdx < m_numMbEncEncKrnStates; krnStateIdx++)
4904     {
4905         auto kernelSize = m_combinedKernelSize;
4906         CODECHAL_KERNEL_HEADER currKrnHeader;
4907 
4908         CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
4909             m_kernelBinary,
4910             ENC_MBENC,
4911             krnStateIdx,
4912             &currKrnHeader,
4913             &kernelSize));
4914 
4915         if (kernelSize == 0)  //Ignore. It isn't used on current platform.
4916         {
4917             kernelStatePtr++;
4918             continue;
4919         }
4920 
4921         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMbEncKernelParams(
4922             &kernelStatePtr->KernelParams,
4923             krnStateIdx));
4924 
4925         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMbEncBindingTable(
4926             &m_mbEncKernelBindingTable[krnStateIdx], krnStateIdx));
4927 
4928         kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
4929         kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
4930         kernelStatePtr->KernelParams.iSize = kernelSize;
4931 
4932         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
4933             m_stateHeapInterface,
4934             kernelStatePtr->KernelParams.iBTCount,
4935             &kernelStatePtr->dwSshSize,
4936             &kernelStatePtr->dwBindingTableSize));
4937 
4938         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
4939 
4940         kernelStatePtr++;
4941     }
4942 
4943     return eStatus;
4944 }
4945 
InitSurfaceInfoTable()4946 MOS_STATUS CodechalEncHevcStateG9::InitSurfaceInfoTable()
4947 {
4948     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4949 
4950     m_surfaceParams = (PCODECHAL_SURFACE_CODEC_PARAMS)MOS_AllocAndZeroMemory(
4951         sizeof(*m_surfaceParams) * SURFACE_NUM_TOTAL);
4952     CODECHAL_ENCODE_CHK_NULL_RETURN(m_surfaceParams);
4953 
4954     PCODECHAL_SURFACE_CODEC_PARAMS param = &m_surfaceParams[SURFACE_RAW_Y];
4955     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4956         param,
4957         m_rawSurfaceToEnc,
4958         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4959         0,
4960         m_verticalLineStride,
4961         false));
4962 
4963     param = &m_surfaceParams[SURFACE_RAW_10bit_Y];
4964     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4965         param,
4966         m_rawSurfaceToEnc,
4967         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4968         0,
4969         m_verticalLineStride,
4970         false));
4971 
4972     // MB stats surface -- currently not used
4973     param = &m_surfaceParams[SURFACE_RAW_MBSTAT];
4974     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4975         param,
4976         &m_resMbStatisticsSurface.sResource,
4977         m_resMbStatisticsSurface.dwSize,
4978         0,
4979         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
4980         0,
4981         true));
4982     param->bRawSurface = true;
4983 
4984     param = &m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV];
4985     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4986         param,
4987         &m_formatConvertedSurface[0],
4988         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4989         0,
4990         m_verticalLineStride,
4991         true));  //this should be writable as it is output of formatconversion
4992     param->bUseUVPlane = true;
4993 
4994     param = &m_surfaceParams[SURFACE_RAW_Y_UV];
4995     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4996            param,
4997            m_rawSurfaceToEnc,
4998            m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4999            0,
5000            m_verticalLineStride,
5001            false));
5002     param->bUseUVPlane    = true;
5003 
5004     param = &m_surfaceParams[SURFACE_RAW_10bit_Y_UV];
5005     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5006             param,
5007             m_rawSurfaceToEnc,
5008             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
5009             0,
5010             m_verticalLineStride,
5011             false));//this should be writable as it is output of formatconversion
5012     param->bUseUVPlane = true;
5013 
5014     param = &m_surfaceParams[SURFACE_Y_2X];
5015     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5016         param,
5017         &m_scaled2xSurface,
5018         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5019         0,
5020         m_verticalLineStride,
5021         false));
5022 
5023     param = &m_surfaceParams[SURFACE_32x32_PU_OUTPUT];
5024     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5025         param,
5026         &m_32x32PuOutputData.sResource,
5027         m_32x32PuOutputData.dwSize,
5028         0,
5029         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5030         0,
5031         false));
5032 
5033     param = &m_surfaceParams[SURFACE_SLICE_MAP];
5034     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5035         param,
5036         &m_sliceMapSurface,
5037         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5038         0,
5039         m_verticalLineStride,
5040         false));
5041 
5042     param = &m_surfaceParams[SURFACE_Y_2X_VME];
5043     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
5044         param,
5045         &m_scaled2xSurface,
5046         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
5047         0));
5048 
5049     param = &m_surfaceParams[SURFACE_BRC_INPUT];
5050     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5051         param,
5052         (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel),
5053         m_allocator->GetResourceSize(m_standard, brcInputForEncKernel),
5054         0,
5055         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5056         0,
5057         false));
5058 
5059     param = &m_surfaceParams[SURFACE_LCU_QP];
5060     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5061         param,
5062         &m_brcBuffers.sBrcMbQpBuffer,
5063         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5064         0,
5065         m_verticalLineStride,
5066         false));
5067 
5068     param = &m_surfaceParams[SURFACE_ROI];
5069     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5070         param,
5071         &m_roiSurface,
5072         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5073         0,
5074         m_verticalLineStride,
5075         false));
5076 
5077     param = &m_surfaceParams[SURFACE_BRC_DATA];
5078     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5079         param,
5080         &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
5081         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5082         0,
5083         m_verticalLineStride,
5084         false));
5085 
5086     param = &m_surfaceParams[SURFACE_SIMPLIFIED_INTRA];
5087     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5088         param,
5089         &m_simplestIntraSurface,
5090         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5091         0,
5092         m_verticalLineStride,
5093         false));
5094 
5095     param = &m_surfaceParams[SURFACE_HME_MVP];
5096     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5097         param,
5098         m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xMvDataBuffer),
5099         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5100         0,
5101         m_verticalLineStride,
5102         false));
5103 
5104     param = &m_surfaceParams[SURFACE_HME_DIST];
5105     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5106         param,
5107         m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xDistortionBuffer),
5108         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value,
5109         0,
5110         m_verticalLineStride,
5111         false));
5112 
5113     param = &m_surfaceParams[SURFACE_16x16PU_SAD];
5114     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5115         param,
5116         &m_sad16x16Pu.sResource,
5117         m_sad16x16Pu.dwSize,
5118         0,
5119         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5120         0,
5121         false));
5122 
5123     param = &m_surfaceParams[SURFACE_RAW_VME];
5124     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
5125         param,
5126         m_rawSurfaceToEnc,
5127         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
5128         0));
5129 
5130     param = &m_surfaceParams[SURFACE_VME_8x8];
5131     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5132         param,
5133         &m_vme8x8Mode.sResource,
5134         m_vme8x8Mode.dwSize,
5135         0,
5136         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5137         0,
5138         false));
5139 
5140     param = &m_surfaceParams[SURFACE_CU_RECORD];
5141     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5142         param,
5143         &m_resMbCodeSurface,
5144         m_mbCodeSize - m_mvOffset,
5145         m_mvOffset,
5146         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5147         0,
5148         true));
5149 
5150     param = &m_surfaceParams[SURFACE_INTRA_MODE];
5151     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5152         param,
5153         &m_intraMode.sResource,
5154         m_intraMode.dwSize,
5155         0,
5156         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5157         0,
5158         false));
5159 
5160     param = &m_surfaceParams[SURFACE_HCP_PAK];
5161     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5162         param,
5163         &m_resMbCodeSurface,
5164         m_mvOffset,
5165         0,
5166         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5167         0,
5168         true));
5169 
5170     param = &m_surfaceParams[SURFACE_INTRA_DIST];
5171     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5172         param,
5173         &m_intraDist.sResource,
5174         m_intraDist.dwSize,
5175         0,
5176         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5177         0,
5178         false));
5179 
5180     param = &m_surfaceParams[SURFACE_MIN_DIST];
5181     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5182         param,
5183         &m_minDistortion,
5184         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value,
5185         0,
5186         m_verticalLineStride,
5187         false));
5188 
5189     param = &m_surfaceParams[SURFACE_VME_UNI_SIC_DATA];
5190     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5191         param,
5192         &m_vmeSavedUniSic.sResource,
5193         m_vmeSavedUniSic.dwSize,
5194         0,
5195         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5196         0,
5197         false));
5198 
5199     param = &m_surfaceParams[SURFACE_COL_MB_MV];
5200     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5201         param,
5202         nullptr,
5203         m_sizeOfMvTemporalBuffer,
5204         0,
5205         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5206         0,
5207         false));
5208 
5209     m_concurrentThreadIndex = 0;
5210     for (auto i = 0; i < NUM_CONCURRENT_THREAD; i++)
5211     {
5212         param = &m_surfaceParams[SURFACE_CONCURRENT_THREAD + i];
5213         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5214             param,
5215             &m_concurrentThreadSurface[i],
5216             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value,
5217             0,
5218             m_verticalLineStride,
5219             false));
5220     }
5221 
5222     param = &m_surfaceParams[SURFACE_MB_MV_INDEX];
5223     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5224         param,
5225         &m_mvIndex.sResource,
5226         m_mvIndex.dwSize,
5227         0,
5228         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5229         0,
5230         false));
5231 
5232     param = &m_surfaceParams[SURFACE_MVP_INDEX];
5233     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5234         param,
5235         &m_mvpIndex.sResource,
5236         m_mvpIndex.dwSize,
5237         0,
5238         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5239         0,
5240         false));
5241 
5242     param = &m_surfaceParams[SURFACE_REF_FRAME_VME];
5243     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
5244         param,
5245         0,
5246         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
5247         0));
5248 
5249     param = &m_surfaceParams[SURFACE_Y_4X];
5250     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5251         param,
5252         nullptr,
5253         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5254         0,
5255         m_verticalLineStride,
5256         false));
5257 
5258     param = &m_surfaceParams[SURFACE_Y_4X_VME];
5259     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
5260         param,
5261         nullptr,
5262         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
5263         0));
5264 
5265     param = &m_surfaceParams[SURFACE_BRC_HISTORY];
5266     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5267         param,
5268         &m_brcBuffers.resBrcHistoryBuffer,
5269         m_brcHistoryBufferSize,
5270         0,
5271         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5272         0,
5273         true));
5274 
5275     param = &m_surfaceParams[SURFACE_BRC_ME_DIST];
5276     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5277         param,
5278         &m_brcBuffers.sMeBrcDistortionBuffer,
5279         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5280         0,
5281         m_verticalLineStride,
5282         true));
5283 
5284     param = &m_surfaceParams[SURFACE_BRC_PAST_PAK_INFO];
5285     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5286         param,
5287         &m_brcBuffers.resBrcPakStatisticBuffer[0],
5288         m_hevcBrcPakStatisticsSize,
5289         0,
5290         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5291         0,
5292         false));
5293 
5294     param = &m_surfaceParams[SURFACE_BRC_HCP_PIC_STATE];
5295     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5296         param,
5297         &m_brcBuffers.resBrcImageStatesWriteBuffer[0],
5298         m_brcBuffers.dwBrcHcpPicStateSize,
5299         0,
5300         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5301         0,
5302         false));
5303 
5304     return eStatus;
5305 }
5306 
RequestSshAndVerifyCommandBufferSize(PMHW_KERNEL_STATE kernelState)5307 MOS_STATUS CodechalEncHevcStateG9::RequestSshAndVerifyCommandBufferSize(PMHW_KERNEL_STATE kernelState)
5308 {
5309     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5310 
5311     CODECHAL_ENCODE_CHK_NULL_RETURN(kernelState);
5312 
5313     auto maxBtCount = m_singleTaskPhaseSupported ?
5314         m_maxBtCount : kernelState->KernelParams.iBTCount;
5315 
5316     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5317         m_stateHeapInterface,
5318         maxBtCount));
5319 
5320     m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5321     CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5322 
5323     return eStatus;
5324 }
5325 
SendKernelCmdsAndBindingTable(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_KERNEL_STATE kernelState,CODECHAL_MEDIA_STATE_TYPE mediaStateType,PMHW_VFE_SCOREBOARD customScoreBoard)5326 MOS_STATUS CodechalEncHevcStateG9::SendKernelCmdsAndBindingTable(
5327     PMOS_COMMAND_BUFFER                     cmdBuffer,
5328     PMHW_KERNEL_STATE                       kernelState,
5329     CODECHAL_MEDIA_STATE_TYPE               mediaStateType,
5330     PMHW_VFE_SCOREBOARD                     customScoreBoard)
5331 {
5332     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5333 
5334     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(cmdBuffer));
5335 
5336     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5337     MOS_ZeroMemory(&idParams, sizeof(idParams));
5338     idParams.pKernelState = kernelState;
5339     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5340         m_stateHeapInterface,
5341         1,
5342         &idParams));
5343 
5344     // Program render engine pipe commands
5345     SendKernelCmdsParams sendKernelCmdsParams;
5346     sendKernelCmdsParams = SendKernelCmdsParams();
5347     sendKernelCmdsParams.EncFunctionType = mediaStateType;
5348     sendKernelCmdsParams.pKernelState = kernelState;
5349     sendKernelCmdsParams.bEnableCustomScoreBoard = customScoreBoard ? true : false;
5350     sendKernelCmdsParams.pCustomScoreBoard = customScoreBoard;
5351     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(cmdBuffer, &sendKernelCmdsParams));
5352 
5353     // Add binding table
5354     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5355         m_stateHeapInterface,
5356         kernelState));
5357 
5358     return eStatus;
5359 }
5360 
EndKernelCall(CODECHAL_MEDIA_STATE_TYPE mediaStateType,PMHW_KERNEL_STATE kernelState,PMOS_COMMAND_BUFFER cmdBuffer)5361 MOS_STATUS CodechalEncHevcStateG9::EndKernelCall(
5362     CODECHAL_MEDIA_STATE_TYPE       mediaStateType,
5363     PMHW_KERNEL_STATE               kernelState,
5364     PMOS_COMMAND_BUFFER             cmdBuffer)
5365 {
5366     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5367 
5368     CODECHAL_ENCODE_FUNCTION_ENTER;
5369 
5370     MOS_UNUSED(kernelState);
5371 
5372     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(cmdBuffer, mediaStateType));
5373 
5374     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
5375         m_stateHeapInterface,
5376         kernelState));
5377     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5378     {
5379         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5380             m_stateHeapInterface));
5381         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(cmdBuffer, nullptr));
5382     }
5383 
5384     CODECHAL_DEBUG_TOOL(
5385         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5386             mediaStateType,
5387             MHW_SSH_TYPE,
5388             kernelState));
5389         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5390             cmdBuffer,
5391             mediaStateType,
5392             nullptr)));
5393 
5394     )
5395 
5396     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
5397 
5398     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(cmdBuffer));
5399 
5400     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5401     {
5402         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, m_renderContextUsesNullHw);
5403         m_lastTaskInPhase = false;
5404     }
5405 
5406     return eStatus;
5407 }
5408 
AddCurbeToStateHeap(PMHW_KERNEL_STATE kernelState,CODECHAL_MEDIA_STATE_TYPE mediaStateType,void * curbe,uint32_t curbeSize)5409 MOS_STATUS CodechalEncHevcStateG9::AddCurbeToStateHeap(
5410     PMHW_KERNEL_STATE               kernelState,
5411     CODECHAL_MEDIA_STATE_TYPE       mediaStateType,
5412     void*                           curbe,
5413     uint32_t                        curbeSize)
5414 {
5415     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5416 
5417     CODECHAL_ENCODE_CHK_NULL_RETURN(kernelState);
5418     MOS_UNUSED(mediaStateType);
5419 
5420     CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
5421         curbe,
5422         kernelState->dwCurbeOffset,
5423         curbeSize));
5424 
5425     CODECHAL_DEBUG_TOOL(
5426 
5427         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5428             mediaStateType,
5429             MHW_DSH_TYPE,
5430             kernelState));
5431     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5432         mediaStateType,
5433         kernelState));
5434 
5435     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5436         mediaStateType,
5437         MHW_ISH_TYPE,
5438         kernelState));
5439     )
5440 
5441     return eStatus;
5442 }
5443 
SetSurfacesState(PMHW_KERNEL_STATE kernelState,PMOS_COMMAND_BUFFER cmdBuffer,SURFACE_ID surfaceId,uint32_t * bindingTableOffset,void * addr,uint32_t width,uint32_t height)5444 MOS_STATUS CodechalEncHevcStateG9::SetSurfacesState(
5445     PMHW_KERNEL_STATE kernelState,
5446     PMOS_COMMAND_BUFFER cmdBuffer,
5447     SURFACE_ID surfaceId,
5448     uint32_t* bindingTableOffset,
5449     void* addr,
5450     uint32_t width,
5451     uint32_t height)
5452 {
5453     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5454 
5455     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5456     CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTableOffset);
5457     CODECHAL_ENCODE_CHK_NULL_RETURN(kernelState);
5458 
5459     auto surfaceCodecParams = &m_surfaceParams[surfaceId];
5460     surfaceCodecParams->dwBindingTableOffset = bindingTableOffset[0];
5461 
5462     if (addr)
5463     {
5464         if (surfaceCodecParams->bIs2DSurface || surfaceCodecParams->bUseAdvState)
5465         {
5466             surfaceCodecParams->psSurface = (PMOS_SURFACE)addr;
5467         }
5468         else
5469         {
5470             surfaceCodecParams->presBuffer = (PMOS_RESOURCE)addr;
5471         }
5472     }
5473 
5474     // Some surface states do not always use fixed graphic memory address
5475     switch (surfaceId)
5476     {
5477         case SURFACE_HME_MVP:
5478             surfaceCodecParams->psSurface = m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xMvDataBuffer);
5479             break;
5480 
5481         case SURFACE_HME_DIST:
5482             surfaceCodecParams->psSurface = m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xDistortionBuffer);
5483             break;
5484 
5485         case SURFACE_BRC_DATA:
5486             surfaceCodecParams->psSurface = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
5487             break;
5488 
5489         case SURFACE_CU_RECORD:
5490         case SURFACE_HCP_PAK:
5491             surfaceCodecParams->presBuffer = &m_resMbCodeSurface;
5492             break;
5493 
5494         case SURFACE_RAW_Y:
5495         case SURFACE_RAW_Y_UV:
5496         case SURFACE_RAW_VME:
5497             if (m_hevcSeqParams->bit_depth_luma_minus8)  // use format converted surface if input is 10 bit
5498                 surfaceCodecParams->psSurface = &m_formatConvertedSurface[0];
5499             else
5500                 surfaceCodecParams->psSurface = m_rawSurfaceToEnc;
5501             break;
5502 
5503         default:
5504             break;
5505     }
5506 
5507     if (surfaceCodecParams->bIs2DSurface && surfaceCodecParams->bUseUVPlane)
5508     {
5509         surfaceCodecParams->dwUVBindingTableOffset = bindingTableOffset[1];
5510     }
5511 
5512     surfaceCodecParams->dwWidthInUse  = width;
5513     surfaceCodecParams->dwHeightInUse = height;
5514 
5515     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5516         m_hwInterface,
5517         cmdBuffer,
5518         surfaceCodecParams,
5519         kernelState));
5520 
5521     if (surfaceId != SURFACE_KERNEL_DEBUG &&
5522         surfaceId != SURFACE_HCP_PAK      &&
5523         surfaceId != SURFACE_CU_RECORD    &&
5524         surfaceId != SURFACE_BRC_HISTORY  &&
5525         surfaceId != SURFACE_BRC_ME_DIST)
5526     {
5527         if (surfaceCodecParams->bIsWritable)
5528         {
5529             surfaceCodecParams->bIsWritable = false; // reset to the default value
5530         }
5531 
5532         if (surfaceCodecParams->bRenderTarget)
5533         {
5534             surfaceCodecParams->bRenderTarget = false; // reset to the default value
5535         }
5536 
5537         if (surfaceCodecParams->bUse16UnormSurfaceFormat)
5538         {
5539             surfaceCodecParams->bUse16UnormSurfaceFormat = false; // reset to the default value
5540         }
5541     }
5542 
5543     return eStatus;
5544 }
5545 
PicCodingTypeToFrameType(uint32_t picType)5546 uint32_t CodechalEncHevcStateG9::PicCodingTypeToFrameType(uint32_t picType)
5547 {
5548     if (picType == I_TYPE)
5549     {
5550         return HEVC_BRC_FRAME_TYPE_I;
5551     }
5552     else if (picType == B_TYPE)
5553     {
5554         return (m_lowDelay) ? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B;
5555     }
5556     else if (picType == B1_TYPE)
5557     {
5558         return HEVC_BRC_FRAME_TYPE_B1;
5559     }
5560     else if (picType == B2_TYPE)
5561     {
5562         return HEVC_BRC_FRAME_TYPE_B2;
5563     }
5564     else if (picType == P_TYPE && (!m_noMeKernelForPFrame))
5565     {
5566         m_lowDelay = true;
5567         return HEVC_BRC_FRAME_TYPE_P_OR_LB;
5568     }
5569     else
5570     {
5571         CODECHAL_ENCODE_ASSERT(false);
5572         return 0;
5573     }
5574 }
5575 
5576 /*
5577 sliceType: 0 (Intra), 1 (Inter P), 2 (inter B).
5578 intraSADTransform: 0-Regular, 1-Reserved, 2-HAAR, 3-HADAMARD
5579 */
CalcLambda(uint8_t sliceType,uint8_t intraSADTransform)5580 void CodechalEncHevcStateG9::CalcLambda(uint8_t sliceType, uint8_t intraSADTransform)
5581 {
5582     if (sliceType != CODECHAL_ENCODE_HEVC_I_SLICE)
5583     {
5584         MOS_SecureMemcpy(&m_qpLambdaMd[sliceType], sizeof(m_qpLambdaMd[sliceType]),
5585             &m_qpLambdaMdLut[sliceType], sizeof(m_qpLambdaMdLut[sliceType]));
5586 
5587         MOS_SecureMemcpy(&m_qpLambdaMe[sliceType], sizeof(m_qpLambdaMe[sliceType]),
5588             &m_qpLambdaMeLut[sliceType], sizeof(m_qpLambdaMeLut[sliceType]));
5589     }
5590     else
5591     {
5592         for (uint32_t qp = 0; qp < QP_NUM; qp++)
5593         {
5594             double qpTemp = (double)qp - 12;
5595             double lambdaMd = 0.85 * pow(2.0, qpTemp/3.0);
5596 
5597             if ((intraSADTransform != INTRA_TRANSFORM_HAAR) && (intraSADTransform != INTRA_TRANSFORM_HADAMARD))
5598             {
5599                 lambdaMd *= 0.95;
5600             }
5601 
5602             m_qpLambdaMd[sliceType][qp] =
5603             m_qpLambdaMe[sliceType][qp] = sqrt(lambdaMd);
5604         }
5605     }
5606 }
5607 
EncodeBrcInitResetKernel()5608 MOS_STATUS CodechalEncHevcStateG9::EncodeBrcInitResetKernel()
5609 {
5610     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5611 
5612     CODECHAL_ENCODE_FUNCTION_ENTER;
5613 
5614     PerfTagSetting    perfTag;
5615     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET);
5616 
5617     uint32_t krnIdx = m_brcInit ? CODECHAL_HEVC_BRC_INIT : CODECHAL_HEVC_BRC_RESET;
5618 
5619     auto kernelState  = &m_brcKernelStates[krnIdx];
5620     auto bindingTable = &m_brcKernelBindingTable[krnIdx];
5621     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
5622     {
5623         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
5624     }
5625 
5626     //Setup DSH
5627     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5628         m_stateHeapInterface,
5629         kernelState,
5630         false,
5631         0,
5632         false,
5633         m_storeData));
5634 
5635     //Setup CURBE
5636     CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9 cmd, *curbe = &cmd;
5637     MOS_SecureMemcpy(curbe, sizeof(cmd), m_brcInitCurbeInit, sizeof(m_brcInitCurbeInit));
5638 
5639     curbe->DW0.Value = GetProfileLevelMaxFrameSize();
5640 
5641     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR ||
5642         m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR ||
5643         m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
5644     {
5645         if (m_hevcSeqParams->InitVBVBufferFullnessInBit == 0)
5646         {
5647             CODECHAL_ENCODE_ASSERT(false);
5648         }
5649 
5650         if (m_hevcSeqParams->VBVBufferSizeInBit == 0)
5651         {
5652             CODECHAL_ENCODE_ASSERT(false);
5653         }
5654     }
5655 
5656     curbe->DW1.InitBufFull           = m_hevcSeqParams->InitVBVBufferFullnessInBit;
5657     curbe->DW2.BufSize               = m_hevcSeqParams->VBVBufferSizeInBit;
5658     curbe->DW3.TargetBitRate         = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
5659     curbe->DW4.MaximumBitRate        = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
5660     curbe->DW9.FrameWidth            = m_oriFrameWidth;
5661     curbe->DW10.FrameHeight          = m_oriFrameHeight;
5662     curbe->DW12.NumberSlice          = m_numSlices;
5663 
5664     curbe->DW6.FrameRateM            = m_hevcSeqParams->FrameRate.Numerator;
5665     curbe->DW7.FrameRateD            = m_hevcSeqParams->FrameRate.Denominator;
5666     curbe->DW8.BRCFlag               = 0;
5667     curbe->DW8.BRCFlag |= (m_lcuBrcEnabled) ? 0 : CODECHAL_ENCODE_BRCINIT_DISABLE_MBBRC;
5668     // For non-ICQ, ACQP Buffer always set to 1
5669     curbe->DW25.ACQPBuffer           = 1;
5670 
5671     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR)
5672     {
5673         curbe->DW4.MaximumBitRate   = curbe->DW3.TargetBitRate;
5674         curbe->DW8.BRCFlag          |= curbe->DW8.BRCFlag | BRCINIT_ISCBR;
5675     }
5676     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR)
5677     {
5678         if (curbe->DW4.MaximumBitRate < curbe->DW3.TargetBitRate)
5679         {
5680             curbe->DW4.MaximumBitRate = 2 * curbe->DW3.TargetBitRate;
5681         }
5682         curbe->DW8.BRCFlag          |= curbe->DW8.BRCFlag | BRCINIT_ISVBR;
5683     }
5684     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
5685     {
5686         curbe->DW8.BRCFlag          |= curbe->DW8.BRCFlag | BRCINIT_ISAVBR;
5687         // For AVBR, max bitrate = target bitrate,
5688         curbe->DW3.TargetBitRate  = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
5689         curbe->DW4.MaximumBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
5690     }
5691     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_ICQ)
5692     {
5693         curbe->DW8.BRCFlag           |= curbe->DW8.BRCFlag | BRCINIT_ISICQ;
5694         curbe->DW25.ACQPBuffer = m_hevcSeqParams->ICQQualityFactor;
5695     }
5696     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VCM)
5697     {
5698         curbe->DW4.MaximumBitRate    = curbe->DW3.TargetBitRate;
5699         curbe->DW8.BRCFlag           |= curbe->DW8.BRCFlag | BRCINIT_ISVCM;
5700     }
5701 
5702     /**********************************************************************
5703     In case of non-HB/BPyramid Structure
5704     BRC_Param_A = GopP
5705     BRC_Param_B = GopB
5706     In case of HB/BPyramid GOP Structure
5707     BRC_Param_A, BRC_Param_B, BRC_Param_C, BRC_Param_D are
5708     BRC Parameters set as follows as per CModel equation
5709     ***********************************************************************/
5710     // BPyramid GOP
5711     if (m_hevcSeqParams->NumOfBInGop[1] != 0 || m_hevcSeqParams->NumOfBInGop[2] != 0)
5712     {
5713         curbe->DW8.BRC_Param_A   = ((m_hevcSeqParams->GopPicSize) / m_hevcSeqParams->GopRefDist);
5714         curbe->DW9.BRC_Param_B   = curbe->DW8.BRC_Param_A;
5715         curbe->DW13.BRC_Param_C  = curbe->DW8.BRC_Param_A * 2;
5716         curbe->DW14.BRC_Param_D  = ((m_hevcSeqParams->GopPicSize) - (curbe->DW8.BRC_Param_A) - (curbe->DW13.BRC_Param_C) - (curbe->DW9.BRC_Param_B));
5717         // B1 Level GOP
5718         if (m_hevcSeqParams->NumOfBInGop[2] == 0)
5719         {
5720             curbe->DW14.MaxBRCLevel = 3;
5721         }
5722         // B2 Level GOP
5723         else
5724         {
5725             curbe->DW14.MaxBRCLevel = 4;
5726         }
5727     }
5728     // For Regular GOP - No BPyramid
5729     else
5730     {
5731         curbe->DW14.MaxBRCLevel = 1;
5732         curbe->DW8.BRC_Param_A =
5733             (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
5734         curbe->DW9.BRC_Param_B = m_hevcSeqParams->GopPicSize - 1 - curbe->DW8.BRC_Param_A;
5735     }
5736 
5737     curbe->DW10.AVBRAccuracy    = m_usAvbrAccuracy;
5738     curbe->DW11.AVBRConvergence = m_usAvbrConvergence;
5739 
5740     // Set dynamic thresholds
5741     double inputBitsPerFrame =
5742         ((double)(curbe->DW4.MaximumBitRate) * (double)(curbe->DW7.FrameRateD) /
5743         (double)(curbe->DW6.FrameRateM));
5744 
5745     if (curbe->DW2.BufSize < (uint32_t)inputBitsPerFrame * 4)
5746     {
5747         curbe->DW2.BufSize = (uint32_t)inputBitsPerFrame * 4;
5748     }
5749 
5750     if (curbe->DW1.InitBufFull == 0)
5751     {
5752         curbe->DW1.InitBufFull = 7 * curbe->DW2.BufSize/8;
5753     }
5754     if (curbe->DW1.InitBufFull < (uint32_t)(inputBitsPerFrame*2))
5755     {
5756         curbe->DW1.InitBufFull = (uint32_t)(inputBitsPerFrame*2);
5757     }
5758     if (curbe->DW1.InitBufFull > curbe->DW2.BufSize)
5759     {
5760         curbe->DW1.InitBufFull = curbe->DW2.BufSize;
5761     }
5762 
5763     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
5764     {
5765         // For AVBR, Buffer size =  2*Bitrate, InitVBV = 0.75 * bufferSize
5766         curbe->DW2.BufSize     = 2 * m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
5767         curbe->DW1.InitBufFull = (uint32_t)(0.75 * curbe->DW2.BufSize);
5768     }
5769 
5770     double bpsRatio = inputBitsPerFrame / ((double)(curbe->DW2.BufSize)/30);
5771     bpsRatio = (bpsRatio < 0.1) ? 0.1 : (bpsRatio > 3.5) ? 3.5 : bpsRatio;
5772 
5773     curbe->DW19.DeviationThreshold0_PBframe      = (uint32_t) (-50 * pow(0.90, bpsRatio));
5774     curbe->DW19.DeviationThreshold1_PBframe      = (uint32_t) (-50 * pow(0.66, bpsRatio));
5775     curbe->DW19.DeviationThreshold2_PBframe      = (uint32_t) (-50 * pow(0.46, bpsRatio));
5776     curbe->DW19.DeviationThreshold3_PBframe      = (uint32_t) (-50 * pow(0.3, bpsRatio));
5777 
5778     curbe->DW20.DeviationThreshold4_PBframe      = (uint32_t) (50 * pow(0.3, bpsRatio));
5779     curbe->DW20.DeviationThreshold5_PBframe      = (uint32_t) (50 * pow(0.46, bpsRatio));
5780     curbe->DW20.DeviationThreshold6_PBframe      = (uint32_t) (50 * pow(0.7, bpsRatio));
5781     curbe->DW20.DeviationThreshold7_PBframe      = (uint32_t) (50 * pow(0.9, bpsRatio));
5782 
5783     curbe->DW21.DeviationThreshold0_VBRcontrol   = (uint32_t) (-50 * pow(0.9, bpsRatio));
5784     curbe->DW21.DeviationThreshold1_VBRcontrol   = (uint32_t) (-50 * pow(0.7, bpsRatio));
5785     curbe->DW21.DeviationThreshold2_VBRcontrol   = (uint32_t) (-50 * pow(0.5, bpsRatio));
5786     curbe->DW21.DeviationThreshold3_VBRcontrol   = (uint32_t) (-50 * pow(0.3, bpsRatio));
5787 
5788     curbe->DW22.DeviationThreshold4_VBRcontrol   = (uint32_t) (100 * pow(0.4, bpsRatio));
5789     curbe->DW22.DeviationThreshold5_VBRcontrol   = (uint32_t) (100 * pow(0.5, bpsRatio));
5790     curbe->DW22.DeviationThreshold6_VBRcontrol   = (uint32_t) (100 * pow(0.75, bpsRatio));
5791     curbe->DW22.DeviationThreshold7_VBRcontrol   = (uint32_t) (100 * pow(0.9, bpsRatio));
5792 
5793     curbe->DW23.DeviationThreshold0_Iframe       = (uint32_t) (-50 * pow(0.8, bpsRatio));
5794     curbe->DW23.DeviationThreshold1_Iframe       = (uint32_t) (-50 * pow(0.6, bpsRatio));
5795     curbe->DW23.DeviationThreshold2_Iframe       = (uint32_t) (-50 * pow(0.34, bpsRatio));
5796     curbe->DW23.DeviationThreshold3_Iframe       = (uint32_t) (-50 * pow(0.2, bpsRatio));
5797 
5798     curbe->DW24.DeviationThreshold4_Iframe       = (uint32_t) (50 * pow(0.2, bpsRatio));
5799     curbe->DW24.DeviationThreshold5_Iframe       = (uint32_t) (50 * pow(0.4, bpsRatio));
5800     curbe->DW24.DeviationThreshold6_Iframe       = (uint32_t) (50 * pow(0.66, bpsRatio));
5801     curbe->DW24.DeviationThreshold7_Iframe       = (uint32_t) (50 * pow(0.9, bpsRatio));
5802 
5803     if (m_brcInit)
5804     {
5805         m_dBrcInitCurrentTargetBufFullInBits = curbe->DW1.InitBufFull;
5806     }
5807 
5808     m_brcInitResetBufSizeInBits      = curbe->DW2.BufSize;
5809     m_dBrcInitResetInputBitsPerFrame = inputBitsPerFrame;
5810 
5811     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_INIT_RESET;
5812     CODECHAL_ENCODE_CHK_STATUS_RETURN(AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
5813 
5814 //#if (_DEBUG || _RELEASE_INTERNAL)
5815 //    if (m_swBrcMode != nullptr)
5816 //    {
5817 //        CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallHevcSwBrcImpl(
5818 //            m_debugInterface,
5819 //            encFunctionType,
5820 //            this,
5821 //            bBrcReset,
5822 //            kernelState,
5823 //            kernelState));
5824 //
5825 //        return eStatus;
5826 //    }
5827 //#endif // (_DEBUG || _RELEASE_INTERNAL)
5828 
5829     MOS_COMMAND_BUFFER cmdBuffer;
5830     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
5831         &cmdBuffer,
5832         kernelState,
5833         encFunctionType,
5834         nullptr));
5835 
5836     //Add surface states
5837     uint32_t startIndex = 0;
5838     // BRC history buffer
5839     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
5840         kernelState,
5841         &cmdBuffer,
5842         SURFACE_BRC_HISTORY,
5843         &bindingTable->dwBindingTableEntries[startIndex++],
5844         &m_brcBuffers.resBrcHistoryBuffer));
5845 
5846     // Distortion data surface
5847     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
5848         kernelState,
5849         &cmdBuffer,
5850         SURFACE_BRC_ME_DIST,
5851         &bindingTable->dwBindingTableEntries[startIndex++],
5852         &m_brcBuffers.sMeBrcDistortionBuffer));
5853 
5854     CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
5855 
5856     MHW_MEDIA_OBJECT_PARAMS    mediaObjectParams;
5857     MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
5858 
5859     MediaObjectInlineData mediaObjectInlineData;
5860     MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
5861 
5862     mediaObjectParams.pInlineData = &mediaObjectInlineData;
5863     mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
5864     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObject(
5865         &cmdBuffer,
5866         nullptr,
5867         &mediaObjectParams));
5868 
5869     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
5870         encFunctionType,
5871         kernelState,
5872         &cmdBuffer));
5873 
5874     // debug dump
5875     CODECHAL_DEBUG_TOOL(
5876         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5877             &m_brcBuffers.resBrcHistoryBuffer,
5878             CodechalDbgAttr::attrOutput,
5879             "HistoryWrite",
5880             m_brcHistoryBufferSize,
5881             0,
5882             CODECHAL_MEDIA_STATE_BRC_INIT_RESET));
5883 
5884         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
5885             &m_brcBuffers.sMeBrcDistortionBuffer,
5886             CodechalDbgAttr::attrOutput,
5887             "BrcDist",
5888             CODECHAL_MEDIA_STATE_BRC_INIT_RESET)););
5889 
5890     return eStatus;
5891 }
5892 
EncodeCoarseIntra16x16Kernel()5893 MOS_STATUS CodechalEncHevcStateG9::EncodeCoarseIntra16x16Kernel()
5894 {
5895     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5896 
5897     CODECHAL_ENCODE_FUNCTION_ENTER;
5898 
5899     PerfTagSetting perfTag;
5900     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_INTRA_DIST);
5901 
5902     uint32_t krnIdx = CODECHAL_HEVC_BRC_COARSE_INTRA;
5903 
5904     auto kernelState  = &m_brcKernelStates[krnIdx];
5905     auto bindingTable = &m_brcKernelBindingTable[krnIdx];
5906     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
5907     {
5908         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
5909     }
5910 
5911     //Setup DSH
5912     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5913         m_stateHeapInterface,
5914         kernelState,
5915         false,
5916         0,
5917         false,
5918         m_storeData));
5919 
5920     //Setup CURBE
5921     CODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9 cmd, *curbe = &cmd;
5922     MOS_ZeroMemory(curbe, sizeof(*curbe));
5923 
5924     // the width and height is the resolution of 4x down-scaled surface
5925     curbe->DW0.PictureWidthInLumaSamples   = m_downscaledWidthInMb4x  << 4;
5926     curbe->DW0.PictureHeightInLumaSamples  = m_downscaledHeightInMb4x << 4;
5927 
5928     curbe->DW1.InterSAD                    = 2;
5929     curbe->DW1.IntraSAD                    = 2;
5930 
5931     uint32_t startBTI = 0;
5932     curbe->DW8.BTI_Src_Y4                  = bindingTable->dwBindingTableEntries[startBTI++];
5933     curbe->DW9.BTI_Intra_Dist              = bindingTable->dwBindingTableEntries[startBTI++];
5934     curbe->DW10.BTI_VME_Intra              = bindingTable->dwBindingTableEntries[startBTI++];
5935 
5936     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
5937 
5938     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_ENC_I_FRAME_DIST;
5939     CODECHAL_ENCODE_CHK_STATUS_RETURN(
5940         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))
5941     );
5942 
5943     MOS_COMMAND_BUFFER cmdBuffer;
5944     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
5945         &cmdBuffer,
5946         kernelState,
5947         encFunctionType,
5948         nullptr));
5949 
5950     //Add surface states
5951     startBTI = 0;
5952     //0: Source Y4
5953     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
5954         kernelState,
5955         &cmdBuffer,
5956         SURFACE_Y_4X,
5957         &bindingTable->dwBindingTableEntries[startBTI++],
5958         m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER)));
5959 
5960     //1: Intra distortion surface
5961     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
5962         kernelState,
5963         &cmdBuffer,
5964         SURFACE_BRC_ME_DIST,
5965         &bindingTable->dwBindingTableEntries[startBTI++],
5966         &m_brcBuffers.sBrcIntraDistortionBuffer));
5967 
5968     //2: Source Y4 for VME
5969     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
5970         kernelState,
5971         &cmdBuffer,
5972         SURFACE_Y_4X_VME,
5973         &bindingTable->dwBindingTableEntries[startBTI++],
5974         m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER)));
5975 
5976     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
5977 
5978     if (!m_hwWalker)
5979     {
5980         eStatus = MOS_STATUS_UNKNOWN;
5981         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
5982         return eStatus;
5983     }
5984 
5985     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
5986     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
5987     walkerCodecParams.WalkerMode        = m_walkerMode;
5988     walkerCodecParams.dwResolutionX     = m_downscaledWidthInMb4x;
5989     walkerCodecParams.dwResolutionY     = m_downscaledHeightInMb4x;
5990     walkerCodecParams.bNoDependency     = true;
5991 
5992     MHW_WALKER_PARAMS walkerParams;
5993     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
5994         m_hwInterface,
5995         &walkerParams,
5996         &walkerCodecParams));
5997 
5998     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
5999         &cmdBuffer,
6000         &walkerParams));
6001 
6002     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
6003         encFunctionType,
6004         kernelState,
6005         &cmdBuffer));
6006 
6007     return eStatus;
6008 }
6009 
GetDefaultCurbeEncBKernel(uint32_t & curbeSize)6010 uint32_t* CodechalEncHevcStateG9::GetDefaultCurbeEncBKernel(uint32_t& curbeSize)
6011 {
6012     CODECHAL_ENCODE_FUNCTION_ENTER;
6013 
6014     if (m_hevcSeqParams->TargetUsage == 0x07)
6015     {
6016         if(m_pictureCodingType == I_TYPE)
6017         {
6018             // When TU=7, there is no normal I kernel calls.
6019             // Instead, B kernel is used for I kernel function and a specfic CURBE setting needs to be used
6020             curbeSize = sizeof(m_encBTu7ICurbeInit);
6021             return (uint32_t*)m_encBTu7ICurbeInit;
6022         }
6023         else if (m_pictureCodingType == P_TYPE)
6024         {
6025             curbeSize = sizeof(m_encBTu7PCurbeInit);
6026             return (uint32_t*)m_encBTu7PCurbeInit;
6027         }
6028         else
6029         {
6030             curbeSize = sizeof(m_encBTu7BCurbeInit);
6031             return (uint32_t*)m_encBTu7BCurbeInit;
6032         }
6033     }
6034     else if (m_hevcSeqParams->TargetUsage == 0x04)
6035     {
6036         if (m_pictureCodingType == P_TYPE)
6037         {
6038             curbeSize = sizeof(m_encBTu4PCurbeInit);
6039             return (uint32_t*)m_encBTu4PCurbeInit;
6040         }
6041         else
6042         {
6043             curbeSize = sizeof(m_encBTu4BCurbeInit);
6044             return (uint32_t*)m_encBTu4BCurbeInit;
6045         }
6046     }
6047     else if (m_hevcSeqParams->TargetUsage == 0x01)
6048     {
6049         if (m_pictureCodingType == P_TYPE)
6050         {
6051             curbeSize = sizeof(m_encBTu1PCurbeInit);
6052             return (uint32_t*)m_encBTu1PCurbeInit;
6053         }
6054         else
6055         {
6056             curbeSize = sizeof(m_encBTu1BCurbeInit);
6057             return (uint32_t*)m_encBTu1BCurbeInit;
6058         }
6059     }
6060     else
6061     {
6062         CODECHAL_ENCODE_ASSERT(false);
6063     }
6064 
6065     return nullptr;
6066 }
6067 
SetupROISurface()6068 MOS_STATUS CodechalEncHevcStateG9::SetupROISurface()
6069 {
6070     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6071 
6072     CODECHAL_ENCODE_FUNCTION_ENTER;
6073 
6074     m_hevcPicParams->NumROI = MOS_MIN(m_hevcPicParams->NumROI, CODECHAL_ENCODE_HEVC_MAX_NUM_ROI);
6075 
6076     // Following code for configuring the ROI surface has been lifted from the CModel and
6077     // ported to work in the context of the driver instead.
6078 
6079     CODECHAL_ENC_HEVC_ROI_G9 currentROI[CODECHAL_ENCODE_HEVC_MAX_NUM_ROI] = { 0 };
6080     for (uint32_t i = 0; i < m_hevcPicParams->NumROI; ++i)
6081     {
6082         currentROI[i].Top    = m_hevcPicParams->ROI[i].Top;
6083         currentROI[i].Bottom = m_hevcPicParams->ROI[i].Bottom;
6084         currentROI[i].Left   = m_hevcPicParams->ROI[i].Left;
6085         currentROI[i].Right  = m_hevcPicParams->ROI[i].Right;
6086         if (m_brcEnabled && !m_roiValueInDeltaQp)
6087         {
6088             currentROI[i].ROI_Level = m_hevcPicParams->ROI[i].PriorityLevelOrDQp * 5;
6089         }
6090         else
6091         {
6092             currentROI[i].QPDelta = m_hevcPicParams->ROI[i].PriorityLevelOrDQp;
6093         }
6094     }
6095 
6096     MOS_LOCK_PARAMS lockParams;
6097     MOS_ZeroMemory(&lockParams, sizeof(lockParams));
6098     lockParams.ReadOnly = 1;
6099     uint32_t* data = (uint32_t*)m_osInterface->pfnLockResource(m_osInterface, &m_roiSurface.OsResource, &lockParams);
6100     if (!data)
6101     {
6102         eStatus = MOS_STATUS_INVALID_HANDLE;
6103         return eStatus;
6104     }
6105 
6106     uint32_t widthInMBsAligned = (m_picWidthInMb * 4 + 63) & ~63;
6107     uint32_t numMBs = m_picWidthInMb * m_picHeightInMb;
6108     for (uint32_t mb = 0 ; mb <= numMBs ; mb++)
6109     {
6110         int32_t curMbY = mb / m_picWidthInMb;
6111         int32_t curMbX = mb - curMbY * m_picWidthInMb;
6112 
6113         uint32_t outdata = 0;
6114         for (int32_t roi = (m_hevcPicParams->NumROI - 1); roi >= 0; roi--)
6115         {
6116             if ((currentROI[roi].ROI_Level == 0) && (currentROI[roi].QPDelta == 0))
6117             {
6118                 continue;
6119             }
6120 
6121             if ((curMbX >= (int32_t)currentROI[roi].Left) && (curMbX < (int32_t)currentROI[roi].Right) &&
6122                 (curMbY >= (int32_t)currentROI[roi].Top) && (curMbY < (int32_t)currentROI[roi].Bottom))
6123             {
6124                 outdata = 15 | (((currentROI[roi].ROI_Level) & 0xFF) << 8) | ((currentROI[roi].QPDelta & 0xFF) << 16);
6125             }
6126             else if ((curMbX >= (int32_t)currentROI[roi].Left - 1) && (curMbX < (int32_t)currentROI[roi].Right + 1) &&
6127                 (curMbY >= (int32_t)currentROI[roi].Top - 1) && (curMbY < (int32_t)currentROI[roi].Bottom + 1))
6128             {
6129                 outdata = 14 | (((currentROI[roi].ROI_Level) & 0xFF) << 8) | ((currentROI[roi].QPDelta & 0xFF) << 16);
6130             }
6131             else if ((curMbX >= (int32_t)currentROI[roi].Left - 2) && (curMbX < (int32_t)currentROI[roi].Right + 2) &&
6132                 (curMbY >= (int32_t)currentROI[roi].Top - 2) && (curMbY < (int32_t)currentROI[roi].Bottom + 2))
6133             {
6134                 outdata = 13 | (((currentROI[roi].ROI_Level) & 0xFF) << 8) | ((currentROI[roi].QPDelta & 0xFF) << 16);
6135             }
6136             else if ((curMbX >= (int32_t)currentROI[roi].Left - 3) && (curMbX < (int32_t)currentROI[roi].Right + 3) &&
6137                 (curMbY >= (int32_t)currentROI[roi].Top - 3) && (curMbY < (int32_t)currentROI[roi].Bottom + 3))
6138             {
6139                 outdata = 12 | (((currentROI[roi].ROI_Level) & 0xFF) << 8) | ((currentROI[roi].QPDelta & 0xFF) << 16);
6140             }
6141         }
6142         data[(curMbY * (widthInMBsAligned>>2)) + curMbX] = outdata;
6143     }
6144 
6145     m_osInterface->pfnUnlockResource(m_osInterface, &m_roiSurface.OsResource);
6146 
6147     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6148         &m_roiSurface,
6149         CodechalDbgAttr::attrInput,
6150         "BrcUpdate_ROI",
6151         CODECHAL_MEDIA_STATE_BRC_UPDATE)));
6152 
6153     return eStatus;
6154 }
6155 
SetupBrcConstantTable(PMOS_SURFACE brcConstantData)6156 MOS_STATUS CodechalEncHevcStateG9::SetupBrcConstantTable(PMOS_SURFACE brcConstantData)
6157 {
6158     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6159 
6160     CODECHAL_ENCODE_FUNCTION_ENTER;
6161 
6162     MOS_LOCK_PARAMS lockFlags;
6163     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
6164     lockFlags.WriteOnly = true;
6165     uint8_t*  data = (uint8_t* )m_osInterface->pfnLockResource(m_osInterface, &brcConstantData->OsResource, &lockFlags);
6166     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6167 
6168     uint32_t size = brcConstantData->dwHeight * brcConstantData->dwWidth;
6169     // 576-byte of Qp adjust table
6170     MOS_SecureMemcpy(data, size, g_cInit_HEVC_BRC_QP_ADJUST, sizeof(g_cInit_HEVC_BRC_QP_ADJUST));
6171     data += sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
6172     size -= sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
6173 
6174     const uint32_t sizeSkipValTable = HEVC_BRC_SKIP_VAL_TABLE_SIZE;
6175     const uint32_t sizelambdaTable = HEVC_BRC_LAMBDA_TABLE_SIZE;
6176 
6177     // Skip thread table
6178     if(m_pictureCodingType == I_TYPE)
6179     {
6180         MOS_ZeroMemory(data, sizeSkipValTable);
6181     }
6182     else
6183     {
6184         uint32_t curbeSize = 0;
6185         void* defaultCurbe = (void*)GetDefaultCurbeEncBKernel(curbeSize);
6186         CODECHAL_ENCODE_ASSERT(defaultCurbe);
6187 
6188         CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9 cmd, *curbe = &cmd;
6189         CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(curbe, sizeof(cmd), defaultCurbe, curbeSize));
6190 
6191         if(curbe->DW3.BlockBasedSkipEnable)
6192         {
6193             MOS_SecureMemcpy(data, size, m_skipThread[1], sizeof(m_skipThread[1]));
6194         }
6195         else
6196         {
6197             MOS_SecureMemcpy(data, size, m_skipThread[0], sizeof(m_skipThread[0]));
6198         }
6199     }
6200     data += sizeSkipValTable;
6201     size -= sizeSkipValTable;
6202 
6203     //lambda value table
6204     MOS_SecureMemcpy(data, size, m_brcLambdaHaar, sizeof(m_brcLambdaHaar));
6205     data += sizelambdaTable;
6206     size -= sizelambdaTable;
6207 
6208     //Mv mode cost table
6209     if(m_pictureCodingType == I_TYPE)
6210     {
6211         MOS_SecureMemcpy(data, size, m_brcMvCostHaar[0], sizeof(m_brcMvCostHaar[0]));
6212     }
6213     else if (m_pictureCodingType == P_TYPE)
6214     {
6215         MOS_SecureMemcpy(data, size, m_brcMvCostHaar[1], sizeof(m_brcMvCostHaar[1]));
6216     }
6217     else
6218     {
6219         MOS_SecureMemcpy(data, size, m_brcMvCostHaar[2], sizeof(m_brcMvCostHaar[2]));
6220     }
6221 
6222     m_osInterface->pfnUnlockResource(m_osInterface, &brcConstantData->OsResource);
6223 
6224     return eStatus;
6225 }
6226 
Convert1byteTo2bytesQPperLCU(PMOS_SURFACE lcuQPIn,PMOS_SURFACE lcuQPOut)6227 MOS_STATUS CodechalEncHevcStateG9::Convert1byteTo2bytesQPperLCU(PMOS_SURFACE lcuQPIn, PMOS_SURFACE lcuQPOut)
6228 {
6229     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6230 
6231     CODECHAL_ENCODE_FUNCTION_ENTER;
6232 
6233     MOS_LOCK_PARAMS lockFlagsIn;
6234     MOS_LOCK_PARAMS lockFlagsOut;
6235     MOS_ZeroMemory(&lockFlagsIn,  sizeof(MOS_LOCK_PARAMS));
6236     MOS_ZeroMemory(&lockFlagsOut, sizeof(MOS_LOCK_PARAMS));
6237 
6238     lockFlagsIn.ReadOnly = true;
6239     uint8_t*  dataIn = (uint8_t* )m_osInterface->pfnLockResource(m_osInterface, &lcuQPIn->OsResource, &lockFlagsIn);
6240     CODECHAL_ENCODE_CHK_NULL_RETURN(dataIn);
6241 
6242     lockFlagsOut.WriteOnly = true;
6243     uint8_t*  dataOut = (uint8_t* )m_osInterface->pfnLockResource(m_osInterface, &lcuQPOut->OsResource, &lockFlagsOut);
6244     CODECHAL_ENCODE_CHK_NULL_RETURN(dataOut);
6245 
6246     for(uint32_t h = 0; h < lcuQPIn->dwHeight; h++)
6247     {
6248         for(uint32_t w = 0; w < lcuQPIn->dwWidth; w++)
6249         {
6250             *(dataOut + h * lcuQPOut->dwPitch + 2 * w)     = *(dataIn + h * lcuQPIn->dwPitch + w);
6251             *(dataOut + h * lcuQPOut->dwPitch + 2 * w + 1) = 0;
6252         }
6253     }
6254 
6255     m_osInterface->pfnUnlockResource(m_osInterface, &lcuQPIn->OsResource);
6256     m_osInterface->pfnUnlockResource(m_osInterface, &lcuQPOut->OsResource);
6257 
6258     return eStatus;
6259 }
6260 
SetupROICurbe(PCODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 curbe)6261 MOS_STATUS CodechalEncHevcStateG9::SetupROICurbe(PCODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 curbe)
6262 {
6263     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6264 
6265     curbe->DW6.CQPValue = 0;
6266     curbe->DW6.ROIFlag  = 0x1 | (m_brcEnabled << 1) | (m_hevcSeqParams->bVideoSurveillance << 2);
6267 
6268     uint32_t roiSize = 0;
6269     for (uint32_t i = 0; i < m_hevcPicParams->NumROI; ++i)
6270     {
6271         roiSize += (CODECHAL_MACROBLOCK_HEIGHT * MOS_ABS(m_hevcPicParams->ROI[i].Top - m_hevcPicParams->ROI[i].Bottom)) *
6272                    (CODECHAL_MACROBLOCK_WIDTH * MOS_ABS(m_hevcPicParams->ROI[i].Right - m_hevcPicParams->ROI[i].Left));
6273     }
6274 
6275     uint32_t roiRatio = 0;
6276     if (roiSize)
6277     {
6278         uint32_t numMBs = m_picWidthInMb * m_picHeightInMb;
6279         roiRatio = 2 * (numMBs * 256 / roiSize - 1);
6280         roiRatio = MOS_MIN(51, roiRatio); // clip QP from 0-51
6281     }
6282 
6283     curbe->DW6.ROIRatio        = roiRatio;
6284     curbe->DW7.FrameWidthInLCU = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
6285 
6286     // if the BRC update LCU kernel is being launched in CQP mode we need to add
6287     // the minimum required parameters it needs to run.  This is used in ROI CQP.
6288     // In the case of BRC the CURBE will already be set up from frame update setup.
6289     if (!m_brcEnabled)
6290     {
6291         curbe->DW1.FrameNumber     = m_storeData - 1;
6292         curbe->DW6.CQPValue        = CalSliceQp();
6293         curbe->DW5.CurrFrameType   = PicCodingTypeToFrameType(m_pictureCodingType);
6294     }
6295 
6296     return eStatus;
6297 }
6298 
EncodeBrcUpdateLCUBasedKernel(PCODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 frameBasedBrcCurbe)6299 MOS_STATUS CodechalEncHevcStateG9::EncodeBrcUpdateLCUBasedKernel(PCODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 frameBasedBrcCurbe)
6300 {
6301     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6302 
6303     CODECHAL_ENCODE_FUNCTION_ENTER;
6304 
6305     PerfTagSetting perfTag;
6306     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE_LCU);
6307 
6308     uint32_t krnIdx = CODECHAL_HEVC_BRC_LCU_UPDATE;
6309     auto     kernelState  = &m_brcKernelStates[krnIdx];
6310     auto     bindingTable = &m_brcKernelBindingTable[krnIdx];
6311 
6312     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
6313     {
6314         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
6315     }
6316 
6317     // Setup DSH
6318     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6319         m_stateHeapInterface,
6320         kernelState,
6321         false,
6322         0,
6323         false,
6324         m_storeData));
6325 
6326     // Setup Curbe
6327     CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 cmd, *curbe = &cmd;
6328     if (m_brcEnabled)
6329     {
6330         MOS_SecureMemcpy(curbe, sizeof(cmd), frameBasedBrcCurbe, sizeof(*frameBasedBrcCurbe));
6331     }
6332     else
6333     {
6334         //confiure LCU BRC Update CURBE for CQP (used in ROI) here
6335         MOS_SecureMemcpy(curbe, sizeof(cmd), m_brcUpdateCurbeInit, sizeof(m_brcUpdateCurbeInit));
6336     }
6337 
6338     if (m_hevcPicParams->NumROI)
6339     {
6340         SetupROICurbe(&cmd);
6341         SetupROISurface();
6342     }
6343 
6344     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_BRC_LCU_UPDATE;
6345     CODECHAL_ENCODE_CHK_STATUS_RETURN(
6346         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
6347 
6348     MOS_COMMAND_BUFFER cmdBuffer;
6349     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
6350         &cmdBuffer,
6351         kernelState,
6352         encFunctionType,
6353         nullptr));
6354 
6355     //Add surface states
6356     uint32_t startIndex = 0;
6357 
6358     //0: BRC history buffer
6359     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6360         kernelState,
6361         &cmdBuffer,
6362         SURFACE_BRC_HISTORY,
6363         &bindingTable->dwBindingTableEntries[startIndex++],
6364         &m_brcBuffers.resBrcHistoryBuffer));
6365 
6366     //1: BRC distortion data surface : when picture type is I-type, both inter and intra distortion are the same
6367     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6368         kernelState,
6369         &cmdBuffer,
6370         SURFACE_BRC_ME_DIST,
6371         &bindingTable->dwBindingTableEntries[startIndex++],
6372         m_brcDistortion));
6373 
6374     //2: Intra distortion data surface
6375     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6376         kernelState,
6377         &cmdBuffer,
6378         SURFACE_BRC_ME_DIST,
6379         &bindingTable->dwBindingTableEntries[startIndex++],
6380         &m_brcBuffers.sBrcIntraDistortionBuffer));
6381 
6382     if(m_hmeSupported)
6383     {
6384     //3: HME MV surface
6385     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6386         kernelState,
6387         &cmdBuffer,
6388         SURFACE_HME_MVP,
6389         &bindingTable->dwBindingTableEntries[startIndex++]));
6390     }
6391     else
6392     {
6393         startIndex++;
6394     }
6395 
6396     //4: LCU Qp surface
6397     m_surfaceParams[SURFACE_LCU_QP].bIsWritable =
6398     m_surfaceParams[SURFACE_LCU_QP].bRenderTarget = true;
6399     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6400         kernelState,
6401         &cmdBuffer,
6402         SURFACE_LCU_QP,
6403         &bindingTable->dwBindingTableEntries[startIndex++],
6404         &m_brcBuffers.sBrcMbQpBuffer));
6405 
6406     //5:  ROI Surface
6407     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6408         kernelState,
6409         &cmdBuffer,
6410         SURFACE_ROI,
6411         &bindingTable->dwBindingTableEntries[startIndex++],
6412         &m_roiSurface));
6413 
6414     CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
6415 
6416     if (!m_hwWalker)
6417     {
6418         eStatus = MOS_STATUS_UNKNOWN;
6419         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
6420         return eStatus;
6421     }
6422 
6423     // LCU-based kernel needs to be executed in 4x4 LCU mode (128x128 per block)
6424     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
6425     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
6426     walkerCodecParams.WalkerMode            = m_walkerMode;
6427     walkerCodecParams.dwResolutionX         = MOS_ALIGN_CEIL(m_frameWidth, 128) >> 7;
6428     walkerCodecParams.dwResolutionY         = MOS_ALIGN_CEIL(m_frameHeight, 128) >> 7;
6429     /* Enforce no dependency dispatch order for LCU-based BRC update kernel  */
6430     walkerCodecParams.bNoDependency         = true;
6431     walkerCodecParams.wPictureCodingType    = m_pictureCodingType;
6432     walkerCodecParams.bUseScoreboard        = m_useHwScoreboard;
6433 
6434     MHW_WALKER_PARAMS walkerParams;
6435     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
6436         m_hwInterface,
6437         &walkerParams,
6438         &walkerCodecParams));
6439 
6440     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
6441         &cmdBuffer,
6442         &walkerParams));
6443 
6444     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
6445         encFunctionType,
6446         kernelState,
6447         &cmdBuffer));
6448 
6449     return eStatus;
6450 }
6451 
EncodeBrcUpdateKernel()6452 MOS_STATUS CodechalEncHevcStateG9::EncodeBrcUpdateKernel()
6453 {
6454     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6455 
6456     CODECHAL_ENCODE_FUNCTION_ENTER;
6457 
6458     PerfTagSetting    perfTag;
6459     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
6460 
6461     uint32_t krnIdx = CODECHAL_HEVC_BRC_FRAME_UPDATE;
6462     auto     kernelState  = &m_brcKernelStates[krnIdx];
6463     auto     bindingTable = &m_brcKernelBindingTable[krnIdx];
6464     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
6465     {
6466         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
6467     }
6468 
6469     // Fill HCP_IMG_STATE so that BRC kernel can use it to generate the write buffer for PAK
6470     auto                     brcHcpStateReadBuffer = &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx];
6471     MHW_VDBOX_HEVC_PIC_STATE mhwHevcPicState;
6472     mhwHevcPicState.pHevcEncSeqParams = m_hevcSeqParams;
6473     mhwHevcPicState.pHevcEncPicParams = m_hevcPicParams;
6474     mhwHevcPicState.brcNumPakPasses = m_mfxInterface->GetBrcNumPakPasses();
6475 
6476     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcPicBrcBuffer(brcHcpStateReadBuffer, &mhwHevcPicState));
6477 
6478     auto brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
6479     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
6480 
6481     // debug dump
6482     CODECHAL_DEBUG_TOOL(
6483         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6484             &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx],
6485             CodechalDbgAttr::attrInput,
6486             "ImgStateRead",
6487             BRC_IMG_STATE_SIZE_PER_PASS * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
6488             0,
6489             CODECHAL_MEDIA_STATE_BRC_UPDATE));
6490 
6491         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6492             &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
6493             CodechalDbgAttr::attrInput,
6494             "ConstData",
6495             CODECHAL_MEDIA_STATE_BRC_UPDATE));
6496 
6497         // PAK statistics buffer is only dumped for BrcUpdate kernel input
6498         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6499             &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
6500             CodechalDbgAttr::attrInput,
6501             "PakStats",
6502             HEVC_BRC_PAK_STATISTCS_SIZE,
6503             0,
6504             CODECHAL_MEDIA_STATE_BRC_UPDATE));
6505         // HEVC maintains a ptr to its own distortion surface, as it may be a couple different surfaces
6506         if (m_brcDistortion) {
6507             CODECHAL_ENCODE_CHK_STATUS_RETURN(
6508                 m_debugInterface->DumpBuffer(
6509                     &m_brcDistortion->OsResource,
6510                     CodechalDbgAttr::attrInput,
6511                     "BrcDist",
6512                     m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
6513                     m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
6514                     CODECHAL_MEDIA_STATE_BRC_UPDATE));
6515         } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_brcBuffers.resBrcHistoryBuffer,
6516             CodechalDbgAttr::attrInput,
6517             "HistoryRead",
6518             m_brcHistoryBufferSize,
6519             0,
6520             CODECHAL_MEDIA_STATE_BRC_UPDATE));
6521         if (m_brcBuffers.pMbEncKernelStateInUse) {
6522             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6523                 CODECHAL_MEDIA_STATE_BRC_UPDATE,
6524                 m_brcBuffers.pMbEncKernelStateInUse));
6525         } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_resMbStatsBuffer,
6526             CodechalDbgAttr::attrInput,
6527             "MBStatsSurf",
6528             m_hwInterface->m_avcMbStatBufferSize,
6529             0,
6530             CODECHAL_MEDIA_STATE_BRC_UPDATE));)
6531     // Setup DSH
6532     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6533         m_stateHeapInterface,
6534         kernelState,
6535         false,
6536         0,
6537         false,
6538         m_storeData));
6539 
6540     // Setup Curbe
6541     CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 cmd, *curbe = &cmd;
6542     MOS_SecureMemcpy(curbe, sizeof(cmd), m_brcUpdateCurbeInit, sizeof(m_brcUpdateCurbeInit));
6543 
6544     curbe->DW5.TARGETSIZE_FLAG = 0;
6545 
6546     if (m_dBrcInitCurrentTargetBufFullInBits > (double)m_brcInitResetBufSizeInBits)
6547     {
6548         m_dBrcInitCurrentTargetBufFullInBits -= (double)m_brcInitResetBufSizeInBits;
6549         curbe->DW5.TARGETSIZE_FLAG = 1;
6550     }
6551 
6552     if (m_numSkipFrames)
6553     {
6554         // pass num/size of skipped frames to update BRC
6555         curbe->DW6.NumSkippedFrames      = m_numSkipFrames;
6556         curbe->DW15.SizeOfSkippedFrames  = m_sizeSkipFrames;
6557 
6558         // account for skipped frame in calculating CurrentTargetBufFullInBits
6559         m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame * m_numSkipFrames;
6560     }
6561 
6562     curbe->DW0.TARGETSIZE        = (uint32_t)(m_dBrcInitCurrentTargetBufFullInBits);
6563     curbe->DW1.FrameNumber       = m_storeData - 1;
6564 
6565     curbe->DW2.PictureHeaderSize = GetPicHdrSize();
6566 
6567     curbe->DW5.CurrFrameType     = PicCodingTypeToFrameType(m_pictureCodingType);
6568 
6569     // Only brc init uses BRCFlag, brc update does NOT use it (it's reserved bits)
6570     curbe->DW5.BRCFlag = 0;
6571 
6572     // Notes from BRC Kernel
6573     /***********************************************************************************************************
6574     * When update kernel Curbe GRF 1.7 bit 15 is set to 1:
6575     * BRC matched with Arch CModel SVN revision 13030 with part of HRD BRC fix in svn 14029 and svn 14228 [HRD]
6576     *
6577     * When update kernel Curbe GRF 1.7 bit 15 is set to 0:
6578     * BRC matched with Arch CModel SVN revision 13419 [HRD Fix] with svn 13833, svn 13827 [Quality] and
6579     * part of BRC fix in svn 14029, svn 14228, svn 13845 [HRD]
6580     ************************************************************************************************************/
6581     curbe->DW7.KernelBuildControl = 0;
6582 
6583     curbe->DW7.ucMinQp = m_hevcPicParams->BRCMinQp;
6584     curbe->DW7.ucMaxQp = m_hevcPicParams->BRCMaxQp;
6585 
6586     if (m_hevcPicParams->NumROI)
6587     {
6588         SetupROICurbe(&cmd);
6589     }
6590     curbe->DW14.ParallelMode = m_hevcSeqParams->ParallelBRC;
6591 
6592     curbe->DW5.MaxNumPAKs = m_mfxInterface->GetBrcNumPakPasses();
6593 
6594     m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
6595 
6596     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
6597     {
6598         curbe->DW3.startGAdjFrame0 = (uint32_t)((10 * m_usAvbrConvergence) / (double)150);
6599         curbe->DW3.startGAdjFrame1 = (uint32_t)((50 * m_usAvbrConvergence) / (double)150);
6600         curbe->DW4.startGAdjFrame2 = (uint32_t)((100 * m_usAvbrConvergence) / (double)150);
6601         curbe->DW4.startGAdjFrame3 = (uint32_t)((150 * m_usAvbrConvergence) / (double)150);
6602 
6603         curbe->DW11.gRateRatioThreshold0 =
6604             (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 40)));
6605         curbe->DW11.gRateRatioThreshold1 =
6606             (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 75)));
6607         curbe->DW12.gRateRatioThreshold2 = (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 97)));
6608         curbe->DW12.gRateRatioThreshold3 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (103 - 100)));
6609         curbe->DW12.gRateRatioThreshold4 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (125 - 100)));
6610         curbe->DW12.gRateRatioThreshold5 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (160 - 100)));
6611     }
6612     else
6613     {
6614         // default CURBE setting is zero. So, driver needs to program them.
6615         curbe->DW3.startGAdjFrame0 = 10;
6616         curbe->DW3.startGAdjFrame1 = 50;
6617         curbe->DW4.startGAdjFrame2 = 100;
6618         curbe->DW4.startGAdjFrame3 = 150;
6619 
6620         curbe->DW11.gRateRatioThreshold0 = 40;
6621         curbe->DW11.gRateRatioThreshold1 = 75;
6622         curbe->DW12.gRateRatioThreshold2 = 97;
6623         curbe->DW12.gRateRatioThreshold3 = 103;
6624         curbe->DW12.gRateRatioThreshold4 = 125;
6625         curbe->DW12.gRateRatioThreshold5 = 160;
6626     }
6627 
6628     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_UPDATE;
6629     CODECHAL_ENCODE_CHK_STATUS_RETURN(
6630         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
6631 
6632 //#if (_DEBUG || _RELEASE_INTERNAL)
6633 //    if (m_swBrcMode != nullptr)
6634 //    {
6635 //        CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallHevcSwBrcImpl(
6636 //            m_debugInterface,
6637 //            encFunctionType,
6638 //            this,
6639 //            false,
6640 //            kernelState,
6641 //            kernelState));
6642 //
6643 //        if (bLcuBrcEnabled || pHevcPicParams->NumROI)
6644 //        {
6645 //            // LCU-based BRC needs to have frame-based one to be call first in order to get HCP_IMG_STATE command result
6646 //            CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateLCUBasedKernel(curbe));
6647 //        }
6648 //        return eStatus;
6649 //    }
6650 //#endif // (_DEBUG || _RELEASE_INTERNAL)
6651 
6652     MOS_COMMAND_BUFFER cmdBuffer;
6653     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer,
6654         kernelState,
6655         encFunctionType,
6656         nullptr));
6657 
6658     if (!m_singleTaskPhaseSupported || m_firstTaskInPhase || !m_singleTaskPhaseSupportedInPak)
6659     {
6660         CODECHAL_ENCODE_CHK_STATUS_RETURN(CheckBrcPakStasBuffer(&cmdBuffer));
6661     }
6662 
6663     //Add surface states
6664     uint32_t startIndex = 0;
6665     //0: BRC history buffer
6666     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6667         kernelState,
6668         &cmdBuffer,
6669         SURFACE_BRC_HISTORY,
6670         &bindingTable->dwBindingTableEntries[startIndex++],
6671         &m_brcBuffers.resBrcHistoryBuffer));
6672 
6673     //1: Previous PAK statistics output buffer
6674     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6675         kernelState,
6676         &cmdBuffer,
6677         SURFACE_BRC_PAST_PAK_INFO,
6678         &bindingTable->dwBindingTableEntries[startIndex++],
6679         &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead]));
6680 
6681     //2: HCP_PIC_STATE buffer for read
6682     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6683         kernelState,
6684         &cmdBuffer,
6685         SURFACE_BRC_HCP_PIC_STATE,
6686         &bindingTable->dwBindingTableEntries[startIndex++],
6687         brcHcpStateReadBuffer));
6688 
6689     //3: HCP_PIC_STATE buffer for write
6690     m_surfaceParams[SURFACE_BRC_HCP_PIC_STATE].bIsWritable =
6691     m_surfaceParams[SURFACE_BRC_HCP_PIC_STATE].bRenderTarget = true;
6692     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6693         kernelState,
6694         &cmdBuffer,
6695         SURFACE_BRC_HCP_PIC_STATE,
6696         &bindingTable->dwBindingTableEntries[startIndex++],
6697         &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]));
6698 
6699     //4: BRC input surface for ENC kernels (output of BRC kernel)
6700     m_surfaceParams[SURFACE_BRC_INPUT].bIsWritable =
6701     m_surfaceParams[SURFACE_BRC_INPUT].bRenderTarget = true;
6702     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6703         kernelState,
6704         &cmdBuffer,
6705         SURFACE_BRC_INPUT,
6706         &bindingTable->dwBindingTableEntries[startIndex++]));
6707 
6708     //5: Distortion data surface
6709     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6710         kernelState,
6711         &cmdBuffer,
6712         SURFACE_BRC_ME_DIST,
6713         &bindingTable->dwBindingTableEntries[startIndex++],
6714         m_brcDistortion));
6715 
6716     //6: BRC data surface
6717     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6718         kernelState,
6719         &cmdBuffer,
6720         SURFACE_BRC_DATA,
6721         &bindingTable->dwBindingTableEntries[startIndex++]));
6722 
6723     CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
6724 
6725     MHW_MEDIA_OBJECT_PARAMS    mediaObjectParams;
6726     MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
6727     MediaObjectInlineData mediaObjectInlineData;
6728     MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
6729     mediaObjectParams.pInlineData = &mediaObjectInlineData;
6730     mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
6731     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObject(
6732         &cmdBuffer,
6733         nullptr,
6734         &mediaObjectParams));
6735 
6736     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
6737         encFunctionType,
6738         kernelState,
6739         &cmdBuffer));
6740 
6741     if (m_lcuBrcEnabled || m_hevcPicParams->NumROI)
6742     {
6743         // LCU-based BRC needs to have frame-based one to be call first in order to get HCP_IMG_STATE command result
6744         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateLCUBasedKernel(curbe));
6745     }
6746 
6747     CODECHAL_DEBUG_TOOL(
6748         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6749             &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
6750             CodechalDbgAttr::attrOutput,
6751             "ImgStateWrite",
6752             BRC_IMG_STATE_SIZE_PER_PASS * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
6753             0,
6754             CODECHAL_MEDIA_STATE_BRC_UPDATE));
6755 
6756         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6757             &m_brcBuffers.resBrcHistoryBuffer,
6758             CodechalDbgAttr::attrOutput,
6759             "HistoryWrite",
6760             m_brcHistoryBufferSize,
6761             0,
6762             CODECHAL_MEDIA_STATE_BRC_UPDATE));
6763         if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource)) {
6764             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6765                 &m_brcBuffers.sBrcMbQpBuffer.OsResource,
6766                 CodechalDbgAttr::attrOutput,
6767                 "MbQp",
6768                 m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight,
6769                 m_brcBuffers.dwBrcMbQpBottomFieldOffset,
6770                 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6771         } if (m_brcBuffers.pMbEncKernelStateInUse) {
6772             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6773                 CODECHAL_MEDIA_STATE_BRC_UPDATE,
6774                 m_brcBuffers.pMbEncKernelStateInUse));
6775         } if (m_mbencBrcBufferSize > 0) {
6776             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6777                 &m_brcBuffers.resMbEncBrcBuffer,
6778                 CodechalDbgAttr::attrOutput,
6779                 "MbEncBRCWrite",
6780                 m_mbencBrcBufferSize,
6781                 0,
6782                 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6783         }
6784 
6785         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6786             (MOS_RESOURCE *)m_allocator->GetResource(m_standard, brcInputForEncKernel),
6787             CodechalDbgAttr::attrOutput,
6788             "CombinedEnc",
6789             128,
6790             0,
6791             CODECHAL_MEDIA_STATE_BRC_UPDATE));
6792 
6793         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6794             &m_brcBuffers.sBrcIntraDistortionBuffer.OsResource,
6795             CodechalDbgAttr::attrOutput,
6796             "IDistortion",
6797             m_brcBuffers.sBrcIntraDistortionBuffer.dwWidth * m_brcBuffers.sBrcIntraDistortionBuffer.dwHeight,
6798             0,
6799             CODECHAL_MEDIA_STATE_BRC_UPDATE));)
6800 
6801     //reset info of skip frame
6802     m_numSkipFrames  = 0;
6803     m_sizeSkipFrames = 0;
6804     return eStatus;
6805 }
6806 
Encode8x8BPakKernel(PCODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9 encBCurbe)6807 MOS_STATUS CodechalEncHevcStateG9::Encode8x8BPakKernel(
6808     PCODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9 encBCurbe)
6809 {
6810     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6811 
6812     CODECHAL_ENCODE_FUNCTION_ENTER;
6813 
6814     CODECHAL_ENCODE_CHK_NULL_RETURN(encBCurbe);
6815 
6816     PerfTagSetting perfTag;
6817     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL);
6818 
6819     uint32_t krnIdx = CODECHAL_HEVC_MBENC_BPAK;
6820     auto     kernelState  = &m_mbEncKernelStates[krnIdx];
6821     auto     bindingTable = &m_mbEncKernelBindingTable[krnIdx];
6822     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
6823     {
6824         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
6825     }
6826 
6827     //Setup DSH
6828     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6829         m_stateHeapInterface,
6830         kernelState,
6831         false,
6832         0,
6833         false,
6834         m_storeData));
6835 
6836     //Setup CURBE
6837     CODECHAL_ENC_HEVC_B_PAK_CURBE_G9  cmd, *curbe = &cmd;
6838     MOS_ZeroMemory(curbe, sizeof(*curbe));
6839     curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
6840     curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
6841 
6842     curbe->DW1.MaxVmvR                 = encBCurbe->DW44.MaxVmvR;
6843     curbe->DW1.Qp                      = encBCurbe->DW13.QpPrimeY;
6844     curbe->DW2.BrcEnable               = encBCurbe->DW36.BRCEnable;
6845     curbe->DW2.LcuBrcEnable            = encBCurbe->DW36.LCUBRCEnable;
6846     curbe->DW2.ScreenContent           = encBCurbe->DW47.ScreenContentFlag;
6847     curbe->DW2.SimplestIntraEnable     = encBCurbe->DW47.SkipIntraKrnFlag;
6848     curbe->DW2.SliceType               = encBCurbe->DW4.SliceType;
6849     curbe->DW2.ROIEnable               = (m_hevcPicParams->NumROI > 0);
6850     curbe->DW2.FASTSurveillanceFlag    = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
6851     // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
6852     curbe->DW2.KBLControlFlag          = UsePlatformControlFlag();
6853     curbe->DW2.EnableRollingIntra      = m_hevcPicParams->bEnableRollingIntraRefresh;
6854     curbe->DW3.IntraRefreshQPDelta     = m_hevcPicParams->QpDeltaForInsertedIntra;
6855     curbe->DW3.IntraRefreshMBNum       = m_hevcPicParams->IntraInsertionLocation;
6856     curbe->DW3.IntraRefreshUnitInMB    = m_hevcPicParams->IntraInsertionSize;
6857 
6858     uint32_t startBTI = 0;
6859     curbe->DW16.BTI_CU_Record          = bindingTable->dwBindingTableEntries[startBTI++];
6860     curbe->DW17.BTI_PAK_Obj            = bindingTable->dwBindingTableEntries[startBTI++];
6861     curbe->DW18.BTI_Slice_Map          = bindingTable->dwBindingTableEntries[startBTI++];
6862     curbe->DW19.BTI_Brc_Input          = bindingTable->dwBindingTableEntries[startBTI++];
6863     curbe->DW20.BTI_LCU_Qp             = bindingTable->dwBindingTableEntries[startBTI++];
6864     curbe->DW21.BTI_Brc_Data           = bindingTable->dwBindingTableEntries[startBTI++];
6865     curbe->DW22.BTI_MB_Data            = bindingTable->dwBindingTableEntries[startBTI++];
6866     curbe->DW23.BTI_MVP_Surface        = bindingTable->dwBindingTableEntries[startBTI++];
6867     curbe->DW24.BTI_WA_PAK_Data        = bindingTable->dwBindingTableEntries[startBTI++];
6868     curbe->DW25.BTI_WA_PAK_Obj         = bindingTable->dwBindingTableEntries[startBTI++];
6869     curbe->DW26.BTI_Debug              = bindingTable->dwBindingTableEntries[startBTI++];
6870 
6871     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
6872 
6873     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_PAK;
6874     CODECHAL_ENCODE_CHK_STATUS_RETURN(
6875         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
6876 
6877     MOS_COMMAND_BUFFER cmdBuffer;
6878     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
6879         &cmdBuffer,
6880         kernelState,
6881         encFunctionType,
6882         nullptr));
6883 
6884     //Add surface states
6885     startBTI = 0;
6886     //0: CU record
6887     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6888         kernelState,
6889         &cmdBuffer,
6890         SURFACE_CU_RECORD,
6891         &bindingTable->dwBindingTableEntries[startBTI++]));
6892 
6893     //1: PAK command
6894     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6895         kernelState,
6896         &cmdBuffer,
6897         SURFACE_HCP_PAK,
6898         &bindingTable->dwBindingTableEntries[startBTI++]));
6899 
6900     //2: slice map
6901     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6902         kernelState,
6903         &cmdBuffer,
6904         SURFACE_SLICE_MAP,
6905         &bindingTable->dwBindingTableEntries[startBTI++]));
6906 
6907     // 3: BRC Input
6908     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6909         kernelState,
6910         &cmdBuffer,
6911         SURFACE_BRC_INPUT,
6912         &bindingTable->dwBindingTableEntries[startBTI++]));
6913 
6914     // 4: LCU Qp
6915     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6916         kernelState,
6917         &cmdBuffer,
6918         SURFACE_LCU_QP,
6919         &bindingTable->dwBindingTableEntries[startBTI++]));
6920 
6921     // 5: LCU BRC constant
6922     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6923         kernelState,
6924         &cmdBuffer,
6925         SURFACE_BRC_DATA,
6926         &bindingTable->dwBindingTableEntries[startBTI++]));
6927 
6928     // 6: MV index buffer or MB data
6929     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6930         kernelState,
6931         &cmdBuffer,
6932         SURFACE_MB_MV_INDEX,
6933         &bindingTable->dwBindingTableEntries[startBTI++]));
6934 
6935     // 7: MVP index buffer
6936     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6937         kernelState,
6938         &cmdBuffer,
6939         SURFACE_MVP_INDEX,
6940         &bindingTable->dwBindingTableEntries[startBTI++]));
6941 
6942     if (!m_hwWalker)
6943     {
6944         eStatus = MOS_STATUS_UNKNOWN;
6945         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
6946         return eStatus;
6947     }
6948 
6949     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
6950     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
6951     walkerCodecParams.WalkerMode            = m_walkerMode;
6952     /* looping for Walker is needed at 8x8 block level */
6953     walkerCodecParams.dwResolutionX         = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
6954     walkerCodecParams.dwResolutionY         = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
6955     /* Enforce no dependency dispatch order for 32x32 B Intra Check kernel  */
6956     walkerCodecParams.bNoDependency         = true;
6957     walkerCodecParams.wPictureCodingType    = m_pictureCodingType;
6958     walkerCodecParams.bUseScoreboard        = m_useHwScoreboard;
6959 
6960     MHW_WALKER_PARAMS walkerParams;
6961     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
6962         m_hwInterface,
6963         &walkerParams,
6964         &walkerCodecParams));
6965 
6966     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
6967         &cmdBuffer,
6968         &walkerParams));
6969 
6970     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
6971         encFunctionType,
6972         kernelState,
6973         &cmdBuffer));
6974 
6975     return eStatus;
6976 }
6977 
Encode8x8PBMbEncKernel()6978 MOS_STATUS CodechalEncHevcStateG9::Encode8x8PBMbEncKernel()
6979 {
6980     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6981 
6982     CODECHAL_ENCODE_FUNCTION_ENTER;
6983 
6984     PerfTagSetting perfTag;
6985     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
6986 
6987     uint32_t krnIdx = CODECHAL_HEVC_MBENC_BENC;
6988     if (m_pictureCodingType == P_TYPE)
6989     {
6990         krnIdx = m_hevcPicParams->bEnableRollingIntraRefresh ? CODECHAL_HEVC_MBENC_ADV_P : CODECHAL_HEVC_MBENC_PENC;
6991     }
6992     else if (m_pictureCodingType == B_TYPE)
6993     {
6994         // In TU7, we still need the original ENC B kernel to process the I frame
6995         krnIdx = m_hevcPicParams->bEnableRollingIntraRefresh ? CODECHAL_HEVC_MBENC_ADV : CODECHAL_HEVC_MBENC_BENC;
6996     }
6997 
6998     auto kernelState  = &m_mbEncKernelStates[krnIdx];
6999     auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
7000     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
7001     {
7002         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
7003     }
7004 
7005     int32_t sliceQp = CalSliceQp();
7006     uint8_t sliceType = PicCodingTypeToSliceType(m_pictureCodingType);
7007 
7008     uint8_t tuMode = 0;
7009     if (m_hevcSeqParams->TargetUsage == 0x07)
7010     {
7011         // When TU=7, lambda is not computed in the 32x32 MD stage for it is skipped.
7012         CalcLambda(sliceType, INTRA_TRANSFORM_HAAR);
7013         tuMode = CODECHAL_ENCODE_HEVC_TARGET_USAGE_MODE_PERFORMANCE;
7014     }
7015     else if (m_hevcSeqParams->TargetUsage == 0x04)
7016     {
7017         tuMode = CODECHAL_ENCODE_HEVC_TARGET_USAGE_MODE_NORMAL;
7018     }
7019     else if (m_hevcSeqParams->TargetUsage == 0x01)
7020     {
7021         tuMode = CODECHAL_ENCODE_HEVC_TARGET_USAGE_MODE_QUALITY;
7022     }
7023     else
7024     {
7025         CODECHAL_ENCODE_ASSERT(false);
7026         eStatus = MOS_STATUS_INVALID_PARAMETER;
7027         return eStatus;
7028     }
7029 
7030     LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_REGULAR);
7031 
7032     uint8_t mbCodeIdxForTempMVP = 0xFF;
7033     if(m_pictureCodingType != I_TYPE)
7034     {
7035         if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
7036         {
7037             uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
7038 
7039             mbCodeIdxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
7040         }
7041 
7042         if (mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
7043         {
7044             // Temporal reference MV index is invalid and so disable the temporal MVP
7045             CODECHAL_ENCODE_ASSERT(false);
7046             m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
7047         }
7048     }
7049 
7050     CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateWalkingControlRegion());
7051 
7052     //Setup DSH
7053     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
7054         m_stateHeapInterface,
7055         kernelState,
7056         false,
7057         0,
7058         false,
7059         m_storeData));
7060 
7061     //Setup CURBE
7062     uint8_t maxLenSP[] = { 25, 57, 57 };
7063     uint8_t forwardTransformThd[7] = { 0 };
7064     CalcForwardCoeffThd(forwardTransformThd, sliceQp);
7065 
7066     uint32_t curbeSize = 0;
7067     void* defaultCurbe = (void*)GetDefaultCurbeEncBKernel(curbeSize);
7068     CODECHAL_ENCODE_ASSERT(defaultCurbe);
7069 
7070     CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9 cmd, *curbe = &cmd;
7071     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(curbe, sizeof(cmd), defaultCurbe, curbeSize));
7072 
7073     bool transform_8x8_mode_flag = true;
7074 
7075     curbe->DW0.AdaptiveEn  = 1;
7076     curbe->DW0.T8x8FlagForInterEn = transform_8x8_mode_flag;
7077     curbe->DW2.PicWidth    = m_picWidthInMb;
7078     curbe->DW2.LenSP       = maxLenSP[tuMode];
7079     curbe->DW3.SrcAccess   = curbe->DW3.RefAccess = 0;
7080     curbe->DW3.FTEnable                           = (m_ftqBasedSkip[m_hevcSeqParams->TargetUsage] >> 1) & 0x01;
7081 
7082     curbe->DW4.PicHeightMinus1               = m_picHeightInMb - 1;
7083     curbe->DW4.HMEEnable                     = m_hmeEnabled;
7084     curbe->DW4.SliceType                     = sliceType;
7085     curbe->DW4.UseActualRefQPValue           = false;
7086 
7087     curbe->DW7.IntraPartMask                 = 0x3;
7088 
7089     curbe->DW6.FrameWidth                    = m_picWidthInMb  * CODECHAL_MACROBLOCK_WIDTH;
7090     curbe->DW6.FrameHeight                   = m_picHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
7091 
7092     curbe->DW8.Mode0Cost = m_modeCost[0];
7093     curbe->DW8.Mode1Cost = m_modeCost[1];
7094     curbe->DW8.Mode2Cost = m_modeCost[2];
7095     curbe->DW8.Mode3Cost = m_modeCost[3];
7096 
7097     curbe->DW9.Mode4Cost = m_modeCost[4];
7098     curbe->DW9.Mode5Cost = m_modeCost[5];
7099     curbe->DW9.Mode6Cost = m_modeCost[6];
7100     curbe->DW9.Mode7Cost = m_modeCost[7];
7101 
7102     curbe->DW10.Mode8Cost= m_modeCost[8];
7103     curbe->DW10.Mode9Cost= m_modeCost[9];
7104     curbe->DW10.RefIDCost = m_modeCost[10];
7105     curbe->DW10.ChromaIntraModeCost = m_modeCost[11];
7106 
7107     curbe->DW11.MV0Cost  = m_mvCost[0];
7108     curbe->DW11.MV1Cost  = m_mvCost[1];
7109     curbe->DW11.MV2Cost  = m_mvCost[2];
7110     curbe->DW11.MV3Cost  = m_mvCost[3];
7111 
7112     curbe->DW12.MV4Cost  = m_mvCost[4];
7113     curbe->DW12.MV5Cost  = m_mvCost[5];
7114     curbe->DW12.MV6Cost  = m_mvCost[6];
7115     curbe->DW12.MV7Cost  = m_mvCost[7];
7116 
7117     curbe->DW13.QpPrimeY = sliceQp;
7118     uint8_t bitDepthChromaMinus8 = 0; // support 4:2:0 only
7119     int32_t qpBdOffsetC = 6 * bitDepthChromaMinus8;
7120     int32_t qpi                  = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cb_qp_offset));
7121     int32_t qpc = (qpi < 30) ? qpi : QPcTable[qpi - 30];
7122     curbe->DW13.QpPrimeCb= qpc + qpBdOffsetC;
7123     qpi                          = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cr_qp_offset));
7124     qpc = (qpi < 30) ? qpi : QPcTable[qpi - 30];
7125     curbe->DW13.QpPrimeCr= qpc;
7126 
7127     curbe->DW14.SICFwdTransCoeffThreshold_0 = forwardTransformThd[0];
7128     curbe->DW14.SICFwdTransCoeffThreshold_1 = forwardTransformThd[1];
7129     curbe->DW14.SICFwdTransCoeffThreshold_2 = forwardTransformThd[2];
7130 
7131     curbe->DW15.SICFwdTransCoeffThreshold_3 = forwardTransformThd[3];
7132     curbe->DW15.SICFwdTransCoeffThreshold_4 = forwardTransformThd[4];
7133     curbe->DW15.SICFwdTransCoeffThreshold_5 = forwardTransformThd[5];
7134     curbe->DW15.SICFwdTransCoeffThreshold_6 = forwardTransformThd[6];
7135 
7136     curbe->DW32.SkipVal = m_skipValB[curbe->DW3.BlockBasedSkipEnable][transform_8x8_mode_flag][sliceQp];
7137 
7138     if(m_pictureCodingType == I_TYPE)
7139     {
7140         *(float*)&(curbe->DW34.LambdaME) = 0.0;
7141     }
7142     else if (m_pictureCodingType == P_TYPE)
7143     {
7144         *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_P_SLICE][sliceQp];
7145     }
7146     else
7147     {
7148         *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp];
7149     }
7150 
7151     curbe->DW35.ModeCostSp                 = m_modeCostSp;
7152     curbe->DW35.SimpIntraInterThreshold    = m_simplestIntraInterThreshold;
7153 
7154     curbe->DW36.NumRefIdxL0MinusOne  = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
7155     curbe->DW36.NumRefIdxL1MinusOne  = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
7156     curbe->DW36.BRCEnable            = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
7157     curbe->DW36.LCUBRCEnable         = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
7158     curbe->DW36.PowerSaving         = m_powerSavingEnabled;
7159     curbe->DW36.ROIEnable            = (m_hevcPicParams->NumROI > 0);
7160     curbe->DW36.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
7161 
7162     if(m_pictureCodingType != I_TYPE)
7163     {
7164         curbe->DW37.ActualQpRefID0List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_0);
7165         curbe->DW37.ActualQpRefID1List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_1);
7166         curbe->DW37.ActualQpRefID2List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_2);
7167         curbe->DW37.ActualQpRefID3List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_3);
7168         curbe->DW41.TextureIntraCostThreshold = 500;
7169         if(m_pictureCodingType == B_TYPE) {
7170             curbe->DW39.ActualQpRefID0List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_0);
7171             curbe->DW39.ActualQpRefID1List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_1);
7172         }
7173     }
7174 
7175     curbe->DW44.MaxVmvR                = 511 * 4;
7176     curbe->DW44.MaxNumMergeCandidates  = m_hevcSliceParams->MaxNumMergeCand;
7177 
7178     if(m_pictureCodingType != I_TYPE)
7179     {
7180         curbe->DW44.MaxNumRefList0         = curbe->DW36.NumRefIdxL0MinusOne + 1;
7181 
7182         curbe->DW45.TemporalMvpEnableFlag  = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
7183         curbe->DW45.HMECombineLenPslice    = 8;
7184         if(m_pictureCodingType == B_TYPE)
7185         {
7186             curbe->DW44.MaxNumRefList1         = curbe->DW36.NumRefIdxL1MinusOne + 1;
7187             curbe->DW45.HMECombineLenBslice    = 8;
7188         }
7189     }
7190 
7191     curbe->DW45.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
7192 
7193     curbe->DW46.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
7194     curbe->DW46.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
7195     curbe->DW46.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
7196     curbe->DW46.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
7197 
7198     curbe->DW47.NumRegionsInSlice       = m_numRegionsInSlice;
7199     curbe->DW47.TypeOfWalkingPattern    = m_enable26WalkingPattern;
7200     curbe->DW47.ChromaFlatnessCheckFlag = (m_hevcSeqParams->TargetUsage == 0x07) ? 0 : 1;
7201     curbe->DW47.EnableIntraEarlyExit    = (m_hevcSeqParams->TargetUsage == 0x04);
7202     curbe->DW47.SkipIntraKrnFlag        = (m_hevcSeqParams->TargetUsage == 0x07);  // When TU=7, there is no intra kernel call
7203     curbe->DW47.CollocatedFromL0Flag    = m_hevcSliceParams->collocated_from_l0_flag;
7204     curbe->DW47.IsLowDelay              = m_lowDelay;
7205     curbe->DW47.ScreenContentFlag       = m_hevcPicParams->bScreenContent;
7206     curbe->DW47.MultiSliceFlag         = (m_numSlices > 1);
7207     curbe->DW47.ArbitarySliceFlag      = m_arbitraryNumMbsInSlice;
7208     curbe->DW47.NumRegionMinus1        = m_walkingPatternParam.dwNumRegion - 1;
7209 
7210     if(m_pictureCodingType != I_TYPE)
7211     {
7212         curbe->DW48.CurrentTdL0_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][0]);
7213         curbe->DW48.CurrentTdL0_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][1]);
7214         curbe->DW49.CurrentTdL0_2 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][2]);
7215         curbe->DW49.CurrentTdL0_3 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][3]);
7216         if(m_pictureCodingType == B_TYPE) {
7217             curbe->DW50.CurrentTdL1_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][0]);
7218             curbe->DW50.CurrentTdL1_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][1]);
7219         }
7220     }
7221 
7222     curbe->DW52.NumofUnitInRegion          = m_walkingPatternParam.dwNumUnitsInRegion;
7223     curbe->DW52.MaxHeightInRegion          = m_walkingPatternParam.dwMaxHeightInRegion;
7224 
7225     uint32_t startBTI = 0;
7226     curbe->DW56.BTI_CU_Record                  = bindingTable->dwBindingTableEntries[startBTI++];
7227     curbe->DW57.BTI_PAK_Cmd                    = bindingTable->dwBindingTableEntries[startBTI++];
7228     curbe->DW58.BTI_Src_Y                      = bindingTable->dwBindingTableEntries[startBTI++];
7229     startBTI++; //skip UV index
7230     curbe->DW59.BTI_Intra_Dist                 = bindingTable->dwBindingTableEntries[startBTI++];
7231     curbe->DW60.BTI_Min_Dist                   = bindingTable->dwBindingTableEntries[startBTI++];
7232     curbe->DW61.BTI_HMEMVPredFwdBwdSurfIndex   = bindingTable->dwBindingTableEntries[startBTI++];
7233     curbe->DW62.BTI_HMEDistSurfIndex           = bindingTable->dwBindingTableEntries[startBTI++];
7234     curbe->DW63.BTI_Slice_Map                  = bindingTable->dwBindingTableEntries[startBTI++];
7235     curbe->DW64.BTI_VME_Saved_UNI_SIC          = bindingTable->dwBindingTableEntries[startBTI++];
7236     curbe->DW65.BTI_Simplest_Intra             = bindingTable->dwBindingTableEntries[startBTI++];
7237     curbe->DW66.BTI_Collocated_RefFrame        = bindingTable->dwBindingTableEntries[startBTI++];
7238     curbe->DW67.BTI_Reserved                   = bindingTable->dwBindingTableEntries[startBTI++];
7239     curbe->DW68.BTI_BRC_Input                  = bindingTable->dwBindingTableEntries[startBTI++];
7240     curbe->DW69.BTI_LCU_QP                     = bindingTable->dwBindingTableEntries[startBTI++];
7241     curbe->DW70.BTI_BRC_Data                   = bindingTable->dwBindingTableEntries[startBTI++];
7242     curbe->DW71.BTI_VMEInterPredictionSurfIndex= bindingTable->dwBindingTableEntries[startBTI++];
7243     if(m_pictureCodingType == P_TYPE)
7244     {
7245         //P MBEnc curbe 72~75 are different from B frame.
7246         startBTI += (CODECHAL_HEVC_P_MBENC_CONCURRENT_THD_MAP - CODECHAL_HEVC_P_MBENC_VME_FORWARD_0);
7247         curbe->DW72.BTI_ConcurrentThreadMap= bindingTable->dwBindingTableEntries[startBTI++];
7248         curbe->DW73.BTI_MB_Data_CurFrame   = bindingTable->dwBindingTableEntries[startBTI++];
7249         curbe->DW74.BTI_MVP_CurFrame       = bindingTable->dwBindingTableEntries[startBTI++];
7250         curbe->DW75.BTI_Haar_Dist16x16     = bindingTable->dwBindingTableEntries[startBTI++];
7251         curbe->DW76.BTI_Stats_Data         = bindingTable->dwBindingTableEntries[startBTI++];
7252         curbe->DW77.BTI_Frame_Stats_Data   = bindingTable->dwBindingTableEntries[startBTI++];
7253         curbe->DW78.BTI_Debug              = bindingTable->dwBindingTableEntries[startBTI++];
7254     }
7255     else
7256     {
7257         startBTI += (CODECHAL_HEVC_B_MBENC_VME_BACKWARD_7 - CODECHAL_HEVC_B_MBENC_VME_FORWARD_0 + 1);
7258 
7259         curbe->DW72.BTI_VMEInterPredictionBSurfIndex = bindingTable->dwBindingTableEntries[startBTI++];
7260         startBTI += (CODECHAL_HEVC_B_MBENC_VME_MUL_NOUSE_3 - CODECHAL_HEVC_B_MBENC_VME_MUL_BACKWARD_0 + 1);
7261 
7262         curbe->DW73.BTI_ConcurrentThreadMap= bindingTable->dwBindingTableEntries[startBTI++];
7263         curbe->DW74.BTI_MB_Data_CurFrame   = bindingTable->dwBindingTableEntries[startBTI++];
7264         curbe->DW75.BTI_MVP_CurFrame       = bindingTable->dwBindingTableEntries[startBTI++];
7265         curbe->DW76.BTI_Haar_Dist16x16     = bindingTable->dwBindingTableEntries[startBTI++];
7266         curbe->DW77.BTI_Stats_Data         = bindingTable->dwBindingTableEntries[startBTI++];
7267         curbe->DW78.BTI_Frame_Stats_Data   = bindingTable->dwBindingTableEntries[startBTI++];
7268         curbe->DW79.BTI_Debug              = bindingTable->dwBindingTableEntries[startBTI++];
7269     }
7270 
7271     // Intra refresh is enabled. Program related CURBE fields
7272     if (m_hevcPicParams->bEnableRollingIntraRefresh)
7273     {
7274         curbe->DW35.IntraRefreshEn         = true;
7275         curbe->DW35.FirstIntraRefresh      = m_firstIntraRefresh;
7276         curbe->DW35.HalfUpdateMixedLCU     = 0;
7277         curbe->DW35.EnableRollingIntra     = true;
7278 
7279         curbe->DW38.NumFrameInGOB            = m_frameNumInGob;
7280         curbe->DW38.NumIntraRefreshOffFrames = m_frameNumWithoutIntraRefresh;
7281 
7282         curbe->DW51.IntraRefreshQPDelta  = m_hevcPicParams->QpDeltaForInsertedIntra;
7283         curbe->DW51.IntraRefreshMBNum    = m_hevcPicParams->IntraInsertionLocation;
7284         curbe->DW51.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
7285 
7286         curbe->DW53.IntraRefreshRefHeight = 40;
7287         curbe->DW53.IntraRefreshRefWidth  = 48;
7288 
7289         m_firstIntraRefresh = false;
7290         m_frameNumWithoutIntraRefresh = 0;
7291     }
7292     else if (m_pictureCodingType != I_TYPE) // don't increment num frames w/o refresh in case of TU7 I frames
7293     {
7294         m_frameNumWithoutIntraRefresh++;
7295     }
7296 
7297     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
7298 
7299     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_MBENC;
7300     if (m_pictureCodingType == P_TYPE)
7301     {
7302         //P frame curbe only use the DW0~DW75
7303         CODECHAL_ENCODE_CHK_STATUS_RETURN(
7304             AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd) - sizeof(uint32_t)));
7305     }
7306     else
7307     {
7308         CODECHAL_ENCODE_CHK_STATUS_RETURN(
7309             AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
7310     }
7311 
7312     MOS_COMMAND_BUFFER cmdBuffer;
7313     if(m_numMbBKernelSplit == 0)
7314     {
7315         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer,
7316             kernelState,
7317             encFunctionType,
7318             &m_walkingPatternParam.ScoreBoard));
7319     }
7320     else
7321     {
7322         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
7323 
7324         MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
7325         MOS_ZeroMemory(&idParams, sizeof(idParams));
7326         idParams.pKernelState = kernelState;
7327         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
7328             m_stateHeapInterface,
7329             1,
7330             &idParams));
7331 
7332         // Add binding table
7333         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
7334             m_stateHeapInterface,
7335             kernelState));
7336     }
7337 
7338     //Add surface states
7339     startBTI = 0;
7340 
7341     //0: CU record
7342     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7343         kernelState,
7344         &cmdBuffer,
7345         SURFACE_CU_RECORD,
7346         &bindingTable->dwBindingTableEntries[startBTI++]));
7347 
7348     //1: PAK command
7349     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7350         kernelState,
7351         &cmdBuffer,
7352         SURFACE_HCP_PAK,
7353         &bindingTable->dwBindingTableEntries[startBTI++]));
7354 
7355     //2 and 3 Source Y and UV
7356     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7357         kernelState,
7358         &cmdBuffer,
7359         SURFACE_RAW_Y_UV,
7360         &bindingTable->dwBindingTableEntries[startBTI++]));
7361     startBTI++;
7362 
7363     //4: Intra dist
7364     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7365         kernelState,
7366         &cmdBuffer,
7367         SURFACE_INTRA_DIST,
7368         &bindingTable->dwBindingTableEntries[startBTI++]));
7369 
7370     //5: min distortion
7371     m_surfaceParams[SURFACE_MIN_DIST].bIsWritable   =
7372     m_surfaceParams[SURFACE_MIN_DIST].bRenderTarget = true;
7373     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7374         kernelState,
7375         &cmdBuffer,
7376         SURFACE_MIN_DIST,
7377         &bindingTable->dwBindingTableEntries[startBTI++]));
7378 
7379     if(m_hmeSupported)
7380     {
7381     //6: MV predictor from HME
7382     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7383         kernelState,
7384         &cmdBuffer,
7385         SURFACE_HME_MVP,
7386         &bindingTable->dwBindingTableEntries[startBTI++]));
7387 
7388     //7: distortion from HME
7389     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7390         kernelState,
7391         &cmdBuffer,
7392         SURFACE_HME_DIST,
7393         &bindingTable->dwBindingTableEntries[startBTI++]));
7394     }
7395     else
7396     {
7397         startBTI += 2;
7398     }
7399 
7400     //8: slice map
7401     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7402         kernelState,
7403         &cmdBuffer,
7404         SURFACE_SLICE_MAP,
7405         &bindingTable->dwBindingTableEntries[startBTI++]));
7406 
7407     //9: VME UNI and SIC data
7408     m_surfaceParams[SURFACE_VME_UNI_SIC_DATA].bIsWritable   =
7409     m_surfaceParams[SURFACE_VME_UNI_SIC_DATA].bRenderTarget = true;
7410     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7411         kernelState,
7412         &cmdBuffer,
7413         SURFACE_VME_UNI_SIC_DATA,
7414         &bindingTable->dwBindingTableEntries[startBTI++]));
7415 
7416     //10: Simplest Intra
7417     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7418         kernelState,
7419         &cmdBuffer,
7420         SURFACE_SIMPLIFIED_INTRA,
7421         &bindingTable->dwBindingTableEntries[startBTI++]));
7422 
7423     // 11: Reference frame col-located data surface
7424     if(mbCodeIdxForTempMVP == 0xFF)
7425     {
7426         startBTI++;
7427     }
7428     else
7429     {
7430         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7431             kernelState,
7432             &cmdBuffer,
7433             SURFACE_COL_MB_MV,
7434             &bindingTable->dwBindingTableEntries[startBTI++],
7435             m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP)));
7436         }
7437 
7438     // 12: Current frame col-located data surface -- reserved now
7439     startBTI++;
7440 
7441     // 13: BRC Input
7442     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7443         kernelState,
7444         &cmdBuffer,
7445         SURFACE_BRC_INPUT,
7446         &bindingTable->dwBindingTableEntries[startBTI++]));
7447 
7448     // 14: LCU Qp
7449     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7450         kernelState,
7451         &cmdBuffer,
7452         SURFACE_LCU_QP,
7453         &bindingTable->dwBindingTableEntries[startBTI++]));
7454 
7455     // 15: LCU BRC constant
7456     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7457         kernelState,
7458         &cmdBuffer,
7459         SURFACE_BRC_DATA,
7460         &bindingTable->dwBindingTableEntries[startBTI++]));
7461 
7462     // 16 - 32 Current plus forward and backward surface 0-7
7463     //16: Source Y for VME
7464     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7465         kernelState,
7466         &cmdBuffer,
7467         SURFACE_RAW_VME,
7468         &bindingTable->dwBindingTableEntries[startBTI++]));
7469 
7470     for(uint32_t surfaceIdx = 0; surfaceIdx < 8; surfaceIdx++)
7471     {
7472         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][surfaceIdx];
7473         if (!CodecHal_PictureIsInvalid(refPic) &&
7474             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
7475         {
7476             uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
7477 
7478             // picture Y VME
7479             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7480                 kernelState,
7481                 &cmdBuffer,
7482                 SURFACE_REF_FRAME_VME,
7483                 &bindingTable->dwBindingTableEntries[startBTI++],
7484                 &m_refList[idx]->sRefBuffer,
7485                 curbe->DW6.FrameWidth,
7486                 curbe->DW6.FrameHeight));
7487         }
7488         else
7489         {
7490             // Skip the binding table index because it is not used
7491             startBTI++;
7492         }
7493 
7494         refPic = m_hevcSliceParams->RefPicList[LIST_1][surfaceIdx];
7495         if (!CodecHal_PictureIsInvalid(refPic) &&
7496             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
7497         {
7498             uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
7499 
7500             // picture Y VME
7501             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7502                 kernelState,
7503                 &cmdBuffer,
7504                 SURFACE_REF_FRAME_VME,
7505                 &bindingTable->dwBindingTableEntries[startBTI++],
7506                 &m_refList[idx]->sRefBuffer,
7507                 curbe->DW6.FrameWidth,
7508                 curbe->DW6.FrameHeight));
7509         }
7510         else
7511         {
7512             // Skip the binding table index because it is not used
7513             startBTI++;
7514         }
7515     }
7516     CODECHAL_ENCODE_ASSERT(startBTI == CODECHAL_HEVC_B_MBENC_VME_BACKWARD_7 - CODECHAL_HEVC_B_MBENC_BEGIN + 1);
7517 
7518     if (m_pictureCodingType != P_TYPE)
7519     {
7520         //33-41 VME multi-ref BTI -- Current plus [backward, nil][0..3]
7521         //33: Current Y VME surface
7522         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7523             kernelState,
7524             &cmdBuffer,
7525             SURFACE_RAW_VME,
7526             &bindingTable->dwBindingTableEntries[startBTI++]));
7527 
7528         for(uint32_t surfaceIdx = 0; surfaceIdx < 4; surfaceIdx++)
7529         {
7530             CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[1][surfaceIdx];
7531             if (!CodecHal_PictureIsInvalid(refPic) &&
7532                 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
7533             {
7534                 uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
7535 
7536                 // picture Y VME
7537                 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7538                     kernelState,
7539                     &cmdBuffer,
7540                     SURFACE_REF_FRAME_VME,
7541                     &bindingTable->dwBindingTableEntries[startBTI++],
7542                     &m_refList[idx]->sRefBuffer,
7543                     curbe->DW6.FrameWidth,
7544                     curbe->DW6.FrameHeight));
7545             }
7546             else
7547             {
7548                 // Skip the binding table index because it is not used
7549                 startBTI++;
7550             }
7551 
7552             // Skip the binding table index because it is not used
7553             startBTI++;
7554         }
7555         CODECHAL_ENCODE_ASSERT(startBTI == CODECHAL_HEVC_B_MBENC_VME_MUL_NOUSE_3 - CODECHAL_HEVC_B_MBENC_BEGIN + 1);
7556     }
7557 
7558     // B 42 or P 33: Concurrent thread
7559     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7560         kernelState,
7561         &cmdBuffer,
7562         (SURFACE_ID)(SURFACE_CONCURRENT_THREAD + m_concurrentThreadIndex),
7563         &bindingTable->dwBindingTableEntries[startBTI++]));
7564 
7565     if (++m_concurrentThreadIndex >= NUM_CONCURRENT_THREAD)
7566     {
7567         m_concurrentThreadIndex = 0;
7568     }
7569 
7570     // B 43 or P 34: MV index buffer
7571     m_surfaceParams[SURFACE_MB_MV_INDEX].bIsWritable   =
7572     m_surfaceParams[SURFACE_MB_MV_INDEX].bRenderTarget = true;
7573     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7574         kernelState,
7575         &cmdBuffer,
7576         SURFACE_MB_MV_INDEX,
7577         &bindingTable->dwBindingTableEntries[startBTI++]));
7578 
7579     // B 44: or P 35: MVP index buffer
7580     m_surfaceParams[SURFACE_MVP_INDEX].bIsWritable   =
7581     m_surfaceParams[SURFACE_MVP_INDEX].bRenderTarget = true;
7582     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7583         kernelState,
7584         &cmdBuffer,
7585         SURFACE_MVP_INDEX,
7586         &bindingTable->dwBindingTableEntries[startBTI++]));
7587 
7588     if (!m_hwWalker)
7589     {
7590         eStatus = MOS_STATUS_UNKNOWN;
7591         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
7592         return eStatus;
7593     }
7594 
7595     if(m_numMbBKernelSplit == 0)
7596     {
7597         // always use customized media walker
7598         MHW_WALKER_PARAMS walkerParams;
7599         MOS_SecureMemcpy(&walkerParams, sizeof(walkerParams), &m_walkingPatternParam.MediaWalker, sizeof(m_walkingPatternParam.MediaWalker));
7600         walkerParams.ColorCountMinusOne = m_walkingPatternParam.dwNumRegion - 1;
7601 
7602         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
7603             &cmdBuffer,
7604             &walkerParams));
7605     }
7606     else
7607     {
7608         int32_t localOuterLoopExecCount = m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount;
7609         int32_t localInitialStartPointY = m_walkingPatternParam.MediaWalker.LocalStart.y;
7610         int32_t phase = MOS_MIN(m_numMbBKernelSplit, MAX_NUM_KERNEL_SPLIT);
7611         int32_t totalExecCount = localOuterLoopExecCount + 1;
7612         int32_t deltaExecCount = (((totalExecCount+phase - 1) / phase) + 1) & 0xfffe;
7613         int32_t remainExecCount = totalExecCount;
7614 
7615         int32_t deltaY = 0;
7616         if (m_enable26WalkingPattern)
7617         {
7618             deltaY = deltaExecCount / 2;
7619         }
7620         else
7621         {
7622             deltaY = deltaExecCount * 2;
7623         }
7624 
7625         int32_t startPointY[MAX_NUM_KERNEL_SPLIT] = { 0 };
7626         int32_t currentExecCount[MAX_NUM_KERNEL_SPLIT] = { -1 };
7627         currentExecCount[0] = (remainExecCount > deltaExecCount)?(deltaExecCount-1) :  (remainExecCount-1);
7628         startPointY[0] = localInitialStartPointY;
7629 
7630         for (auto i = 1; i < phase; i++)
7631         {
7632             remainExecCount -= deltaExecCount;
7633             if (remainExecCount < 1)
7634             {
7635                 remainExecCount = 1;
7636             }
7637 
7638             currentExecCount[i] = (remainExecCount > deltaExecCount)?(deltaExecCount-1) :  (remainExecCount-1);
7639             startPointY[i] = startPointY[i-1] + deltaY;
7640         }
7641 
7642         for(auto i = 0; i < phase; i++)
7643         {
7644             if(currentExecCount[i] < 0)
7645             {
7646                 break;
7647             }
7648 
7649             // Program render engine pipe commands
7650             SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
7651             sendKernelCmdsParams.EncFunctionType        = encFunctionType;
7652             sendKernelCmdsParams.pKernelState           = kernelState;
7653             sendKernelCmdsParams.bEnableCustomScoreBoard= true;
7654             sendKernelCmdsParams.pCustomScoreBoard      = &m_walkingPatternParam.ScoreBoard;
7655             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
7656 
7657             // Change walker execution count and local start Y for different phases
7658             m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount = currentExecCount[i];
7659             m_walkingPatternParam.MediaWalker.LocalStart.y = startPointY[i];
7660 
7661             // always use customized media walker
7662             MHW_WALKER_PARAMS walkerParams;
7663             MOS_SecureMemcpy(&walkerParams, sizeof(walkerParams), &m_walkingPatternParam.MediaWalker, sizeof(m_walkingPatternParam.MediaWalker));
7664             walkerParams.ColorCountMinusOne = m_walkingPatternParam.dwNumRegion - 1;
7665 
7666             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
7667                 &cmdBuffer,
7668                 &walkerParams));
7669         }
7670     }
7671 
7672     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
7673         encFunctionType,
7674         kernelState,
7675         &cmdBuffer));
7676 
7677     CODECHAL_DEBUG_TOOL(
7678         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7679             &m_mvIndex.sResource,
7680             CodechalDbgAttr::attrOutput,
7681             "MbData",
7682             m_mvpIndex.dwSize,
7683             0,
7684             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
7685 
7686          CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7687             &m_mvpIndex.sResource,
7688             CodechalDbgAttr::attrOutput,
7689             "MvData",
7690             m_mvpIndex.dwSize,
7691             0,
7692             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
7693     )
7694 
7695     m_lastTaskInPhase = true;
7696     eStatus = Encode8x8BPakKernel(curbe);
7697 
7698     return eStatus;
7699 }
7700 
Encode2xScalingKernel()7701 MOS_STATUS CodechalEncHevcStateG9::Encode2xScalingKernel()
7702 {
7703     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7704 
7705     PerfTagSetting perfTag;
7706     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL);
7707 
7708     uint32_t krnIdx = CODECHAL_HEVC_MBENC_2xSCALING;
7709     auto     kernelState         = &m_mbEncKernelStates[krnIdx];
7710     auto     scalingBindingTable = &m_mbEncKernelBindingTable[krnIdx];
7711     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
7712     {
7713         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
7714     }
7715 
7716     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
7717         m_osInterface,
7718         &m_scaled2xSurface));
7719 
7720     // Setup DSH
7721     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
7722         m_stateHeapInterface,
7723         kernelState,
7724         false,
7725         0,
7726         false,
7727         m_storeData));
7728 
7729     //Setup CURBE
7730     MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9  cmd, *curbe = &cmd;
7731     MOS_ZeroMemory(curbe, sizeof(*curbe));
7732     curbe->DW0.PicWidth  = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
7733     curbe->DW0.PicHeight    = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
7734 
7735     uint32_t startBTI = 0;
7736     curbe->DW8.BTI_Src_Y    = scalingBindingTable->dwBindingTableEntries[startBTI++];
7737     curbe->DW9.BTI_Dst_Y    = scalingBindingTable->dwBindingTableEntries[startBTI++];
7738 
7739     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING;
7740     CODECHAL_ENCODE_CHK_STATUS_RETURN(
7741         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
7742 
7743     MOS_COMMAND_BUFFER cmdBuffer;
7744     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
7745         &cmdBuffer,
7746         kernelState,
7747         encFunctionType,
7748         nullptr));
7749 
7750     // Add surface states, 2X scaling uses U16Norm surface format
7751     startBTI = 0;
7752 
7753     // Source surface/s
7754     auto surfaceCodecParams = &m_surfaceParams[SURFACE_RAW_Y];
7755     surfaceCodecParams->bUse16UnormSurfaceFormat = true;
7756 
7757     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7758         kernelState,
7759         &cmdBuffer,
7760         SURFACE_RAW_Y,
7761         &scalingBindingTable->dwBindingTableEntries[startBTI++]
7762     ));
7763 
7764     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
7765     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceParams(surfaceCodecParams));
7766 
7767     // Destination surface/s
7768     m_scaled2xSurface.dwWidth  = MOS_ALIGN_CEIL((m_frameWidth / SCALE_FACTOR_2x), CODECHAL_MACROBLOCK_WIDTH);
7769     m_scaled2xSurface.dwHeight = MOS_ALIGN_CEIL((m_frameHeight / SCALE_FACTOR_2x), CODECHAL_MACROBLOCK_HEIGHT);
7770 
7771     m_surfaceParams[SURFACE_Y_2X].bUse16UnormSurfaceFormat =
7772     m_surfaceParams[SURFACE_Y_2X].bIsWritable   =
7773     m_surfaceParams[SURFACE_Y_2X].bRenderTarget = true;
7774     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7775         kernelState,
7776         &cmdBuffer,
7777         SURFACE_Y_2X,
7778         &scalingBindingTable->dwBindingTableEntries[startBTI++]
7779         ));
7780 
7781     if (!m_hwWalker)
7782     {
7783         eStatus = MOS_STATUS_UNKNOWN;
7784         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
7785         return eStatus;
7786     }
7787 
7788     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
7789     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
7790     walkerCodecParams.WalkerMode        = m_walkerMode;
7791     // check kernel of Downscaling 2x kernels for Ultra HME.
7792     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,  32) >> 5;
7793     // The frame kernel process 32x32 input pixels and output 16x16 down sampled pixels
7794     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
7795     /* Enforce no dependency dispatch order for Scaling kernel,  */
7796     walkerCodecParams.bNoDependency     = true;
7797 
7798     MHW_WALKER_PARAMS walkerParams;
7799     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
7800         m_hwInterface,
7801         &walkerParams,
7802         &walkerCodecParams));
7803 
7804     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
7805         &cmdBuffer,
7806         &walkerParams));
7807 
7808     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
7809         encFunctionType,
7810         kernelState,
7811         &cmdBuffer));
7812 
7813     return eStatus;
7814 }
7815 
Encode32x32PuModeDecisionKernel()7816 MOS_STATUS CodechalEncHevcStateG9::Encode32x32PuModeDecisionKernel()
7817 {
7818     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7819 
7820     PerfTagSetting perfTag;
7821     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_PU_MD);
7822 
7823     uint32_t krnIdx = CODECHAL_HEVC_MBENC_32x32MD;
7824     auto     kernelState  = &m_mbEncKernelStates[krnIdx];
7825     auto     bindingTable = &m_mbEncKernelBindingTable[krnIdx];
7826     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
7827     {
7828         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
7829     }
7830 
7831     // Setup DSH
7832     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
7833         m_stateHeapInterface,
7834         kernelState,
7835         false,
7836         0,
7837         false,
7838         m_storeData));
7839 
7840     //Setup CURBE
7841     uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
7842 
7843     CalcLambda(CODECHAL_ENCODE_HEVC_I_SLICE, INTRA_TRANSFORM_HAAR);
7844     int32_t sliceQp = CalSliceQp();
7845 
7846     double lambdaScalingFactor = 1.0;
7847     double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
7848     double squaredQpLambda = qpLambda * qpLambda;
7849     m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
7850 
7851     CODECHAL_ENC_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9 cmd, *curbe = &cmd;
7852     MOS_ZeroMemory(curbe, sizeof(*curbe));
7853     curbe->DW0.FrameWidth      = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
7854     curbe->DW0.FrameHeight     = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
7855 
7856     curbe->DW1.EnableDebugDump = false;
7857     curbe->DW1.LCUType         = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/;
7858     curbe->DW1.PuType          = 0; // 32x32 PU
7859     curbe->DW1.BRCEnable            = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
7860     curbe->DW1.LCUBRCEnable         = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
7861     curbe->DW1.SliceType            = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
7862     curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
7863     curbe->DW1.ROIEnable            = (m_hevcPicParams->NumROI > 0);
7864 
7865     curbe->DW2.Lambda          = m_fixedPointLambda;
7866 
7867     curbe->DW3.ModeCost32x32   = 0;
7868 
7869     curbe->DW4.EarlyExit       = (uint32_t)-1;
7870 
7871     uint32_t startIndex = 0;
7872     curbe->DW8.BTI_32x32PU_Output    = bindingTable->dwBindingTableEntries[startIndex++];
7873     curbe->DW9.BTI_Src_Y           = bindingTable->dwBindingTableEntries[startIndex++];
7874     startIndex++; // skip one BTI for Y and UV have the same BTI
7875     curbe->DW10.BTI_Src_Y2x        = bindingTable->dwBindingTableEntries[startIndex++];
7876     curbe->DW11.BTI_Slice_Map      = bindingTable->dwBindingTableEntries[startIndex++];
7877     curbe->DW12.BTI_Src_Y2x_VME    = bindingTable->dwBindingTableEntries[startIndex++];
7878     curbe->DW13.BTI_Brc_Input      = bindingTable->dwBindingTableEntries[startIndex++];
7879     curbe->DW14.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startIndex++];
7880     curbe->DW15.BTI_Brc_Data       = bindingTable->dwBindingTableEntries[startIndex++];
7881     curbe->DW16.BTI_Stats_Data     = bindingTable->dwBindingTableEntries[startIndex++];
7882     curbe->DW17.BTI_Kernel_Debug   = bindingTable->dwBindingTableEntries[startIndex++];
7883 
7884     CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
7885 
7886     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION;
7887     CODECHAL_ENCODE_CHK_STATUS_RETURN(
7888         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
7889 
7890     MOS_COMMAND_BUFFER cmdBuffer;
7891     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
7892         &cmdBuffer,
7893         kernelState,
7894         encFunctionType,
7895         nullptr));
7896 
7897     //Add surface states
7898     startIndex = 0;
7899 
7900     // 32x32 PU output
7901     m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bIsWritable   =
7902     m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bRenderTarget = true;
7903     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7904         kernelState,
7905         &cmdBuffer,
7906         SURFACE_32x32_PU_OUTPUT,
7907         &bindingTable->dwBindingTableEntries[startIndex++]));
7908 
7909     // Source Y and UV
7910     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7911         kernelState,
7912         &cmdBuffer,
7913         SURFACE_RAW_Y_UV,
7914         &bindingTable->dwBindingTableEntries[startIndex++]));
7915     startIndex ++; // UV index
7916 
7917     // Source Y2x
7918     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7919         kernelState,
7920         &cmdBuffer,
7921         SURFACE_Y_2X,
7922         &bindingTable->dwBindingTableEntries[startIndex++]));
7923 
7924     // Slice map
7925     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7926         kernelState,
7927         &cmdBuffer,
7928         SURFACE_SLICE_MAP,
7929         &bindingTable->dwBindingTableEntries[startIndex++]));
7930 
7931     // Source Y2x for VME
7932     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7933         kernelState,
7934         &cmdBuffer,
7935         SURFACE_Y_2X_VME,
7936         &bindingTable->dwBindingTableEntries[startIndex++]));
7937 
7938     // BRC Input
7939     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7940         kernelState,
7941         &cmdBuffer,
7942         SURFACE_BRC_INPUT,
7943         &bindingTable->dwBindingTableEntries[startIndex++]));
7944 
7945     // LCU Qp surface
7946     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7947         kernelState,
7948         &cmdBuffer,
7949         SURFACE_LCU_QP,
7950         &bindingTable->dwBindingTableEntries[startIndex++]));
7951 
7952     // BRC data surface
7953     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7954         kernelState,
7955         &cmdBuffer,
7956         SURFACE_BRC_DATA,
7957         &bindingTable->dwBindingTableEntries[startIndex++]));
7958 
7959     if (!m_hwWalker)
7960     {
7961         eStatus = MOS_STATUS_UNKNOWN;
7962         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
7963         return eStatus;
7964     }
7965 
7966     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
7967     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
7968     walkerCodecParams.WalkerMode            = m_walkerMode;
7969     walkerCodecParams.dwResolutionX         = MOS_ALIGN_CEIL(m_frameWidth,  32) >> 5; /* looping for Walker is needed at 8x8 block level */
7970     walkerCodecParams.dwResolutionY         = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
7971     walkerCodecParams.bNoDependency         = true;     /* Enforce no dependency dispatch order for 32x32 MD kernel  */
7972 
7973     MHW_WALKER_PARAMS walkerParams;
7974     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
7975         m_hwInterface,
7976         &walkerParams,
7977         &walkerCodecParams));
7978 
7979     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
7980         &cmdBuffer,
7981         &walkerParams));
7982 
7983     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
7984         encFunctionType,
7985         kernelState,
7986         &cmdBuffer));
7987 
7988     return eStatus;
7989 }
7990 
Encode32X32BIntraCheckKernel()7991 MOS_STATUS CodechalEncHevcStateG9::Encode32X32BIntraCheckKernel()
7992 {
7993     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7994 
7995     CODECHAL_ENCODE_FUNCTION_ENTER;
7996 
7997     PerfTagSetting perfTag;
7998     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_B_IC);
7999 
8000     uint32_t krnIdx = CODECHAL_HEVC_MBENC_32x32INTRACHECK;
8001     auto     kernelState  = &m_mbEncKernelStates[krnIdx];
8002     auto     bindingTable = &m_mbEncKernelBindingTable[krnIdx];
8003 
8004     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
8005     {
8006         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
8007     }
8008 
8009     // Setup DSH
8010     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
8011         m_stateHeapInterface,
8012         kernelState,
8013         false,
8014         0,
8015         false,
8016         m_storeData));
8017 
8018     // Setup CURBE
8019     if (m_pictureCodingType == P_TYPE)
8020     {
8021         CalcLambda(CODECHAL_ENCODE_HEVC_P_SLICE, INTRA_TRANSFORM_HAAR);
8022     }
8023     else
8024     {
8025         CalcLambda(CODECHAL_ENCODE_HEVC_B_SLICE, INTRA_TRANSFORM_HAAR);
8026     }
8027     int32_t sliceQp = CalSliceQp();
8028 
8029     double lambdaScalingFactor = 1.0;
8030     double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
8031     double squaredQpLambda = qpLambda * qpLambda;
8032     m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
8033 
8034     CODECHAL_ENC_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 cmd, *curbe = &cmd;
8035     MOS_ZeroMemory(curbe, sizeof(*curbe));
8036     curbe->DW0.FrameWidth      = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
8037     curbe->DW0.FrameHeight     = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
8038 
8039     curbe->DW1.EnableDebugDump = false;
8040     curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
8041     curbe->DW1.Flags           = 0;
8042     curbe->DW1.Log2MinTUSize        = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
8043     curbe->DW1.SliceType            = m_hevcSliceParams->slice_type;
8044     curbe->DW1.HMEEnable            = m_hmeEnabled;
8045     curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
8046 
8047     curbe->DW2.QpMultiplier    = 100;
8048     curbe->DW2.QpValue         = 0;     // MBZ
8049 
8050     uint32_t startIndex = 0;
8051     curbe->DW8.BTI_Per32x32PuIntraCheck    = bindingTable->dwBindingTableEntries[startIndex++];
8052     curbe->DW9.BTI_Src_Y            = bindingTable->dwBindingTableEntries[startIndex++];
8053     startIndex++; // skip one BTI for Y and UV have the same BTI
8054     curbe->DW10.BTI_Src_Y2X         = bindingTable->dwBindingTableEntries[startIndex++];
8055     curbe->DW11.BTI_Slice_Map       = bindingTable->dwBindingTableEntries[startIndex++];
8056     curbe->DW12.BTI_VME_Y2X         = bindingTable->dwBindingTableEntries[startIndex++];
8057     curbe->DW13.BTI_Simplest_Intra  = bindingTable->dwBindingTableEntries[startIndex++];
8058     curbe->DW14.BTI_HME_MVPred      = bindingTable->dwBindingTableEntries[startIndex++];
8059     curbe->DW15.BTI_HME_Dist        = bindingTable->dwBindingTableEntries[startIndex++];
8060     curbe->DW16.BTI_LCU_Skip        = bindingTable->dwBindingTableEntries[startIndex++];
8061     curbe->DW17.BTI_Debug           = bindingTable->dwBindingTableEntries[startIndex++];
8062 
8063     CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
8064 
8065     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_B_INTRA_CHECK;
8066     CODECHAL_ENCODE_CHK_STATUS_RETURN(
8067         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
8068 
8069     MOS_COMMAND_BUFFER cmdBuffer;
8070     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
8071         &cmdBuffer,
8072         kernelState,
8073         encFunctionType,
8074         nullptr));
8075 
8076     //Add surface states
8077     startIndex = 0;
8078 
8079     // 32x32 PU B Intra Check Output
8080     m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bIsWritable   =
8081     m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bRenderTarget = true;
8082     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8083         kernelState,
8084         &cmdBuffer,
8085         SURFACE_32x32_PU_OUTPUT,
8086         &bindingTable->dwBindingTableEntries[startIndex++]));
8087 
8088     // Source Y and UV
8089     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8090         kernelState,
8091         &cmdBuffer,
8092         SURFACE_RAW_Y_UV,
8093         &bindingTable->dwBindingTableEntries[startIndex++]));
8094     startIndex++;
8095 
8096     // Source Y2x
8097     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8098         kernelState,
8099         &cmdBuffer,
8100         SURFACE_Y_2X,
8101         &bindingTable->dwBindingTableEntries[startIndex++]));
8102 
8103     // Slice map
8104     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8105         kernelState,
8106         &cmdBuffer,
8107         SURFACE_SLICE_MAP,
8108         &bindingTable->dwBindingTableEntries[startIndex++]));
8109 
8110     // Source Y2x for VME
8111     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8112         kernelState,
8113         &cmdBuffer,
8114         SURFACE_Y_2X_VME,
8115         &bindingTable->dwBindingTableEntries[startIndex++]));
8116 
8117     // Simplest Intra
8118     m_surfaceParams[SURFACE_SIMPLIFIED_INTRA].bIsWritable   =
8119     m_surfaceParams[SURFACE_SIMPLIFIED_INTRA].bRenderTarget = true;
8120     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8121         kernelState,
8122         &cmdBuffer,
8123         SURFACE_SIMPLIFIED_INTRA,
8124         &bindingTable->dwBindingTableEntries[startIndex++]));
8125 
8126     if(m_hmeSupported)
8127     {
8128     //MV predictor from HME
8129     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8130         kernelState,
8131         &cmdBuffer,
8132         SURFACE_HME_MVP,
8133         &bindingTable->dwBindingTableEntries[startIndex++]));
8134 
8135     //distortion from HME
8136     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8137         kernelState,
8138         &cmdBuffer,
8139         SURFACE_HME_DIST,
8140         &bindingTable->dwBindingTableEntries[startIndex++]));
8141     }
8142     else
8143     {
8144         startIndex += 2;
8145     }
8146 
8147     // LCU Qp/Skip surface
8148     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8149         kernelState,
8150         &cmdBuffer,
8151         SURFACE_LCU_QP,
8152         &bindingTable->dwBindingTableEntries[startIndex++]));
8153 
8154     if (!m_hwWalker)
8155     {
8156         eStatus = MOS_STATUS_UNKNOWN;
8157         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
8158         return eStatus;
8159     }
8160 
8161     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
8162     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
8163     walkerCodecParams.WalkerMode        = m_walkerMode;
8164     /* looping for Walker is needed at 8x8 block level */
8165     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,  32) >> 5;
8166     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
8167     /* Enforce no dependency dispatch order for 32x32 B Intra Check kernel  */
8168     walkerCodecParams.bNoDependency     = true;
8169 
8170     MHW_WALKER_PARAMS walkerParams;
8171     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
8172         m_hwInterface,
8173         &walkerParams,
8174         &walkerCodecParams));
8175 
8176     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
8177         &cmdBuffer,
8178         &walkerParams));
8179 
8180     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
8181         encFunctionType,
8182         kernelState,
8183         &cmdBuffer));
8184 
8185     return eStatus;
8186 }
8187 
Encode16x16SadPuComputationKernel()8188 MOS_STATUS CodechalEncHevcStateG9::Encode16x16SadPuComputationKernel()
8189 {
8190     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8191 
8192     CODECHAL_ENCODE_FUNCTION_ENTER;
8193 
8194     PerfTagSetting perfTag;
8195     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_SAD);
8196 
8197     uint32_t krnIdx = CODECHAL_HEVC_MBENC_16x16SAD;
8198     auto     kernelState  = &m_mbEncKernelStates[krnIdx];
8199     auto     bindingTable = &m_mbEncKernelBindingTable[krnIdx];
8200     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
8201     {
8202         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
8203     }
8204 
8205     //Setup DSH
8206     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
8207         m_stateHeapInterface,
8208         kernelState,
8209         false,
8210         0,
8211         false,
8212         m_storeData));
8213 
8214     // Setup CURBE
8215     CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9 cmd, *curbe = &cmd;
8216 
8217     MOS_ZeroMemory(curbe, sizeof(*curbe));
8218     curbe->DW0.FrameWidth      = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
8219     curbe->DW0.FrameHeight     = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
8220 
8221     curbe->DW1.Log2MaxCUSize        = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
8222     curbe->DW1.Log2MinCUSize        = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
8223     curbe->DW1.Log2MinTUSize        = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
8224     curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
8225 
8226     curbe->DW2.SliceType       = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
8227     curbe->DW2.SimFlagForInter = false;
8228     if (m_hevcPicParams->CodingType != I_TYPE)
8229     {
8230         curbe->DW2.FASTSurveillanceFlag = m_hevcSeqParams->bVideoSurveillance;
8231     }
8232 
8233     uint32_t startIndex = 0;
8234     curbe->DW8.BTI_Src_Y                   = bindingTable->dwBindingTableEntries[startIndex++];
8235     startIndex++; // skip UV BTI
8236     curbe->DW9.BTI_Sad_16x16_PU_Output     = bindingTable->dwBindingTableEntries[startIndex++];
8237     curbe->DW10.BTI_32x32_Pu_ModeDecision  = bindingTable->dwBindingTableEntries[startIndex++];
8238     curbe->DW11.BTI_Slice_Map              = bindingTable->dwBindingTableEntries[startIndex++];
8239     curbe->DW12.BTI_Simplest_Intra         = bindingTable->dwBindingTableEntries[startIndex++];
8240     curbe->DW13.BTI_Debug                  = bindingTable->dwBindingTableEntries[startIndex++];
8241 
8242     CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
8243 
8244     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_SAD;
8245     CODECHAL_ENCODE_CHK_STATUS_RETURN(
8246         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
8247 
8248     MOS_COMMAND_BUFFER cmdBuffer;
8249     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
8250         &cmdBuffer,
8251         kernelState,
8252         encFunctionType,
8253         nullptr));
8254 
8255     //Add surface states
8256     startIndex = 0;
8257 
8258     // Source Y and UV
8259     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8260         kernelState,
8261         &cmdBuffer,
8262         SURFACE_RAW_Y_UV,
8263         &bindingTable->dwBindingTableEntries[startIndex++]));
8264     startIndex++;
8265 
8266     // 16x16 PU SAD output
8267     m_surfaceParams[SURFACE_16x16PU_SAD].bIsWritable   =
8268     m_surfaceParams[SURFACE_16x16PU_SAD].bRenderTarget = true;
8269     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8270         kernelState,
8271         &cmdBuffer,
8272         SURFACE_16x16PU_SAD,
8273         &bindingTable->dwBindingTableEntries[startIndex++]));
8274 
8275     // 32x32 PU MD data
8276     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8277         kernelState,
8278         &cmdBuffer,
8279         SURFACE_32x32_PU_OUTPUT,
8280         &bindingTable->dwBindingTableEntries[startIndex++]));
8281 
8282     // Slice map
8283     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8284         kernelState,
8285         &cmdBuffer,
8286         SURFACE_SLICE_MAP,
8287         &bindingTable->dwBindingTableEntries[startIndex++]));
8288 
8289     // Simplest Intra
8290     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8291         kernelState,
8292         &cmdBuffer,
8293         SURFACE_SIMPLIFIED_INTRA,
8294         &bindingTable->dwBindingTableEntries[startIndex++]));
8295 
8296     if (!m_hwWalker)
8297     {
8298         eStatus = MOS_STATUS_UNKNOWN;
8299         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
8300         return eStatus;
8301     }
8302 
8303     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
8304     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
8305     walkerCodecParams.WalkerMode        = m_walkerMode;
8306     /* looping for Walker is needed at 16x16 block level */
8307     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,  16) >> 4;
8308     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight, 16) >> 4;
8309     /* Enforce no dependency dispatch order for the 16x16 SAD kernel  */
8310     walkerCodecParams.bNoDependency     = true;
8311 
8312     MHW_WALKER_PARAMS walkerParams;
8313     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
8314         m_hwInterface,
8315         &walkerParams,
8316         &walkerCodecParams));
8317 
8318     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
8319         &cmdBuffer,
8320         &walkerParams));
8321 
8322     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
8323         encFunctionType,
8324         kernelState,
8325         &cmdBuffer));
8326 
8327     return eStatus;
8328 }
8329 
Encode16x16PuModeDecisionKernel()8330 MOS_STATUS CodechalEncHevcStateG9::Encode16x16PuModeDecisionKernel()
8331 {
8332     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8333 
8334     CODECHAL_ENCODE_FUNCTION_ENTER;
8335 
8336     PerfTagSetting perfTag;
8337     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_PU_MD);
8338 
8339     uint32_t krnIdx = CODECHAL_HEVC_MBENC_16x16MD;
8340     auto     kernelState  = &m_mbEncKernelStates[krnIdx];
8341     auto     bindingTable = &m_mbEncKernelBindingTable[krnIdx];
8342     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
8343     {
8344         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
8345     }
8346 
8347     // Setup DSH
8348     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
8349         m_stateHeapInterface,
8350         kernelState,
8351         false,
8352         0,
8353         false,
8354         m_storeData));
8355 
8356     // Setup CURBE
8357     int32_t sliceQp = CalSliceQp();
8358     uint8_t sliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
8359 
8360     double lambdaScaleFactor = 0.46 + sliceQp - 22;
8361     if (lambdaScaleFactor < 0)
8362     {
8363         lambdaScaleFactor = 0.46;
8364     }
8365 
8366     if (lambdaScaleFactor > 15)
8367     {
8368         lambdaScaleFactor = 15;
8369     }
8370 
8371     double squredLambda = lambdaScaleFactor * pow(2.0, ((double)sliceQp-12.0)/6);
8372     m_fixedPointLambdaForLuma = (uint32_t)(squredLambda * (1<<10));
8373 
8374     double lambdaScalingFactor = 1.0;
8375     double qpLambda = m_qpLambdaMd[sliceType][sliceQp];
8376     double squaredQpLambda = qpLambda * qpLambda;
8377     m_fixedPointLambdaForChroma = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
8378 
8379     LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_HAAR);
8380 
8381     CODECHAL_ENC_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9 cmd, *curbe = &cmd;
8382     MOS_ZeroMemory(curbe, sizeof(*curbe));
8383 
8384     uint32_t log2MaxCUSize         = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
8385     curbe->DW0.FrameWidth          = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
8386     curbe->DW0.FrameHeight         = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
8387 
8388     curbe->DW1.Log2MaxCUSize       = log2MaxCUSize;
8389     curbe->DW1.Log2MinCUSize       = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
8390     curbe->DW1.Log2MinTUSize       = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
8391     curbe->DW1.SliceQp             = sliceQp;
8392 
8393     curbe->DW2.FixedPoint_Lambda_PredMode = m_fixedPointLambdaForChroma;
8394 
8395     curbe->DW3.LambdaScalingFactor    = 1;
8396     curbe->DW3.SliceType              = sliceType;
8397     curbe->DW3.EnableIntraEarlyExit   = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
8398     curbe->DW3.BRCEnable              = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
8399     curbe->DW3.LCUBRCEnable           = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
8400     curbe->DW3.ROIEnable              = (m_hevcPicParams->NumROI > 0);
8401     curbe->DW3.FASTSurveillanceFlag   = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
8402     curbe->DW3.EnableRollingIntra     = m_hevcPicParams->bEnableRollingIntraRefresh;
8403     //Given only Column Rolling I is supported, if in future, Row Rolling I support to be added, then, need to make change here as per Kernel
8404     curbe->DW3.IntraRefreshEn         = m_hevcPicParams->bEnableRollingIntraRefresh;
8405     curbe->DW3.HalfUpdateMixedLCU     = 0;
8406 
8407     curbe->DW4.PenaltyForIntra8x8NonDCPredMode = 0;
8408     curbe->DW4.IntraComputeType                = 1;
8409     curbe->DW4.AVCIntra8x8Mask                 = 0;
8410     curbe->DW4.IntraSadAdjust                  = 2;
8411 
8412     double lambdaMd       = sqrt(0.57*pow(2.0, ((double)sliceQp-12.0)/3));
8413     squredLambda          = lambdaMd * lambdaMd;
8414     uint32_t newLambda      = (uint32_t)(squredLambda*(1<<10));
8415     curbe->DW5.FixedPoint_Lambda_CU_Mode_for_Cost_Calculation = newLambda;
8416 
8417     curbe->DW6.ScreenContentFlag = m_hevcPicParams->bScreenContent;
8418 
8419     curbe->DW7.ModeCostIntraNonPred = m_modeCost[0];
8420     curbe->DW7.ModeCostIntra16x16   = m_modeCost[1];
8421     curbe->DW7.ModeCostIntra8x8     = m_modeCost[2];
8422     curbe->DW7.ModeCostIntra4x4     = m_modeCost[3];
8423 
8424     curbe->DW8.FixedPoint_Lambda_CU_Mode_for_Luma = m_fixedPointLambdaForLuma;
8425 
8426     if (m_hevcPicParams->bEnableRollingIntraRefresh)
8427     {
8428         curbe->DW9.IntraRefreshMBNum    = m_hevcPicParams->IntraInsertionLocation;
8429         curbe->DW9.IntraRefreshQPDelta  = m_hevcPicParams->QpDeltaForInsertedIntra;
8430         curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
8431     }
8432 
8433     curbe->DW10.SimplifiedFlagForInter = 0;
8434     curbe->DW10.HaarTransformMode      = (m_hevcPicParams->CodingType == I_TYPE) ? false : true;
8435 
8436     uint32_t startBTI = 0;
8437     curbe->DW16.BTI_Src_Y              = bindingTable->dwBindingTableEntries[startBTI++];
8438     startBTI++; // skip UV BTI
8439     curbe->DW17.BTI_Sad_16x16_PU       = bindingTable->dwBindingTableEntries[startBTI++];
8440     curbe->DW18.BTI_PAK_Object         = bindingTable->dwBindingTableEntries[startBTI++];
8441     curbe->DW19.BTI_SAD_32x32_PU_mode  = bindingTable->dwBindingTableEntries[startBTI++];
8442     curbe->DW20.BTI_VME_Mode_8x8       = bindingTable->dwBindingTableEntries[startBTI++];
8443     curbe->DW21.BTI_Slice_Map          = bindingTable->dwBindingTableEntries[startBTI++];
8444     curbe->DW22.BTI_VME_Src            = bindingTable->dwBindingTableEntries[startBTI++];
8445     curbe->DW23.BTI_BRC_Input          = bindingTable->dwBindingTableEntries[startBTI++];
8446     curbe->DW24.BTI_Simplest_Intra     = bindingTable->dwBindingTableEntries[startBTI++];
8447     curbe->DW25.BTI_LCU_Qp_Surface     = bindingTable->dwBindingTableEntries[startBTI++];
8448     curbe->DW26.BTI_BRC_Data           = bindingTable->dwBindingTableEntries[startBTI++];
8449     curbe->DW27.BTI_Debug              = bindingTable->dwBindingTableEntries[startBTI++];
8450 
8451     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
8452 
8453     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION;
8454     CODECHAL_ENCODE_CHK_STATUS_RETURN(
8455         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
8456 
8457     MOS_COMMAND_BUFFER cmdBuffer;
8458     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
8459         &cmdBuffer,
8460         kernelState,
8461         encFunctionType,
8462         nullptr));
8463 
8464     //Add surface states
8465     startBTI = 0;
8466 
8467     // Source Y and UV:
8468     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8469         kernelState,
8470         &cmdBuffer,
8471         SURFACE_RAW_Y_UV,
8472         &bindingTable->dwBindingTableEntries[startBTI++]));
8473     startBTI++;
8474 
8475     // 16x16 PU SAD output
8476     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8477         kernelState,
8478         &cmdBuffer,
8479         SURFACE_16x16PU_SAD,
8480         &bindingTable->dwBindingTableEntries[startBTI++]));
8481 
8482     // PAK object output
8483     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8484         kernelState,
8485         &cmdBuffer,
8486         SURFACE_CU_RECORD,
8487         &bindingTable->dwBindingTableEntries[startBTI++]));
8488 
8489     // 32x32 PU MD data
8490     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8491         kernelState,
8492         &cmdBuffer,
8493         SURFACE_32x32_PU_OUTPUT,
8494         &bindingTable->dwBindingTableEntries[startBTI++]));
8495 
8496     // VME 8x8 mode
8497     m_surfaceParams[SURFACE_VME_8x8].bIsWritable   =
8498     m_surfaceParams[SURFACE_VME_8x8].bRenderTarget = true;
8499     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8500         kernelState,
8501         &cmdBuffer,
8502         SURFACE_VME_8x8,
8503         &bindingTable->dwBindingTableEntries[startBTI++]));
8504 
8505     // Slice map
8506     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8507         kernelState,
8508         &cmdBuffer,
8509         SURFACE_SLICE_MAP,
8510         &bindingTable->dwBindingTableEntries[startBTI++]));
8511 
8512     // Source Y for VME
8513     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8514         kernelState,
8515         &cmdBuffer,
8516         SURFACE_RAW_VME,
8517         &bindingTable->dwBindingTableEntries[startBTI++]));
8518 
8519     // BRC Input
8520     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8521         kernelState,
8522         &cmdBuffer,
8523         SURFACE_BRC_INPUT,
8524         &bindingTable->dwBindingTableEntries[startBTI++]));
8525 
8526     // Simplest Intra
8527     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8528         kernelState,
8529         &cmdBuffer,
8530         SURFACE_SIMPLIFIED_INTRA,
8531         &bindingTable->dwBindingTableEntries[startBTI++]));
8532 
8533     // LCU Qp surface
8534     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8535         kernelState,
8536         &cmdBuffer,
8537         SURFACE_LCU_QP,
8538         &bindingTable->dwBindingTableEntries[startBTI++]));
8539 
8540     // BRC data surface
8541     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8542         kernelState,
8543         &cmdBuffer,
8544         SURFACE_BRC_DATA,
8545         &bindingTable->dwBindingTableEntries[startBTI++]));
8546 
8547     if (!m_hwWalker)
8548     {
8549         eStatus = MOS_STATUS_UNKNOWN;
8550         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
8551         return eStatus;
8552     }
8553 
8554     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
8555     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
8556     walkerCodecParams.WalkerMode        = m_walkerMode;
8557     /* looping for Walker is needed at 32x32 block level in OPT case*/
8558     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,  32) >> 5;
8559     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
8560     walkerCodecParams.bNoDependency     = true;
8561 
8562     MHW_WALKER_PARAMS walkerParams;
8563     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
8564         m_hwInterface,
8565         &walkerParams,
8566         &walkerCodecParams));
8567 
8568     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
8569         &cmdBuffer,
8570         &walkerParams));
8571 
8572     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
8573         encFunctionType,
8574         kernelState,
8575         &cmdBuffer));
8576 
8577     return eStatus;
8578 }
8579 
Encode8x8PUKernel()8580 MOS_STATUS CodechalEncHevcStateG9::Encode8x8PUKernel()
8581 {
8582     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8583 
8584     CODECHAL_ENCODE_FUNCTION_ENTER;
8585 
8586     PerfTagSetting perfTag;
8587     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU);
8588 
8589     uint32_t krnIdx = CODECHAL_HEVC_MBENC_8x8PU;
8590     auto     kernelState  = &m_mbEncKernelStates[krnIdx];
8591     auto     bindingTable = &m_mbEncKernelBindingTable[krnIdx];
8592     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
8593     {
8594         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
8595     }
8596 
8597     // Setup DSH
8598     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
8599         m_stateHeapInterface,
8600         kernelState,
8601         false,
8602         0,
8603         false,
8604         m_storeData));
8605 
8606     // Setup CURBE
8607     uint32_t                            log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
8608     CODECHAL_ENC_HEVC_I_8x8_PU_CURBE_G9 cmd, *curbe = &cmd;
8609     MOS_ZeroMemory(curbe, sizeof(*curbe));
8610 
8611     curbe->DW0.FrameWidth          = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
8612     curbe->DW0.FrameHeight         = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
8613 
8614     curbe->DW1.SliceType            = (m_hevcPicParams->CodingType == I_TYPE) ? CODECHAL_ENCODE_HEVC_I_SLICE : CODECHAL_ENCODE_HEVC_B_SLICE;
8615     curbe->DW1.PuType          = 2; // 8x8
8616     curbe->DW1.DcFilterFlag    = true;
8617     curbe->DW1.AngleRefineFlag = true;
8618     curbe->DW1.LCUType         = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/;
8619     curbe->DW1.ScreenContentFlag    = m_hevcPicParams->bScreenContent;
8620     curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
8621     curbe->DW1.EnableDebugDump = false;
8622     curbe->DW1.BRCEnable            = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
8623     curbe->DW1.LCUBRCEnable         = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
8624     curbe->DW1.ROIEnable            = (m_hevcPicParams->NumROI > 0);
8625     curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
8626     if (m_hevcPicParams->bEnableRollingIntraRefresh)
8627     {
8628         curbe->DW1.EnableRollingIntra   = true;
8629         curbe->DW1.IntraRefreshEn       = true;
8630         curbe->DW1.HalfUpdateMixedLCU   = 0;
8631 
8632         curbe->DW5.IntraRefreshMBNum    = m_hevcPicParams->IntraInsertionLocation;
8633         curbe->DW5.IntraRefreshQPDelta  = m_hevcPicParams->QpDeltaForInsertedIntra;
8634         curbe->DW5.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
8635 
8636         int32_t qp = CalSliceQp();
8637         curbe->DW1.QPValue              = (uint32_t)qp;
8638     }
8639 
8640     curbe->DW2.LumaLambda      = m_fixedPointLambdaForLuma;
8641 
8642     curbe->DW3.ChromaLambda    = m_fixedPointLambdaForChroma;
8643 
8644     curbe->DW4.HaarTransformFlag       = (m_hevcPicParams->CodingType == I_TYPE) ? false : true;
8645     curbe->DW4.SimplifiedFlagForInter  = false;
8646 
8647     uint32_t startBTI = 0;
8648     curbe->DW8.BTI_Src_Y           = bindingTable->dwBindingTableEntries[startBTI++];
8649     startBTI++; // skip one BTI for Y and UV have the same BTI
8650     curbe->DW9.BTI_Slice_Map       = bindingTable->dwBindingTableEntries[startBTI++];
8651     curbe->DW10.BTI_VME_8x8_Mode    = bindingTable->dwBindingTableEntries[startBTI++];
8652     curbe->DW11.BTI_Intra_Mode     = bindingTable->dwBindingTableEntries[startBTI++];
8653     curbe->DW12.BTI_BRC_Input      = bindingTable->dwBindingTableEntries[startBTI++];
8654     curbe->DW13.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++];
8655     curbe->DW14.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startBTI++];
8656     curbe->DW15.BTI_BRC_Data       = bindingTable->dwBindingTableEntries[startBTI++];
8657     curbe->DW16.BTI_Debug          = bindingTable->dwBindingTableEntries[startBTI++];
8658 
8659     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
8660 
8661     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU;
8662     CODECHAL_ENCODE_CHK_STATUS_RETURN(
8663         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
8664 
8665     MOS_COMMAND_BUFFER cmdBuffer;
8666     if(m_numMb8x8IntraKernelSplit == 0)
8667     {
8668         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer,
8669             kernelState,
8670             encFunctionType,
8671             nullptr));
8672     }
8673     else
8674     {
8675         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
8676 
8677         MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
8678         MOS_ZeroMemory(&idParams, sizeof(idParams));
8679         idParams.pKernelState = kernelState;
8680         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
8681             m_stateHeapInterface,
8682             1,
8683             &idParams));
8684 
8685         // Add binding table
8686         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
8687             m_stateHeapInterface,
8688             kernelState));
8689     }
8690 
8691     //Add surface states
8692     startBTI = 0;
8693 
8694     // Source Y and UV
8695     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8696         kernelState,
8697         &cmdBuffer,
8698         SURFACE_RAW_Y_UV,
8699         &bindingTable->dwBindingTableEntries[startBTI++]));
8700     startBTI++;
8701 
8702     // Slice Map
8703     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8704         kernelState,
8705         &cmdBuffer,
8706         SURFACE_SLICE_MAP,
8707         &bindingTable->dwBindingTableEntries[startBTI++]));
8708 
8709     // VME 8x8 mode
8710     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8711         kernelState,
8712         &cmdBuffer,
8713         SURFACE_VME_8x8,
8714         &bindingTable->dwBindingTableEntries[startBTI++]));
8715 
8716     // Intra mode
8717     m_surfaceParams[SURFACE_INTRA_MODE].bIsWritable   =
8718     m_surfaceParams[SURFACE_INTRA_MODE].bRenderTarget = true;
8719     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8720         kernelState,
8721         &cmdBuffer,
8722         SURFACE_INTRA_MODE,
8723         &bindingTable->dwBindingTableEntries[startBTI++]));
8724 
8725     // BRC Input
8726     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8727         kernelState,
8728         &cmdBuffer,
8729         SURFACE_BRC_INPUT,
8730         &bindingTable->dwBindingTableEntries[startBTI++]));
8731 
8732     // Simplest Intra
8733     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8734         kernelState,
8735         &cmdBuffer,
8736         SURFACE_SIMPLIFIED_INTRA,
8737         &bindingTable->dwBindingTableEntries[startBTI++]));
8738 
8739     // LCU Qp surface
8740     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8741         kernelState,
8742         &cmdBuffer,
8743         SURFACE_LCU_QP,
8744         &bindingTable->dwBindingTableEntries[startBTI++]));
8745 
8746     // BRC data surface
8747     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8748         kernelState,
8749         &cmdBuffer,
8750         SURFACE_BRC_DATA,
8751         &bindingTable->dwBindingTableEntries[startBTI++]));
8752 
8753     if (!m_hwWalker)
8754     {
8755         eStatus = MOS_STATUS_UNKNOWN;
8756         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
8757         return eStatus;
8758     }
8759 
8760     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
8761     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
8762     walkerCodecParams.WalkerMode        = m_walkerMode;
8763     // each EU is based on one 8x8 block
8764     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,    CODECHAL_MACROBLOCK_WIDTH)  >> 3;
8765     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight,   CODECHAL_MACROBLOCK_HEIGHT) >> 3;
8766     /* Enforce no dependency dispatch order for 8x8 PU kernel  */
8767     walkerCodecParams.bNoDependency     = true;
8768 
8769     if(m_numMb8x8IntraKernelSplit == 0)
8770     {
8771         MHW_WALKER_PARAMS walkerParams;
8772         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
8773             m_hwInterface,
8774             &walkerParams,
8775             &walkerCodecParams));
8776 
8777         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
8778             &cmdBuffer,
8779             &walkerParams));
8780     }
8781     else
8782     {
8783         uint32_t numRowPerSplit = (walkerCodecParams.dwResolutionY + m_numMb8x8IntraKernelSplit - 1) / m_numMb8x8IntraKernelSplit;
8784         uint32_t currentNumRow = 0;
8785 
8786         for(uint32_t i = 0; i < m_numMb8x8IntraKernelSplit; i++)
8787         {
8788             // Program render engine pipe commands
8789             SendKernelCmdsParams sendKernelCmdsParams;
8790             sendKernelCmdsParams                        = SendKernelCmdsParams();
8791             sendKernelCmdsParams.EncFunctionType        = encFunctionType;
8792             sendKernelCmdsParams.pKernelState           = kernelState;
8793             sendKernelCmdsParams.bEnableCustomScoreBoard= true;
8794             sendKernelCmdsParams.pCustomScoreBoard      = &m_walkingPatternParam.ScoreBoard;
8795             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
8796 
8797             MHW_WALKER_PARAMS walkerParams;
8798             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
8799                 m_hwInterface,
8800                 &walkerParams,
8801                 &walkerCodecParams));
8802 
8803             if(currentNumRow + numRowPerSplit >= walkerCodecParams.dwResolutionY)
8804             {
8805                 // the last split may not have the same number of rows as previous splits
8806                 numRowPerSplit = walkerCodecParams.dwResolutionY - currentNumRow;
8807             }
8808 
8809             walkerParams.LocalStart.y = currentNumRow;
8810             walkerParams.dwLocalLoopExecCount = numRowPerSplit * walkerCodecParams.dwResolutionX;
8811 
8812             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
8813                 &cmdBuffer,
8814                 &walkerParams));
8815 
8816             currentNumRow += numRowPerSplit;
8817             if(currentNumRow >= walkerCodecParams.dwResolutionY)
8818             {
8819                 break;
8820             }
8821         }
8822     }
8823 
8824     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
8825         encFunctionType,
8826         kernelState,
8827         &cmdBuffer));
8828 
8829     return eStatus;
8830 }
8831 
Encode8x8PUFMODEKernel()8832 MOS_STATUS CodechalEncHevcStateG9::Encode8x8PUFMODEKernel()
8833 {
8834     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8835 
8836     CODECHAL_ENCODE_FUNCTION_ENTER;
8837 
8838     PerfTagSetting perfTag;
8839     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_FMODE);
8840 
8841     uint32_t krnIdx = CODECHAL_HEVC_MBENC_8x8FMODE;
8842     auto     kernelState  = &m_mbEncKernelStates[krnIdx];
8843     auto     bindingTable = &m_mbEncKernelBindingTable[krnIdx];
8844     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
8845     {
8846         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
8847     }
8848 
8849     // Setup DSH
8850     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
8851         m_stateHeapInterface,
8852         kernelState,
8853         false,
8854         0,
8855         false,
8856         m_storeData));
8857 
8858     // Setup CURBE
8859     int32_t qp = CalSliceQp();
8860     uint32_t sliceQp = (uint32_t)qp;
8861     uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
8862 
8863     CODECHAL_ENC_HEVC_I_8x8_PU_FMODE_CURBE_G9 cmd, *curbe = &cmd;
8864     MOS_ZeroMemory(curbe, sizeof(*curbe));
8865     curbe->DW0.FrameWidth                  = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
8866     curbe->DW0.FrameHeight                 = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
8867 
8868     curbe->DW1.SliceType                   = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
8869     curbe->DW1.PuType                      = 2;
8870     curbe->DW1.PakReordingFlag             = (m_hevcPicParams->CodingType == I_TYPE) ? true : false;
8871     curbe->DW1.LCUType                     = (log2MaxCUSize == 6)? 0 /*64x64*/: 1 /*32x32*/;
8872     curbe->DW1.ScreenContentFlag           = m_hevcPicParams->bScreenContent;
8873     curbe->DW1.EnableIntraEarlyExit        = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
8874     curbe->DW1.EnableDebugDump             = false;
8875     curbe->DW1.BRCEnable                   = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
8876     curbe->DW1.LCUBRCEnable                = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
8877     curbe->DW1.ROIEnable                   = (m_hevcPicParams->NumROI > 0);
8878     curbe->DW1.FASTSurveillanceFlag        = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
8879     curbe->DW1.EnableRollingIntra          = m_hevcPicParams->bEnableRollingIntraRefresh;
8880     curbe->DW1.IntraRefreshEn              = m_hevcPicParams->bEnableRollingIntraRefresh;
8881     curbe->DW1.HalfUpdateMixedLCU          = 0;
8882     curbe->DW2.LambdaForLuma               = m_fixedPointLambdaForLuma;
8883 
8884     if (m_hevcPicParams->CodingType != I_TYPE)
8885     {
8886         float hadBias = 2.0f;
8887 
8888         double lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp];
8889         lambdaMd = lambdaMd * hadBias;
8890         curbe->DW3.LambdaForDistCalculation = (uint32_t)(lambdaMd*(1<<10));
8891     }
8892     curbe->DW4.ModeCostFor8x8PU_TU8      = 0;
8893     curbe->DW5.ModeCostFor8x8PU_TU4      = 0;
8894     curbe->DW6.SATD16x16PuThreshold      = MOS_MAX(200 * ((int32_t)sliceQp - 12), 0);
8895     curbe->DW6.BiasFactorToward8x8       = (m_hevcPicParams->bScreenContent) ? 1024 : 1126 + 102;
8896     curbe->DW7.Qp                        = sliceQp;
8897     curbe->DW7.QpForInter                = 0;
8898     curbe->DW8.SimplifiedFlagForInter    = false;
8899     // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
8900     curbe->DW8.KBLControlFlag            = UsePlatformControlFlag();
8901     curbe->DW9.IntraRefreshMBNum         = m_hevcPicParams->IntraInsertionLocation;
8902     curbe->DW9.IntraRefreshQPDelta       = m_hevcPicParams->QpDeltaForInsertedIntra;
8903     curbe->DW9.IntraRefreshUnitInMB      = m_hevcPicParams->IntraInsertionSize;
8904 
8905     uint32_t startBTI = 0;
8906     curbe->DW16.BTI_PAK_Object           = bindingTable->dwBindingTableEntries[startBTI++];
8907     curbe->DW17.BTI_VME_8x8_Mode         = bindingTable->dwBindingTableEntries[startBTI++];
8908     curbe->DW18.BTI_Intra_Mode           = bindingTable->dwBindingTableEntries[startBTI++];
8909     curbe->DW19.BTI_PAK_Command          = bindingTable->dwBindingTableEntries[startBTI++];
8910     curbe->DW20.BTI_Slice_Map            = bindingTable->dwBindingTableEntries[startBTI++];
8911     curbe->DW21.BTI_IntraDist            = bindingTable->dwBindingTableEntries[startBTI++];
8912     curbe->DW22.BTI_BRC_Input            = bindingTable->dwBindingTableEntries[startBTI++];
8913     curbe->DW23.BTI_Simplest_Intra       = bindingTable->dwBindingTableEntries[startBTI++];
8914     curbe->DW24.BTI_LCU_Qp_Surface       = bindingTable->dwBindingTableEntries[startBTI++];
8915     curbe->DW25.BTI_BRC_Data             = bindingTable->dwBindingTableEntries[startBTI++];
8916     curbe->DW26.BTI_Haar_Dist16x16       = bindingTable->dwBindingTableEntries[startBTI++];
8917     curbe->DW27.BTI_Stats_Data           = bindingTable->dwBindingTableEntries[startBTI++];
8918     curbe->DW28.BTI_Frame_Stats_Data     = bindingTable->dwBindingTableEntries[startBTI++];
8919     curbe->DW29.BTI_Debug                = bindingTable->dwBindingTableEntries[startBTI++];
8920 
8921     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
8922 
8923     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU_FMODE;
8924     CODECHAL_ENCODE_CHK_STATUS_RETURN(
8925         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
8926 
8927     MOS_COMMAND_BUFFER cmdBuffer;
8928     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
8929         &cmdBuffer,
8930         kernelState,
8931         encFunctionType,
8932         nullptr));
8933 
8934     //Add surface states
8935     startBTI = 0;
8936 
8937     // PAK object
8938     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8939         kernelState,
8940         &cmdBuffer,
8941         SURFACE_CU_RECORD,
8942         &bindingTable->dwBindingTableEntries[startBTI++]));
8943 
8944     // VME 8x8 mode
8945     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8946         kernelState,
8947         &cmdBuffer,
8948         SURFACE_VME_8x8,
8949         &bindingTable->dwBindingTableEntries[startBTI++]));
8950 
8951     // Intra mode
8952     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8953         kernelState,
8954         &cmdBuffer,
8955         SURFACE_INTRA_MODE,
8956         &bindingTable->dwBindingTableEntries[startBTI++]));
8957 
8958     // PAK command
8959     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8960         kernelState,
8961         &cmdBuffer,
8962         SURFACE_HCP_PAK,
8963         &bindingTable->dwBindingTableEntries[startBTI++]));
8964 
8965     // Slice Map
8966     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8967         kernelState,
8968         &cmdBuffer,
8969         SURFACE_SLICE_MAP,
8970         &bindingTable->dwBindingTableEntries[startBTI++]));
8971 
8972     // Intra dist
8973     m_surfaceParams[SURFACE_INTRA_DIST].bIsWritable   =
8974     m_surfaceParams[SURFACE_INTRA_DIST].bRenderTarget = true;
8975     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8976         kernelState,
8977         &cmdBuffer,
8978         SURFACE_INTRA_DIST,
8979         &bindingTable->dwBindingTableEntries[startBTI++]));
8980 
8981     // BRC Input
8982     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8983         kernelState,
8984         &cmdBuffer,
8985         SURFACE_BRC_INPUT,
8986         &bindingTable->dwBindingTableEntries[startBTI++]));
8987 
8988     // Simplest Intra
8989     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8990         kernelState,
8991         &cmdBuffer,
8992         SURFACE_SIMPLIFIED_INTRA,
8993         &bindingTable->dwBindingTableEntries[startBTI++]));
8994 
8995     // LCU Qp surface
8996     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8997         kernelState,
8998         &cmdBuffer,
8999         SURFACE_LCU_QP,
9000         &bindingTable->dwBindingTableEntries[startBTI++]));
9001 
9002     // BRC data surface
9003     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9004         kernelState,
9005         &cmdBuffer,
9006         SURFACE_BRC_DATA,
9007         &bindingTable->dwBindingTableEntries[startBTI++]));
9008 
9009     if (!m_hwWalker)
9010     {
9011         eStatus = MOS_STATUS_UNKNOWN;
9012         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
9013         return eStatus;
9014     }
9015 
9016     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
9017     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
9018     walkerCodecParams.WalkerMode        = m_walkerMode;
9019     // each EU is based on one LCU
9020     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,    (1<<log2MaxCUSize)) >> log2MaxCUSize;
9021     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight,   (1<<log2MaxCUSize)) >> log2MaxCUSize;
9022     /* Enforce no dependency dispatch order for 8x8 PU FMODE kernel  */
9023     walkerCodecParams.bNoDependency     = true;
9024 
9025     MHW_WALKER_PARAMS walkerParams;
9026     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
9027         m_hwInterface,
9028         &walkerParams,
9029         &walkerCodecParams));
9030 
9031     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
9032         &cmdBuffer,
9033         &walkerParams));
9034 
9035     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
9036         encFunctionType,
9037         kernelState,
9038         &cmdBuffer));
9039 
9040     return eStatus;
9041 }
9042 
EncodeDSCombinedKernel(DsStage downScaleStage,uint32_t index,uint32_t refListIdx)9043 MOS_STATUS CodechalEncHevcStateG9::EncodeDSCombinedKernel(
9044     DsStage downScaleStage,
9045     uint32_t index,
9046     uint32_t refListIdx)
9047 {
9048     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9049 
9050     CODECHAL_ENCODE_FUNCTION_ENTER;
9051 
9052     if (m_scalingEnabled)
9053     {
9054         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_trackedBuf->AllocateSurfaceDS());
9055     }
9056 
9057     PerfTagSetting perfTag;
9058     perfTag.CallType = m_singleTaskPhaseSupported ? CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL :
9059         CODECHAL_ENCODE_PERFTAG_CALL_DS_CONVERSION_KERNEL;
9060     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, perfTag.CallType);
9061 
9062     uint32_t krnIdx = CODECHAL_HEVC_MBENC_DS_COMBINED;
9063     auto     kernelState  = &m_mbEncKernelStates[krnIdx];
9064     auto     bindingTable = &m_mbEncKernelBindingTable[krnIdx];
9065     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
9066     {
9067         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
9068     }
9069 
9070     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
9071         m_osInterface,
9072         &m_scaled2xSurface));
9073 
9074     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
9075         m_osInterface,
9076         &m_formatConvertedSurface[index]));
9077 
9078     //Setup DSH
9079     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
9080         m_stateHeapInterface,
9081         kernelState,
9082         false,
9083         0,
9084         false,
9085         m_storeData));
9086 
9087     //Setup Scaling CURBE
9088     CODECHAL_ENC_HEVC_DS_COMBINED_CURBE_G9 cmd, *curbe = &cmd;
9089 
9090     MOS_ZeroMemory(curbe, sizeof(*curbe));
9091     curbe->DW0.Pak_BitDepth_Chroma = 10;
9092     curbe->DW0.Pak_BitDepth_Luma = 10;
9093     curbe->DW0.Enc_BitDepth_Chroma = 8;
9094     curbe->DW0.Enc_BitDepth_Luma = 8;
9095     curbe->DW0.Rounding_Value = 1;
9096 
9097     curbe->DW1.PicFormat = 0;
9098     curbe->DW1.PicConvertFlag = 1;
9099     curbe->DW1.PicDownscale = downScaleStage;//Downscale stage
9100     curbe->DW1.PicMBStatOutputCntrl = 0;
9101 
9102     curbe->DW2.OrigPicWidth = m_frameWidth;
9103     curbe->DW2.OrigPicHeight = m_frameHeight;
9104 
9105     uint32_t startBTI = 0;
9106     curbe->DW3.BTI_Surface_P010 = bindingTable->dwBindingTableEntries[startBTI];
9107     startBTI += 2;   // increment by no of planes
9108     curbe->DW4.BTI_Surface_NV12 = bindingTable->dwBindingTableEntries[startBTI];
9109     startBTI += 2;  // increment by no of planes
9110     curbe->DW5.BTI_Src_Y_4xDownScaled = bindingTable->dwBindingTableEntries[startBTI++];
9111     curbe->DW6.BTI_Surf_MBState = bindingTable->dwBindingTableEntries[startBTI++];
9112     curbe->DW7.BTI_Src_Y_2xDownScaled = bindingTable->dwBindingTableEntries[startBTI++];
9113 
9114     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING;
9115     CODECHAL_ENCODE_CHK_STATUS_RETURN(
9116         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))
9117     );
9118 
9119     MOS_COMMAND_BUFFER cmdBuffer;
9120     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
9121         &cmdBuffer,
9122         kernelState,
9123         encFunctionType,
9124         nullptr));
9125 
9126     // Add surface states, 2X scaling uses U32Norm surface format for destination
9127     startBTI = 0;
9128 
9129     if (index == 0)
9130     {
9131         // Source surface/s  -- 10 bit YUV
9132         m_surfaceParams[SURFACE_RAW_10bit_Y_UV].bUseUVPlane = true;
9133         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9134             kernelState,
9135             &cmdBuffer,
9136             SURFACE_RAW_10bit_Y_UV,
9137             &bindingTable->dwBindingTableEntries[startBTI],
9138             m_rawSurfaceToEnc
9139         ));
9140     }
9141     else
9142     {
9143         // Source surface/s  -- 10 bit YUV
9144         m_surfaceParams[SURFACE_RAW_10bit_Y_UV].bUseUVPlane = true;
9145         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9146             kernelState,
9147             &cmdBuffer,
9148             SURFACE_RAW_10bit_Y_UV,
9149             &bindingTable->dwBindingTableEntries[startBTI],
9150             &(m_refList[refListIdx]->sRefReconBuffer)));
9151     }
9152     startBTI += 2; // advance binding table pointer to next surface setting
9153 
9154     // Destination surface/s  -- 8 bit Format converted surface
9155     m_formatConvertedSurface[index].dwWidth                            = m_frameWidth;
9156     m_formatConvertedSurface[index].dwHeight                           = m_frameHeight;
9157     m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV].bUse32UnormSurfaceFormat = false;
9158     m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV].bUse16UnormSurfaceFormat = false;
9159     m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV].bUseUVPlane = true;
9160     m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV].bIsWritable =
9161         m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV].bRenderTarget = true;
9162     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9163         kernelState,
9164         &cmdBuffer,
9165         SURFACE_RAW_FC_8bit_Y_UV,
9166         &bindingTable->dwBindingTableEntries[startBTI],
9167         &m_formatConvertedSurface[index]));
9168 
9169     startBTI += 2;
9170 
9171     // Destination surface/s  -- 4x downscaled luma only
9172     m_surfaceParams[SURFACE_Y_4X].bUse32UnormSurfaceFormat =
9173         m_surfaceParams[SURFACE_Y_4X].bIsWritable =
9174         m_surfaceParams[SURFACE_Y_4X].bRenderTarget = true;
9175 
9176     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9177         kernelState,
9178         &cmdBuffer,
9179         SURFACE_Y_4X,
9180         &bindingTable->dwBindingTableEntries[startBTI],
9181         m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER)));
9182 
9183     startBTI++;
9184 
9185     //Destination Surface  -- MB Stat surface 1D buffer
9186     m_surfaceParams[SURFACE_RAW_MBSTAT].bUse32UnormSurfaceFormat = false;
9187     m_surfaceParams[SURFACE_RAW_MBSTAT].bUse16UnormSurfaceFormat = false;
9188     m_surfaceParams[SURFACE_RAW_MBSTAT].bIsWritable =
9189         m_surfaceParams[SURFACE_RAW_MBSTAT].bRenderTarget = true;
9190     m_surfaceParams[SURFACE_RAW_MBSTAT].dwSize            = m_resMbStatisticsSurface.dwSize;
9191     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9192         kernelState,
9193         &cmdBuffer,
9194         SURFACE_RAW_MBSTAT,
9195         &bindingTable->dwBindingTableEntries[startBTI],
9196         &m_resMbStatisticsSurface.sResource));
9197 
9198     startBTI++;
9199 
9200     // Destination surface/s  -- 2x downscaled luma only
9201     m_scaled2xSurface.dwWidth = MOS_ALIGN_CEIL((m_frameWidth / SCALE_FACTOR_2x), (CODECHAL_MACROBLOCK_WIDTH * 2));
9202     m_scaled2xSurface.dwHeight = MOS_ALIGN_CEIL((m_frameHeight / SCALE_FACTOR_2x), (CODECHAL_MACROBLOCK_HEIGHT * 2));
9203 
9204     m_surfaceParams[SURFACE_Y_2X].bUse32UnormSurfaceFormat =
9205         m_surfaceParams[SURFACE_Y_2X].bIsWritable =
9206         m_surfaceParams[SURFACE_Y_2X].bRenderTarget = true;
9207 
9208     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9209         kernelState,
9210         &cmdBuffer,
9211         SURFACE_Y_2X,
9212         &bindingTable->dwBindingTableEntries[startBTI]
9213     ));
9214 
9215     //move back to 16 aligned..
9216     m_scaled2xSurface.dwWidth = MOS_ALIGN_CEIL((m_frameWidth / SCALE_FACTOR_2x), (CODECHAL_MACROBLOCK_WIDTH));
9217     m_scaled2xSurface.dwHeight = MOS_ALIGN_CEIL((m_frameHeight / SCALE_FACTOR_2x), (CODECHAL_MACROBLOCK_HEIGHT));
9218 
9219     m_surfaceParams[SURFACE_Y_2X].bUse16UnormSurfaceFormat =
9220         m_surfaceParams[SURFACE_Y_2X].bIsWritable =
9221         m_surfaceParams[SURFACE_Y_2X].bRenderTarget = true;
9222 
9223     if (!m_hwWalker)
9224     {
9225         eStatus = MOS_STATUS_UNKNOWN;
9226         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
9227         return eStatus;
9228     }
9229 
9230     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
9231     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
9232 
9233     /* first stage of the downscale and convert kernel can do conversion + 4x + 2x */
9234     walkerCodecParams.WalkerMode = m_walkerMode;
9235     walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL((m_frameWidth >> 2), 32) >> 3;
9236     walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL((m_frameHeight >> 2), 32) >> 3;
9237 
9238     /* Enforce no dependency dispatch order for Scaling kernel,  */
9239     walkerCodecParams.bNoDependency = true;
9240 
9241     MHW_WALKER_PARAMS walkerParams;
9242     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
9243         m_hwInterface,
9244         &walkerParams,
9245         &walkerCodecParams));
9246 
9247     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
9248         &cmdBuffer,
9249         &walkerParams));
9250 
9251     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
9252         encFunctionType,
9253         kernelState,
9254         &cmdBuffer));
9255 
9256     return eStatus;
9257 }
9258 
EncodeDSKernel()9259 MOS_STATUS CodechalEncHevcStateG9::EncodeDSKernel()
9260 {
9261     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9262 
9263     CODECHAL_ENCODE_FUNCTION_ENTER;
9264 
9265     // Walker must be used for HME call and scaling one
9266     CODECHAL_ENCODE_ASSERT(m_hwWalker);
9267 
9268     //perform 4x down-scaling
9269     if (MEDIA_IS_SKU(m_hwInterface->GetSkuTable(), FtrEncodeHEVC10bit) && (m_hevcSeqParams->bit_depth_luma_minus8) && m_scalingEnabled)
9270     {
9271         m_lastTaskInPhase = !(m_16xMeSupported || m_hmeEnabled || m_brcEnabled);
9272         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeDSCombinedKernel(dsStage2x4x, 0, 0));
9273 
9274         //Dump format converted input surface
9275         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
9276             &m_formatConvertedSurface[0],
9277             CodechalDbgAttr::attrEncodeRawInputSurface,
9278             "SrcSurf")));
9279 
9280         //Scaled surface
9281         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
9282             m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER),
9283             CodechalDbgAttr::attrEncodeRawInputSurface,
9284             "SrcSurf")));
9285 
9286         //Scaled surface
9287         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
9288             &m_scaled2xSurface,
9289             CodechalDbgAttr::attrEncodeRawInputSurface,
9290             "SrcSurf")));
9291 
9292         // call 16x/32x DS
9293         if (m_16xMeSupported)
9294         {
9295             m_lastTaskInPhase = !(m_32xMeSupported || m_hmeEnabled || m_brcEnabled);
9296 
9297             // 4x downscaled images used as the input for 16x downscaling
9298             CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
9299             MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
9300             cscScalingKernelParams.b16xScalingInUse = true;
9301             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->DsKernel(&cscScalingKernelParams));
9302 
9303             if (m_32xMeSupported)
9304             {
9305                 m_lastTaskInPhase = !(m_hmeEnabled || m_brcEnabled);
9306 
9307                 // 16x downscaled images used as the input for 32x downscaling
9308                 cscScalingKernelParams.b32xScalingInUse = true;
9309                 cscScalingKernelParams.b16xScalingInUse = false;
9310                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->DsKernel(&cscScalingKernelParams));
9311             }
9312         }
9313     }
9314     else
9315     {
9316         // Csc, Downscaling, and/or 10-bit to 8-bit conversion
9317         CODECHAL_ENCODE_CHK_NULL_RETURN(m_cscDsState);
9318 
9319         CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
9320         MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
9321         cscScalingKernelParams.bLastTaskInPhaseCSC =
9322             cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled || m_brcEnabled);
9323         cscScalingKernelParams.bLastTaskInPhase16xDS    = !(m_32xMeSupported || m_hmeEnabled || m_brcEnabled);
9324         cscScalingKernelParams.bLastTaskInPhase32xDS    = !(m_hmeEnabled || m_brcEnabled);
9325 
9326         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
9327     }
9328 
9329     // wait on the current MbCode object if needed
9330     if (m_hevcPicParams->bUsedAsRef || (m_brcEnabled && !m_hevcSeqParams->ParallelBRC))
9331     {
9332         m_currRefSync = &m_refSync[m_currMbCodeIdx];
9333 
9334         // Check if the signal obj has been used before
9335         if (m_currRefSync->uiSemaphoreObjCount || m_currRefSync->bInUsed)
9336         {
9337             MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
9338             syncParams.GpuContext = m_renderContext;
9339             syncParams.presSyncResource = &m_currRefSync->resSyncObject;
9340             syncParams.uiSemaphoreCount = m_currRefSync->uiSemaphoreObjCount;
9341 
9342             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
9343             m_currRefSync->uiSemaphoreObjCount = 0;
9344             m_currRefSync->bInUsed             = false;
9345         }
9346     }
9347     else
9348     {
9349         m_currRefSync = nullptr;
9350     }
9351 
9352     return eStatus;
9353 }
9354 
EncodeKernelFunctions()9355 MOS_STATUS CodechalEncHevcStateG9::EncodeKernelFunctions()
9356 {
9357     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9358 
9359     CODECHAL_ENCODE_FUNCTION_ENTER;
9360 
9361     CODECHAL_DEBUG_TOOL(
9362         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
9363             m_rawSurfaceToEnc,
9364             CodechalDbgAttr::attrEncodeRawInputSurface,
9365             "SrcSurf")));
9366     )
9367 
9368     if (m_pakOnlyTest)
9369     {
9370         // Skip all ENC kernel operations for now it is in the PAK only test mode.
9371         // PAK and CU records will be passed via the app
9372         return eStatus;
9373     }
9374 
9375     UpdateSSDSliceCount();
9376 
9377     // BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface
9378     if (m_brcEnabled && (m_brcInit || m_brcReset))
9379     {
9380         m_firstTaskInPhase = m_lastTaskInPhase = true;
9381         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcInitResetKernel());
9382         m_brcInit = m_brcReset = false;
9383     }
9384 
9385     // Scaled surfaces are required to run both HME and IFrameDist
9386     bool scalingEnabled = (m_hmeSupported || m_brcEnabled);
9387     if (scalingEnabled || m_cscDsState->RequireCsc())
9388     {
9389         //Use a different performance tag ID for scaling and HME
9390         m_osInterface->pfnResetPerfBufferID(m_osInterface);
9391 
9392         m_firstTaskInPhase = true;
9393         m_lastTaskInPhase  = false;
9394 
9395         if(m_hevcSeqParams->GopPicSize != 1 || m_brcEnabled || m_cscDsState->RequireCsc())
9396         {
9397             CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeDSKernel());
9398         }
9399 
9400         if (m_brcEnabled)
9401         {
9402             // LCU-based BRC update kernel needs both intra and inter (from HME) distortion
9403             m_lastTaskInPhase = (m_pictureCodingType == I_TYPE);
9404             CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeCoarseIntra16x16Kernel());
9405         }
9406 
9407         // only need to call HME kernel when HME enabled and NOT I-frame
9408         if (m_hmeEnabled)
9409         {
9410             CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel());
9411         }
9412     }
9413 
9414     if(m_osInterface->bSimIsActive)
9415     {
9416         // Clean MB code buffer to ensure there is no previous CU record and PAK command
9417         MOS_LOCK_PARAMS lockFlags;
9418         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
9419         lockFlags.WriteOnly = 1;
9420 
9421         uint8_t*  data = (uint8_t* )m_osInterface->pfnLockResource(m_osInterface, &m_resMbCodeSurface, &lockFlags);
9422         if (data)
9423         {
9424             MOS_ZeroMemory(data, m_mbCodeSize);
9425             m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
9426         }
9427     }
9428 
9429     // Generate slice map for kernel
9430     CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateSliceMap());
9431 
9432     //Reset to use a different performance tag ID for I kernels. Each kernel has a different buffer ID
9433     m_osInterface->pfnResetPerfBufferID(m_osInterface);
9434 
9435     m_firstTaskInPhase = true;
9436     m_lastTaskInPhase  = false;
9437 
9438     // ROI uses the BRC LCU update kernel, even in CQP.  So we will call it
9439     // first if in CQP.  It has no other kernel execution dependencies, even
9440     // that brc is not initialized is not a dependency
9441     if (m_hevcPicParams->NumROI && !m_brcEnabled)
9442     {
9443         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateLCUBasedKernel(nullptr));
9444     }
9445 
9446     /* When TU=7, fast encoding mode is ON, and I kernels are not needed.
9447     Instead, MB ENC B kernel is used to replace I kernels.
9448     */
9449     bool fastEncodingFlag  = (m_hevcSeqParams->TargetUsage == 0x7);
9450     bool brcUpdateComplete = false;
9451 
9452     if(fastEncodingFlag)
9453     {
9454         if (m_hevcPicParams->CodingType == I_TYPE)
9455         {
9456             // BRC and MbEnc are included in the same task phase
9457             if (m_brcEnabled && !brcUpdateComplete)
9458             {
9459                 // BRC needs previous PAK result if not running in the parallel BRC mode
9460                 // If yes, BRC is using the PAk result of the frame before the previous one
9461                 CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
9462 
9463                 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateKernel());
9464 
9465                 // Reset buffer ID used for BRC kernel performance reports
9466                 m_osInterface->pfnResetPerfBufferID(m_osInterface);
9467                 brcUpdateComplete = true;
9468             }
9469             else if (!m_brcEnabled)
9470             {
9471                 if (m_encodeParams.bMbQpDataEnabled && m_encodeParams.psMbQpDataSurface)
9472                 {
9473                     auto brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
9474                     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
9475                     Convert1byteTo2bytesQPperLCU(m_encodeParams.psMbQpDataSurface, &m_brcBuffers.sBrcMbQpBuffer);
9476                 }
9477             }
9478 
9479             CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PBMbEncKernel());
9480         }
9481     }
9482     else
9483     {
9484         // BRC and MbEnc are included in the same task phase
9485         if (m_brcEnabled && !brcUpdateComplete)
9486         {
9487             // BRC needs previous PAK result
9488             CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
9489 
9490             CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateKernel());
9491 
9492             // Reset buffer ID used for BRC kernel performance reports
9493             m_osInterface->pfnResetPerfBufferID(m_osInterface);
9494             brcUpdateComplete = true;
9495         }
9496         else if (!m_brcEnabled)
9497         {
9498             if (m_encodeParams.bMbQpDataEnabled && m_encodeParams.psMbQpDataSurface)
9499             {
9500                 auto brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
9501                 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
9502                 Convert1byteTo2bytesQPperLCU(m_encodeParams.psMbQpDataSurface, &m_brcBuffers.sBrcMbQpBuffer);
9503             }
9504         }
9505 
9506         //Step 1: perform 2:1 down-scaling
9507         if (m_hevcSeqParams->bit_depth_luma_minus8 == 0)  // use this for 8 bit only case.
9508         {
9509             CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode2xScalingKernel());
9510         }
9511 
9512         //Step 2: 32x32 PU Mode Decision or 32x32 PU Intra check kernel
9513         if (m_hevcPicParams->CodingType == I_TYPE)
9514         {
9515             CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode32x32PuModeDecisionKernel());
9516         }
9517         else
9518         {
9519             CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode32X32BIntraCheckKernel());
9520         }
9521 
9522         //Step 3: 16x16 SAD Computation
9523         CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode16x16SadPuComputationKernel());
9524 
9525         CODECHAL_DEBUG_TOOL(
9526             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9527                 &m_sad16x16Pu.sResource,
9528                 CodechalDbgAttr::attrOutput,
9529                 "HEVC_16x16_PU_SAD_Out",
9530                 m_sad16x16Pu.dwSize,
9531                 0,
9532                 CODECHAL_MEDIA_STATE_16x16_PU_SAD));
9533         )
9534 
9535         //Step 4: 16x16 PU Mode Decision
9536         CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode16x16PuModeDecisionKernel());
9537 
9538         CODECHAL_DEBUG_TOOL(
9539             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9540                 &m_vme8x8Mode.sResource,
9541                 CodechalDbgAttr::attrOutput,
9542                 "HEVC_16x16_PU_MD_Out",
9543                 m_vme8x8Mode.dwSize,
9544                 0,
9545                 CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION));
9546         )
9547 
9548         //Step 5: 8x8 PU
9549         CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PUKernel());
9550 
9551         //Step 6: 8x8 PU FMODE
9552         m_lastTaskInPhase = true;
9553         CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PUFMODEKernel());
9554 
9555         CODECHAL_DEBUG_TOOL(
9556             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
9557                 &m_scaled2xSurface,
9558                 CodechalDbgAttr::attrReferenceSurfaces,
9559                 "2xScaledSurf"));
9560 
9561             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
9562                 &m_simplestIntraSurface,
9563                 CodechalDbgAttr::attrOutput,
9564                 "HEVC_32x32_SIF_Out",
9565                 CODECHAL_MEDIA_STATE_32x32_B_INTRA_CHECK));
9566 
9567             if (m_pictureCodingType == I_TYPE)
9568             {
9569                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9570                     &m_32x32PuOutputData.sResource,
9571                     CodechalDbgAttr::attrOutput,
9572                     "HEVC_32x32_PU_MD_Out",
9573                     m_32x32PuOutputData.dwSize,
9574                     0,
9575                     CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION));
9576             }
9577             else
9578             {
9579                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9580                     &m_32x32PuOutputData.sResource,
9581                     CodechalDbgAttr::attrOutput,
9582                     "HEVC_32x32_B_INTRA_CHECK_Out",
9583                     m_32x32PuOutputData.dwSize,
9584                     0,
9585                     CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION));
9586 
9587             }
9588 
9589             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9590                 &m_intraMode.sResource,
9591                 CodechalDbgAttr::attrOutput,
9592                 "HEVC_8x8_PU_MD_Out",
9593                 m_intraMode.dwSize,
9594                 0,
9595                 CODECHAL_MEDIA_STATE_8x8_PU));
9596 
9597             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9598                 &m_intraDist.sResource,
9599                 CodechalDbgAttr::attrOutput,
9600                 "HEVC_8x8_PU_FMOD_Out",
9601                 m_intraDist.dwSize,
9602                 0,
9603                 CODECHAL_MEDIA_STATE_8x8_PU_FMODE));
9604         )
9605     }
9606 
9607     // Sync-wait can be executed after I-kernel is submitted before there is no dependency for I to wait for PAK to be ready
9608     CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
9609 
9610     //Step 7: B MB ENC kernel for B picture only
9611     if (m_hevcPicParams->CodingType != I_TYPE)
9612     {
9613         m_firstTaskInPhase = true;
9614         m_lastTaskInPhase = false;
9615 
9616         // BRC and MbEnc are included in the same task phase
9617         if (m_brcEnabled && !brcUpdateComplete)
9618         {
9619             CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateKernel());
9620 
9621             // Reset buffer ID used for BRC kernel performance reports
9622             m_osInterface->pfnResetPerfBufferID(m_osInterface);
9623             brcUpdateComplete = true;
9624         }
9625         else if (!m_brcEnabled)
9626         {
9627             if (m_encodeParams.bMbQpDataEnabled && m_encodeParams.psMbQpDataSurface)
9628             {
9629                 auto brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
9630                 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
9631                 Convert1byteTo2bytesQPperLCU(m_encodeParams.psMbQpDataSurface, &m_brcBuffers.sBrcMbQpBuffer);
9632             }
9633         }
9634 
9635         if ((m_hevcSeqParams->bit_depth_luma_minus8))
9636         {
9637             bool formatConversionDone[NUM_FORMAT_CONV_FRAMES] = { false };
9638             formatConversionDone[0] = true; // always true since its for the input surface.
9639 
9640             for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
9641             {
9642                 if (!m_picIdx[i].bValid || !m_currUsedRefPic[i])
9643                 {
9644                     continue;
9645                 }
9646 
9647                 uint8_t picIdx = m_picIdx[i].ucPicIdx;
9648                 CODECHAL_ENCODE_ASSERT(picIdx < 127);
9649 
9650                 uint8_t frameStoreId = (uint8_t)m_refIdxMapping[i];
9651 
9652                 if (frameStoreId >= CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC)
9653                 {
9654                     CODECHAL_ENCODE_ASSERT(false);
9655                     eStatus = MOS_STATUS_INVALID_PARAMETER;
9656                     return eStatus;
9657                 }
9658 
9659                 if (formatConversionDone[frameStoreId + 1] != true)
9660                 {
9661                     CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeDSCombinedKernel(dsDisabled, (frameStoreId + 1), picIdx));
9662                     formatConversionDone[frameStoreId + 1] = true;
9663                     m_refList[picIdx]->sRefBuffer          = m_formatConvertedSurface[frameStoreId + 1];
9664                 }
9665             }
9666         }
9667 
9668         CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PBMbEncKernel());
9669     }
9670 
9671     // Notify PAK engine once ENC is done
9672     if (!m_pakOnlyTest && !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
9673     {
9674         MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
9675         syncParams.GpuContext = m_renderContext;
9676         syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
9677 
9678         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
9679     }
9680 
9681     if (m_brcEnabled && m_hevcSeqParams->ParallelBRC)
9682     {
9683         m_brcBuffers.uiCurrBrcPakStasIdxForRead = (m_brcBuffers.uiCurrBrcPakStasIdxForRead + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
9684     }
9685 
9686     return eStatus;
9687 }
9688 
CheckBrcPakStasBuffer(PMOS_COMMAND_BUFFER cmdBuffer)9689 MOS_STATUS CodechalEncHevcStateG9::CheckBrcPakStasBuffer(
9690     PMOS_COMMAND_BUFFER cmdBuffer)
9691 {
9692     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9693 
9694     CODECHAL_ENCODE_FUNCTION_ENTER;
9695 
9696     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
9697 
9698     auto brcPakStas = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead];
9699 
9700     /*
9701     1. The following assembly code is used to implement the following C statements.
9702 
9703             if( ((MHW_VDBOX_IMAGE_STATUS_CONTROL*)&(p->HCP_IMAGE_STATUS_CONTROL))->hcpCumulativeFrameDeltaQp <
9704                 ((MHW_VDBOX_IMAGE_STATUS_CONTROL*)&(p->HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS))->hcpCumulativeFrameDeltaQp)
9705             {
9706                 (MHW_VDBOX_IMAGE_STATUS_CONTROL*)&(p->HCP_IMAGE_STATUS_CONTROL))->hcpCumulativeFrameDeltaQp =
9707                     MHW_VDBOX_IMAGE_STATUS_CONTROL*)&(p->HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS))->hcpCumulativeFrameDeltaQp;
9708             }
9709 
9710     2. The if statement can be replaced by and-or statements. That is,
9711             (a) a = (a < b) ? b : a;
9712             (b) mask = (a - b) >> 32; a = (b & mask) | (a & !mask);
9713             where (a) and (b) are identical and each variable is assumed to be a 64-bit unsigned integer
9714 
9715     3. Totally there are 71 DWs
9716     */
9717     if(cmdBuffer->iRemaining < 71 * sizeof(uint32_t))
9718     {
9719         eStatus = MOS_STATUS_NO_SPACE;
9720         return eStatus;
9721     }
9722 
9723     // reg0 = p->HCP_IMAGE_STATUS_CONTROL
9724     MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
9725     miLoadRegMemParams.presStoreBuffer = brcPakStas;
9726     miLoadRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
9727     miLoadRegMemParams.dwRegister = CS_GPR_REGISTER_INDEX(0);
9728     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams));
9729 
9730     MHW_MI_LOAD_REGISTER_IMM_PARAMS miLoadRegImmParams;
9731     miLoadRegImmParams.dwData = 0;
9732     miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(0) + 4);
9733     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9734 
9735     // reg1 = p->HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS
9736     miLoadRegMemParams.presStoreBuffer = brcPakStas;
9737     miLoadRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
9738     miLoadRegMemParams.dwRegister = CS_GPR_REGISTER_INDEX(1);
9739     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams));
9740     miLoadRegImmParams.dwData = 0;
9741     miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(1) + 4);
9742     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9743 
9744     // reg2 = 0xFF000000
9745     miLoadRegImmParams.dwData = 0xFF000000;
9746     miLoadRegImmParams.dwRegister = CS_GPR_REGISTER_INDEX(2);
9747     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9748     miLoadRegImmParams.dwData = 0;
9749     miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(2) + 4);
9750     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9751 
9752     // reg3 = reg0 & 0xFF000000
9753     uint32_t csALUCmdNum = 0;
9754     MHW_MI_ALU_PARAMS miAluParams[64] = { 0 };
9755 
9756     // reg3 = reg0 & 0xFF000000
9757     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 0);     // load     srcA, reg0
9758     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 2);     // load     srcB, reg2
9759     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_AND;            // and      srcA, srcB
9760     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(3);  // store    reg3, alu
9761 
9762     // reg4 = reg1 & 0xFF000000
9763     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 1);     // load     srcA, reg1
9764     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 2);     // load     srcB, reg2
9765     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_AND;            // and      srcA, srcB
9766     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(4);  // store    reg4, alu
9767 
9768     // reg5 = reg3 - reg4
9769     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 3);     // load     srcA, reg3
9770     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 4);     // load     srcB, reg4
9771     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_SUB;            // sub      srcA, srcB
9772     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(5);  // store    reg5, alu
9773 
9774     if (csALUCmdNum >= sizeof(miAluParams) / sizeof(miAluParams[0]))
9775     {
9776         eStatus = MOS_STATUS_NO_SPACE;
9777         return eStatus;
9778     }
9779 
9780     MHW_MI_MATH_PARAMS miMathParams;
9781     miMathParams.dwNumAluParams = csALUCmdNum;
9782     miMathParams.pAluPayload = miAluParams;
9783     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiMathCmd(cmdBuffer, &miMathParams));
9784 
9785     // reg5 = reg5 >> 32;
9786     MHW_MI_LOAD_REGISTER_REG_PARAMS miLoadRegRegParams;
9787     MOS_ZeroMemory(&miLoadRegRegParams, sizeof(miLoadRegRegParams));
9788     miLoadRegRegParams.dwSrcRegister = CS_GPR_REGISTER_INDEX(5) + 4;
9789     miLoadRegRegParams.dwDstRegister = CS_GPR_REGISTER_INDEX(5) + 0;
9790     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterRegCmd(cmdBuffer, &miLoadRegRegParams));
9791     miLoadRegImmParams.dwData = 0;
9792     miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(5) + 4);
9793     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9794 
9795     // reg6 = 0x00000000FFFFFFFF;
9796     miLoadRegImmParams.dwData = 0xFFFFFFFF;
9797     miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(6));
9798     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9799     miLoadRegImmParams.dwData = 0;
9800     miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(6) + 4);
9801     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9802 
9803     csALUCmdNum = 0;
9804     MOS_ZeroMemory(miAluParams, sizeof(miAluParams));
9805 
9806     // reg6 = reg5 ^ 0x00000000FFFFFFFF;
9807     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 5);     // load     srcA, reg5
9808     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 6);     // load     srcB, reg6
9809     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_XOR;          // xor      srcA, srcB
9810     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(6);  // store    reg6, alu
9811 
9812     // reg1 = reg1 & reg5
9813     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 1);     // load     srcA, reg1
9814     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 5);     // load     srcB, reg5
9815     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_AND;          // and      srcA, srcB
9816     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(1);  // store    reg1, alu
9817 
9818     // reg0 = reg0 & reg6
9819     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 0);     // load     srcA, reg0
9820     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 6);     // load     srcB, reg6
9821     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_AND;          // and      srcA, srcB
9822     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(0);  // store    reg0, alu
9823 
9824     // reg0 = reg0 | reg1
9825     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 0);     // load     srcA, reg0
9826     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 1);     // load     srcB, reg1
9827     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_OR;           //  or      srcA, srcB
9828     miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(0);  // store    reg0, alu
9829 
9830     if (csALUCmdNum >= sizeof(miAluParams) / sizeof(miAluParams[0]))
9831     {
9832         eStatus = MOS_STATUS_NO_SPACE;
9833         return eStatus;
9834     }
9835 
9836     miMathParams.dwNumAluParams = csALUCmdNum;
9837     miMathParams.pAluPayload = miAluParams;
9838     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiMathCmd(cmdBuffer, &miMathParams));
9839 
9840     // p->HCP_IMAGE_STATUS_CONTROL = reg0
9841     MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
9842     MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
9843     miStoreRegMemParams.presStoreBuffer = brcPakStas;
9844     miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
9845     miStoreRegMemParams.dwRegister = CS_GPR_REGISTER_INDEX(0);
9846     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
9847 
9848     // p->HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS = 0
9849     MHW_MI_STORE_DATA_PARAMS miStoreDataImmParams;
9850     miStoreDataImmParams.pOsResource = brcPakStas;
9851     miStoreDataImmParams.dwResourceOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
9852     miStoreDataImmParams.dwValue = 0;
9853     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &miStoreDataImmParams));
9854 
9855     return eStatus;
9856 }
9857 
CodechalEncHevcStateG9(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)9858 CodechalEncHevcStateG9::CodechalEncHevcStateG9(
9859     CodechalHwInterface* hwInterface,
9860     CodechalDebugInterface* debugInterface,
9861     PCODECHAL_STANDARD_INFO standardInfo)
9862     :CodechalEncHevcState(hwInterface, debugInterface, standardInfo)
9863 {
9864     m_fieldScalingOutputInterleaved = false;
9865     m_brcHistoryBufferSize          = BRC_HISTORY_BUFFER_SIZE;
9866     m_kuid                          = IDR_CODEC_HEVC_COMBINED_KENREL_INTEL;
9867     m_kernelBase                    = (uint8_t*)IGCODECKRN_G9;
9868 
9869     MOS_ZeroMemory(&m_scaled2xSurface, sizeof(m_scaled2xSurface));
9870     MOS_ZeroMemory(&m_sliceMapSurface, sizeof(m_sliceMapSurface));
9871     MOS_ZeroMemory(&m_32x32PuOutputData, sizeof(m_32x32PuOutputData));
9872     MOS_ZeroMemory(&m_sad16x16Pu, sizeof(m_sad16x16Pu));
9873     MOS_ZeroMemory(&m_vme8x8Mode, sizeof(m_vme8x8Mode));
9874     MOS_ZeroMemory(&m_intraMode, sizeof(m_intraMode));
9875     MOS_ZeroMemory(&m_intraDist, sizeof(m_intraDist));
9876     MOS_ZeroMemory(&m_simplestIntraSurface, sizeof(m_simplestIntraSurface));
9877     MOS_ZeroMemory(&m_roiSurface, sizeof(m_roiSurface));
9878     MOS_ZeroMemory(&m_concurrentThreadSurface, sizeof(m_concurrentThreadSurface));
9879     MOS_ZeroMemory(&m_walkingPatternParam, sizeof(m_walkingPatternParam));
9880     MOS_ZeroMemory(&m_minDistortion, sizeof(m_minDistortion));
9881     MOS_ZeroMemory(&m_vmeSavedUniSic, sizeof(m_vmeSavedUniSic));
9882     MOS_ZeroMemory(&m_mvIndex, sizeof(m_mvIndex));
9883     MOS_ZeroMemory(&m_mvpIndex, sizeof(m_mvpIndex));
9884 
9885     m_numRegionsInSlice = 4;
9886 }
9887 
InitMhw()9888 MOS_STATUS CodechalEncHevcStateG9::InitMhw()
9889 {
9890     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9891 
9892     // MHW set-up
9893     m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS;
9894     m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_INIT_DSH_SIZE_HEVC_ENC;
9895 
9896     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
9897         m_kernelBase,
9898         m_kuid,
9899         &m_kernelBinary,
9900         &m_combinedKernelSize));
9901 
9902     m_hwInterface->GetStateHeapSettings()->dwIshSize +=
9903         MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
9904 
9905     return eStatus;
9906 }
9907 
UserFeatureKeyReport()9908 MOS_STATUS CodechalEncHevcStateG9::UserFeatureKeyReport()
9909 {
9910     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9911 
9912     CODECHAL_ENCODE_FUNCTION_ENTER;
9913 
9914     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::UserFeatureKeyReport());
9915 
9916     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_POWER_SAVING, m_powerSavingEnabled, m_osInterface->pOsContext);
9917     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_B_KERNEL_SPLIT, m_numMbBKernelSplit, m_osInterface->pOsContext);
9918     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_8x8_INTRA_KERNEL_SPLIT, m_numMb8x8IntraKernelSplit, m_osInterface->pOsContext);
9919 
9920     return eStatus;
9921 }
9922 
Initialize(CodechalSetting * settings)9923 MOS_STATUS CodechalEncHevcStateG9::Initialize(CodechalSetting * settings)
9924 {
9925     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9926 
9927     CODECHAL_ENCODE_FUNCTION_ENTER;
9928 
9929     // common initilization
9930     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::Initialize(settings));
9931 
9932     m_bmeMethodTable = (uint8_t *)m_meMethod;
9933     m_meMethodTable  = (uint8_t *)m_meMethod;
9934 
9935     m_brcBuffers.dwBrcConstantSurfaceWidth  = BRC_CONSTANT_SURFACE_WIDTH;
9936     m_brcBuffers.dwBrcConstantSurfaceHeight = BRC_CONSTANT_SURFACE_HEIGHT;
9937 
9938     // LCU size is 32x32 in Gen9
9939     m_widthAlignedMaxLcu  = MOS_ALIGN_CEIL(m_frameWidth, 32);
9940     m_heightAlignedMaxLcu = MOS_ALIGN_CEIL(m_frameHeight, 32);
9941 
9942     // user feature key setup
9943     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
9944     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9945     MOS_UserFeature_ReadValue_ID(
9946         nullptr,
9947         __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
9948         &userFeatureData,
9949         m_osInterface->pOsContext);
9950     m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
9951 
9952     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9953     MOS_UserFeature_ReadValue_ID(
9954         nullptr,
9955         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_26Z_ENABLE_ID,
9956         &userFeatureData,
9957         m_osInterface->pOsContext);
9958     m_enable26WalkingPattern = (userFeatureData.i32Data) ? false : true;
9959 
9960     if (m_codecFunction != CODECHAL_FUNCTION_PAK)
9961     {
9962         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9963         MOS_UserFeature_ReadValue_ID(
9964             nullptr,
9965             __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ME_ENABLE_ID,
9966             &userFeatureData,
9967             m_osInterface->pOsContext);
9968         m_hmeSupported = (userFeatureData.i32Data) ? true : false;
9969 
9970         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9971         MOS_UserFeature_ReadValue_ID(
9972             nullptr,
9973             __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_16xME_ENABLE_ID,
9974             &userFeatureData,
9975             m_osInterface->pOsContext);
9976         m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
9977 
9978         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9979         MOS_UserFeature_ReadValue_ID(
9980             nullptr,
9981             __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_32xME_ENABLE_ID,
9982             &userFeatureData,
9983             m_osInterface->pOsContext);
9984 
9985         if (userFeatureData.i32Data == 0 || userFeatureData.i32Data == 1)
9986         {
9987             m_32xMeUserfeatureControl = true;
9988             m_32xMeSupported = (userFeatureData.i32Data) ? true : false;
9989         }
9990         else
9991         {
9992             m_32xMeUserfeatureControl = false;
9993             m_32xMeSupported = true;
9994         }
9995     }
9996 
9997     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9998     eStatus = MOS_UserFeature_ReadValue_ID(
9999         nullptr,
10000         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID,
10001         &userFeatureData,
10002         m_osInterface->pOsContext);
10003 
10004     if (eStatus == MOS_STATUS_SUCCESS)
10005     {
10006         // Region number must be greater than 1
10007         m_numRegionsInSlice = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data;
10008     }
10009     else
10010     {
10011         // Reset the status to success if user feature key is not set
10012         eStatus = MOS_STATUS_SUCCESS;
10013     }
10014 
10015     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
10016     MOS_UserFeature_ReadValue_ID(
10017         nullptr,
10018         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_8x8_INTRA_KERNEL_SPLIT,
10019         &userFeatureData,
10020         m_osInterface->pOsContext);
10021     m_numMb8x8IntraKernelSplit = (userFeatureData.i32Data < 0) ? 0 : userFeatureData.i32Data;
10022 
10023     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
10024     MOS_UserFeature_ReadValue_ID(
10025         nullptr,
10026         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_B_KERNEL_SPLIT,
10027         &userFeatureData,
10028         m_osInterface->pOsContext);
10029     m_numMbBKernelSplit = (userFeatureData.i32Data < 0) ? 0 : userFeatureData.i32Data;
10030 
10031     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
10032     MOS_UserFeature_ReadValue_ID(
10033         nullptr,
10034         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_POWER_SAVING,
10035         &userFeatureData,
10036         m_osInterface->pOsContext);
10037     m_powerSavingEnabled = (userFeatureData.i32Data) ? true : false;
10038 
10039     if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
10040     {
10041         /* Make the width aligned to a multiple of 32 and then get the no of macroblocks.*/
10042         /* This is done to facilitate the use of format conversion kernel for downscaling to 4x and 2x along with formatconversion of 10 bit data to 8 bit data.
10043         Refer format conversion kernel for further details .
10044         We will use only 4x downscale for HME, Super and ultra HME use the traditional scaling kernels.
10045         */
10046         uint32_t downscaledSurfaceWidth4x = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x* CODECHAL_MACROBLOCK_WIDTH), (CODECHAL_MACROBLOCK_WIDTH * 2));
10047         m_downscaledWidthInMb4x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(downscaledSurfaceWidth4x);
10048 
10049     }
10050 
10051     return eStatus;
10052 }
10053 
InitKernelState()10054 MOS_STATUS CodechalEncHevcStateG9::InitKernelState()
10055 {
10056     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10057 
10058     CODECHAL_ENCODE_FUNCTION_ENTER;
10059 
10060     // Init kernel state
10061     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMbEnc());
10062     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateBrc());
10063 
10064     // Create Hme kernel
10065     m_hmeKernel = MOS_New(CodechalKernelHmeG9, this);
10066     CODECHAL_ENCODE_CHK_NULL_RETURN(m_hmeKernel);
10067     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Initialize(
10068         pfnGetKernelHeaderAndSize,
10069         m_kernelBase,
10070         m_kuid));
10071 
10072     return eStatus;
10073 }
10074