1 /*
2 * Copyright (c) 2017-2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_encode_hevc_g9.cpp
24 //! \brief HEVC dual-pipe encoder for GEN9.
25 //!
26
27 #include "codechal_encode_hevc_g9.h"
28 #include "codechal_kernel_hme_g9.h"
29 #include "igcodeckrn_g9.h"
30 #include "codeckrnheader.h"
31 #include "mhw_mmio_g9.h"
32
33 #define CS_ALU_COMMAND_LOAD(bSrcRegA, GprReg) ((0x80 << 20) | (((bSrcRegA) ? 0x20 : 0x21) << 10) | ((GprReg) & 0x0F))
34 #define CS_ALU_COMMAND_STORE_ACCU(GprReg) ((0x180 << 20) | (((GprReg) & 0x0F) << 10) | 0x31)
35
36
37 #define GPUMMU_WA_PADDING (64 * 1024)
38
39 //! HME step
40 enum
41 {
42 HME_FIRST_STEP = 0,
43 HME_FOLLOWING_STEP = 1
44 };
45
46 //! Motion vector shift factor
47 enum
48 {
49 MV_SHIFT_FACTOR_32x = 1,
50 MV_SHIFT_FACTOR_16x = 2,
51 MV_SHIFT_FACTOR_4x = 2
52 };
53
54 //! Previous motion vector read position
55 enum
56 {
57 PREV_MV_READ_POSITION_16x = 1,
58 PREV_MV_READ_POSITION_4x = 0
59 };
60
61 //! ALU Opcode
62 enum
63 {
64 CS_ALU_COMMAND_ADD = ((0x100) << 20),
65 CS_ALU_COMMAND_SUB = ((0x101) << 20),
66 CS_ALU_COMMAND_AND = ((0x102) << 20),
67 CS_ALU_COMMAND_OR = ((0x103) << 20),
68 CS_ALU_COMMAND_XOR = ((0x104) << 20)
69 };
70
71 // This ROI structure is defined in kernel for ROI surface calculations
72 // and differs slightly from the ENCODE_ROI structure used by DDI/App, so this
73 // will be used only for ROI surface loading
74 struct CODECHAL_ENC_HEVC_ROI_G9
75 {
76 uint32_t Top;
77 uint32_t Left;
78 uint32_t Bottom;
79 uint32_t Right;
80 int32_t QPDelta;
81 int32_t ROI_Level;
82 };
83
84 //! HEVC encoder ME kernel curbe for GEN9
85 struct CODECHAL_ENC_HEVC_ME_CURBE_G9
86 {
87 // DW0
88 union
89 {
90 struct
91 {
92 uint32_t SkipModeEn : MOS_BITFIELD_BIT(0);
93 uint32_t AdaptiveEn : MOS_BITFIELD_BIT(1);
94 uint32_t BiMixDis : MOS_BITFIELD_BIT(2);
95 uint32_t : MOS_BITFIELD_RANGE(3, 4);
96 uint32_t EarlyImeSuccessEn : MOS_BITFIELD_BIT(5);
97 uint32_t : MOS_BITFIELD_BIT(6);
98 uint32_t T8x8FlagForInterEn : MOS_BITFIELD_BIT(7);
99 uint32_t : MOS_BITFIELD_RANGE(8, 23);
100 uint32_t EarlyImeStop : MOS_BITFIELD_RANGE(24, 31);
101 };
102 struct
103 {
104 uint32_t Value;
105 };
106 } DW0;
107
108 // DW1
109 union
110 {
111 struct
112 {
113 uint32_t MaxNumMVs : MOS_BITFIELD_RANGE(0, 5);
114 uint32_t : MOS_BITFIELD_RANGE(6, 15);
115 uint32_t BiWeight : MOS_BITFIELD_RANGE(16, 21);
116 uint32_t : MOS_BITFIELD_RANGE(22, 27);
117 uint32_t UniMixDisable : MOS_BITFIELD_BIT(28);
118 uint32_t : MOS_BITFIELD_RANGE(29, 31);
119 };
120 struct
121 {
122 uint32_t Value;
123 };
124 } DW1;
125
126 // DW2
127 union
128 {
129 struct
130 {
131 uint32_t MaxLenSP : MOS_BITFIELD_RANGE(0, 7);
132 uint32_t MaxNumSU : MOS_BITFIELD_RANGE(8, 15);
133 uint32_t : MOS_BITFIELD_RANGE(16, 31);
134 };
135 struct
136 {
137 uint32_t Value;
138 };
139 } DW2;
140
141 // DW3
142 union
143 {
144 struct
145 {
146 uint32_t SrcSize : MOS_BITFIELD_RANGE(0, 1);
147 uint32_t : MOS_BITFIELD_RANGE(2, 3);
148 uint32_t MbTypeRemap : MOS_BITFIELD_RANGE(4, 5);
149 uint32_t SrcAccess : MOS_BITFIELD_BIT(6);
150 uint32_t RefAccess : MOS_BITFIELD_BIT(7);
151 uint32_t SearchCtrl : MOS_BITFIELD_RANGE(8, 10);
152 uint32_t DualSearchPathOption : MOS_BITFIELD_BIT(11);
153 uint32_t SubPelMode : MOS_BITFIELD_RANGE(12, 13);
154 uint32_t SkipType : MOS_BITFIELD_BIT(14);
155 uint32_t DisableFieldCacheAlloc : MOS_BITFIELD_BIT(15);
156 uint32_t InterChromaMode : MOS_BITFIELD_BIT(16);
157 uint32_t FTEnable : MOS_BITFIELD_BIT(17);
158 uint32_t BMEDisableFBR : MOS_BITFIELD_BIT(18);
159 uint32_t BlockBasedSkipEnable : MOS_BITFIELD_BIT(19);
160 uint32_t InterSAD : MOS_BITFIELD_RANGE(20, 21);
161 uint32_t IntraSAD : MOS_BITFIELD_RANGE(22, 23);
162 uint32_t SubMbPartMask : MOS_BITFIELD_RANGE(24, 30);
163 uint32_t : MOS_BITFIELD_BIT(31);
164 };
165 struct
166 {
167 uint32_t Value;
168 };
169 } DW3;
170
171 // DW4
172 union
173 {
174 struct
175 {
176 uint32_t : MOS_BITFIELD_RANGE(0, 7);
177 uint32_t PictureHeightMinus1 : MOS_BITFIELD_RANGE(8, 15);
178 uint32_t PictureWidth : MOS_BITFIELD_RANGE(16, 23);
179 uint32_t : MOS_BITFIELD_RANGE(24, 31);
180 };
181 struct
182 {
183 uint32_t Value;
184 };
185 } DW4;
186
187 // DW5
188 union
189 {
190 struct
191 {
192 uint32_t : MOS_BITFIELD_RANGE(0, 7);
193 uint32_t QpPrimeY : MOS_BITFIELD_RANGE(8, 15);
194 uint32_t RefWidth : MOS_BITFIELD_RANGE(16, 23);
195 uint32_t RefHeight : MOS_BITFIELD_RANGE(24, 31);
196 };
197 struct
198 {
199 uint32_t Value;
200 };
201 } DW5;
202
203 // DW6
204 union
205 {
206 struct
207 {
208 uint32_t : MOS_BITFIELD_RANGE(0, 2);
209 uint32_t WriteDistortions : MOS_BITFIELD_BIT(3);
210 uint32_t UseMvFromPrevStep : MOS_BITFIELD_BIT(4);
211 uint32_t : MOS_BITFIELD_RANGE(5, 7);
212 uint32_t SuperCombineDist : MOS_BITFIELD_RANGE(8, 15);
213 uint32_t MaxVmvR : MOS_BITFIELD_RANGE(16, 31);
214 };
215 struct
216 {
217 uint32_t Value;
218 };
219 } DW6;
220
221 // DW7
222 union
223 {
224 struct
225 {
226 uint32_t : MOS_BITFIELD_RANGE(0, 15);
227 uint32_t MVCostScaleFactor : MOS_BITFIELD_RANGE(16, 17);
228 uint32_t BilinearEnable : MOS_BITFIELD_BIT(18);
229 uint32_t SrcFieldPolarity : MOS_BITFIELD_BIT(19);
230 uint32_t WeightedSADHAAR : MOS_BITFIELD_BIT(20);
231 uint32_t AConlyHAAR : MOS_BITFIELD_BIT(21);
232 uint32_t RefIDCostMode : MOS_BITFIELD_BIT(22);
233 uint32_t : MOS_BITFIELD_BIT(23);
234 uint32_t SkipCenterMask : MOS_BITFIELD_RANGE(24, 31);
235 };
236 struct
237 {
238 uint32_t Value;
239 };
240 } DW7;
241
242 // DW8
243 union
244 {
245 struct
246 {
247 uint32_t Mode0Cost : MOS_BITFIELD_RANGE(0, 7);
248 uint32_t Mode1Cost : MOS_BITFIELD_RANGE(8, 15);
249 uint32_t Mode2Cost : MOS_BITFIELD_RANGE(16, 23);
250 uint32_t Mode3Cost : MOS_BITFIELD_RANGE(24, 31);
251 };
252 struct
253 {
254 uint32_t Value;
255 };
256 } DW8;
257
258 // DW9
259 union
260 {
261 struct
262 {
263 uint32_t Mode4Cost : MOS_BITFIELD_RANGE(0, 7);
264 uint32_t Mode5Cost : MOS_BITFIELD_RANGE(8, 15);
265 uint32_t Mode6Cost : MOS_BITFIELD_RANGE(16, 23);
266 uint32_t Mode7Cost : MOS_BITFIELD_RANGE(24, 31);
267 };
268 struct
269 {
270 uint32_t Value;
271 };
272 } DW9;
273
274 // DW10
275 union
276 {
277 struct
278 {
279 uint32_t Mode8Cost : MOS_BITFIELD_RANGE(0, 7);
280 uint32_t Mode9Cost : MOS_BITFIELD_RANGE(8, 15);
281 uint32_t RefIDCost : MOS_BITFIELD_RANGE(16, 23);
282 uint32_t ChromaIntraModeCost : MOS_BITFIELD_RANGE(24, 31);
283 };
284 struct
285 {
286 uint32_t Value;
287 };
288 } DW10;
289
290 // DW11
291 union
292 {
293 struct
294 {
295 uint32_t MV0Cost : MOS_BITFIELD_RANGE(0, 7);
296 uint32_t MV1Cost : MOS_BITFIELD_RANGE(8, 15);
297 uint32_t MV2Cost : MOS_BITFIELD_RANGE(16, 23);
298 uint32_t MV3Cost : MOS_BITFIELD_RANGE(24, 31);
299 };
300 struct
301 {
302 uint32_t Value;
303 };
304 } DW11;
305
306 // DW12
307 union
308 {
309 struct
310 {
311 uint32_t MV4Cost : MOS_BITFIELD_RANGE(0, 7);
312 uint32_t MV5Cost : MOS_BITFIELD_RANGE(8, 15);
313 uint32_t MV6Cost : MOS_BITFIELD_RANGE(16, 23);
314 uint32_t MV7Cost : MOS_BITFIELD_RANGE(24, 31);
315 };
316 struct
317 {
318 uint32_t Value;
319 };
320 } DW12;
321
322 // DW13
323 union
324 {
325 struct
326 {
327 uint32_t NumRefIdxL0MinusOne : MOS_BITFIELD_RANGE(0, 7);
328 uint32_t NumRefIdxL1MinusOne : MOS_BITFIELD_RANGE(8, 15);
329 uint32_t RefStreaminCost : MOS_BITFIELD_RANGE(16, 23);
330 uint32_t ROIEnable : MOS_BITFIELD_RANGE(24, 26);
331 uint32_t : MOS_BITFIELD_RANGE(27, 31);
332 };
333 struct
334 {
335 uint32_t Value;
336 };
337 } DW13;
338
339 // DW14
340 union
341 {
342 struct
343 {
344 uint32_t List0RefID0FieldParity : MOS_BITFIELD_BIT(0);
345 uint32_t List0RefID1FieldParity : MOS_BITFIELD_BIT(1);
346 uint32_t List0RefID2FieldParity : MOS_BITFIELD_BIT(2);
347 uint32_t List0RefID3FieldParity : MOS_BITFIELD_BIT(3);
348 uint32_t List0RefID4FieldParity : MOS_BITFIELD_BIT(4);
349 uint32_t List0RefID5FieldParity : MOS_BITFIELD_BIT(5);
350 uint32_t List0RefID6FieldParity : MOS_BITFIELD_BIT(6);
351 uint32_t List0RefID7FieldParity : MOS_BITFIELD_BIT(7);
352 uint32_t List1RefID0FieldParity : MOS_BITFIELD_BIT(8);
353 uint32_t List1RefID1FieldParity : MOS_BITFIELD_BIT(9);
354 uint32_t : MOS_BITFIELD_RANGE(10, 31);
355 };
356 struct
357 {
358 uint32_t Value;
359 };
360 } DW14;
361
362 // DW15
363 union
364 {
365 struct
366 {
367 uint32_t PrevMvReadPosFactor : MOS_BITFIELD_RANGE(0, 7);
368 uint32_t MvShiftFactor : MOS_BITFIELD_RANGE(8, 15);
369 uint32_t Reserved : MOS_BITFIELD_RANGE(16, 31);
370 };
371 struct
372 {
373 uint32_t Value;
374 };
375 } DW15;
376
377 struct
378 {
379 // DW16
380 union
381 {
382 struct
383 {
384 SearchPathDelta SPDelta_0;
385 SearchPathDelta SPDelta_1;
386 SearchPathDelta SPDelta_2;
387 SearchPathDelta SPDelta_3;
388 };
389 struct
390 {
391 uint32_t Value;
392 };
393 } DW16;
394
395 // DW17
396 union
397 {
398 struct
399 {
400 SearchPathDelta SPDelta_4;
401 SearchPathDelta SPDelta_5;
402 SearchPathDelta SPDelta_6;
403 SearchPathDelta SPDelta_7;
404 };
405 struct
406 {
407 uint32_t Value;
408 };
409 } DW17;
410
411 // DW18
412 union
413 {
414 struct
415 {
416 SearchPathDelta SPDelta_8;
417 SearchPathDelta SPDelta_9;
418 SearchPathDelta SPDelta_10;
419 SearchPathDelta SPDelta_11;
420 };
421 struct
422 {
423 uint32_t Value;
424 };
425 } DW18;
426
427 // DW19
428 union
429 {
430 struct
431 {
432 SearchPathDelta SPDelta_12;
433 SearchPathDelta SPDelta_13;
434 SearchPathDelta SPDelta_14;
435 SearchPathDelta SPDelta_15;
436 };
437 struct
438 {
439 uint32_t Value;
440 };
441 } DW19;
442
443 // DW20
444 union
445 {
446 struct
447 {
448 SearchPathDelta SPDelta_16;
449 SearchPathDelta SPDelta_17;
450 SearchPathDelta SPDelta_18;
451 SearchPathDelta SPDelta_19;
452 };
453 struct
454 {
455 uint32_t Value;
456 };
457 } DW20;
458
459 // DW21
460 union
461 {
462 struct
463 {
464 SearchPathDelta SPDelta_20;
465 SearchPathDelta SPDelta_21;
466 SearchPathDelta SPDelta_22;
467 SearchPathDelta SPDelta_23;
468 };
469 struct
470 {
471 uint32_t Value;
472 };
473 } DW21;
474
475 // DW22
476 union
477 {
478 struct
479 {
480 SearchPathDelta SPDelta_24;
481 SearchPathDelta SPDelta_25;
482 SearchPathDelta SPDelta_26;
483 SearchPathDelta SPDelta_27;
484 };
485 struct
486 {
487 uint32_t Value;
488 };
489 } DW22;
490
491 // DW23
492 union
493 {
494 struct
495 {
496 SearchPathDelta SPDelta_28;
497 SearchPathDelta SPDelta_29;
498 SearchPathDelta SPDelta_30;
499 SearchPathDelta SPDelta_31;
500 };
501 struct
502 {
503 uint32_t Value;
504 };
505 } DW23;
506
507 // DW24
508 union
509 {
510 struct
511 {
512 SearchPathDelta SPDelta_32;
513 SearchPathDelta SPDelta_33;
514 SearchPathDelta SPDelta_34;
515 SearchPathDelta SPDelta_35;
516 };
517 struct
518 {
519 uint32_t Value;
520 };
521 } DW24;
522
523 // DW25
524 union
525 {
526 struct
527 {
528 SearchPathDelta SPDelta_36;
529 SearchPathDelta SPDelta_37;
530 SearchPathDelta SPDelta_38;
531 SearchPathDelta SPDelta_39;
532 };
533 struct
534 {
535 uint32_t Value;
536 };
537 } DW25;
538
539 // DW26
540 union
541 {
542 struct
543 {
544 SearchPathDelta SPDelta_40;
545 SearchPathDelta SPDelta_41;
546 SearchPathDelta SPDelta_42;
547 SearchPathDelta SPDelta_43;
548 };
549 struct
550 {
551 uint32_t Value;
552 };
553 } DW26;
554
555 // DW27
556 union
557 {
558 struct
559 {
560 SearchPathDelta SPDelta_44;
561 SearchPathDelta SPDelta_45;
562 SearchPathDelta SPDelta_46;
563 SearchPathDelta SPDelta_47;
564 };
565 struct
566 {
567 uint32_t Value;
568 };
569 } DW27;
570
571 // DW28
572 union
573 {
574 struct
575 {
576 SearchPathDelta SPDelta_48;
577 SearchPathDelta SPDelta_49;
578 SearchPathDelta SPDelta_50;
579 SearchPathDelta SPDelta_51;
580 };
581 struct
582 {
583 uint32_t Value;
584 };
585 } DW28;
586
587 // DW29
588 union
589 {
590 struct
591 {
592 SearchPathDelta SPDelta_52;
593 SearchPathDelta SPDelta_53;
594 SearchPathDelta SPDelta_54;
595 SearchPathDelta SPDelta_55;
596 };
597 struct
598 {
599 uint32_t Value;
600 };
601 } DW29;
602 } SPDelta;
603
604 // DW30
605 union
606 {
607 struct
608 {
609 uint32_t ActualMBWidth : MOS_BITFIELD_RANGE(0, 15);
610 uint32_t ActualMBHeight : MOS_BITFIELD_RANGE(16, 31);
611 };
612 struct
613 {
614 uint32_t Value;
615 };
616 } DW30;
617
618 // DW31
619 union
620 {
621 struct
622 {
623 uint32_t Reserved;
624 };
625 struct
626 {
627 uint32_t Value;
628 };
629 } DW31;
630
631 // DW32
632 union
633 {
634 struct
635 {
636 uint32_t _4xMeMvOutputDataSurfIndex;
637 };
638 struct
639 {
640 uint32_t Value;
641 };
642 } DW32;
643
644 // DW33
645 union
646 {
647 struct
648 {
649 uint32_t _16xOr32xMeMvInputDataSurfIndex;
650 };
651 struct
652 {
653 uint32_t Value;
654 };
655 } DW33;
656
657 // DW34
658 union
659 {
660 struct
661 {
662 uint32_t _4xMeOutputDistSurfIndex;
663 };
664 struct
665 {
666 uint32_t Value;
667 };
668 } DW34;
669
670 // DW35
671 union
672 {
673 struct
674 {
675 uint32_t _4xMeOutputBrcDistSurfIndex;
676 };
677 struct
678 {
679 uint32_t Value;
680 };
681 } DW35;
682
683 // DW36
684 union
685 {
686 struct
687 {
688 uint32_t VMEFwdInterPredictionSurfIndex;
689 };
690 struct
691 {
692 uint32_t Value;
693 };
694 } DW36;
695
696 // DW37
697 union
698 {
699 struct
700 {
701 uint32_t VMEBwdInterPredictionSurfIndex;
702 };
703 struct
704 {
705 uint32_t Value;
706 };
707 } DW37;
708
709 // DW38
710 union
711 {
712 struct
713 {
714 uint32_t VDEncStreamInSurfIndex;
715 };
716 struct
717 {
718 uint32_t Value;
719 };
720 } DW38;
721 };
722
723 using PCODECHAL_ENC_HEVC_ME_CURBE_G9 = struct CODECHAL_ENC_HEVC_ME_CURBE_G9*;
724 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_ME_CURBE_G9)) == 39);
725
726 //! HEVC encoder B MBEnc kernel curbe for GEN9
727 struct CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9
728 {
729 // DW0
730 union
731 {
732 struct
733 {
734 uint32_t SkipModeEn : MOS_BITFIELD_BIT(0);
735 uint32_t AdaptiveEn : MOS_BITFIELD_BIT(1);
736 uint32_t BiMixDis : MOS_BITFIELD_BIT(2);
737 uint32_t : MOS_BITFIELD_RANGE(3, 4);
738 uint32_t EarlyImeSuccessEn : MOS_BITFIELD_BIT(5);
739 uint32_t : MOS_BITFIELD_BIT(6);
740 uint32_t T8x8FlagForInterEn : MOS_BITFIELD_BIT(7);
741 uint32_t : MOS_BITFIELD_RANGE(8, 23);
742 uint32_t EarlyImeStop : MOS_BITFIELD_RANGE(24, 31);
743 };
744 struct
745 {
746 uint32_t Value;
747 };
748 } DW0;
749
750 // DW1
751 union
752 {
753 struct
754 {
755 uint32_t MaxNumMVs : MOS_BITFIELD_RANGE(0, 5);
756 uint32_t : MOS_BITFIELD_RANGE(6, 15);
757 uint32_t BiWeight : MOS_BITFIELD_RANGE(16, 21);
758 uint32_t : MOS_BITFIELD_RANGE(22, 27);
759 uint32_t UniMixDisable : MOS_BITFIELD_BIT(28);
760 uint32_t : MOS_BITFIELD_RANGE(29, 31);
761 };
762 struct
763 {
764 uint32_t Value;
765 };
766 } DW1;
767
768 // DW2
769 union
770 {
771 struct
772 {
773 uint32_t LenSP : MOS_BITFIELD_RANGE(0, 7);
774 uint32_t MaxNumSU : MOS_BITFIELD_RANGE(8, 15);
775 uint32_t PicWidth : MOS_BITFIELD_RANGE(16, 31);
776 };
777 struct
778 {
779 uint32_t Value;
780 };
781 } DW2;
782
783 // DW3
784 union
785 {
786 struct
787 {
788 uint32_t SrcSize : MOS_BITFIELD_RANGE(0, 1);
789 uint32_t : MOS_BITFIELD_RANGE(2, 3);
790 uint32_t MbTypeRemap : MOS_BITFIELD_RANGE(4, 5);
791 uint32_t SrcAccess : MOS_BITFIELD_BIT(6);
792 uint32_t RefAccess : MOS_BITFIELD_BIT(7);
793 uint32_t SearchCtrl : MOS_BITFIELD_RANGE(8, 10);
794 uint32_t DualSearchPathOption : MOS_BITFIELD_BIT(11);
795 uint32_t SubPelMode : MOS_BITFIELD_RANGE(12, 13);
796 uint32_t SkipType : MOS_BITFIELD_BIT(14);
797 uint32_t DisableFieldCacheAlloc : MOS_BITFIELD_BIT(15);
798 uint32_t InterChromaMode : MOS_BITFIELD_BIT(16);
799 uint32_t FTEnable : MOS_BITFIELD_BIT(17);
800 uint32_t BMEDisableFBR : MOS_BITFIELD_BIT(18);
801 uint32_t BlockBasedSkipEnable : MOS_BITFIELD_BIT(19);
802 uint32_t InterSAD : MOS_BITFIELD_RANGE(20, 21);
803 uint32_t IntraSAD : MOS_BITFIELD_RANGE(22, 23);
804 uint32_t SubMbPartMask : MOS_BITFIELD_RANGE(24, 30);
805 uint32_t : MOS_BITFIELD_BIT(31);
806 };
807 struct
808 {
809 uint32_t Value;
810 };
811 } DW3;
812
813 union
814 {
815 struct
816 {
817 uint32_t PicHeightMinus1 : MOS_BITFIELD_RANGE(0, 15);
818 uint32_t Res_16_22 : MOS_BITFIELD_RANGE(16, 22);
819 uint32_t EnableQualityImprovement : MOS_BITFIELD_BIT(23);
820 uint32_t EnableDebug : MOS_BITFIELD_BIT(24);
821 uint32_t EnableFlexibleParam : MOS_BITFIELD_BIT(25);
822 uint32_t EnableStatsDataDump : MOS_BITFIELD_BIT(26);
823 uint32_t Res_27 : MOS_BITFIELD_BIT(27);
824 uint32_t HMEEnable : MOS_BITFIELD_BIT(28);
825 uint32_t SliceType : MOS_BITFIELD_RANGE(29, 30);
826 uint32_t UseActualRefQPValue : MOS_BITFIELD_BIT(31);
827 };
828 struct
829 {
830 uint32_t Value;
831 };
832 } DW4;
833
834 // DW5
835 union
836 {
837 struct
838 {
839 uint32_t Res_0_15 : MOS_BITFIELD_RANGE(0, 15);
840 uint32_t RefWidth : MOS_BITFIELD_RANGE(16, 23);
841 uint32_t RefHeight : MOS_BITFIELD_RANGE(24, 31);
842 };
843 struct
844 {
845 uint32_t Value;
846 };
847 } DW5;
848
849 union
850 {
851 struct
852 {
853 uint32_t FrameWidth : MOS_BITFIELD_RANGE(0, 15);
854 uint32_t FrameHeight : MOS_BITFIELD_RANGE(16, 31);
855 };
856 struct
857 {
858 uint32_t Value;
859 };
860 } DW6;
861
862 // DW7
863 union
864 {
865 struct
866 {
867 uint32_t IntraPartMask : MOS_BITFIELD_RANGE(0, 4);
868 uint32_t NonSkipZMvAdded : MOS_BITFIELD_BIT(5);
869 uint32_t NonSkipModeAdded : MOS_BITFIELD_BIT(6);
870 uint32_t LumaIntraSrcCornerSwap : MOS_BITFIELD_BIT(7);
871 uint32_t : MOS_BITFIELD_RANGE(8, 15);
872 uint32_t MVCostScaleFactor : MOS_BITFIELD_RANGE(16, 17);
873 uint32_t BilinearEnable : MOS_BITFIELD_BIT(18);
874 uint32_t Res_19 : MOS_BITFIELD_BIT(19);
875 uint32_t WeightedSADHAAR : MOS_BITFIELD_BIT(20);
876 uint32_t AConlyHAAR : MOS_BITFIELD_BIT(21);
877 uint32_t RefIDCostMode : MOS_BITFIELD_BIT(22);
878 uint32_t : MOS_BITFIELD_BIT(23);
879 uint32_t SkipCenterMask : MOS_BITFIELD_RANGE(24, 31);
880 };
881 struct
882 {
883 uint32_t Value;
884 };
885 } DW7;
886
887 // DW8
888 union
889 {
890 struct
891 {
892 uint32_t Mode0Cost : MOS_BITFIELD_RANGE(0, 7);
893 uint32_t Mode1Cost : MOS_BITFIELD_RANGE(8, 15);
894 uint32_t Mode2Cost : MOS_BITFIELD_RANGE(16, 23);
895 uint32_t Mode3Cost : MOS_BITFIELD_RANGE(24, 31);
896 };
897 struct
898 {
899 uint32_t Value;
900 };
901 } DW8;
902
903 // DW9
904 union
905 {
906 struct
907 {
908 uint32_t Mode4Cost : MOS_BITFIELD_RANGE(0, 7);
909 uint32_t Mode5Cost : MOS_BITFIELD_RANGE(8, 15);
910 uint32_t Mode6Cost : MOS_BITFIELD_RANGE(16, 23);
911 uint32_t Mode7Cost : MOS_BITFIELD_RANGE(24, 31);
912 };
913 struct
914 {
915 uint32_t Value;
916 };
917 } DW9;
918
919 // DW10
920 union
921 {
922 struct
923 {
924 uint32_t Mode8Cost : MOS_BITFIELD_RANGE(0, 7);
925 uint32_t Mode9Cost : MOS_BITFIELD_RANGE(8, 15);
926 uint32_t RefIDCost : MOS_BITFIELD_RANGE(16, 23);
927 uint32_t ChromaIntraModeCost : MOS_BITFIELD_RANGE(24, 31);
928 };
929 struct
930 {
931 uint32_t Value;
932 };
933 } DW10;
934
935 // DW11
936 union
937 {
938 struct
939 {
940 uint32_t MV0Cost : MOS_BITFIELD_RANGE(0, 7);
941 uint32_t MV1Cost : MOS_BITFIELD_RANGE(8, 15);
942 uint32_t MV2Cost : MOS_BITFIELD_RANGE(16, 23);
943 uint32_t MV3Cost : MOS_BITFIELD_RANGE(24, 31);
944 };
945 struct
946 {
947 uint32_t Value;
948 };
949 } DW11;
950
951 // DW12
952 union
953 {
954 struct
955 {
956 uint32_t MV4Cost : MOS_BITFIELD_RANGE(0, 7);
957 uint32_t MV5Cost : MOS_BITFIELD_RANGE(8, 15);
958 uint32_t MV6Cost : MOS_BITFIELD_RANGE(16, 23);
959 uint32_t MV7Cost : MOS_BITFIELD_RANGE(24, 31);
960 };
961 struct
962 {
963 uint32_t Value;
964 };
965 } DW12;
966
967 // DW13
968 union
969 {
970 struct
971 {
972 uint32_t QpPrimeY : MOS_BITFIELD_RANGE(0, 7);
973 uint32_t QpPrimeCb : MOS_BITFIELD_RANGE(8, 15);
974 uint32_t QpPrimeCr : MOS_BITFIELD_RANGE(16, 23);
975 uint32_t TargetSizeInWord : MOS_BITFIELD_RANGE(24, 31);
976 };
977 struct
978 {
979 uint32_t Value;
980 };
981 } DW13;
982
983 // DW14
984 union
985 {
986 struct
987 {
988 uint32_t SICFwdTransCoeffThreshold_0 : MOS_BITFIELD_RANGE(0, 15);
989 uint32_t SICFwdTransCoeffThreshold_1 : MOS_BITFIELD_RANGE(16, 23);
990 uint32_t SICFwdTransCoeffThreshold_2 : MOS_BITFIELD_RANGE(24, 31);
991 };
992 struct
993 {
994 uint32_t Value;
995 };
996 } DW14;
997
998 // DW15
999 union
1000 {
1001 struct
1002 {
1003 uint32_t SICFwdTransCoeffThreshold_3 : MOS_BITFIELD_RANGE(0, 7);
1004 uint32_t SICFwdTransCoeffThreshold_4 : MOS_BITFIELD_RANGE(8, 15);
1005 uint32_t SICFwdTransCoeffThreshold_5 : MOS_BITFIELD_RANGE(16, 23);
1006 uint32_t SICFwdTransCoeffThreshold_6 : MOS_BITFIELD_RANGE(24, 31); // Highest Freq
1007 };
1008 struct
1009 {
1010 uint32_t Value;
1011 };
1012 } DW15;
1013
1014 // DW16
1015 union
1016 {
1017 struct
1018 {
1019 SearchPathDelta SPDelta_0;
1020 SearchPathDelta SPDelta_1;
1021 SearchPathDelta SPDelta_2;
1022 SearchPathDelta SPDelta_3;
1023 };
1024 struct
1025 {
1026 uint32_t Value;
1027 };
1028 } DW16;
1029
1030 // DW17
1031 union
1032 {
1033 struct
1034 {
1035 SearchPathDelta SPDelta_4;
1036 SearchPathDelta SPDelta_5;
1037 SearchPathDelta SPDelta_6;
1038 SearchPathDelta SPDelta_7;
1039 };
1040 struct
1041 {
1042 uint32_t Value;
1043 };
1044 } DW17;
1045
1046 // DW18
1047 union
1048 {
1049 struct
1050 {
1051 SearchPathDelta SPDelta_8;
1052 SearchPathDelta SPDelta_9;
1053 SearchPathDelta SPDelta_10;
1054 SearchPathDelta SPDelta_11;
1055 };
1056 struct
1057 {
1058 uint32_t Value;
1059 };
1060 } DW18;
1061
1062 // DW19
1063 union
1064 {
1065 struct
1066 {
1067 SearchPathDelta SPDelta_12;
1068 SearchPathDelta SPDelta_13;
1069 SearchPathDelta SPDelta_14;
1070 SearchPathDelta SPDelta_15;
1071 };
1072 struct
1073 {
1074 uint32_t Value;
1075 };
1076 } DW19;
1077
1078 // DW20
1079 union
1080 {
1081 struct
1082 {
1083 SearchPathDelta SPDelta_16;
1084 SearchPathDelta SPDelta_17;
1085 SearchPathDelta SPDelta_18;
1086 SearchPathDelta SPDelta_19;
1087 };
1088 struct
1089 {
1090 uint32_t Value;
1091 };
1092 } DW20;
1093
1094 // DW21
1095 union
1096 {
1097 struct
1098 {
1099 SearchPathDelta SPDelta_20;
1100 SearchPathDelta SPDelta_21;
1101 SearchPathDelta SPDelta_22;
1102 SearchPathDelta SPDelta_23;
1103 };
1104 struct
1105 {
1106 uint32_t Value;
1107 };
1108 } DW21;
1109
1110 // DW22
1111 union
1112 {
1113 struct
1114 {
1115 SearchPathDelta SPDelta_24;
1116 SearchPathDelta SPDelta_25;
1117 SearchPathDelta SPDelta_26;
1118 SearchPathDelta SPDelta_27;
1119 };
1120 struct
1121 {
1122 uint32_t Value;
1123 };
1124 } DW22;
1125
1126 // DW23
1127 union
1128 {
1129 struct
1130 {
1131 SearchPathDelta SPDelta_28;
1132 SearchPathDelta SPDelta_29;
1133 SearchPathDelta SPDelta_30;
1134 SearchPathDelta SPDelta_31;
1135 };
1136 struct
1137 {
1138 uint32_t Value;
1139 };
1140 } DW23;
1141
1142 // DW24
1143 union
1144 {
1145 struct
1146 {
1147 SearchPathDelta SPDelta_32;
1148 SearchPathDelta SPDelta_33;
1149 SearchPathDelta SPDelta_34;
1150 SearchPathDelta SPDelta_35;
1151 };
1152 struct
1153 {
1154 uint32_t Value;
1155 };
1156 } DW24;
1157
1158 // DW25
1159 union
1160 {
1161 struct
1162 {
1163 SearchPathDelta SPDelta_36;
1164 SearchPathDelta SPDelta_37;
1165 SearchPathDelta SPDelta_38;
1166 SearchPathDelta SPDelta_39;
1167 };
1168 struct
1169 {
1170 uint32_t Value;
1171 };
1172 } DW25;
1173
1174 // DW26
1175 union
1176 {
1177 struct
1178 {
1179 SearchPathDelta SPDelta_40;
1180 SearchPathDelta SPDelta_41;
1181 SearchPathDelta SPDelta_42;
1182 SearchPathDelta SPDelta_43;
1183 };
1184 struct
1185 {
1186 uint32_t Value;
1187 };
1188 } DW26;
1189
1190 // DW27
1191 union
1192 {
1193 struct
1194 {
1195 SearchPathDelta SPDelta_44;
1196 SearchPathDelta SPDelta_45;
1197 SearchPathDelta SPDelta_46;
1198 SearchPathDelta SPDelta_47;
1199 };
1200 struct
1201 {
1202 uint32_t Value;
1203 };
1204 } DW27;
1205
1206 // DW28
1207 union
1208 {
1209 struct
1210 {
1211 SearchPathDelta SPDelta_48;
1212 SearchPathDelta SPDelta_49;
1213 SearchPathDelta SPDelta_50;
1214 SearchPathDelta SPDelta_51;
1215 };
1216 struct
1217 {
1218 uint32_t Value;
1219 };
1220 } DW28;
1221
1222 // DW29
1223 union
1224 {
1225 struct
1226 {
1227 SearchPathDelta SPDelta_52;
1228 SearchPathDelta SPDelta_53;
1229 SearchPathDelta SPDelta_54;
1230 SearchPathDelta SPDelta_55;
1231 };
1232 struct
1233 {
1234 uint32_t Value;
1235 };
1236 } DW29;
1237
1238 // DW30
1239 union
1240 {
1241 struct
1242 {
1243 uint32_t Intra4x4ModeMask : MOS_BITFIELD_RANGE(0, 8);
1244 uint32_t : MOS_BITFIELD_RANGE(9, 15);
1245 uint32_t Intra8x8ModeMask : MOS_BITFIELD_RANGE(16, 24);
1246 uint32_t : MOS_BITFIELD_RANGE(25, 31);
1247 };
1248 struct
1249 {
1250 uint32_t Value;
1251 };
1252 } DW30;
1253
1254 // DW31
1255 union
1256 {
1257 struct
1258 {
1259 uint32_t Intra16x16ModeMask : MOS_BITFIELD_RANGE(0, 3);
1260 uint32_t IntraChromaModeMask : MOS_BITFIELD_RANGE(4, 7);
1261 uint32_t IntraComputeType : MOS_BITFIELD_RANGE(8, 9);
1262 uint32_t : MOS_BITFIELD_RANGE(10, 31);
1263 };
1264 struct
1265 {
1266 uint32_t Value;
1267 };
1268 } DW31;
1269
1270 // DW32
1271 union
1272 {
1273 struct
1274 {
1275 uint32_t SkipVal : MOS_BITFIELD_RANGE(0, 15);
1276 uint32_t MultiPredL0Disable : MOS_BITFIELD_RANGE(16, 23);
1277 uint32_t MultiPredL1Disable : MOS_BITFIELD_RANGE(24, 31);
1278 };
1279 struct
1280 {
1281 uint32_t Value;
1282 };
1283 } DW32;
1284
1285 // DW33
1286 union
1287 {
1288 struct
1289 {
1290 uint32_t Intra16x16NonDCPredPenalty : MOS_BITFIELD_RANGE(0, 7);
1291 uint32_t Intra8x8NonDCPredPenalty : MOS_BITFIELD_RANGE(8, 15);
1292 uint32_t Intra4x4NonDCPredPenalty : MOS_BITFIELD_RANGE(16, 23);
1293 uint32_t : MOS_BITFIELD_RANGE(24, 31);
1294 };
1295 struct
1296 {
1297 uint32_t Value;
1298 };
1299 } DW33;
1300
1301 union {
1302 struct {
1303 uint32_t LambdaME;
1304 };
1305 uint32_t Value;
1306 } DW34;
1307
1308 union {
1309 struct {
1310 uint32_t SimpIntraInterThreshold : MOS_BITFIELD_RANGE(0, 15);
1311 uint32_t ModeCostSp : MOS_BITFIELD_RANGE(16, 23);
1312 uint32_t IntraRefreshEn : MOS_BITFIELD_RANGE(24, 25);
1313 uint32_t FirstIntraRefresh : MOS_BITFIELD_BIT(26);
1314 uint32_t EnableRollingIntra : MOS_BITFIELD_BIT(27);
1315 uint32_t HalfUpdateMixedLCU : MOS_BITFIELD_BIT(28);
1316 uint32_t Res_29_31 : MOS_BITFIELD_RANGE(29, 31);
1317 };
1318 uint32_t Value;
1319 } DW35;
1320
1321 union {
1322 struct {
1323 uint32_t NumRefIdxL0MinusOne : MOS_BITFIELD_RANGE(0, 7);
1324 uint32_t HMECombinedExtraSUs : MOS_BITFIELD_RANGE(8, 15);
1325 uint32_t NumRefIdxL1MinusOne : MOS_BITFIELD_RANGE(16, 23);
1326 uint32_t PowerSaving : MOS_BITFIELD_BIT(24);
1327 uint32_t BRCEnable : MOS_BITFIELD_BIT(25);
1328 uint32_t LCUBRCEnable : MOS_BITFIELD_BIT(26);
1329 uint32_t ROIEnable : MOS_BITFIELD_BIT(27);
1330 uint32_t FASTSurveillanceFlag : MOS_BITFIELD_BIT(28);
1331 uint32_t CheckAllFractionalEnable : MOS_BITFIELD_BIT(29);
1332 uint32_t HMECombinedOverlap : MOS_BITFIELD_RANGE(30, 31);
1333 };
1334 uint32_t Value;
1335 } DW36;
1336
1337 union {
1338 struct {
1339 uint32_t ActualQpRefID0List0 : MOS_BITFIELD_RANGE(0, 7);
1340 uint32_t ActualQpRefID1List0 : MOS_BITFIELD_RANGE(8, 15);
1341 uint32_t ActualQpRefID2List0 : MOS_BITFIELD_RANGE(16, 23);
1342 uint32_t ActualQpRefID3List0 : MOS_BITFIELD_RANGE(24, 31);
1343 };
1344 uint32_t Value;
1345 } DW37;
1346
1347 union {
1348 struct {
1349 uint32_t NumIntraRefreshOffFrames : MOS_BITFIELD_RANGE(0, 15);
1350 uint32_t NumFrameInGOB : MOS_BITFIELD_RANGE(16, 31);
1351 };
1352 uint32_t Value;
1353 } DW38;
1354
1355 union {
1356 struct {
1357 uint32_t ActualQpRefID0List1 : MOS_BITFIELD_RANGE(0, 7);
1358 uint32_t ActualQpRefID1List1 : MOS_BITFIELD_RANGE(8, 15);
1359 uint32_t RefCost : MOS_BITFIELD_RANGE(16, 31);
1360 };
1361 uint32_t Value;
1362 } DW39;
1363
1364 union {
1365 struct {
1366 uint32_t Reserved;
1367 };
1368 uint32_t Value;
1369 } DW40;
1370
1371 union {
1372 struct {
1373 uint32_t TransformThreshold2 : MOS_BITFIELD_RANGE(0, 15);
1374 uint32_t TextureIntraCostThreshold : MOS_BITFIELD_RANGE(16, 31);
1375 };
1376 uint32_t Value;
1377 } DW41;
1378
1379 union {
1380 struct {
1381 uint32_t Reserved;
1382 };
1383 uint32_t Value;
1384 } DW42;
1385
1386 union {
1387 struct {
1388 uint32_t Reserved;
1389 };
1390 uint32_t Value;
1391 } DW43;
1392
1393 union {
1394 struct {
1395 uint32_t MaxNumMergeCandidates : MOS_BITFIELD_RANGE(0, 3);
1396 uint32_t MaxNumRefList0 : MOS_BITFIELD_RANGE(4, 7);
1397 uint32_t MaxNumRefList1 : MOS_BITFIELD_RANGE(8, 11);
1398 uint32_t Res_12_15 : MOS_BITFIELD_RANGE(12, 15);
1399 uint32_t MaxVmvR : MOS_BITFIELD_RANGE(16, 31);
1400 };
1401 uint32_t Value;
1402 } DW44;
1403
1404 union {
1405 struct {
1406 uint32_t TemporalMvpEnableFlag : MOS_BITFIELD_BIT(0);
1407 uint32_t Res_1_7 : MOS_BITFIELD_RANGE(1, 7);
1408 uint32_t Log2ParallelMergeLevel : MOS_BITFIELD_RANGE(8, 15);
1409 uint32_t HMECombineLenPslice : MOS_BITFIELD_RANGE(16, 23);
1410 uint32_t HMECombineLenBslice : MOS_BITFIELD_RANGE(24, 31);
1411 };
1412 uint32_t Value;
1413 } DW45;
1414
1415 union {
1416 struct {
1417 uint32_t Log2MinTUSize : MOS_BITFIELD_RANGE(0, 7);
1418 uint32_t Log2MaxTUSize : MOS_BITFIELD_RANGE(8, 15);
1419 uint32_t Log2MinCUSize : MOS_BITFIELD_RANGE(16, 23);
1420 uint32_t Log2MaxCUSize : MOS_BITFIELD_RANGE(24, 31);
1421 };
1422 uint32_t Value;
1423 } DW46;
1424
1425 union {
1426 struct {
1427 uint32_t NumRegionsInSlice : MOS_BITFIELD_RANGE(0, 7);
1428 uint32_t TypeOfWalkingPattern : MOS_BITFIELD_RANGE(8, 11);
1429 uint32_t ChromaFlatnessCheckFlag : MOS_BITFIELD_BIT(12);
1430 uint32_t EnableIntraEarlyExit : MOS_BITFIELD_BIT(13);
1431 uint32_t SkipIntraKrnFlag : MOS_BITFIELD_BIT(14);
1432 uint32_t ScreenContentFlag : MOS_BITFIELD_BIT(15);
1433 uint32_t IsLowDelay : MOS_BITFIELD_BIT(16);
1434 uint32_t CollocatedFromL0Flag : MOS_BITFIELD_BIT(17);
1435 uint32_t ArbitarySliceFlag : MOS_BITFIELD_BIT(18);
1436 uint32_t MultiSliceFlag : MOS_BITFIELD_BIT(19);
1437 uint32_t Res_20_23 : MOS_BITFIELD_RANGE(20, 23);
1438 uint32_t isCurrRefL0LongTerm : MOS_BITFIELD_BIT(24);
1439 uint32_t isCurrRefL1LongTerm : MOS_BITFIELD_BIT(25);
1440 uint32_t NumRegionMinus1 : MOS_BITFIELD_RANGE(26, 31);
1441 };
1442 uint32_t Value;
1443 } DW47;
1444
1445 union {
1446 struct {
1447 uint32_t CurrentTdL0_0 : MOS_BITFIELD_RANGE(0, 15);
1448 uint32_t CurrentTdL0_1 : MOS_BITFIELD_RANGE(16, 31);
1449 };
1450 uint32_t Value;
1451 } DW48;
1452
1453 union {
1454 struct {
1455 uint32_t CurrentTdL0_2 : MOS_BITFIELD_RANGE(0, 15);
1456 uint32_t CurrentTdL0_3 : MOS_BITFIELD_RANGE(16, 31);
1457 };
1458 uint32_t Value;
1459 } DW49;
1460
1461 union {
1462 struct {
1463 uint32_t CurrentTdL1_0 : MOS_BITFIELD_RANGE(0, 15);
1464 uint32_t CurrentTdL1_1 : MOS_BITFIELD_RANGE(16, 31);
1465 };
1466 uint32_t Value;
1467 } DW50;
1468
1469 union {
1470 struct {
1471 uint32_t IntraRefreshMBNum : MOS_BITFIELD_RANGE(0, 15);
1472 uint32_t IntraRefreshUnitInMB : MOS_BITFIELD_RANGE(16, 23);
1473 uint32_t IntraRefreshQPDelta : MOS_BITFIELD_RANGE(24, 31);
1474 };
1475 uint32_t Value;
1476 } DW51;
1477
1478 union {
1479 struct {
1480 uint32_t NumofUnitInRegion : MOS_BITFIELD_RANGE(0, 15);
1481 uint32_t MaxHeightInRegion : MOS_BITFIELD_RANGE(16, 31);
1482 };
1483 uint32_t Value;
1484 } DW52;
1485
1486 union {
1487 struct {
1488 uint32_t IntraRefreshRefWidth : MOS_BITFIELD_RANGE(0, 7);
1489 uint32_t IntraRefreshRefHeight : MOS_BITFIELD_RANGE(8, 15);
1490 uint32_t Res_16_31 : MOS_BITFIELD_RANGE(16, 31);
1491 };
1492 uint32_t Value;
1493 } DW53;
1494
1495 union {
1496 struct {
1497 uint32_t Reserved;
1498 };
1499 uint32_t Value;
1500 } DW54;
1501
1502 union {
1503 struct {
1504 uint32_t Reserved;
1505 };
1506 uint32_t Value;
1507 } DW55;
1508
1509 union {
1510 struct {
1511 uint32_t BTI_CU_Record;
1512 };
1513 uint32_t Value;
1514 } DW56;
1515
1516 union {
1517 struct {
1518 uint32_t BTI_PAK_Cmd;
1519 };
1520 uint32_t Value;
1521 } DW57;
1522
1523 union {
1524 struct {
1525 uint32_t BTI_Src_Y;
1526 };
1527 uint32_t Value;
1528 } DW58;
1529
1530 union {
1531 struct {
1532 uint32_t BTI_Intra_Dist;
1533 };
1534 uint32_t Value;
1535 } DW59;
1536
1537 union {
1538 struct {
1539 uint32_t BTI_Min_Dist;
1540 };
1541 uint32_t Value;
1542 } DW60;
1543
1544 union {
1545 struct {
1546 uint32_t BTI_HMEMVPredFwdBwdSurfIndex;
1547 };
1548 uint32_t Value;
1549 } DW61;
1550
1551 union {
1552 struct {
1553 uint32_t BTI_HMEDistSurfIndex;
1554 };
1555 uint32_t Value;
1556 } DW62;
1557
1558 union {
1559 struct {
1560 uint32_t BTI_Slice_Map;
1561 };
1562 uint32_t Value;
1563 } DW63;
1564
1565 union {
1566 struct {
1567 uint32_t BTI_VME_Saved_UNI_SIC;
1568 };
1569 uint32_t Value;
1570 } DW64;
1571
1572 union {
1573 struct {
1574 uint32_t BTI_Simplest_Intra;
1575 };
1576 uint32_t Value;
1577 } DW65;
1578
1579 union {
1580 struct {
1581 uint32_t BTI_Collocated_RefFrame;
1582 };
1583 uint32_t Value;
1584 } DW66;
1585
1586 union {
1587 struct {
1588 uint32_t BTI_Reserved;
1589 };
1590 uint32_t Value;
1591 } DW67;
1592
1593 union {
1594 struct {
1595 uint32_t BTI_BRC_Input;
1596 };
1597 uint32_t Value;
1598 } DW68;
1599
1600 union {
1601 struct {
1602 uint32_t BTI_LCU_QP;
1603 };
1604 uint32_t Value;
1605 } DW69;
1606
1607 union {
1608 struct {
1609 uint32_t BTI_BRC_Data;
1610 };
1611 uint32_t Value;
1612 } DW70;
1613
1614 union {
1615 struct {
1616 uint32_t BTI_VMEInterPredictionSurfIndex;
1617 };
1618 uint32_t Value;
1619 } DW71;
1620
1621 union {
1622 //For B frame
1623 struct {
1624 uint32_t BTI_VMEInterPredictionBSurfIndex;
1625 };
1626 //For P frame
1627 struct {
1628 uint32_t BTI_ConcurrentThreadMap;
1629 };
1630 uint32_t Value;
1631 } DW72;
1632
1633 union {
1634 //For B frame
1635 struct {
1636 uint32_t BTI_ConcurrentThreadMap;
1637 };
1638 //For P frame
1639 struct {
1640 uint32_t BTI_MB_Data_CurFrame;
1641 };
1642 uint32_t Value;
1643 } DW73;
1644
1645 union {
1646 //For B frame
1647 struct {
1648 uint32_t BTI_MB_Data_CurFrame;
1649 };
1650 //For P frame
1651 struct {
1652 uint32_t BTI_MVP_CurFrame;
1653 };
1654 uint32_t Value;
1655 } DW74;
1656
1657 union {
1658 //For B frame
1659 struct {
1660 uint32_t BTI_MVP_CurFrame;
1661 };
1662 //For P frame
1663 struct {
1664 uint32_t BTI_Haar_Dist16x16;
1665 };
1666 uint32_t Value;
1667 } DW75;
1668
1669 union {
1670 //For B frame
1671 struct {
1672 uint32_t BTI_Haar_Dist16x16;
1673 };
1674 //For P frame
1675 struct {
1676 uint32_t BTI_Stats_Data;
1677 };
1678 uint32_t Value;
1679 } DW76;
1680
1681 union {
1682 //For B frame
1683 struct {
1684 uint32_t BTI_Stats_Data;
1685 };
1686 //For P frame
1687 struct {
1688 uint32_t BTI_Frame_Stats_Data;
1689 };
1690 uint32_t Value;
1691 } DW77;
1692
1693 union {
1694 //For B frame
1695 struct {
1696 uint32_t BTI_Frame_Stats_Data;
1697 };
1698 //For P frame
1699 struct {
1700 uint32_t BTI_Debug;
1701 };
1702 uint32_t Value;
1703 } DW78;
1704
1705 union {
1706 struct {
1707 uint32_t BTI_Debug;
1708 };
1709 uint32_t Value;
1710 } DW79;
1711 };
1712
1713 using PCODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9 = struct CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9*;
1714 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9)) == 80);
1715
1716 //! HEVC encoder BRC init/reset curbe for GEN9
1717 struct CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9
1718 {
1719 union
1720 {
1721 struct
1722 {
1723 uint32_t ProfileLevelMaxFrame;
1724 };
1725 struct
1726 {
1727 uint32_t Value;
1728 };
1729 } DW0;
1730
1731 union
1732 {
1733 struct
1734 {
1735 uint32_t InitBufFull;
1736 };
1737 struct
1738 {
1739 uint32_t Value;
1740 };
1741 } DW1;
1742
1743 union
1744 {
1745 struct
1746 {
1747 uint32_t BufSize;
1748 };
1749 struct
1750 {
1751 uint32_t Value;
1752 };
1753 } DW2;
1754
1755 union
1756 {
1757 struct
1758 {
1759 uint32_t TargetBitRate;
1760 };
1761 struct
1762 {
1763 uint32_t Value;
1764 };
1765 } DW3;
1766
1767 union
1768 {
1769 struct
1770 {
1771 uint32_t MaximumBitRate;
1772 };
1773 struct
1774 {
1775 uint32_t Value;
1776 };
1777 } DW4;
1778
1779 union
1780 {
1781 struct
1782 {
1783 uint32_t MinimumBitRate;
1784 };
1785 struct
1786 {
1787 uint32_t Value;
1788 };
1789 } DW5;
1790
1791 union
1792 {
1793 struct
1794 {
1795 uint32_t FrameRateM;
1796 };
1797 struct
1798 {
1799 uint32_t Value;
1800 };
1801 } DW6;
1802
1803 union
1804 {
1805 struct
1806 {
1807 uint32_t FrameRateD;
1808 };
1809 struct
1810 {
1811 uint32_t Value;
1812 };
1813 } DW7;
1814
1815 union
1816 {
1817 struct
1818 {
1819 uint32_t BRCFlag : MOS_BITFIELD_RANGE(0, 15);
1820 uint32_t BRC_Param_A : MOS_BITFIELD_RANGE(16, 31);
1821 };
1822 struct
1823 {
1824 uint32_t Value;
1825 };
1826 } DW8;
1827
1828 union
1829 {
1830 struct
1831 {
1832 uint32_t BRC_Param_B : MOS_BITFIELD_RANGE(0, 15);
1833 uint32_t FrameWidth : MOS_BITFIELD_RANGE(16, 31);
1834 };
1835 struct
1836 {
1837 uint32_t Value;
1838 };
1839 } DW9;
1840
1841 union
1842 {
1843 struct
1844 {
1845 uint32_t FrameHeight : MOS_BITFIELD_RANGE(0, 15);
1846 uint32_t AVBRAccuracy : MOS_BITFIELD_RANGE(16, 31);
1847 };
1848 struct
1849 {
1850 uint32_t Value;
1851 };
1852 } DW10;
1853
1854 union
1855 {
1856 struct
1857 {
1858 uint32_t AVBRConvergence : MOS_BITFIELD_RANGE(0, 15);
1859 uint32_t MinimumQP : MOS_BITFIELD_RANGE(16, 31);
1860 };
1861 struct
1862 {
1863 uint32_t Value;
1864 };
1865 } DW11;
1866
1867 union
1868 {
1869 struct
1870 {
1871 uint32_t MaximumQP : MOS_BITFIELD_RANGE(0, 15);
1872 uint32_t NumberSlice : MOS_BITFIELD_RANGE(16, 31);
1873 };
1874 struct
1875 {
1876 uint32_t Value;
1877 };
1878 } DW12;
1879
1880 union
1881 {
1882 struct
1883 {
1884 uint32_t reserved : MOS_BITFIELD_RANGE(0, 15);
1885 uint32_t BRC_Param_C : MOS_BITFIELD_RANGE(16, 31);
1886 };
1887 struct
1888 {
1889 uint32_t Value;
1890 };
1891 } DW13;
1892
1893 union
1894 {
1895 struct
1896 {
1897 uint32_t BRC_Param_D : MOS_BITFIELD_RANGE(0, 15);
1898 uint32_t MaxBRCLevel : MOS_BITFIELD_RANGE(16, 31);
1899 };
1900 struct
1901 {
1902 uint32_t Value;
1903 };
1904 } DW14;
1905
1906 union
1907 {
1908 struct
1909 {
1910 uint32_t reserved;
1911 };
1912 struct
1913 {
1914 uint32_t Value;
1915 };
1916 } DW15;
1917
1918 union
1919 {
1920 struct
1921 {
1922 uint32_t InstantRateThreshold0_Pframe : MOS_BITFIELD_RANGE(0, 7);
1923 uint32_t InstantRateThreshold1_Pframe : MOS_BITFIELD_RANGE(8, 15);
1924 uint32_t InstantRateThreshold2_Pframe : MOS_BITFIELD_RANGE(16, 23);
1925 uint32_t InstantRateThreshold3_Pframe : MOS_BITFIELD_RANGE(24, 31);
1926 };
1927 struct
1928 {
1929 uint32_t Value;
1930 };
1931 } DW16;
1932
1933 union
1934 {
1935 struct
1936 {
1937 uint32_t InstantRateThreshold0_Bframe : MOS_BITFIELD_RANGE(0, 7);
1938 uint32_t InstantRateThreshold1_Bframe : MOS_BITFIELD_RANGE(8, 15);
1939 uint32_t InstantRateThreshold2_Bframe : MOS_BITFIELD_RANGE(16, 23);
1940 uint32_t InstantRateThreshold3_Bframe : MOS_BITFIELD_RANGE(24, 31);
1941 };
1942 struct
1943 {
1944 uint32_t Value;
1945 };
1946 } DW17;
1947
1948 union
1949 {
1950 struct
1951 {
1952 uint32_t InstantRateThreshold0_Iframe : MOS_BITFIELD_RANGE(0, 7);
1953 uint32_t InstantRateThreshold1_Iframe : MOS_BITFIELD_RANGE(8, 15);
1954 uint32_t InstantRateThreshold2_Iframe : MOS_BITFIELD_RANGE(16, 23);
1955 uint32_t InstantRateThreshold3_Iframe : MOS_BITFIELD_RANGE(24, 31);
1956 };
1957 struct
1958 {
1959 uint32_t Value;
1960 };
1961 } DW18;
1962
1963 union
1964 {
1965 struct
1966 {
1967 uint32_t DeviationThreshold0_PBframe : MOS_BITFIELD_RANGE(0, 7);
1968 uint32_t DeviationThreshold1_PBframe : MOS_BITFIELD_RANGE(8, 15);
1969 uint32_t DeviationThreshold2_PBframe : MOS_BITFIELD_RANGE(16, 23);
1970 uint32_t DeviationThreshold3_PBframe : MOS_BITFIELD_RANGE(24, 31);
1971 };
1972 struct
1973 {
1974 uint32_t Value;
1975 };
1976 } DW19;
1977
1978 union
1979 {
1980 struct
1981 {
1982 uint32_t DeviationThreshold4_PBframe : MOS_BITFIELD_RANGE(0, 7);
1983 uint32_t DeviationThreshold5_PBframe : MOS_BITFIELD_RANGE(8, 15);
1984 uint32_t DeviationThreshold6_PBframe : MOS_BITFIELD_RANGE(16, 23);
1985 uint32_t DeviationThreshold7_PBframe : MOS_BITFIELD_RANGE(24, 31);
1986 };
1987 struct
1988 {
1989 uint32_t Value;
1990 };
1991 } DW20;
1992
1993 union
1994 {
1995 struct
1996 {
1997 uint32_t DeviationThreshold0_VBRcontrol : MOS_BITFIELD_RANGE(0, 7);
1998 uint32_t DeviationThreshold1_VBRcontrol : MOS_BITFIELD_RANGE(8, 15);
1999 uint32_t DeviationThreshold2_VBRcontrol : MOS_BITFIELD_RANGE(16, 23);
2000 uint32_t DeviationThreshold3_VBRcontrol : MOS_BITFIELD_RANGE(24, 31);
2001 };
2002 struct
2003 {
2004 uint32_t Value;
2005 };
2006 } DW21;
2007
2008 union
2009 {
2010 struct
2011 {
2012 uint32_t DeviationThreshold4_VBRcontrol : MOS_BITFIELD_RANGE(0, 7);
2013 uint32_t DeviationThreshold5_VBRcontrol : MOS_BITFIELD_RANGE(8, 15);
2014 uint32_t DeviationThreshold6_VBRcontrol : MOS_BITFIELD_RANGE(16, 23);
2015 uint32_t DeviationThreshold7_VBRcontrol : MOS_BITFIELD_RANGE(24, 31);
2016 };
2017 struct
2018 {
2019 uint32_t Value;
2020 };
2021 } DW22;
2022
2023 union
2024 {
2025 struct
2026 {
2027 uint32_t DeviationThreshold0_Iframe : MOS_BITFIELD_RANGE(0, 7);
2028 uint32_t DeviationThreshold1_Iframe : MOS_BITFIELD_RANGE(8, 15);
2029 uint32_t DeviationThreshold2_Iframe : MOS_BITFIELD_RANGE(16, 23);
2030 uint32_t DeviationThreshold3_Iframe : MOS_BITFIELD_RANGE(24, 31);
2031 };
2032 struct
2033 {
2034 uint32_t Value;
2035 };
2036 } DW23;
2037
2038 union
2039 {
2040 struct
2041 {
2042 uint32_t DeviationThreshold4_Iframe : MOS_BITFIELD_RANGE(0, 7);
2043 uint32_t DeviationThreshold5_Iframe : MOS_BITFIELD_RANGE(8, 15);
2044 uint32_t DeviationThreshold6_Iframe : MOS_BITFIELD_RANGE(16, 23);
2045 uint32_t DeviationThreshold7_Iframe : MOS_BITFIELD_RANGE(24, 31);
2046 };
2047 struct
2048 {
2049 uint32_t Value;
2050 };
2051 } DW24;
2052
2053 union
2054 {
2055 struct
2056 {
2057 uint32_t ACQPBuffer : MOS_BITFIELD_RANGE(0, 7);
2058 uint32_t IntraSADTransform : MOS_BITFIELD_RANGE(8, 15);
2059 uint32_t Reserved0 : MOS_BITFIELD_RANGE(16, 23);
2060 uint32_t Reserved1 : MOS_BITFIELD_RANGE(24, 31);
2061 };
2062 struct
2063 {
2064 uint32_t Value;
2065 };
2066 } DW25;
2067
2068 union
2069 {
2070 struct
2071 {
2072 uint32_t reserved;
2073 };
2074 struct
2075 {
2076 uint32_t Value;
2077 };
2078 } DW26;
2079
2080 union
2081 {
2082 struct
2083 {
2084 uint32_t reserved;
2085 };
2086 struct
2087 {
2088 uint32_t Value;
2089 };
2090 } DW27;
2091
2092 union
2093 {
2094 struct
2095 {
2096 uint32_t reserved;
2097 };
2098 struct
2099 {
2100 uint32_t Value;
2101 };
2102 } DW28;
2103
2104 union
2105 {
2106 struct
2107 {
2108 uint32_t reserved;
2109 };
2110 struct
2111 {
2112 uint32_t Value;
2113 };
2114 } DW29;
2115
2116 union
2117 {
2118 struct
2119 {
2120 uint32_t reserved;
2121 };
2122 struct
2123 {
2124 uint32_t Value;
2125 };
2126 } DW30;
2127
2128 union
2129 {
2130 struct
2131 {
2132 uint32_t reserved;
2133 };
2134 struct
2135 {
2136 uint32_t Value;
2137 };
2138 } DW31;
2139 };
2140
2141 using PCODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9 = struct CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9*;
2142 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9)) == 32 );
2143
2144 //! HEVC encoder BRC update kernel curbe for GEN9
2145 struct CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9
2146 {
2147 union
2148 {
2149 struct
2150 {
2151 uint32_t TARGETSIZE;
2152 };
2153 struct
2154 {
2155 uint32_t Value;
2156 };
2157 }DW0;
2158
2159 union
2160 {
2161 struct
2162 {
2163 uint32_t FrameNumber;
2164 };
2165 struct
2166 {
2167 uint32_t Value;
2168 };
2169 }DW1;
2170
2171 union
2172 {
2173 struct
2174 {
2175 uint32_t PictureHeaderSize;
2176 };
2177 struct
2178 {
2179 uint32_t Value;
2180 };
2181 }DW2;
2182
2183 union
2184 {
2185 struct
2186 {
2187 uint32_t startGAdjFrame0 : MOS_BITFIELD_RANGE(0, 15);
2188 uint32_t startGAdjFrame1 : MOS_BITFIELD_RANGE(16, 31);
2189 };
2190 struct
2191 {
2192 uint32_t Value;
2193 };
2194 }DW3;
2195
2196 union
2197 {
2198 struct
2199 {
2200 uint32_t startGAdjFrame2 : MOS_BITFIELD_RANGE(0, 15);
2201 uint32_t startGAdjFrame3 : MOS_BITFIELD_RANGE(16, 31);
2202 };
2203 struct
2204 {
2205 uint32_t Value;
2206 };
2207 }DW4;
2208
2209 union
2210 {
2211 struct
2212 {
2213 uint32_t TARGETSIZE_FLAG : MOS_BITFIELD_RANGE(0, 7);
2214 uint32_t BRCFlag : MOS_BITFIELD_RANGE(8, 15);
2215 uint32_t MaxNumPAKs : MOS_BITFIELD_RANGE(16, 23);
2216 uint32_t CurrFrameType : MOS_BITFIELD_RANGE(24, 31);
2217 };
2218 struct
2219 {
2220 uint32_t Value;
2221 };
2222 }DW5;
2223
2224 union
2225 {
2226 struct
2227 {
2228 uint32_t NumSkippedFrames : MOS_BITFIELD_RANGE(0, 7);
2229 uint32_t CQPValue : MOS_BITFIELD_RANGE(8, 15);
2230 uint32_t ROIFlag : MOS_BITFIELD_RANGE(16, 23);
2231 uint32_t ROIRatio : MOS_BITFIELD_RANGE(24, 31);
2232 };
2233 struct
2234 {
2235 uint32_t Value;
2236 };
2237 }DW6;
2238
2239 union
2240 {
2241 struct
2242 {
2243 uint32_t FrameWidthInLCU : MOS_BITFIELD_RANGE(0, 7);
2244 uint32_t Res_8_14 : MOS_BITFIELD_RANGE(8, 14);
2245 uint32_t KernelBuildControl : MOS_BITFIELD_BIT( 15);
2246 uint32_t ucMinQp : MOS_BITFIELD_RANGE(16, 23);
2247 uint32_t ucMaxQp : MOS_BITFIELD_RANGE(24, 31);
2248 };
2249 struct
2250 {
2251 uint32_t Value;
2252 };
2253 }DW7;
2254
2255 union
2256 {
2257 struct
2258 {
2259 uint32_t StartGlobalAdjustMult0 : MOS_BITFIELD_RANGE(0, 7);
2260 uint32_t StartGlobalAdjustMult1 : MOS_BITFIELD_RANGE(8, 15);
2261 uint32_t StartGlobalAdjustMult2 : MOS_BITFIELD_RANGE(16, 23);
2262 uint32_t StartGlobalAdjustMult3 : MOS_BITFIELD_RANGE(24, 31);
2263 };
2264 struct
2265 {
2266 uint32_t Value;
2267 };
2268 }DW8;
2269
2270 union
2271 {
2272 struct
2273 {
2274 uint32_t StartGlobalAdjustMult4 : MOS_BITFIELD_RANGE(0, 7);
2275 uint32_t StartGlobalAdjustDivd0 : MOS_BITFIELD_RANGE(8, 15);
2276 uint32_t StartGlobalAdjustDivd1 : MOS_BITFIELD_RANGE(16, 23);
2277 uint32_t StartGlobalAdjustDivd2 : MOS_BITFIELD_RANGE(24, 31);
2278 };
2279 struct
2280 {
2281 uint32_t Value;
2282 };
2283 }DW9;
2284
2285 union
2286 {
2287 struct
2288 {
2289 uint32_t StartGlobalAdjustDivd3 : MOS_BITFIELD_RANGE(0, 7);
2290 uint32_t StartGlobalAdjustDivd4 : MOS_BITFIELD_RANGE(8, 15);
2291 uint32_t QPThreshold0 : MOS_BITFIELD_RANGE(16, 23);
2292 uint32_t QPThreshold1 : MOS_BITFIELD_RANGE(24, 31);
2293 };
2294 struct
2295 {
2296 uint32_t Value;
2297 };
2298 }DW10;
2299
2300 union
2301 {
2302 struct
2303 {
2304 uint32_t QPThreshold2 : MOS_BITFIELD_RANGE(0, 7);
2305 uint32_t QPThreshold3 : MOS_BITFIELD_RANGE(8, 15);
2306 uint32_t gRateRatioThreshold0 : MOS_BITFIELD_RANGE(16, 23);
2307 uint32_t gRateRatioThreshold1 : MOS_BITFIELD_RANGE(24, 31);
2308 };
2309 struct
2310 {
2311 uint32_t Value;
2312 };
2313 }DW11;
2314
2315 union
2316 {
2317 struct
2318 {
2319 uint32_t gRateRatioThreshold2 : MOS_BITFIELD_RANGE(0, 7);
2320 uint32_t gRateRatioThreshold3 : MOS_BITFIELD_RANGE(8, 15);
2321 uint32_t gRateRatioThreshold4 : MOS_BITFIELD_RANGE(16, 23);
2322 uint32_t gRateRatioThreshold5 : MOS_BITFIELD_RANGE(24, 31);
2323 };
2324 struct
2325 {
2326 uint32_t Value;
2327 };
2328 }DW12;
2329
2330 union
2331 {
2332 struct
2333 {
2334 uint32_t gRateRatioThreshold6 : MOS_BITFIELD_RANGE(0, 7);
2335 uint32_t gRateRatioThreshold7 : MOS_BITFIELD_RANGE(8, 15);
2336 uint32_t gRateRatioThreshold8 : MOS_BITFIELD_RANGE(16, 23);
2337 uint32_t gRateRatioThreshold9 : MOS_BITFIELD_RANGE(24, 31);
2338 };
2339 struct
2340 {
2341 uint32_t Value;
2342 };
2343 }DW13;
2344
2345 union
2346 {
2347 struct
2348 {
2349 uint32_t gRateRatioThreshold10 : MOS_BITFIELD_RANGE(0, 7);
2350 uint32_t gRateRatioThreshold11 : MOS_BITFIELD_RANGE(8, 15);
2351 uint32_t gRateRatioThreshold12 : MOS_BITFIELD_RANGE(16, 23);
2352 uint32_t ParallelMode : MOS_BITFIELD_RANGE(24, 31);
2353 };
2354 struct
2355 {
2356 uint32_t Value;
2357 };
2358 }DW14;
2359
2360 union
2361 {
2362 struct
2363 {
2364 uint32_t SizeOfSkippedFrames;
2365 };
2366 struct
2367 {
2368 uint32_t Value;
2369 };
2370 }DW15;
2371 };
2372
2373 using PCODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 = struct CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9*;
2374 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9)) == 16);
2375
2376 //! HEVC encoder coarse intra kernel curbe for GEN9
2377 struct CODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9
2378 {
2379 union
2380 {
2381 struct
2382 {
2383 uint32_t PictureWidthInLumaSamples : MOS_BITFIELD_RANGE(0, 15);
2384 uint32_t PictureHeightInLumaSamples : MOS_BITFIELD_RANGE(16, 31);
2385 };
2386 struct
2387 {
2388 uint32_t Value;
2389 };
2390 }DW0;
2391
2392 union
2393 {
2394 struct
2395 {
2396 uint32_t SrcSize : MOS_BITFIELD_RANGE(0, 1);
2397 uint32_t Reserved0 : MOS_BITFIELD_RANGE(2, 13);
2398 uint32_t SkipType : MOS_BITFIELD_BIT(14);
2399 uint32_t Reserved1 : MOS_BITFIELD_BIT(15);
2400 uint32_t InterChromaMode : MOS_BITFIELD_BIT(16);
2401 uint32_t FTEnable : MOS_BITFIELD_BIT(17);
2402 uint32_t Reserved2 : MOS_BITFIELD_BIT(18);
2403 uint32_t BlkSkipEnabled : MOS_BITFIELD_BIT(19);
2404 uint32_t InterSAD : MOS_BITFIELD_RANGE(20, 21);
2405 uint32_t IntraSAD : MOS_BITFIELD_RANGE(22, 23);
2406 uint32_t Reserved3 : MOS_BITFIELD_RANGE(24, 31);
2407 };
2408 struct
2409 {
2410 uint32_t Value;
2411 };
2412 }DW1;
2413
2414 union
2415 {
2416 struct
2417 {
2418 uint32_t IntraPartMask : MOS_BITFIELD_RANGE(0, 4);
2419 uint32_t NonSkipZMvAdded : MOS_BITFIELD_BIT(5);
2420 uint32_t NonSkipModeAdded : MOS_BITFIELD_BIT(6);
2421 uint32_t IntraCornerSwap : MOS_BITFIELD_BIT(7);
2422 uint32_t Reserved0 : MOS_BITFIELD_RANGE(8, 15);
2423 uint32_t MVCostScaleFactor : MOS_BITFIELD_RANGE(16, 17);
2424 uint32_t BilinearEnable : MOS_BITFIELD_BIT(18);
2425 uint32_t Reserved1 : MOS_BITFIELD_BIT(19);
2426 uint32_t WeightedSADHAAR : MOS_BITFIELD_BIT(20);
2427 uint32_t AConlyHAAR : MOS_BITFIELD_BIT(21);
2428 uint32_t RefIDCostMode : MOS_BITFIELD_BIT(22);
2429 uint32_t Reserved2 : MOS_BITFIELD_BIT(23);
2430 uint32_t SkipCenterMask : MOS_BITFIELD_RANGE(24, 31);
2431 };
2432 struct
2433 {
2434 uint32_t Value;
2435 };
2436 }DW2;
2437
2438 union
2439 {
2440 struct
2441 {
2442 uint32_t Reserved;
2443 };
2444 struct
2445 {
2446 uint32_t Value;
2447 };
2448 }DW3;
2449
2450 union
2451 {
2452 struct
2453 {
2454 uint32_t Reserved;
2455 };
2456 struct
2457 {
2458 uint32_t Value;
2459 };
2460 }DW4;
2461
2462 union
2463 {
2464 struct
2465 {
2466 uint32_t Reserved;
2467 };
2468 struct
2469 {
2470 uint32_t Value;
2471 };
2472 }DW5;
2473
2474 union
2475 {
2476 struct
2477 {
2478 uint32_t Reserved;
2479 };
2480 struct
2481 {
2482 uint32_t Value;
2483 };
2484 }DW6;
2485
2486 union
2487 {
2488 struct
2489 {
2490 uint32_t Reserved;
2491 };
2492 struct
2493 {
2494 uint32_t Value;
2495 };
2496 }DW7;
2497
2498 union
2499 {
2500 struct
2501 {
2502 uint32_t BTI_Src_Y4;
2503 };
2504 struct
2505 {
2506 uint32_t Value;
2507 };
2508 }DW8;
2509
2510 union
2511 {
2512 struct
2513 {
2514 uint32_t BTI_Intra_Dist;
2515 };
2516 struct
2517 {
2518 uint32_t Value;
2519 };
2520 }DW9;
2521
2522 union
2523 {
2524 struct
2525 {
2526 uint32_t BTI_VME_Intra;
2527 };
2528 struct
2529 {
2530 uint32_t Value;
2531 };
2532 }DW10;
2533 };
2534
2535 using PCODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9 = struct CODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9*;
2536 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9)) == 11 );
2537
2538 const uint8_t CodechalEncHevcStateG9::m_ftqBasedSkip[NUM_TARGET_USAGE_MODES] =
2539 {
2540 0, 3, 3, 3, 3, 3, 3, 0
2541 };
2542
2543 const uint8_t CodechalEncHevcStateG9::m_meMethod[NUM_TARGET_USAGE_MODES] =
2544 {
2545 0, 4, 4, 4, 4, 4, 4, 6
2546 };
2547
2548 const uint8_t CodechalEncHevcStateG9::m_superCombineDist[NUM_TARGET_USAGE_MODES + 1] =
2549 {
2550 0, 1, 1, 5, 5, 5, 9, 9, 0
2551 };
2552
2553 const uint16_t CodechalEncHevcStateG9::m_skipValB[2][2][64] =
2554 {
2555 {
2556 // Block Based Skip = 0 and Transform Flag = 0
2557 {
2558 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0024,
2559 0x0024, 0x0060, 0x0060, 0x0099, 0x0099, 0x00cf, 0x00cf, 0x0105,
2560 0x0105, 0x0141, 0x0141, 0x0183, 0x0183, 0x01ce, 0x01ce, 0x0228,
2561 0x0228, 0x0291, 0x0291, 0x030c, 0x030c, 0x039f, 0x039f, 0x0447,
2562 0x0447, 0x050d, 0x050d, 0x05f1, 0x05f1, 0x06f6, 0x06f6, 0x0822,
2563 0x0822, 0x0972, 0x0972, 0x0aef, 0x0aef, 0x0c96, 0x0c96, 0x0e70,
2564 0x0e70, 0x107a, 0x107a, 0x1284, 0x0000, 0x0000, 0x0000, 0x0000,
2565 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
2566 },
2567 // Block Based Skip = 0 and Transform Flag = 1
2568 {
2569 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0024,
2570 0x0024, 0x0060, 0x0060, 0x0099, 0x0099, 0x00cf, 0x00cf, 0x0105,
2571 0x0105, 0x0141, 0x0141, 0x0183, 0x0183, 0x01ce, 0x01ce, 0x0228,
2572 0x0228, 0x0291, 0x0291, 0x030c, 0x030c, 0x039f, 0x039f, 0x0447,
2573 0x0447, 0x050d, 0x050d, 0x05f1, 0x05f1, 0x06f6, 0x06f6, 0x0822,
2574 0x0822, 0x0972, 0x0972, 0x0aef, 0x0aef, 0x0c96, 0x0c96, 0x0e70,
2575 0x0e70, 0x107a, 0x107a, 0x1284, 0x0000, 0x0000, 0x0000, 0x0000,
2576 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
2577 }
2578 },
2579 {
2580 // Block Based Skip = 1 and Transform Flag = 0
2581 {
2582 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0006,
2583 0x0006, 0x0010, 0x0010, 0x0019, 0x0019, 0x0022, 0x0022, 0x002b,
2584 0x002b, 0x0035, 0x0035, 0x0040, 0x0040, 0x004d, 0x004d, 0x005c,
2585 0x005c, 0x006d, 0x006d, 0x0082, 0x0082, 0x009a, 0x009a, 0x00b6,
2586 0x00b6, 0x00d7, 0x00d7, 0x00fd, 0x00fd, 0x0129, 0x0129, 0x015b,
2587 0x015b, 0x0193, 0x0193, 0x01d2, 0x01d2, 0x0219, 0x0219, 0x0268,
2588 0x0268, 0x02bf, 0x02bf, 0x0316, 0x0000, 0x0000, 0x0000, 0x0000,
2589 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
2590 },
2591 // Block Based Skip = 1 and Transform Flag = 1
2592 {
2593 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x000c,
2594 0x000c, 0x0020, 0x0020, 0x0033, 0x0033, 0x0045, 0x0045, 0x0057,
2595 0x0057, 0x006b, 0x006b, 0x0081, 0x0081, 0x009a, 0x009a, 0x00b8,
2596 0x00b8, 0x00db, 0x00db, 0x0104, 0x0104, 0x0135, 0x0135, 0x016d,
2597 0x016d, 0x01af, 0x01af, 0x01fb, 0x01fb, 0x0252, 0x0252, 0x02b6,
2598 0x02b6, 0x0326, 0x0326, 0x03a5, 0x03a5, 0x0432, 0x0432, 0x04d0,
2599 0x04d0, 0x057e, 0x057e, 0x062c, 0x0000, 0x0000, 0x0000, 0x0000,
2600 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
2601 }
2602 }
2603 };
2604
2605 const double CodechalEncHevcStateG9::m_modeCostLut[3][12] = {
2606 //BPREDSLICE
2607 { 3.5, 4, 14, 40, 6.0, 3.25, 4.25, 0, 3.0, 1.0, 2.0, 0.0 },
2608 //PREDSLICE
2609 { 3.5, 4, 14, 35, 4.5, 1.32, 2.32, 0, 2.75, 0.0, 2.0, 0.0 },
2610 //INTRASLICE
2611 { 3.5, 0, 10.0, 30, 0, 0, 0, 0, 0, 0, 0, 0 }
2612 };
2613
2614 const double CodechalEncHevcStateG9::m_mvCostLut[3][8] = {
2615 //BPREDSLICE
2616 { 0.0, 1.0, 1.0, 3.0, 5.0, 6.0, 7.0, 8.0 },
2617 //PREDSLICE
2618 { 0.0, 2.0, 2.5, 4.5, 5.0, 6.0, 7.0, 7.5 },
2619 //INTRASLICE
2620 { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }
2621 };
2622
2623 const uint32_t CodechalEncHevcStateG9::m_brcMvCostHaar[][416] =
2624 {
2625 // I
2626 {
2627 0x0d040001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x3e6c0535, 0x0d040001,
2628 0x0f050001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff010101, 0x3e847641, 0x0f050001,
2629 0x19050002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff020202, 0x3e94aefa, 0x19050002,
2630 0x1a060002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff030303, 0x3ea6e43f, 0x1a060002,
2631 0x1b070002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff040404, 0x3ebb5458, 0x1b070002,
2632 0x1c080002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff050505, 0x3ed2452d, 0x1c080002,
2633 0x1e090003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff060606, 0x3eec0535, 0x1e090003,
2634 0x280a0003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff070707, 0x3f047641, 0x280a0003,
2635 0x290b0004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff080808, 0x3f14aefa, 0x290b0004,
2636 0x2a0d0004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff090909, 0x3f26e43f, 0x2a0d0004,
2637 0x2b0e0005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0a0a0a, 0x3f3b5458, 0x2b0e0005,
2638 0x2c180005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0b0b0b, 0x3f52452d, 0x2c180005,
2639 0x2e190006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0c0c0c, 0x3f6c0535, 0x2e190006,
2640 0x381a0007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0d0d0d, 0x3f847641, 0x381a0007,
2641 0x391c0008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0e0e0e, 0x3f94aefa, 0x391c0008,
2642 0x3a1d0009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff0f0f0f, 0x3fa6e43f, 0x3a1d0009,
2643 0x3b1f000a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff101010, 0x3fbb5458, 0x3b1f000a,
2644 0x3c28000b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff111111, 0x3fd2452d, 0x3c28000b,
2645 0x3e29000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff121212, 0x3fec0535, 0x3e29000c,
2646 0x482a000e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff131313, 0x40047641, 0x482a000e,
2647 0x492c0018, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff141414, 0x4014aefa, 0x492c0018,
2648 0x4a2d0019, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff151515, 0x4026e43f, 0x4a2d0019,
2649 0x4b2f001a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff161616, 0x403b5458, 0x4b2f001a,
2650 0x4c38001b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff171717, 0x4052452d, 0x4c38001b,
2651 0x4e39001d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff181818, 0x406c0535, 0x4e39001d,
2652 0x583a001e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff191919, 0x40847641, 0x583a001e,
2653 0x593c0028, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1a1a1a, 0x4094aefa, 0x593c0028,
2654 0x5a3d0029, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1b1b1b, 0x40a6e43f, 0x5a3d0029,
2655 0x5b3f002a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1c1c1c, 0x40bb5458, 0x5b3f002a,
2656 0x5c48002b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1d1d1d, 0x40d2452d, 0x5c48002b,
2657 0x5e49002d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1e1e1e, 0x40ec0535, 0x5e49002d,
2658 0x684a002e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff1f1f1f, 0x41047641, 0x684a002e,
2659 0x694c0038, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff202020, 0x4114aefa, 0x694c0038,
2660 0x6a4d0039, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff212121, 0x4126e43f, 0x6a4d0039,
2661 0x6b4f003a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff222222, 0x413b5458, 0x6b4f003a,
2662 0x6c58003b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff232323, 0x4152452d, 0x6c58003b,
2663 0x6e59003d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff242424, 0x416c0535, 0x6e59003d,
2664 0x785a003e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff252525, 0x41847641, 0x785a003e,
2665 0x795c0048, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff262626, 0x4194aefa, 0x795c0048,
2666 0x7a5d0049, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff272727, 0x41a6e43f, 0x7a5d0049,
2667 0x7b5f004a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff282828, 0x41bb5458, 0x7b5f004a,
2668 0x7c68004b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff292929, 0x41d2452d, 0x7c68004b,
2669 0x7e69004d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2a2a2a, 0x41ec0535, 0x7e69004d,
2670 0x886a004e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2b2b2b, 0x42047641, 0x886a004e,
2671 0x896c0058, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2c2c2c, 0x4214aefa, 0x896c0058,
2672 0x8a6d0059, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2d2d2d, 0x4226e43f, 0x8a6d0059,
2673 0x8b6f005a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2e2e2e, 0x423b5458, 0x8b6f005a,
2674 0x8c78005b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff2f2f2f, 0x4252452d, 0x8c78005b,
2675 0x8e79005d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff303030, 0x426c0535, 0x8e79005d,
2676 0x8f7a005e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff313131, 0x42847641, 0x8f7a005e,
2677 0x8f7c0068, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff323232, 0x4294aefa, 0x8f7c0068,
2678 0x8f7d0069, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff333333, 0x42a6e43f, 0x8f7d0069
2679 },
2680 // P
2681 {
2682 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x003b003e, 0x3f800000, 0x391e0807,
2683 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x013b003e, 0x3f800000, 0x391e0807,
2684 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x023b003e, 0x3f800000, 0x391e0807,
2685 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x033b003e, 0x3f800000, 0x391e0807,
2686 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x043b003e, 0x3f800000, 0x391e0807,
2687 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x053b003e, 0x3f800000, 0x391e0807,
2688 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x063b003e, 0x3f800000, 0x391e0807,
2689 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x073b003e, 0x3f800000, 0x391e0807,
2690 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x083b003e, 0x3f800000, 0x391e0807,
2691 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x093b003e, 0x3f800000, 0x391e0807,
2692 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0a3b003e, 0x3f800000, 0x391e0807,
2693 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0b3b003e, 0x3f800000, 0x391e0807,
2694 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0c3b003e, 0x3f800000, 0x391e0807,
2695 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0d3b003e, 0x3f800000, 0x391e0807,
2696 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0e3b003e, 0x3f800000, 0x391e0807,
2697 0x391e0807, 0x00040209, 0x00040005, 0x09050400, 0x0f0e0c0a, 0x0f3b003e, 0x3f800000, 0x391e0807,
2698 0x492e180e, 0x00090519, 0x0008000b, 0x190a0800, 0x1f1e1c1a, 0x104b007c, 0x40000000, 0x492e180e,
2699 0x492e180e, 0x00090519, 0x0008000b, 0x190a0800, 0x1f1e1c1a, 0x114b007c, 0x40000000, 0x492e180e,
2700 0x492e180e, 0x00090519, 0x0008000b, 0x190a0800, 0x1f1e1c1a, 0x124b007c, 0x40000000, 0x492e180e,
2701 0x492e180e, 0x00090519, 0x0008000b, 0x190a0800, 0x1f1e1c1a, 0x134b007c, 0x40000000, 0x492e180e,
2702 0x4d3b1c1b, 0x000d071e, 0x000c0018, 0x1e0f0c00, 0x2b2b291f, 0x145800ba, 0x40400000, 0x4d3b1c1b,
2703 0x4d3b1c1b, 0x000d071e, 0x000c0018, 0x1e0f0c00, 0x2b2b291f, 0x155800ba, 0x40400000, 0x4d3b1c1b,
2704 0x4d3b1c1b, 0x000d071e, 0x000c0018, 0x1e0f0c00, 0x2b2b291f, 0x165800ba, 0x40400000, 0x4d3b1c1b,
2705 0x593e281e, 0x00190a29, 0x0018001b, 0x291a1800, 0x2f2e2c2a, 0x175b00f8, 0x40800000, 0x593e281e,
2706 0x593e281e, 0x00190a29, 0x0018001b, 0x291a1800, 0x2f2e2c2a, 0x185b00f8, 0x40800000, 0x593e281e,
2707 0x593e281e, 0x00190a29, 0x0018001b, 0x291a1800, 0x2f2e2c2a, 0x195b00f8, 0x40800000, 0x593e281e,
2708 0x5b492a29, 0x001c0d2b, 0x001a001e, 0x2b1d1a00, 0x39392f2d, 0x1a5e0136, 0x40a00000, 0x5b492a29,
2709 0x5d4b2c2b, 0x001e0f2e, 0x001c0028, 0x2e1f1c00, 0x3b3b392f, 0x1b680174, 0x40c00000, 0x5d4b2c2b,
2710 0x5d4b2c2b, 0x001e0f2e, 0x001c0028, 0x2e1f1c00, 0x3b3b392f, 0x1c680174, 0x40c00000, 0x5d4b2c2b,
2711 0x5f4c2e2c, 0x00281938, 0x001e002a, 0x38291e00, 0x3d3c3b39, 0x1d6a01b2, 0x40e00000, 0x5f4c2e2c,
2712 0x694e382e, 0x00291b39, 0x0028002b, 0x392a2800, 0x3f3e3c3a, 0x1e6b01f0, 0x41000000, 0x694e382e,
2713 0x6a583938, 0x002a1c3a, 0x0029002c, 0x3a2b2900, 0x48483e3b, 0x1f6d022e, 0x41100000, 0x6a583938,
2714 0x6b593a39, 0x002c1d3b, 0x002a002e, 0x3b2d2a00, 0x49493f3d, 0x206e026c, 0x41200000, 0x6b593a39,
2715 0x6c5a3b3a, 0x002d1f3c, 0x002b002f, 0x3c2e2b00, 0x4a4a483e, 0x216f02aa, 0x41300000, 0x6c5a3b3a,
2716 0x6e5b3d3b, 0x002f293f, 0x002d0039, 0x3f382d00, 0x4c4b4a48, 0x22790326, 0x41500000, 0x6e5b3d3b,
2717 0x6f5c3e3c, 0x00382948, 0x002e003a, 0x48392e00, 0x4d4c4b49, 0x237a0364, 0x41600000, 0x6f5c3e3c,
2718 0x795e483e, 0x00392b49, 0x0038003b, 0x493a3800, 0x4f4e4c4a, 0x247b03e0, 0x41800000, 0x795e483e,
2719 0x7a684948, 0x003a2c4a, 0x0039003c, 0x4a3b3900, 0x58584e4b, 0x257d045c, 0x41900000, 0x7a684948,
2720 0x7b694a49, 0x003c2d4b, 0x003a003e, 0x4b3d3a00, 0x59594f4d, 0x267e04d8, 0x41a00000, 0x7b694a49,
2721 0x7d6a4c4a, 0x003d2f4d, 0x003c0048, 0x4d3e3c00, 0x5b5a594e, 0x27880592, 0x41b80000, 0x7d6a4c4a,
2722 0x7e6b4d4b, 0x003e384e, 0x003d0049, 0x4e483d00, 0x5c5b5958, 0x2889060e, 0x41c80000, 0x7e6b4d4b,
2723 0x886d4f4d, 0x00483a58, 0x003f004a, 0x58493f00, 0x5e5d5b59, 0x298a0706, 0x41e80000, 0x886d4f4d,
2724 0x896e584e, 0x00493b59, 0x0048004b, 0x594a4800, 0x5f5e5c5a, 0x2a8b07c0, 0x42000000, 0x896e584e,
2725 0x8a785958, 0x004a3c5a, 0x0049004c, 0x5a4b4900, 0x68685e5b, 0x2b8d08b8, 0x42100000, 0x8a785958,
2726 0x8b795a59, 0x004c3d5b, 0x004a004e, 0x5b4d4a00, 0x69695f5d, 0x2c8e09b0, 0x42200000, 0x8b795a59,
2727 0x8c7a5b5a, 0x004d3f5d, 0x004b004f, 0x5d4e4b00, 0x6b6a685e, 0x2d8f0ae6, 0x42340000, 0x8c7a5b5a,
2728 0x8e7b5d5b, 0x004f485e, 0x004d0059, 0x5e584d00, 0x6c6b6a68, 0x2e8f0c5a, 0x424c0000, 0x8e7b5d5b,
2729 0x8f7c5e5c, 0x00584968, 0x004e005a, 0x68594e00, 0x6d6c6b69, 0x2f8f0dce, 0x42640000, 0x8f7c5e5c,
2730 0x8f7e685e, 0x00594b69, 0x0058005b, 0x695a5800, 0x6f6e6c6a, 0x308f0f80, 0x42800000, 0x8f7e685e,
2731 0x8f886968, 0x005a4c6a, 0x0059005c, 0x6a5b5900, 0x6f6f6e6b, 0x318f1170, 0x42900000, 0x8f886968,
2732 0x8f896a69, 0x005c4d6b, 0x005a005e, 0x6b5d5a00, 0x6f6f6f6d, 0x328f139e, 0x42a20000, 0x8f896a69,
2733 0x8f8a6b6a, 0x005d4f6d, 0x005b0068, 0x6d5e5b00, 0x6f6f6f6e, 0x338f160a, 0x42b60000, 0x8f8a6b6a
2734 },
2735 // B
2736 {
2737 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x003b0048, 0x3f800000, 0x3a1e0807,
2738 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x013b0048, 0x3f800000, 0x3a1e0807,
2739 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x023b0048, 0x3f800000, 0x3a1e0807,
2740 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x033b0048, 0x3f800000, 0x3a1e0807,
2741 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x043b0048, 0x3f800000, 0x3a1e0807,
2742 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x053b0048, 0x3f800000, 0x3a1e0807,
2743 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x063b0048, 0x3f800000, 0x3a1e0807,
2744 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x073b0048, 0x3f800000, 0x3a1e0807,
2745 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x083b0048, 0x3f800000, 0x3a1e0807,
2746 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x093b0048, 0x3f800000, 0x3a1e0807,
2747 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0a3b0048, 0x3f800000, 0x3a1e0807,
2748 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0b3b0048, 0x3f800000, 0x3a1e0807,
2749 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0c3b0048, 0x3f800000, 0x3a1e0807,
2750 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0d3b0048, 0x3f800000, 0x3a1e0807,
2751 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0e3b0048, 0x3f800000, 0x3a1e0807,
2752 0x3a1e0807, 0x0008060c, 0x00040206, 0x06020200, 0x180e0c0a, 0x0f3b0048, 0x3f800000, 0x3a1e0807,
2753 0x4a2e180e, 0x00190d1c, 0x0008040c, 0x0c040400, 0x281e1c1a, 0x104b0090, 0x40000000, 0x4a2e180e,
2754 0x4a2e180e, 0x00190d1c, 0x0008040c, 0x0c040400, 0x281e1c1a, 0x114b0090, 0x40000000, 0x4a2e180e,
2755 0x4a2e180e, 0x00190d1c, 0x0008040c, 0x0c040400, 0x281e1c1a, 0x124b0090, 0x40000000, 0x4a2e180e,
2756 0x4a2e180e, 0x00190d1c, 0x0008040c, 0x0c040400, 0x281e1c1a, 0x134b0090, 0x40000000, 0x4a2e180e,
2757 0x4f3b1c1b, 0x001d1a29, 0x000c0619, 0x19060600, 0x2c2b291f, 0x145800d8, 0x40400000, 0x4f3b1c1b,
2758 0x4f3b1c1b, 0x001d1a29, 0x000c0619, 0x19060600, 0x2c2b291f, 0x155800d8, 0x40400000, 0x4f3b1c1b,
2759 0x4f3b1c1b, 0x001d1a29, 0x000c0619, 0x19060600, 0x2c2b291f, 0x165800d8, 0x40400000, 0x4f3b1c1b,
2760 0x5a3e281e, 0x00291d2c, 0x0018081c, 0x1c080800, 0x382e2c2a, 0x175b0120, 0x40800000, 0x5a3e281e,
2761 0x5a3e281e, 0x00291d2c, 0x0018081c, 0x1c080800, 0x382e2c2a, 0x185b0120, 0x40800000, 0x5a3e281e,
2762 0x5a3e281e, 0x00291d2c, 0x0018081c, 0x1c080800, 0x382e2c2a, 0x195b0120, 0x40800000, 0x5a3e281e,
2763 0x5d492a29, 0x002b282f, 0x001a0a1f, 0x1f0a0a00, 0x3a392f2d, 0x1a5e0168, 0x40a00000, 0x5d492a29,
2764 0x5f4b2c2b, 0x002d2a39, 0x001c0c29, 0x290c0c00, 0x3c3b392f, 0x1b6801b0, 0x40c00000, 0x5f4b2c2b,
2765 0x5f4b2c2b, 0x002d2a39, 0x001c0c29, 0x290c0c00, 0x3c3b392f, 0x1c6801b0, 0x40c00000, 0x5f4b2c2b,
2766 0x694c2e2c, 0x002f2b3b, 0x001e0e2b, 0x2b0e0e00, 0x3e3c3b39, 0x1d6a01f8, 0x40e00000, 0x694c2e2c,
2767 0x6a4e382e, 0x00392d3c, 0x0028182c, 0x2c181800, 0x483e3c3a, 0x1e6b0240, 0x41000000, 0x6a4e382e,
2768 0x6b583938, 0x003a2f3e, 0x0029192e, 0x2e191900, 0x49483e3b, 0x1f6d0288, 0x41100000, 0x6b583938,
2769 0x6d593a39, 0x003b383f, 0x002a1a2f, 0x2f1a1a00, 0x4a493f3d, 0x206e02d0, 0x41200000, 0x6d593a39,
2770 0x6e5a3b3a, 0x003c3948, 0x002b1b38, 0x381b1b00, 0x4b4a483e, 0x216f0318, 0x41300000, 0x6e5a3b3a,
2771 0x785b3d3b, 0x003e3b4a, 0x002d1d3a, 0x3a1d1d00, 0x4d4b4a48, 0x227903a8, 0x41500000, 0x785b3d3b,
2772 0x795c3e3c, 0x003f3b4b, 0x002e1e3b, 0x3b1e1e00, 0x4e4c4b49, 0x237a03f0, 0x41600000, 0x795c3e3c,
2773 0x7a5e483e, 0x00493d4c, 0x0038283c, 0x3c282800, 0x584e4c4a, 0x247b0480, 0x41800000, 0x7a5e483e,
2774 0x7b684948, 0x004a3f4e, 0x0039293e, 0x3e292900, 0x59584e4b, 0x257d0510, 0x41900000, 0x7b684948,
2775 0x7d694a49, 0x004b484f, 0x003a2a3f, 0x3f2a2a00, 0x5a594f4d, 0x267e05a0, 0x41a00000, 0x7d694a49,
2776 0x7e6a4c4a, 0x004c4959, 0x003c2c49, 0x492c2c00, 0x5c5a594e, 0x27880678, 0x41b80000, 0x7e6a4c4a,
2777 0x886b4d4b, 0x004d4a59, 0x003d2d49, 0x492d2d00, 0x5d5b5958, 0x28890708, 0x41c80000, 0x886b4d4b,
2778 0x896d4f4d, 0x004f4c5b, 0x003f2f4b, 0x4b2f2f00, 0x5f5d5b59, 0x298a0828, 0x41e80000, 0x896d4f4d,
2779 0x8a6e584e, 0x00594d5c, 0x0048384c, 0x4c383800, 0x685e5c5a, 0x2a8b0900, 0x42000000, 0x8a6e584e,
2780 0x8b785958, 0x005a4f5e, 0x0049394e, 0x4e393900, 0x69685e5b, 0x2b8d0a20, 0x42100000, 0x8b785958,
2781 0x8d795a59, 0x005b585f, 0x004a3a4f, 0x4f3a3a00, 0x6a695f5d, 0x2c8e0b40, 0x42200000, 0x8d795a59,
2782 0x8e7a5b5a, 0x005c5968, 0x004b3b58, 0x583b3b00, 0x6b6a685e, 0x2d8f0ca8, 0x42340000, 0x8e7a5b5a,
2783 0x8f7b5d5b, 0x005e5a6a, 0x004d3d5a, 0x5a3d3d00, 0x6d6b6a68, 0x2e8f0e58, 0x424c0000, 0x8f7b5d5b,
2784 0x8f7c5e5c, 0x005f5c6b, 0x004e3e5b, 0x5b3e3e00, 0x6e6c6b69, 0x2f8f1008, 0x42640000, 0x8f7c5e5c,
2785 0x8f7e685e, 0x00695d6c, 0x0058485c, 0x5c484800, 0x6f6e6c6a, 0x308f1200, 0x42800000, 0x8f7e685e,
2786 0x8f886968, 0x006a5f6e, 0x0059495e, 0x5e494900, 0x6f6f6e6b, 0x318f1440, 0x42900000, 0x8f886968,
2787 0x8f896a69, 0x006b686f, 0x005a4a5f, 0x5f4a4a00, 0x6f6f6f6d, 0x328f16c8, 0x42a20000, 0x8f896a69,
2788 0x8f8a6b6a, 0x006c6979, 0x005b4b69, 0x694b4b00, 0x6f6f6f6e, 0x338f1998, 0x42b60000, 0x8f8a6b6a
2789 }
2790 };
2791
2792 const uint32_t CodechalEncHevcStateG9::m_brcLambdaHaar[QP_NUM * 4] = {
2793 0x00000036, 0x00000024, 0x00000075, 0x00000800, 0x00000044, 0x0000002d, 0x00000084, 0x00000800,
2794 0x00000056, 0x00000039, 0x00000094, 0x00000800, 0x0000006c, 0x00000048, 0x000000a6, 0x00000800,
2795 0x00000089, 0x0000005b, 0x000000ba, 0x00000800, 0x000000ac, 0x00000073, 0x000000d1, 0x00000800,
2796 0x000000d9, 0x00000091, 0x000000eb, 0x00000800, 0x00000112, 0x000000b7, 0x00000108, 0x00000800,
2797 0x00000159, 0x000000e7, 0x00000128, 0x00000800, 0x000001b3, 0x00000123, 0x0000014d, 0x00000800,
2798 0x00000224, 0x0000016f, 0x00000175, 0x00000800, 0x000002b2, 0x000001cf, 0x000001a3, 0x00000800,
2799 0x00000366, 0x00000247, 0x000001d7, 0x00000800, 0x00000448, 0x000002df, 0x00000210, 0x00000800,
2800 0x00000565, 0x0000039e, 0x00000251, 0x00000800, 0x000006cc, 0x0000048f, 0x0000029a, 0x00000800,
2801 0x00000891, 0x000005be, 0x000002eb, 0x00001000, 0x00000acb, 0x0000073d, 0x00000347, 0x00001000,
2802 0x00000d99, 0x0000091e, 0x000003ae, 0x00001000, 0x00001122, 0x00000b7d, 0x00000421, 0x00001000,
2803 0x00001596, 0x00000e7a, 0x000004a2, 0x00001800, 0x00001b33, 0x0000123d, 0x00000534, 0x00001800,
2804 0x00002245, 0x000016fb, 0x000005d7, 0x00001800, 0x00002b2d, 0x00001cf4, 0x000014cf, 0x00002000,
2805 0x00003666, 0x0000247a, 0x0000275c, 0x00002000, 0x0000448a, 0x00002df6, 0x00003e23, 0x00002000,
2806 0x0000565a, 0x000039e8, 0x000059e8, 0x00002800, 0x00006ccc, 0x000048f5, 0x00007b8b, 0x00003000,
2807 0x00008914, 0x00005bec, 0x0000a412, 0x00003000, 0x0000acb5, 0x000073d1, 0x0000d4ac, 0x00003800,
2808 0x0000d999, 0x000091eb, 0x00010eb8, 0x00004000, 0x00011228, 0x0000b7d9, 0x000153ca, 0x00004800,
2809 0x0001596b, 0x0000e7a2, 0x0001a5b8, 0x00005000, 0x0001b333, 0x000123d7, 0x0002069e, 0x00005800,
2810 0x00022451, 0x00016fb2, 0x000278ed, 0x00006800, 0x0002b2d6, 0x0001cf44, 0x0002ff74, 0x00007000,
2811 0x00036666, 0x000247ae, 0x00039d70, 0x00008000, 0x000448a2, 0x0002df64, 0x00043590, 0x00009000,
2812 0x000565ac, 0x00039e88, 0x0004b986, 0x0000a000, 0x0006cccc, 0x00048f5c, 0x00054da5, 0x0000b800,
2813 0x00089145, 0x0005bec8, 0x0005f3e7, 0x0000c800, 0x000acb59, 0x00073d11, 0x0006ae86, 0x0000e800,
2814 0x000d9999, 0x00091eb8, 0x00078000, 0x00010000, 0x0011228a, 0x000b7d90, 0x00086b20, 0x00012000,
2815 0x001596b2, 0x000e7a23, 0x0009730c, 0x00014000, 0x001b3333, 0x00123d70, 0x000a9b4a, 0x00016800,
2816 0x00224515, 0x0016fb20, 0x000be7cf, 0x00019800, 0x002b2d64, 0x001cf446, 0x000d5d0d, 0x0001c800,
2817 0x00366666, 0x00247ae1, 0x000f0000, 0x00020000, 0x00448a2a, 0x002df640, 0x0010d641, 0x00024000,
2818 0x00565ac8, 0x0039e88c, 0x0012e618, 0x00028800, 0x006ccccc, 0x0048f5c2, 0x00153694, 0x0002d800
2819 };
2820
2821 const uint16_t CodechalEncHevcStateG9::m_skipThread[][QP_NUM] = {
2822 {
2823 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0024,
2824 0x0024, 0x0060, 0x0060, 0x0099, 0x0099, 0x00cf, 0x00cf, 0x0105,
2825 0x0105, 0x0141, 0x0141, 0x0183, 0x0183, 0x01ce, 0x01ce, 0x0228,
2826 0x0228, 0x0291, 0x0291, 0x030c, 0x030c, 0x039f, 0x039f, 0x0447,
2827 0x0447, 0x050d, 0x050d, 0x05f1, 0x05f1, 0x06f6, 0x06f6, 0x0822,
2828 0x0822, 0x0972, 0x0972, 0x0aef, 0x0aef, 0x0c96, 0x0c96, 0x0e70,
2829 0x0e70, 0x107a, 0x107a, 0x1284
2830 },
2831
2832 {
2833 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x000c,
2834 0x000c, 0x0020, 0x0020, 0x0033, 0x0033, 0x0045, 0x0045, 0x0057,
2835 0x0057, 0x006b, 0x006b, 0x0081, 0x0081, 0x009a, 0x009a, 0x00b8,
2836 0x00b8, 0x00db, 0x00db, 0x0104, 0x0104, 0x0135, 0x0135, 0x016d,
2837 0x016d, 0x01af, 0x01af, 0x01fb, 0x01fb, 0x0252, 0x0252, 0x02b6,
2838 0x02b6, 0x0326, 0x0326, 0x03a5, 0x03a5, 0x0432, 0x0432, 0x04d0,
2839 0x04d0, 0x057e, 0x057e, 0x062c
2840 }
2841 };
2842
2843 const double CodechalEncHevcStateG9::m_qpLambdaMdLut[3][QP_NUM] = // default lambda = pow(2, (qp-12)/6)
2844 {
2845 //BREDSLICE
2846 {
2847 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, //QP=[0 ~12]
2848 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, //QP=[13~25]
2849 5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0, //QP=[26~38]
2850 23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0 //QP=[39~51]
2851 },
2852 //PREDSLICE
2853 {
2854 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, //QP=[0 ~12]
2855 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, //QP=[13~25]
2856 5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0, //QP=[26~38]
2857 23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0 //QP=[39~51]
2858 },
2859 //INTRASLICE
2860 {
2861 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, //QP=[0 ~12]
2862 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, //QP=[13~25]
2863 5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0, //QP=[26~38]
2864 23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0 //QP=[39~51]
2865 }
2866 };
2867
2868 const double CodechalEncHevcStateG9::m_qpLambdaMeLut[3][QP_NUM] = // default lambda = pow(2, (qp-12)/6)
2869 {
2870 //BREDSLICE
2871 {
2872 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, //QP=[0 ~12]
2873 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, //QP=[13~25]
2874 5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0, //QP=[26~38]
2875 23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0 //QP=[39~51]
2876 },
2877 //PREDSLICE
2878 {
2879 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, //QP=[0 ~12]
2880 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, //QP=[13~25]
2881 5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0, //QP=[26~38]
2882 23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0 //QP=[39~51]
2883 },
2884 //INTRASLICE
2885 {
2886 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, //QP=[0 ~12]
2887 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, //QP=[13~25]
2888 5.0, 6.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0, 18.0, 20.0, //QP=[26~38]
2889 23.0, 25.0, 29.0, 32.0, 36.0, 40.0, 45.0, 51.0, 57.0, 64.0, 72.0, 81.0, 91.0 //QP=[39~51]
2890 }
2891 };
2892
2893 const uint32_t CodechalEncHevcStateG9::m_encBTu1BCurbeInit[56] =
2894 {
2895 0x000000a3, 0x00200008, 0x00143939, 0x00a27700, 0x1000000f, 0x20200000, 0x01000140, 0x00400003,
2896 0x4f3b1c1b, 0x001d1a29, 0x000c0619, 0x19060600, 0x2c2b291f, 0x00161616, 0x13130013, 0x13131313,
2897 0x0101f00f, 0x0f0f1010, 0xf0f0f00f, 0x01010101, 0x10101010, 0x0f0f0f0f, 0xf0f0f00f, 0x0101f0f0,
2898 0x01010101, 0x10101010, 0x0f0f1010, 0x0f0f0f0f, 0xf0f0f00f, 0xf0f0f0f0, 0x00000000, 0x00000000,
2899 0x010101ce, 0x00040c24, 0x40400000, 0x005800d8, 0x40000001, 0x00001616, 0x00000000, 0x00000016,
2900 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0125, 0x08080201, 0x05030502, 0x00031101,
2901 0x00020001, 0x00000000, 0x00000001, 0x00000000, 0x00100014, 0x00000000, 0x00000000, 0x00000000
2902 };
2903
2904 const uint32_t CodechalEncHevcStateG9::m_encBTu4BCurbeInit[56] =
2905 {
2906 0x000000a3, 0x00200008, 0x00143939, 0x00a27700, 0x1000000f, 0x20200000, 0x01000140, 0x00400003,
2907 0x4f3b1c1b, 0x001d1a29, 0x000c0619, 0x19060600, 0x2c2b291f, 0x00161616, 0x13130013, 0x13131313,
2908 0x0101f00f, 0x0f0f1010, 0xf0f0f00f, 0x01010101, 0x10101010, 0x0f0f0f0f, 0xf0f0f00f, 0x0101f0f0,
2909 0x01010101, 0x10101010, 0x0f0f1010, 0x0f0f0f0f, 0xf0f0f00f, 0xf0f0f0f0, 0x00000000, 0x00000000,
2910 0x010101ce, 0x00040c24, 0x40400000, 0x005800d8, 0x40000001, 0x00001616, 0x00000000, 0x00000016,
2911 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0125, 0x08080201, 0x05030502, 0x0c033104,
2912 0x00020001, 0x00000000, 0x00000001, 0x00000000, 0x0010000d, 0x00000000, 0x00000000, 0x00000000
2913 };
2914
2915 const uint32_t CodechalEncHevcStateG9::m_encBTu7BCurbeInit[56] =
2916 {
2917 0x000000a3, 0x00200008, 0x00143919, 0x00a27700, 0x1000000f, 0x20200000, 0x01000140, 0x00400003,
2918 0x5f4b2c2b, 0x002d2a39, 0x001c0c29, 0x290c0c00, 0x3c3b392f, 0x001b1b1b, 0x1e1e001e, 0x1e1e1e1e,
2919 0x120ff10f, 0x1e22e20d, 0x20e2ff10, 0x2edd06fc, 0x11d33ff1, 0xeb1ff33d, 0x4ef1f1f1, 0xf1f21211,
2920 0x0dffffe0, 0x11201f1f, 0x1105f1cf, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2921 0x0101030c, 0x00040c24, 0x40c00000, 0x006801b0, 0x40000000, 0x0000001b, 0x00000000, 0x0000001b,
2922 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0115, 0x08080201, 0x05030502, 0x0c034104,
2923 0x00000001, 0x00000000, 0x00000001, 0x00000000, 0x0010000d, 0x00000000, 0x00000000, 0x00000000
2924 };
2925
2926 const uint32_t CodechalEncHevcStateG9::m_encBTu1PCurbeInit[56] =
2927 {
2928 0x000000a3, 0x00200008, 0x000b3919, 0x00a63000, 0x30000008, 0x28300000, 0x009000b0, 0x00400063,
2929 0x5d4b2c2b, 0x001e0f2e, 0x001c0028, 0x2e1f1c00, 0x3b3b392f, 0x001b1b1b, 0x1e1e001e, 0x1e1e1e1e,
2930 0x0101f00f, 0x0f0f1010, 0xf0f0f00f, 0x01010101, 0x10101010, 0x0f0f0f0f, 0xf0f0f00f, 0x0101f0f0,
2931 0x01010101, 0x10101010, 0x0f0f1010, 0x0f0f0f0f, 0xf0f0f00f, 0xf0f0f0f0, 0x00000000, 0x00000000,
2932 0x80010165, 0x00040c24, 0x40c00000, 0x04680174, 0x41000002, 0x001b1b1b, 0x00000000, 0x00000000,
2933 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0035, 0x08080201, 0x05030502, 0x00032000,
2934 0x00020001, 0x00000003, 0x00000000, 0x00000000, 0x000a000e, 0x00002830, 0x00000000, 0x00000000
2935
2936 };
2937
2938 const uint32_t CodechalEncHevcStateG9::m_encBTu4PCurbeInit[56] =
2939 {
2940 0x000000a3, 0x00200008, 0x000b3919, 0x00a63000, 0x30000008, 0x28300000, 0x009000b0, 0x00400063,
2941 0x5d4b2c2b, 0x001e0f2e, 0x001c0028, 0x2e1f1c00, 0x3b3b392f, 0x001b1b1b, 0x1e1e001e, 0x1e1e1e1e,
2942 0x0101f00f, 0x0f0f1010, 0xf0f0f00f, 0x01010101, 0x10101010, 0x0f0f0f0f, 0xf0f0f00f, 0x0101f0f0,
2943 0x01010101, 0x10101010, 0x0f0f1010, 0x0f0f0f0f, 0xf0f0f00f, 0xf0f0f0f0, 0x00000000, 0x00000000,
2944 0x80010165, 0x00040c24, 0x40c00000, 0x04680174, 0x41000002, 0x001b1b1b, 0x00000000, 0x00000000,
2945 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0035, 0x08080201, 0x05030502, 0x00032000,
2946 0x00020001, 0x00000003, 0x00000000, 0x00000000, 0x000a000e, 0x00002830, 0x00000000, 0x00000000
2947
2948 };
2949
2950 const uint32_t CodechalEncHevcStateG9::m_encBTu7PCurbeInit[56] =
2951 {
2952 0x000000a3, 0x00200008, 0x000b3919, 0x00a63000, 0x30000008, 0x28300000, 0x009000b0, 0x00400063,
2953 0x5d4b2c2b, 0x001e0f2e, 0x001c0028, 0x2e1f1c00, 0x3b3b392f, 0x001b1b1b, 0x1e1e001e, 0x1e1e1e1e,
2954 0x120ff10f, 0x1e22e20d, 0x20e2ff10, 0x2edd06fc, 0x11d33ff1, 0xeb1ff33d, 0x4ef1f1f1, 0xf1f21211,
2955 0x0dffffe0, 0x11201f1f, 0x1105f1cf, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2956 0x80010165, 0x00040c24, 0x40c00000, 0x04680174, 0x41000002, 0x001b1b1b, 0x00000000, 0x00000000,
2957 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0035, 0x08080201, 0x05030502, 0x00032000,
2958 0x00020001, 0x00000003, 0x00000000, 0x00000000, 0x000a000e, 0x00002830, 0x00000000, 0x00000000
2959 };
2960
2961 const uint32_t CodechalEncHevcStateG9::m_encBTu7ICurbeInit[56] =
2962 {
2963 0x000000a2, 0x00200008, 0x00143919, 0x00a03000, 0x5000000f, 0x28300000, 0x01000140, 0x00000003,
2964 0x5a3d0029, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x001b1b1b, 0x1e1e001e, 0x1e1e1e1e,
2965 0x120ff10f, 0x1e22e20d, 0x20e2ff10, 0x2edd06fc, 0x11d33ff1, 0xeb1ff33d, 0x4ef1f1f1, 0xf1f21211,
2966 0x0dffffe0, 0x11201f1f, 0x1105f1cf, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2967 0x8080030c, 0x00040c24, 0x40a6e43f, 0x005f0139, 0x40000000, 0x00000000, 0x00000000, 0x00000000,
2968 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x07fc0005, 0x08080201, 0x05030502, 0x0c034104,
2969 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0010000d, 0x00000000, 0x00000000, 0x00000000
2970 };
2971
2972 const uint32_t CodechalEncHevcStateG9::m_brcInitCurbeInit[32] =
2973 {
2974 0x000a8c00, 0x0112a880, 0x016e3600, 0x00b71b00, 0x00b71b00, 0x00000000, 0x0000001e, 0x00000001,
2975 0x000a0040, 0x05000000, 0x001e02d0, 0x000100c8, 0x00010033, 0x00000000, 0x00010000, 0x00000000,
2976 0x78503c28, 0x78503c23, 0x735a3c28, 0xe5dfd8d1, 0x2f29211b, 0xe5ddd7d1, 0x5e56463f, 0xeae3dad4,
2977 0x2f281f16, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
2978 };
2979
2980 const uint32_t CodechalEncHevcStateG9::m_brcUpdateCurbeInit[16] =
2981 {
2982 0x0112a880, 0x00000000, 0x00000230, 0x0042000d, 0x00c80085, 0x02044000, 0x00000000, 0x00000000,
2983 0x02030101, 0x05052801, 0x12070103, 0x4b282519, 0xa07d6761, 0x00fffefd, 0x00030201, 0x00000000
2984 };
2985
2986 const uint32_t CodechalEncHevcStateG9::m_meCurbeInit[39] =
2987 {
2988 0x00000000, 0x00200008, 0x00003939, 0x77a43000, 0x00000000, 0x28300000, 0x00000000, 0x00000000,
2989 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2990 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2991 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
2992 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
2993 };
2994
PicCodingTypeToSliceType(uint16_t pictureCodingType)2995 uint8_t CodechalEncHevcStateG9::PicCodingTypeToSliceType(uint16_t pictureCodingType)
2996 {
2997 uint8_t sliceType = 0;
2998
2999 switch (pictureCodingType)
3000 {
3001 case I_TYPE:
3002 sliceType = CODECHAL_ENCODE_HEVC_I_SLICE;
3003 break;
3004 case P_TYPE:
3005 sliceType = CODECHAL_ENCODE_HEVC_P_SLICE;
3006 break;
3007 case B_TYPE:
3008 case B1_TYPE:
3009 case B2_TYPE:
3010 sliceType = CODECHAL_ENCODE_HEVC_B_SLICE;
3011 break;
3012 default:
3013 CODECHAL_ENCODE_ASSERT(false);
3014 }
3015 return sliceType;
3016 }
3017
GenerateSliceMap()3018 MOS_STATUS CodechalEncHevcStateG9::GenerateSliceMap()
3019 {
3020 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3021
3022 CODECHAL_ENCODE_FUNCTION_ENTER;
3023
3024 if (m_numSlices > 1 && m_sliceMap)
3025 {
3026 uint32_t log2LcuSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
3027 CODECHAL_ENCODE_ASSERT(log2LcuSize == 5);
3028
3029 uint32_t W = MOS_ALIGN_CEIL(m_frameWidth, (1 << log2LcuSize)) >> log2LcuSize;
3030 uint32_t H = MOS_ALIGN_CEIL(m_frameHeight, (1 << log2LcuSize)) >> log2LcuSize;
3031 if (m_sliceMapSurface.dwPitch < W * sizeof(m_sliceMap[0]))
3032 {
3033 eStatus = MOS_STATUS_MORE_DATA;
3034 return eStatus;
3035 }
3036
3037 MOS_LOCK_PARAMS lockFlags;
3038 MOS_ZeroMemory(&lockFlags, sizeof(lockFlags));
3039 lockFlags.WriteOnly = true;
3040
3041 uint8_t* surface = (uint8_t* )m_osInterface->pfnLockResource(
3042 m_osInterface,
3043 &m_sliceMapSurface.OsResource,
3044 &lockFlags);
3045
3046 if (surface == nullptr)
3047 {
3048 eStatus = MOS_STATUS_NULL_POINTER;
3049 return eStatus;
3050 }
3051
3052 for (uint32_t h = 0; h < H; h++, surface += m_sliceMapSurface.dwPitch)
3053 {
3054 PCODECHAL_ENCODE_HEVC_SLICE_MAP map = (PCODECHAL_ENCODE_HEVC_SLICE_MAP)surface;
3055 for (uint32_t w = 0; w < W; w++)
3056 {
3057 map[w] = m_sliceMap[h * W + w];
3058 }
3059 }
3060
3061 m_osInterface->pfnUnlockResource(
3062 m_osInterface,
3063 &m_sliceMapSurface.OsResource);
3064 }
3065 else if (m_numSlices == 1 && m_lastNumSlices != m_numSlices)
3066 {
3067 // Reset slice map surface
3068 MOS_LOCK_PARAMS lockFlags;
3069 MOS_ZeroMemory(&lockFlags, sizeof(lockFlags));
3070 lockFlags.WriteOnly = true;
3071
3072 uint8_t* surface = (uint8_t* )m_osInterface->pfnLockResource(
3073 m_osInterface,
3074 &m_sliceMapSurface.OsResource,
3075 &lockFlags);
3076
3077 if (surface == nullptr)
3078 {
3079 eStatus = MOS_STATUS_NULL_POINTER;
3080 return eStatus;
3081 }
3082
3083 MOS_ZeroMemory(surface,
3084 m_sliceMapSurface.dwWidth * m_sliceMapSurface.dwHeight);
3085
3086 m_osInterface->pfnUnlockResource(
3087 m_osInterface,
3088 &m_sliceMapSurface.OsResource);
3089 }
3090
3091 m_lastNumSlices = m_numSlices;
3092
3093 return eStatus;
3094 }
3095
SetSliceStructs()3096 MOS_STATUS CodechalEncHevcStateG9::SetSliceStructs()
3097 {
3098 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3099
3100 CODECHAL_ENCODE_FUNCTION_ENTER;
3101
3102 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetSliceStructs());
3103
3104 // setup slice map
3105 PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams = m_hevcSliceParams;
3106 for (uint32_t startLCU = 0, slcCount = 0; slcCount < m_numSlices; slcCount++, slcParams++)
3107 {
3108 if (!m_hevcPicParams->tiles_enabled_flag)
3109 {
3110 CODECHAL_ENCODE_ASSERT(slcParams->slice_segment_address == startLCU);
3111
3112 // process slice map
3113 for (uint32_t i = 0; i < slcParams->NumLCUsInSlice; i++)
3114 {
3115 m_sliceMap[startLCU + i].ucSliceID = (uint8_t)slcCount;
3116 }
3117
3118 startLCU += slcParams->NumLCUsInSlice;
3119 }
3120 }
3121
3122 return eStatus;
3123 }
3124
SetSequenceStructs()3125 MOS_STATUS CodechalEncHevcStateG9::SetSequenceStructs()
3126 {
3127 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3128
3129 CODECHAL_ENCODE_FUNCTION_ENTER;
3130
3131 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetSequenceStructs());
3132
3133 // TU1 has no wave-front split
3134 if (m_hevcSeqParams->TargetUsage == 1 && m_numRegionsInSlice != 1)
3135 {
3136 m_numRegionsInSlice = 1;
3137 }
3138
3139 return eStatus;
3140 }
3141
SetPictureStructs()3142 MOS_STATUS CodechalEncHevcStateG9::SetPictureStructs()
3143 {
3144 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3145
3146 CODECHAL_ENCODE_FUNCTION_ENTER;
3147
3148 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetPictureStructs());
3149
3150 /* dwOriFrameWidth and dwOriFrameHeight must be CU-aligned in HEVC. Set the recon and raw surface resolution as
3151 the actual encoding resolution.
3152 */
3153 m_rawSurface.dwWidth = m_reconSurface.dwWidth = m_oriFrameWidth;
3154 m_rawSurface.dwHeight = m_reconSurface.dwHeight = m_oriFrameHeight;
3155
3156 m_firstIntraRefresh = true;
3157 m_frameNumInGob = (m_pictureCodingType == I_TYPE) ? 0 : (m_frameNumInGob + 1);
3158
3159 return eStatus;
3160 }
3161
CalcScaledDimensions()3162 MOS_STATUS CodechalEncHevcStateG9::CalcScaledDimensions()
3163 {
3164 CODECHAL_ENCODE_FUNCTION_ENTER;
3165
3166 m_downscaledWidthInMb4x =
3167 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_4x);
3168
3169 if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit) && m_hevcSeqParams->bit_depth_luma_minus8)
3170 {
3171 uint32_t downscaledSurfaceWidth4x = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x* CODECHAL_MACROBLOCK_WIDTH), (CODECHAL_MACROBLOCK_WIDTH * 2));
3172 m_downscaledWidthInMb4x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(downscaledSurfaceWidth4x);
3173 }
3174
3175 m_downscaledHeightInMb4x =
3176 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_4x);
3177 m_downscaledWidth4x =
3178 m_downscaledWidthInMb4x * CODECHAL_MACROBLOCK_WIDTH;
3179 m_downscaledHeight4x =
3180 m_downscaledHeightInMb4x * CODECHAL_MACROBLOCK_HEIGHT;
3181
3182 // SuperHME Scaling WxH
3183 m_downscaledWidthInMb16x =
3184 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_16x);
3185 m_downscaledHeightInMb16x =
3186 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_16x);
3187 m_downscaledWidth16x =
3188 m_downscaledWidthInMb16x * CODECHAL_MACROBLOCK_WIDTH;
3189 m_downscaledHeight16x =
3190 m_downscaledHeightInMb16x * CODECHAL_MACROBLOCK_HEIGHT;
3191
3192 // UltraHME Scaling WxH
3193 m_downscaledWidthInMb32x =
3194 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_32x);
3195 m_downscaledHeightInMb32x =
3196 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_32x);
3197 m_downscaledWidth32x =
3198 m_downscaledWidthInMb32x * CODECHAL_MACROBLOCK_WIDTH;
3199 m_downscaledHeight32x =
3200 m_downscaledHeightInMb32x * CODECHAL_MACROBLOCK_HEIGHT;
3201
3202 return MOS_STATUS_SUCCESS;
3203 }
3204
LoadCosts(uint8_t sliceType,uint8_t qp,uint8_t intraSADTransform)3205 void CodechalEncHevcStateG9::LoadCosts(
3206 uint8_t sliceType,
3207 uint8_t qp,
3208 uint8_t intraSADTransform)
3209 {
3210 float hadBias = 2.0f;
3211
3212 if (intraSADTransform == INTRA_TRANSFORM_HADAMARD)
3213 {
3214 hadBias = 1.67f;
3215 }
3216
3217 double lambdaMd = m_qpLambdaMd[sliceType][qp];
3218 double lambdaMe = m_qpLambdaMe[sliceType][qp];
3219
3220 m_modeCost[0] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][0] * hadBias), 0x6f);
3221 m_modeCost[1] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][1] * hadBias), 0x8f);
3222 m_modeCost[2] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][2] * hadBias), 0x8f);
3223 m_modeCost[3] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][3] * hadBias), 0x8f);
3224 m_modeCost[4] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][4] * hadBias), 0x8f);
3225 m_modeCost[5] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][5] * hadBias), 0x6f);
3226 m_modeCost[6] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][6] * hadBias), 0x6f);
3227 m_modeCost[7] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][7] * hadBias), 0x6f);
3228 m_modeCost[8] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][8] * hadBias), 0x8f);
3229 m_modeCost[9] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][9] * hadBias), 0x6f);
3230 m_modeCost[10] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][10] * hadBias), 0x6f);
3231 m_modeCost[11] = Map44LutValue((uint32_t)(lambdaMd * m_modeCostLut[sliceType][11] * hadBias), 0x6f);
3232
3233 m_mvCost[0] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][0] * hadBias), 0x6f);
3234 m_mvCost[1] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][1] * hadBias), 0x6f);
3235 m_mvCost[2] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][2] * hadBias), 0x6f);
3236 m_mvCost[3] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][3] * hadBias), 0x6f);
3237 m_mvCost[4] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][4] * hadBias), 0x6f);
3238 m_mvCost[5] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][5] * hadBias), 0x6f);
3239 m_mvCost[6] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][6] * hadBias), 0x6f);
3240 m_mvCost[7] = Map44LutValue((uint32_t)(lambdaMe * m_mvCostLut[sliceType][7] * hadBias), 0x6f);
3241
3242 double m_lambdaMd = lambdaMd * hadBias;
3243 m_simplestIntraInterThreshold = 0;
3244 if (m_modeCostLut[sliceType][1] < m_modeCostLut[sliceType][3])
3245 {
3246 m_simplestIntraInterThreshold = (uint32_t)(m_lambdaMd * (m_modeCostLut[sliceType][3] - m_modeCostLut[sliceType][1]) + 0.5);
3247 }
3248
3249 m_modeCostSp = Map44LutValue((uint32_t)(lambdaMd * 45 * hadBias), 0x8f);
3250 }
3251
CalcForwardCoeffThd(uint8_t * forwardCoeffThresh,int32_t qp)3252 void CodechalEncHevcStateG9::CalcForwardCoeffThd(uint8_t* forwardCoeffThresh, int32_t qp)
3253 {
3254 static const uint8_t FTQ25I[27] =
3255 {
3256 0, 0, 0, 0,
3257 1, 3, 6, 8, 11,
3258 13, 16, 19, 22, 26,
3259 30, 34, 39, 44, 50,
3260 56, 62, 69, 77, 85,
3261 94, 104, 115
3262 };
3263
3264 uint8_t idx = (qp + 1) >> 1;
3265
3266 forwardCoeffThresh[0] =
3267 forwardCoeffThresh[1] =
3268 forwardCoeffThresh[2] =
3269 forwardCoeffThresh[3] =
3270 forwardCoeffThresh[4] =
3271 forwardCoeffThresh[5] =
3272 forwardCoeffThresh[6] = FTQ25I[idx];
3273 }
3274
GetQPValueFromRefList(uint32_t list,uint32_t index)3275 uint8_t CodechalEncHevcStateG9::GetQPValueFromRefList(uint32_t list, uint32_t index)
3276 {
3277 CODECHAL_ENCODE_ASSERT(list == LIST_0 || list == LIST_1);
3278 CODECHAL_ENCODE_ASSERT(index < CODEC_MAX_NUM_REF_FRAME_HEVC);
3279
3280 CODEC_PICTURE picture = m_hevcSliceParams->RefPicList[list][index];
3281
3282 if (!CodecHal_PictureIsInvalid(picture) && m_picIdx[picture.FrameIdx].bValid)
3283 {
3284 auto picIdx = m_picIdx[picture.FrameIdx].ucPicIdx;
3285 return m_refList[picIdx]->ucQPValue[0];
3286 }
3287 else
3288 {
3289 return 0;
3290 }
3291 }
3292
GetMaxRefFrames(uint8_t & maxNumRef0,uint8_t & maxNumRef1)3293 void CodechalEncHevcStateG9::GetMaxRefFrames(uint8_t& maxNumRef0, uint8_t& maxNumRef1)
3294 {
3295 maxNumRef0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G9;
3296 maxNumRef1 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G9;
3297
3298 return;
3299 }
3300
InitParamForWalkerVfe26z(uint32_t numRegionsInSlice,uint32_t maxSliceHeight)3301 void CodechalEncHevcStateG9::InitParamForWalkerVfe26z(
3302 uint32_t numRegionsInSlice,
3303 uint32_t maxSliceHeight)
3304 {
3305 int32_t width = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth);
3306 int32_t height = maxSliceHeight * 2;
3307 int32_t tsWidth = ((width + 3) & 0xfffc) >> 1;
3308 int32_t lcuWidth = (width + 1) >> 1;
3309 int32_t lcuHeight = (height + 1) >> 1;
3310 int32_t tmp1 = ((lcuWidth + 1) >> 1) + ((lcuWidth + ((lcuHeight - 1) << 1)) + (2 * numRegionsInSlice - 1)) / (2 * numRegionsInSlice);
3311
3312 m_walkingPatternParam.MediaWalker.UseScoreboard = m_useHwScoreboard;
3313 m_walkingPatternParam.MediaWalker.ScoreboardMask = 0xFF;
3314 m_walkingPatternParam.MediaWalker.GlobalResolution.x = tsWidth;
3315 m_walkingPatternParam.MediaWalker.GlobalResolution.y = 4 * tmp1;
3316
3317 m_walkingPatternParam.MediaWalker.GlobalStart.x = 0;
3318 m_walkingPatternParam.MediaWalker.GlobalStart.y = 0;
3319
3320 m_walkingPatternParam.MediaWalker.GlobalOutlerLoopStride.x = tsWidth;
3321 m_walkingPatternParam.MediaWalker.GlobalOutlerLoopStride.y = 0;
3322
3323 m_walkingPatternParam.MediaWalker.GlobalInnerLoopUnit.x = 0;
3324 m_walkingPatternParam.MediaWalker.GlobalInnerLoopUnit.y = 4 * tmp1;
3325
3326 m_walkingPatternParam.MediaWalker.BlockResolution.x = tsWidth;
3327 m_walkingPatternParam.MediaWalker.BlockResolution.y = 4 * tmp1;
3328
3329 m_walkingPatternParam.MediaWalker.LocalStart.x = tsWidth;
3330 m_walkingPatternParam.MediaWalker.LocalStart.y = 0;
3331
3332 m_walkingPatternParam.MediaWalker.LocalEnd.x = 0;
3333 m_walkingPatternParam.MediaWalker.LocalEnd.y = 0;
3334
3335 m_walkingPatternParam.MediaWalker.LocalOutLoopStride.x = 1;
3336 m_walkingPatternParam.MediaWalker.LocalOutLoopStride.y = 0;
3337
3338 m_walkingPatternParam.MediaWalker.LocalInnerLoopUnit.x = MOS_BITFIELD_VALUE((uint32_t)-2, 16);
3339 m_walkingPatternParam.MediaWalker.LocalInnerLoopUnit.y = 4;
3340
3341 m_walkingPatternParam.MediaWalker.MiddleLoopExtraSteps = 3;
3342
3343 m_walkingPatternParam.MediaWalker.MidLoopUnitX = 0;
3344 m_walkingPatternParam.MediaWalker.MidLoopUnitY = 1;
3345
3346 m_walkingPatternParam.MediaWalker.dwGlobalLoopExecCount = 0;
3347 m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount = 2 * ((lcuWidth + (lcuHeight - 1) * 2 + 2 * numRegionsInSlice - 1) / (2 * numRegionsInSlice)) - 1;
3348
3349 m_walkingPatternParam.ScoreBoard.ScoreboardEnable = m_useHwScoreboard;
3350 m_walkingPatternParam.ScoreBoard.ScoreboardType = m_hwScoreboardType;
3351 m_walkingPatternParam.ScoreBoard.ScoreboardMask = 0xff;
3352
3353 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[0].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3354 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[0].y = 3;
3355
3356 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[1].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3357 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[1].y = 1;
3358
3359 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[2].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3360 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[2].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3361
3362 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[3].x = 0;
3363 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[3].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3364
3365 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[4].x = 0;
3366 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[4].y = MOS_BITFIELD_VALUE((uint32_t)-2, 4);
3367
3368 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[5].x = 0;
3369 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[5].y = MOS_BITFIELD_VALUE((uint32_t)-3, 4);
3370
3371 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[6].x = 1;
3372 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[6].y = MOS_BITFIELD_VALUE((uint32_t)-2, 4);
3373
3374 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[7].x = 1;
3375 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[7].y = MOS_BITFIELD_VALUE((uint32_t)-3, 4);
3376
3377 m_walkingPatternParam.Offset_Y = -4 * ((lcuWidth + 1) >> 1);
3378 m_walkingPatternParam.Offset_Delta = ((lcuWidth + ((lcuHeight - 1) << 1)) + (numRegionsInSlice - 1)) / (numRegionsInSlice);
3379 }
3380
InitParamForWalkerVfe26(uint32_t numRegionsInSlice,uint32_t maxSliceHeight)3381 void CodechalEncHevcStateG9::InitParamForWalkerVfe26(
3382 uint32_t numRegionsInSlice,
3383 uint32_t maxSliceHeight)
3384 {
3385 int32_t width = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth);
3386 int32_t height = maxSliceHeight;
3387 int32_t tsWidth = (width + 1) & 0xfffe;
3388 int32_t tsHeight = (height + 1) & 0xfffe;
3389 int32_t tmp1 = ((tsWidth + 1) >> 1) + ((tsWidth + ((tsHeight - 1) << 1)) + (2 * numRegionsInSlice - 1)) / (2 * numRegionsInSlice);
3390
3391 m_walkingPatternParam.MediaWalker.UseScoreboard = m_useHwScoreboard;
3392 m_walkingPatternParam.MediaWalker.ScoreboardMask = 0x0F;
3393 m_walkingPatternParam.MediaWalker.GlobalResolution.x = tsWidth;
3394 m_walkingPatternParam.MediaWalker.GlobalResolution.y = tmp1; // tsHeight;
3395
3396 m_walkingPatternParam.MediaWalker.GlobalStart.x = 0;
3397 m_walkingPatternParam.MediaWalker.GlobalStart.y = 0;
3398
3399 m_walkingPatternParam.MediaWalker.GlobalOutlerLoopStride.x = tsWidth;
3400 m_walkingPatternParam.MediaWalker.GlobalOutlerLoopStride.y = 0;
3401
3402 m_walkingPatternParam.MediaWalker.GlobalInnerLoopUnit.x = 0;
3403 m_walkingPatternParam.MediaWalker.GlobalInnerLoopUnit.y = tmp1;
3404
3405 m_walkingPatternParam.MediaWalker.BlockResolution.x = tsWidth;
3406 m_walkingPatternParam.MediaWalker.BlockResolution.y = tmp1;
3407
3408 m_walkingPatternParam.MediaWalker.LocalStart.x = tsWidth;
3409 m_walkingPatternParam.MediaWalker.LocalStart.y = 0;
3410
3411 m_walkingPatternParam.MediaWalker.LocalEnd.x = 0;
3412 m_walkingPatternParam.MediaWalker.LocalEnd.y = 0;
3413
3414 m_walkingPatternParam.MediaWalker.LocalOutLoopStride.x = 1;
3415 m_walkingPatternParam.MediaWalker.LocalOutLoopStride.y = 0;
3416
3417 m_walkingPatternParam.MediaWalker.LocalInnerLoopUnit.x = MOS_BITFIELD_VALUE((uint32_t)-2, 16);
3418 m_walkingPatternParam.MediaWalker.LocalInnerLoopUnit.y = 1;
3419
3420 m_walkingPatternParam.MediaWalker.MiddleLoopExtraSteps = 0;
3421
3422 m_walkingPatternParam.MediaWalker.MidLoopUnitX = 0;
3423 m_walkingPatternParam.MediaWalker.MidLoopUnitY = 0;
3424
3425 m_walkingPatternParam.MediaWalker.dwGlobalLoopExecCount = 0;
3426
3427 m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount = (width + (height - 1) * 2 + numRegionsInSlice - 1) / numRegionsInSlice;
3428
3429 m_walkingPatternParam.ScoreBoard.ScoreboardEnable = m_useHwScoreboard;
3430 m_walkingPatternParam.ScoreBoard.ScoreboardType = m_hwScoreboardType;
3431 m_walkingPatternParam.ScoreBoard.ScoreboardMask = 0x0f;
3432
3433 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[0].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3434 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[0].y = 0;
3435
3436 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[1].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3437 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[1].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3438
3439 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[2].x = 0;
3440 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[2].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3441
3442 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[3].x = 1;
3443 m_walkingPatternParam.ScoreBoard.ScoreboardDelta[3].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
3444
3445 m_walkingPatternParam.Offset_Y = -((width + 1) >> 1);
3446 m_walkingPatternParam.Offset_Delta = ((width + ((height - 1) << 1)) + (numRegionsInSlice - 1)) / (numRegionsInSlice);
3447 }
3448
GenerateWalkingControlRegion()3449 MOS_STATUS CodechalEncHevcStateG9::GenerateWalkingControlRegion()
3450 {
3451 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3452
3453 CODECHAL_ENCODE_FUNCTION_ENTER;
3454
3455 MOS_ZeroMemory(&m_walkingPatternParam, sizeof(m_walkingPatternParam));
3456
3457 if (m_numRegionsInSlice < 1)
3458 {
3459 // Region number cannot be smaller than 1
3460 m_numRegionsInSlice = 1;
3461 }
3462
3463 if (m_numRegionsInSlice > 16)
3464 {
3465 // Region number cannot be larger than 16
3466 m_numRegionsInSlice = 16;
3467 }
3468
3469 uint32_t frameWidthInUnits = 0, frameHeightInUnits = 0;
3470 if (m_enable26WalkingPattern) /* 26 degree walking pattern */
3471 {
3472 frameWidthInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameWidth, 16);
3473 frameHeightInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameHeight, 16);
3474 }
3475 else /* 26z walking pattern */
3476 {
3477 frameWidthInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameWidth, 32);
3478 frameHeightInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameHeight, 32);
3479 }
3480
3481 // THE FOLLOWING CODE FOR SLICE MERGING / CONCURRENT THREAD GENERATION IS PORTED FROM THE
3482 // SKL HEVC KRN CMODEL (v8992). FOR FIXES VERIFY THAT PROBLEM DOESN'T EXIST THERE TOO.
3483 bool isArbitrarySlices = false;
3484 int32_t sliceStartY[CODECHAL_HEVC_MAX_NUM_SLICES_LVL_5 + 1] = { 0 };
3485 for (uint32_t slice = 0; slice < m_numSlices; slice++)
3486 {
3487 if (m_hevcSliceParams[slice].slice_segment_address % CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameWidth, 32))
3488 {
3489 isArbitrarySlices = true;
3490 }
3491 else
3492 {
3493 sliceStartY[slice] = m_hevcSliceParams[slice].slice_segment_address / CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameWidth, 32);
3494
3495 /* 26 degree walking pattern */
3496 if (m_enable26WalkingPattern)
3497 {
3498 sliceStartY[slice] *= 2;
3499 }
3500 }
3501 }
3502
3503 sliceStartY[m_numSlices] = frameHeightInUnits;
3504
3505 const uint32_t regionStartYOffset = 32;
3506 uint32_t numRegions = 1;
3507 uint32_t numSlices = 0, height = 0;
3508 int32_t maxHeight = 0;
3509 uint16_t regionsStartTable[64] = { 0 };
3510
3511 if (isArbitrarySlices)
3512 {
3513 height = frameHeightInUnits;
3514 numSlices = 1;
3515 maxHeight = height;
3516 if (m_numRegionsInSlice > 1)
3517 {
3518 uint32_t numUnitInRegion =
3519 (frameWidthInUnits + 2 * (frameHeightInUnits - 1) + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
3520
3521 numRegions = m_numRegionsInSlice;
3522
3523 for (uint32_t i = 1; i < m_numRegionsInSlice; i++)
3524 {
3525 uint32_t front = i*numUnitInRegion;
3526
3527 if (front < frameWidthInUnits)
3528 {
3529 regionsStartTable[i] = (uint16_t)front;
3530 }
3531 else if (((front - frameWidthInUnits + 1) & 1) == 0)
3532 {
3533 regionsStartTable[i] = (uint16_t)frameWidthInUnits - 1;
3534 }
3535 else
3536 {
3537 regionsStartTable[i] = (uint16_t)frameWidthInUnits - 2;
3538 }
3539
3540 regionsStartTable[regionStartYOffset + i] = (uint16_t)((front - regionsStartTable[i]) >> 1);
3541 }
3542 }
3543 }
3544 else
3545 {
3546 maxHeight = 0;
3547 numSlices = m_numSlices;
3548
3549 for (uint32_t slice = 0; slice < numSlices; slice++)
3550 {
3551 int32_t sliceHeight = sliceStartY[slice + 1] - sliceStartY[slice];
3552 if (sliceHeight > maxHeight)
3553 {
3554 maxHeight = sliceHeight;
3555 }
3556 }
3557
3558 bool sliceIsMerged = false;
3559 while (!sliceIsMerged)
3560 {
3561 int32_t newNumSlices = 1;
3562 int32_t startY = 0;
3563
3564 for (uint32_t slice = 1; slice < numSlices; slice++)
3565 {
3566 if ((sliceStartY[slice + 1] - startY) <= maxHeight)
3567 {
3568 sliceStartY[slice] = -1;
3569 }
3570 else
3571 {
3572 startY = sliceStartY[slice];
3573 }
3574 }
3575
3576 for (uint32_t slice = 1; slice < numSlices; slice++)
3577 {
3578 if (sliceStartY[slice] > 0)
3579 {
3580 sliceStartY[newNumSlices] = sliceStartY[slice];
3581 newNumSlices++;
3582 }
3583 }
3584
3585 numSlices = newNumSlices;
3586 sliceStartY[numSlices] = frameHeightInUnits;
3587
3588 /* very rough estimation */
3589 if (numSlices * m_numRegionsInSlice <= CODECHAL_MEDIA_WALKER_MAX_COLORS)
3590 {
3591 sliceIsMerged = true;
3592 }
3593 else
3594 {
3595 int32_t num = 1;
3596
3597 maxHeight = frameHeightInUnits;
3598
3599 for (uint32_t slice = 0; slice < numSlices - 1; slice++)
3600 {
3601 if ((sliceStartY[slice + 2] - sliceStartY[slice]) <= maxHeight)
3602 {
3603 maxHeight = sliceStartY[slice + 2] - sliceStartY[slice];
3604 num = slice + 1;
3605 }
3606 }
3607
3608 for (uint32_t slice = num; slice < numSlices; slice++)
3609 {
3610 sliceStartY[slice] = sliceStartY[slice + 1];
3611 }
3612
3613 numSlices--;
3614 }
3615 }
3616
3617 uint32_t numUnitInRegion =
3618 (frameWidthInUnits + 2 * (maxHeight - 1) + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
3619
3620 numRegions = numSlices * m_numRegionsInSlice;
3621
3622 CODECHAL_ENCODE_ASSERT(numRegions != 0); // Making sure that the number of regions is at least 1
3623
3624 for (uint32_t slice = 0; slice < numSlices; slice++)
3625 {
3626 regionsStartTable[slice * m_numRegionsInSlice] = 0;
3627 regionsStartTable[regionStartYOffset + (slice * m_numRegionsInSlice)] = (uint16_t)sliceStartY[slice];
3628
3629 for (uint32_t i = 1; i < m_numRegionsInSlice; i++)
3630 {
3631 uint32_t front = i*numUnitInRegion;
3632
3633 if (front < frameWidthInUnits)
3634 {
3635 regionsStartTable[slice * m_numRegionsInSlice + i] = (uint16_t)front;
3636 }
3637 else if (((front - frameWidthInUnits + 1) & 1) == 0)
3638 {
3639 regionsStartTable[slice * m_numRegionsInSlice + i] = (uint16_t)frameWidthInUnits - 1;
3640 }
3641 else
3642 {
3643 regionsStartTable[slice * m_numRegionsInSlice + i] = (uint16_t)frameWidthInUnits - 2;
3644 }
3645
3646 regionsStartTable[regionStartYOffset + (slice * m_numRegionsInSlice + i)] = (uint16_t)sliceStartY[slice] +
3647 ((front - regionsStartTable[i]) >> 1);
3648 }
3649 }
3650 height = maxHeight;
3651 }
3652
3653 CODECHAL_ENCODE_ASSERT(numSlices <= CODECHAL_MEDIA_WALKER_MAX_COLORS); // The merged slices should be within the max color limit
3654
3655 uint16_t datatmp[32][32] = { 0 };
3656 uint32_t offsetToTheRegionStart[16] = { 0 };
3657 for (uint32_t k = 0; k < numSlices; k++)
3658 {
3659 int32_t nearestReg = 0;
3660 int32_t minDelta = m_frameHeight;
3661
3662 /* 26 degree wave front */
3663 if (m_enable26WalkingPattern)
3664 {
3665 int32_t curLcuPelY = regionsStartTable[regionStartYOffset + (k * m_numRegionsInSlice)] << 4;
3666 int32_t tsWidth = m_frameWidth >> 4;
3667 int32_t tsHeight = height;
3668 int32_t offsetY = -((tsWidth + 1) >> 1);
3669 int32_t offsetDelta = ((tsWidth + ((tsHeight - 1) << 1)) + (m_numRegionsInSlice - 1)) / (m_numRegionsInSlice);
3670
3671 for (uint32_t i = 0; i < numRegions; i++)
3672 {
3673 if (regionsStartTable[i] == 0)
3674 {
3675 int32_t delta = curLcuPelY - (regionsStartTable[regionStartYOffset + i] << 4);
3676
3677 if (delta >= 0)
3678 {
3679 if (delta < minDelta)
3680 {
3681 minDelta = delta;
3682 nearestReg = i;
3683 }
3684 }
3685 }
3686
3687 offsetToTheRegionStart[k] = 2 * regionsStartTable[regionStartYOffset + nearestReg];
3688 }
3689 for (uint32_t i = 0; i < m_numRegionsInSlice; i++)
3690 {
3691 datatmp[k * m_numRegionsInSlice + i][0] = regionsStartTable[nearestReg + i];
3692 datatmp[k * m_numRegionsInSlice + i][1] = regionsStartTable[regionStartYOffset + (nearestReg + i)];
3693 datatmp[k * m_numRegionsInSlice + i][2] = regionsStartTable[regionStartYOffset + nearestReg];
3694 int32_t tmpY = regionsStartTable[regionStartYOffset + (nearestReg + m_numRegionsInSlice)];
3695 datatmp[k * m_numRegionsInSlice + i][3] = (uint16_t)((tmpY != 0) ? tmpY : (m_frameHeight) >> 4);
3696 datatmp[k * m_numRegionsInSlice + i][4] = offsetToTheRegionStart[k] & 0x0FFFF;
3697 datatmp[k * m_numRegionsInSlice + i][5] = 0;
3698 datatmp[k * m_numRegionsInSlice + i][6] = 0;
3699 datatmp[k * m_numRegionsInSlice + i][7] = (uint16_t)(offsetY + regionsStartTable[regionStartYOffset + nearestReg] + ((i * offsetDelta) >> 1));
3700 }
3701 }
3702 else /* 26z walking pattern */
3703 {
3704 int32_t curLcuPelY = regionsStartTable[regionStartYOffset + (k * m_numRegionsInSlice)] << 5;
3705 int32_t tsWidth = (m_frameWidth + 16) >> 5;
3706 int32_t tsHeight = height;
3707 int32_t offsetY = -4 * ((tsWidth + 1) >> 1);
3708 int32_t offsetDelta = ((tsWidth + ((tsHeight - 1) << 1)) + (m_numRegionsInSlice - 1)) / (m_numRegionsInSlice);
3709
3710 for (uint32_t i = 0; i < numRegions; i++)
3711 {
3712 if (regionsStartTable[i] == 0)
3713 {
3714 int32_t delta = curLcuPelY - (regionsStartTable[regionStartYOffset + i] << 5);
3715
3716 if (delta >= 0)
3717 {
3718 if (delta < minDelta)
3719 {
3720 minDelta = delta;
3721 nearestReg = i;
3722 }
3723 }
3724 }
3725
3726 offsetToTheRegionStart[k] = 2 * regionsStartTable[regionStartYOffset + nearestReg];
3727 }
3728
3729 for (uint32_t i = 0; i < m_numRegionsInSlice; i++)
3730 {
3731 datatmp[k * m_numRegionsInSlice + i][0] = regionsStartTable[nearestReg + i];
3732 datatmp[k * m_numRegionsInSlice + i][1] = 2 * regionsStartTable[regionStartYOffset + (nearestReg + i)];
3733 datatmp[k * m_numRegionsInSlice + i][2] = 2 * regionsStartTable[regionStartYOffset + nearestReg];
3734 int32_t tmpY = 2 * regionsStartTable[regionStartYOffset + (nearestReg + m_numRegionsInSlice)];
3735 datatmp[k * m_numRegionsInSlice + i][3] = (uint16_t)((tmpY != 0) ? tmpY : (m_frameHeight) >> 4);
3736 datatmp[k * m_numRegionsInSlice + i][4] = offsetToTheRegionStart[k] & 0x0FFFF;
3737 datatmp[k * m_numRegionsInSlice + i][5] = 0;
3738 datatmp[k * m_numRegionsInSlice + i][6] = 0;
3739 datatmp[k * m_numRegionsInSlice + i][7] = (uint16_t)(offsetY + 4 * regionsStartTable[regionStartYOffset + nearestReg] + (4 * ((i * offsetDelta) >> 1)));
3740 }
3741 }
3742 }
3743
3744 if (m_enable26WalkingPattern)
3745 {
3746 InitParamForWalkerVfe26(m_numRegionsInSlice, maxHeight);
3747 }
3748 else
3749 {
3750 InitParamForWalkerVfe26z(m_numRegionsInSlice, maxHeight);
3751 }
3752
3753 MOS_LOCK_PARAMS lockFlags;
3754 MOS_ZeroMemory(&lockFlags, sizeof(lockFlags));
3755 lockFlags.WriteOnly = true;
3756
3757 PCODECHAL_ENCODE_HEVC_WALKING_CONTROL_REGION region;
3758 region = (PCODECHAL_ENCODE_HEVC_WALKING_CONTROL_REGION)m_osInterface->pfnLockResource(
3759 m_osInterface,
3760 &m_concurrentThreadSurface[m_concurrentThreadIndex].OsResource,
3761 &lockFlags);
3762
3763 if (region == nullptr)
3764 {
3765 eStatus = MOS_STATUS_NULL_POINTER;
3766 return eStatus;
3767 }
3768
3769 MOS_ZeroMemory(region, sizeof(*region) * HEVC_CONCURRENT_SURFACE_HEIGHT);
3770
3771 for (auto i = 0; i < 1024; i += 64)
3772 {
3773 MOS_SecureMemcpy(((uint8_t* )region) + i, 32, (uint8_t* )datatmp[i / 64], 32);
3774 }
3775
3776 m_walkingPatternParam.dwMaxHeightInRegion = m_enable26WalkingPattern ? maxHeight : maxHeight * 2;
3777 ;
3778 m_walkingPatternParam.dwNumRegion = numRegions;
3779 m_walkingPatternParam.dwNumUnitsInRegion =
3780 (frameWidthInUnits + 2 * (maxHeight - 1) + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
3781
3782 m_osInterface->pfnUnlockResource(
3783 m_osInterface,
3784 &m_concurrentThreadSurface[m_concurrentThreadIndex].OsResource);
3785
3786 CODECHAL_DEBUG_TOOL(
3787 eStatus = m_debugInterface->DumpSurface(
3788 &m_concurrentThreadSurface[m_concurrentThreadIndex],
3789 CodechalDbgAttr::attrOutput,
3790 "HEVC_B_MBENC_Out",
3791 CODECHAL_MEDIA_STATE_HEVC_B_MBENC);
3792 )
3793
3794 return eStatus;
3795 }
3796
GetMaxBtCount()3797 uint32_t CodechalEncHevcStateG9::GetMaxBtCount()
3798 {
3799 auto btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
3800
3801 // Init/Reset BRC kernel
3802 uint32_t btCountPhase1 = MOS_ALIGN_CEIL(
3803 m_brcKernelStates[CODECHAL_HEVC_BRC_INIT].KernelParams.iBTCount,
3804 btIdxAlignment);
3805
3806 // 4x, 16x DS, 2x DS, 4x ME, 16x ME, 32x ME, and coarse intra kernel
3807 uint32_t btCountPhase2 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_COARSE_INTRA].KernelParams.iBTCount, btIdxAlignment) + // coarse intra
3808 2 * MOS_ALIGN_CEIL(m_scaling4xKernelStates[0].KernelParams.iBTCount, btIdxAlignment) + // 4x and 16x DS
3809 MOS_ALIGN_CEIL(m_scaling2xKernelStates[0].KernelParams.iBTCount, btIdxAlignment) + // 2x DS
3810 3 * MOS_ALIGN_CEIL(m_hmeKernel ? m_hmeKernel->GetBTCount() : 0, btIdxAlignment); // 4x, 16x, and 32x ME
3811
3812 // BRC update kernels and 6 I kernels
3813 uint32_t btCountPhase3 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
3814 MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
3815 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_2xSCALING].KernelParams.iBTCount, btIdxAlignment) +
3816 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_16x16SAD].KernelParams.iBTCount, btIdxAlignment) +
3817 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_16x16MD].KernelParams.iBTCount, btIdxAlignment) +
3818 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_8x8PU].KernelParams.iBTCount, btIdxAlignment) +
3819 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_8x8FMODE].KernelParams.iBTCount, btIdxAlignment);
3820
3821 btCountPhase3 += MOS_MAX(
3822 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_32x32MD].KernelParams.iBTCount, btIdxAlignment),
3823 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_32x32INTRACHECK].KernelParams.iBTCount, btIdxAlignment));
3824
3825 if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
3826 {
3827 btCountPhase3 += MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_DS_COMBINED].KernelParams.iBTCount, btIdxAlignment);
3828 }
3829
3830 // BRC update kernels and two B kernels
3831 uint32_t btCountPhase4 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
3832 MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
3833 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_BENC].KernelParams.iBTCount, btIdxAlignment) +
3834 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_BPAK].KernelParams.iBTCount, btIdxAlignment);
3835
3836 uint32_t maxBtCount = MOS_MAX(btCountPhase1, btCountPhase2);
3837 maxBtCount = MOS_MAX(maxBtCount, btCountPhase3);
3838 maxBtCount = MOS_MAX(maxBtCount, btCountPhase4);
3839
3840 return maxBtCount;
3841 }
3842
AllocateEncResources()3843 MOS_STATUS CodechalEncHevcStateG9::AllocateEncResources()
3844 {
3845 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3846
3847 CODECHAL_ENCODE_FUNCTION_ENTER;
3848
3849 m_sliceMap = (PCODECHAL_ENCODE_HEVC_SLICE_MAP)MOS_AllocAndZeroMemory(
3850 m_widthAlignedMaxLcu * m_heightAlignedMaxLcu * sizeof(m_sliceMap[0]));
3851 CODECHAL_ENCODE_CHK_NULL_RETURN(m_sliceMap);
3852
3853 uint32_t downscaling2xWidth = m_widthAlignedMaxLcu >> 1;
3854 uint32_t downscaling2xHeight = m_heightAlignedMaxLcu >> 1;
3855 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
3856 &m_scaled2xSurface,
3857 downscaling2xWidth,
3858 downscaling2xHeight,
3859 "2x Downscaling"));
3860
3861 uint32_t width = m_widthAlignedMaxLcu >> 3;
3862 uint32_t height = m_heightAlignedMaxLcu >> 5;
3863 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
3864 &m_sliceMapSurface,
3865 width,
3866 height,
3867 "Slice Map"));
3868
3869 uint32_t size = 32 * (m_widthAlignedMaxLcu >> 5) * (m_heightAlignedMaxLcu >> 5);
3870 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3871 &m_32x32PuOutputData,
3872 size,
3873 "32x32 PU Output Data"));
3874
3875 size = 8 * 4 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
3876 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3877 &m_sad16x16Pu,
3878 size,
3879 "SAD 16x16 PU"));
3880
3881 size = 64 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
3882 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3883 &m_vme8x8Mode,
3884 size,
3885 "VME 8x8 mode"));
3886
3887 size = 32 * (m_widthAlignedMaxLcu >> 3) * (m_heightAlignedMaxLcu >> 3);
3888 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3889 &m_intraMode,
3890 size,
3891 "Intra mode"));
3892
3893 size = 16 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
3894 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3895 &m_intraDist,
3896 size,
3897 "Intra dist"));
3898
3899 // Change the surface size
3900 width = m_widthAlignedMaxLcu >> 1;
3901 height = m_heightAlignedMaxLcu >> 4;
3902 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
3903 &m_minDistortion,
3904 width,
3905 height,
3906 "Min distortion surface"));
3907
3908 width = sizeof(CODECHAL_ENCODE_HEVC_WALKING_CONTROL_REGION);
3909 height = HEVC_CONCURRENT_SURFACE_HEIGHT;
3910 for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++)
3911 {
3912 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
3913 &m_concurrentThreadSurface[i],
3914 width,
3915 height,
3916 "Concurrent Thread"));
3917 }
3918
3919 //size = (dwWidthAlignedMaxLCU * dwHeightAlignedMaxLCU / 4);
3920 size = (m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 4) + GPUMMU_WA_PADDING;
3921 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3922 &m_mvIndex,
3923 size,
3924 "MV index surface"));
3925
3926 //size = (dwWidthAlignedMaxLCU * dwHeightAlignedMaxLCU / 2);
3927 size = (m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 2) + GPUMMU_WA_PADDING;
3928 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3929 &m_mvpIndex,
3930 size,
3931 "MVP index surface"));
3932
3933 size = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu;
3934 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3935 &m_vmeSavedUniSic,
3936 size,
3937 "VME Saved UniSic surface"));
3938
3939 width = m_widthAlignedMaxLcu >> 3;
3940 height = m_heightAlignedMaxLcu >> 5;
3941 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
3942 &m_simplestIntraSurface,
3943 width,
3944 height,
3945 "Simplest Intra surface"));
3946
3947 m_allocator->AllocateResource(m_standard, 1024, 1, brcInputForEncKernel, "brcInputForEncKernel", true);
3948
3949 if (m_hmeKernel && m_hmeSupported)
3950 {
3951 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->AllocateResources());
3952 }
3953
3954 // BRC Distortion Surface which will be used in ME as the output, too
3955 // In addition, this surface should also be allocated as BRC resource once ENC is enabled
3956 width = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64);
3957 height = MOS_ALIGN_CEIL((m_downscaledHeightInMb4x * 4), 8);
3958 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
3959 &m_brcBuffers.sMeBrcDistortionBuffer,
3960 width,
3961 height,
3962 "BRC distortion surface"));
3963
3964 if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
3965 {
3966 // adding 10 bit support for KBL : output surface for format conversion from 10bit to 8 bit
3967 for (uint32_t i = 0; i < NUM_FORMAT_CONV_FRAMES; i++)
3968 {
3969 if (Mos_ResourceIsNull(&m_formatConvertedSurface[i].OsResource))
3970 {
3971 width = m_widthAlignedMaxLcu;
3972 height = m_heightAlignedMaxLcu;
3973
3974 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
3975 &m_formatConvertedSurface[i],
3976 width,
3977 height,
3978 "Format Converted Surface"));
3979 }
3980 }
3981
3982 if (Mos_ResourceIsNull(&m_resMbStatisticsSurface.sResource))
3983 {
3984 size = 52 * m_picWidthInMb * m_picHeightInMb; // 13 DWs or 52 bytes for statistics per MB
3985
3986 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
3987 &m_resMbStatisticsSurface,
3988 size,
3989 "MB stats surface"));
3990 }
3991 }
3992
3993 // ROI
3994 // ROI buffer size uses MB units for HEVC, not LCU
3995 width = MOS_ALIGN_CEIL(m_picWidthInMb * 4, 64);
3996 height = MOS_ALIGN_CEIL(m_picHeightInMb, 8);
3997
3998 MOS_ZeroMemory(&m_roiSurface, sizeof(m_roiSurface));
3999 m_roiSurface.TileType = MOS_TILE_LINEAR;
4000 m_roiSurface.bArraySpacing = true;
4001 m_roiSurface.Format = Format_Buffer_2D;
4002 m_roiSurface.dwWidth = width;
4003 m_roiSurface.dwPitch = width;
4004 m_roiSurface.dwHeight = height;
4005
4006 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
4007 &m_roiSurface,
4008 width,
4009 height,
4010 "ROI Buffer"));
4011
4012 return eStatus;
4013 }
4014
FreeEncResources()4015 MOS_STATUS CodechalEncHevcStateG9::FreeEncResources()
4016 {
4017 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4018
4019 CODECHAL_ENCODE_FUNCTION_ENTER;
4020
4021 MOS_Delete(m_meKernelState);
4022 m_meKernelState = nullptr;
4023 MOS_FreeMemory(m_meKernelBindingTable);
4024 m_meKernelBindingTable = nullptr;
4025
4026 MOS_DeleteArray(m_mbEncKernelStates);
4027 m_mbEncKernelStates = nullptr;
4028 MOS_FreeMemory(m_mbEncKernelBindingTable);
4029 m_mbEncKernelBindingTable = nullptr;
4030
4031 MOS_DeleteArray(m_brcKernelStates);
4032 m_brcKernelStates = nullptr;
4033 MOS_FreeMemory(m_brcKernelBindingTable);
4034 m_brcKernelBindingTable = nullptr;
4035
4036 MOS_FreeMemory(m_surfaceParams); m_surfaceParams = nullptr;
4037
4038 for (uint32_t i = 0; i < NUM_FORMAT_CONV_FRAMES; i++)
4039 {
4040 m_osInterface->pfnFreeResource(
4041 m_osInterface,
4042 &m_formatConvertedSurface[i].OsResource);
4043 }
4044
4045 m_osInterface->pfnFreeResource(
4046 m_osInterface,
4047 &m_scaled2xSurface.OsResource);
4048
4049 m_osInterface->pfnFreeResource(
4050 m_osInterface,
4051 &m_resMbStatisticsSurface.sResource);
4052
4053 m_osInterface->pfnFreeResource(
4054 m_osInterface,
4055 &m_sliceMapSurface.OsResource);
4056
4057 m_osInterface->pfnFreeResource(
4058 m_osInterface,
4059 &m_32x32PuOutputData.sResource);
4060
4061 m_osInterface->pfnFreeResource(
4062 m_osInterface,
4063 &m_sad16x16Pu.sResource);
4064
4065 m_osInterface->pfnFreeResource(
4066 m_osInterface,
4067 &m_vme8x8Mode.sResource);
4068
4069 m_osInterface->pfnFreeResource(
4070 m_osInterface,
4071 &m_intraMode.sResource);
4072
4073 m_osInterface->pfnFreeResource(
4074 m_osInterface,
4075 &m_intraDist.sResource);
4076
4077 m_osInterface->pfnFreeResource(
4078 m_osInterface,
4079 &m_mvIndex.sResource);
4080
4081 m_osInterface->pfnFreeResource(
4082 m_osInterface,
4083 &m_mvpIndex.sResource);
4084
4085 m_osInterface->pfnFreeResource(
4086 m_osInterface,
4087 &m_vmeSavedUniSic.sResource);
4088
4089 m_osInterface->pfnFreeResource(
4090 m_osInterface,
4091 &m_minDistortion.OsResource);
4092
4093 for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++)
4094 {
4095 m_osInterface->pfnFreeResource(
4096 m_osInterface,
4097 &m_concurrentThreadSurface[i].OsResource);
4098 }
4099
4100 m_osInterface->pfnFreeResource(
4101 m_osInterface,
4102 &m_simplestIntraSurface.OsResource);
4103
4104 if (m_encEnabled)
4105 {
4106 m_osInterface->pfnFreeResource(
4107 m_osInterface,
4108 &m_brcBuffers.sMeBrcDistortionBuffer.OsResource);
4109 }
4110
4111 MOS_FreeMemory(m_sliceMap);
4112 m_sliceMap = nullptr;
4113
4114 m_osInterface->pfnFreeResource(
4115 m_osInterface,
4116 &m_roiSurface.OsResource);
4117
4118 #if (_DEBUG || _RELEASE_INTERNAL)
4119 if (m_swBrcMode != nullptr)
4120 {
4121 m_osInterface->pfnFreeLibrary(m_swBrcMode);
4122 m_swBrcMode = nullptr;
4123 }
4124 #endif // (_DEBUG || _RELEASE_INTERNAL)
4125
4126 return eStatus;
4127 }
4128
SendMeSurfaces(CodechalHwInterface * hwInterface,PMOS_COMMAND_BUFFER cmdBuffer,MeSurfaceParams * params)4129 MOS_STATUS CodechalEncHevcStateG9::SendMeSurfaces(
4130 CodechalHwInterface *hwInterface,
4131 PMOS_COMMAND_BUFFER cmdBuffer,
4132 MeSurfaceParams *params)
4133 {
4134 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4135
4136 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
4137 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
4138 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
4139 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pCurrOriginalPic);
4140 CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps4xMeMvDataBuffer);
4141 CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeDistortionBuffer);
4142 CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeBrcDistortionBuffer);
4143 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pMeBindingTable);
4144
4145 PMOS_SURFACE currScaledSurface = nullptr, meMvDataBuffer = nullptr;
4146 if (params->b32xMeInUse)
4147 {
4148 CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps32xMeMvDataBuffer);
4149 currScaledSurface = m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
4150 meMvDataBuffer = params->ps32xMeMvDataBuffer;
4151 }
4152 else if (params->b16xMeInUse)
4153 {
4154 CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps16xMeMvDataBuffer);
4155 currScaledSurface = m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
4156 meMvDataBuffer = params->ps16xMeMvDataBuffer;
4157 }
4158 else
4159 {
4160 currScaledSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
4161 meMvDataBuffer = params->ps4xMeMvDataBuffer;
4162 }
4163
4164 // Reference height and width information should be taken from the current scaled surface rather
4165 // than from the reference scaled surface in the case of PAFF.
4166 uint32_t width = MOS_ALIGN_CEIL(params->dwDownscaledWidthInMb * 32, 64);
4167 uint32_t height = params->dwDownscaledHeightInMb * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER;
4168 // Force the values
4169 meMvDataBuffer->dwWidth = width;
4170 meMvDataBuffer->dwHeight = height;
4171 meMvDataBuffer->dwPitch = width;
4172
4173 MeKernelBindingTable* meBindingTable = params->pMeBindingTable;
4174 CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
4175 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4176 surfaceParams.bIs2DSurface = true;
4177 surfaceParams.bMediaBlockRW = true;
4178 surfaceParams.psSurface = meMvDataBuffer;
4179 surfaceParams.dwOffset = 0;
4180 surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
4181 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEMVDataSurface;
4182 surfaceParams.bIsWritable = true;
4183 surfaceParams.bRenderTarget = true;
4184 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4185 hwInterface,
4186 cmdBuffer,
4187 &surfaceParams,
4188 params->pKernelState));
4189
4190 if (params->b16xMeInUse && params->b32xMeEnabled)
4191 {
4192 // Pass 32x MV to 16x ME operation
4193 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4194 surfaceParams.bIs2DSurface = true;
4195 surfaceParams.bMediaBlockRW = true;
4196 surfaceParams.psSurface = params->ps32xMeMvDataBuffer;
4197 surfaceParams.dwOffset = 0;
4198 surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
4199 surfaceParams.dwBindingTableOffset = meBindingTable->dw32xMEMVDataSurface;
4200 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4201 hwInterface,
4202 cmdBuffer,
4203 &surfaceParams,
4204 params->pKernelState));
4205 }
4206 else if (!params->b32xMeInUse && params->b16xMeEnabled)
4207 {
4208 // Pass 16x MV to 4x ME operation
4209 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4210 surfaceParams.bIs2DSurface = true;
4211 surfaceParams.bMediaBlockRW = true;
4212 surfaceParams.psSurface = params->ps16xMeMvDataBuffer;
4213 surfaceParams.dwOffset = 0;
4214 surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
4215 surfaceParams.dwBindingTableOffset = meBindingTable->dw16xMEMVDataSurface;
4216 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4217 hwInterface,
4218 cmdBuffer,
4219 &surfaceParams,
4220 params->pKernelState));
4221 }
4222
4223 // Insert Distortion buffers only for 4xMe case
4224 if (!params->b32xMeInUse && !params->b16xMeInUse)
4225 {
4226 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4227 surfaceParams.bIs2DSurface = true;
4228 surfaceParams.bMediaBlockRW = true;
4229 surfaceParams.psSurface = params->psMeBrcDistortionBuffer;
4230 surfaceParams.dwOffset = 0;
4231 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBRCDist;
4232 surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
4233 surfaceParams.bIsWritable = true;
4234 surfaceParams.bRenderTarget = true;
4235 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4236 hwInterface,
4237 cmdBuffer,
4238 &surfaceParams,
4239 params->pKernelState));
4240
4241 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4242 surfaceParams.bIs2DSurface = true;
4243 surfaceParams.bMediaBlockRW = true;
4244 surfaceParams.psSurface = params->psMeDistortionBuffer;
4245 surfaceParams.dwOffset = 0;
4246 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEDist;
4247 surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value;
4248 surfaceParams.bIsWritable = true;
4249 surfaceParams.bRenderTarget = true;
4250 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4251 hwInterface,
4252 cmdBuffer,
4253 &surfaceParams,
4254 params->pKernelState));
4255 }
4256
4257 // Setup references 1...n
4258 // LIST 0 references
4259 const uint8_t currVDirection = CODECHAL_VDIRECTION_FRAME; // Interlaced not supported
4260 for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL0ActiveMinus1; refIdx++)
4261 {
4262 CODEC_PICTURE refPic = params->pL0RefFrameList[refIdx];
4263 MOS_SURFACE refScaledSurface = *currScaledSurface;
4264
4265 if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
4266 {
4267 if (refIdx == 0)
4268 {
4269 // Current picture Y - VME
4270 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4271 surfaceParams.bUseAdvState = true;
4272 surfaceParams.psSurface = currScaledSurface;
4273 surfaceParams.dwOffset = 0;
4274 surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
4275 surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForFwdRef;
4276 surfaceParams.ucVDirection = currVDirection;
4277 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4278 hwInterface,
4279 cmdBuffer,
4280 &surfaceParams,
4281 params->pKernelState));
4282 }
4283
4284 uint8_t refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
4285 uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
4286 if (params->b32xMeInUse)
4287 {
4288 MOS_SURFACE* p32xSurface = m_trackedBuf->Get32xDsSurface(scaledIdx);
4289 if (p32xSurface != nullptr)
4290 {
4291 refScaledSurface.OsResource = p32xSurface->OsResource;
4292 }
4293 else
4294 {
4295 CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4296 }
4297 }
4298 else if (params->b16xMeInUse)
4299 {
4300 MOS_SURFACE* p16xSurface = m_trackedBuf->Get16xDsSurface(scaledIdx);
4301 if (p16xSurface != nullptr)
4302 {
4303 refScaledSurface.OsResource = p16xSurface->OsResource;
4304 }
4305 else
4306 {
4307 CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4308 }
4309 }
4310 else
4311 {
4312 MOS_SURFACE* p4xSurface = m_trackedBuf->Get4xDsSurface(scaledIdx);
4313 if (p4xSurface != nullptr)
4314 {
4315 refScaledSurface.OsResource = p4xSurface->OsResource;
4316 }
4317 else
4318 {
4319 CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4320 }
4321 }
4322 // L0 Reference picture Y - VME
4323 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4324 surfaceParams.bUseAdvState = true;
4325 surfaceParams.psSurface = &refScaledSurface;
4326 surfaceParams.dwOffset = 0;
4327 surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
4328 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEFwdRefPicIdx[refIdx];
4329 surfaceParams.ucVDirection = CODECHAL_VDIRECTION_FRAME;
4330 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4331 hwInterface,
4332 cmdBuffer,
4333 &surfaceParams,
4334 params->pKernelState));
4335 }
4336 }
4337
4338 // Setup references 1...n
4339 // LIST 1 references
4340 for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL1ActiveMinus1; refIdx++)
4341 {
4342 CODEC_PICTURE refPic = params->pL1RefFrameList[refIdx];
4343 MOS_SURFACE refScaledSurface = *currScaledSurface;
4344
4345 if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
4346 {
4347 if (refIdx == 0)
4348 {
4349 // Current picture Y - VME
4350 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4351 surfaceParams.bUseAdvState = true;
4352 surfaceParams.psSurface = currScaledSurface;
4353 surfaceParams.dwOffset = 0;
4354 surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
4355 surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForBwdRef;
4356 surfaceParams.ucVDirection = currVDirection;
4357 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4358 hwInterface,
4359 cmdBuffer,
4360 &surfaceParams,
4361 params->pKernelState));
4362 }
4363
4364 uint8_t refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
4365 uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
4366 if (params->b32xMeInUse)
4367 {
4368 MOS_SURFACE* p32xSurface = m_trackedBuf->Get32xDsSurface(scaledIdx);
4369 if (p32xSurface != nullptr)
4370 {
4371 refScaledSurface.OsResource = p32xSurface->OsResource;
4372 }
4373 else
4374 {
4375 CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4376 }
4377 }
4378 else if (params->b16xMeInUse)
4379 {
4380 MOS_SURFACE* p16xSurface = m_trackedBuf->Get16xDsSurface(scaledIdx);
4381 if (p16xSurface != nullptr)
4382 {
4383 refScaledSurface.OsResource = p16xSurface->OsResource;
4384 }
4385 else
4386 {
4387 CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4388 }
4389 }
4390 else
4391 {
4392 MOS_SURFACE* p4xSurface = m_trackedBuf->Get4xDsSurface(scaledIdx);
4393 if (p4xSurface != nullptr)
4394 {
4395 refScaledSurface.OsResource = p4xSurface->OsResource;
4396 }
4397 else
4398 {
4399 CODECHAL_ENCODE_ASSERTMESSAGE("NULL pointer of DsSurface");
4400 }
4401 }
4402 // L1 Reference picture Y - VME
4403 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
4404 surfaceParams.bUseAdvState = true;
4405 surfaceParams.psSurface = &refScaledSurface;
4406 surfaceParams.dwOffset = 0;
4407 surfaceParams.dwCacheabilityControl = hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
4408 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBwdRefPicIdx[refIdx];
4409 surfaceParams.ucVDirection = CODECHAL_VDIRECTION_FRAME;
4410 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4411 hwInterface,
4412 cmdBuffer,
4413 &surfaceParams,
4414 params->pKernelState));
4415 }
4416 }
4417
4418 return eStatus;
4419 }
4420
4421 //------------------------------------------------------------------------------
4422 //| Purpose: Setup curbe for HEVC ME kernels
4423 //| Return: N/A
4424 //------------------------------------------------------------------------------
SetCurbeMe(MeCurbeParams * params)4425 MOS_STATUS CodechalEncHevcStateG9::SetCurbeMe(
4426 MeCurbeParams* params)
4427 {
4428 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4429
4430 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
4431 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
4432
4433 CODECHAL_ENCODE_ASSERT(params->TargetUsage <= NUM_TARGET_USAGE_MODES);
4434
4435 uint8_t mvShiftFactor = 0, prevMvReadPosFactor = 0;
4436 bool useMvFromPrevStep= false, writeDistortions = false;
4437 uint32_t scaleFactor = 0;
4438 switch (params->hmeLvl)
4439 {
4440 case HME_LEVEL_32x:
4441 useMvFromPrevStep = HME_FIRST_STEP;
4442 writeDistortions = false;
4443 scaleFactor = SCALE_FACTOR_32x;
4444 mvShiftFactor = MV_SHIFT_FACTOR_32x;
4445 break;
4446 case HME_LEVEL_16x:
4447 useMvFromPrevStep = (m_b32XMeEnabled) ? HME_FOLLOWING_STEP : HME_FIRST_STEP;
4448 writeDistortions = false;
4449 scaleFactor = SCALE_FACTOR_16x;
4450 mvShiftFactor = MV_SHIFT_FACTOR_16x;
4451 prevMvReadPosFactor = PREV_MV_READ_POSITION_16x;
4452 break;
4453 case HME_LEVEL_4x:
4454 useMvFromPrevStep = (m_b16XMeEnabled) ? HME_FOLLOWING_STEP : HME_FIRST_STEP;
4455 writeDistortions = true;
4456 scaleFactor = SCALE_FACTOR_4x;
4457 mvShiftFactor = MV_SHIFT_FACTOR_4x;
4458 prevMvReadPosFactor = PREV_MV_READ_POSITION_4x;
4459 break;
4460 default:
4461 return MOS_STATUS_INVALID_PARAMETER;
4462 break;
4463 }
4464
4465 CODECHAL_ENC_HEVC_ME_CURBE_G9 cmd;
4466 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
4467 &cmd,
4468 sizeof(CODECHAL_ENC_HEVC_ME_CURBE_G9),
4469 m_meCurbeInit,
4470 sizeof(CODECHAL_ENC_HEVC_ME_CURBE_G9)));
4471
4472 cmd.DW3.SubPelMode = 3;
4473 cmd.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
4474 cmd.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
4475 cmd.DW5.QpPrimeY = params->pic_init_qp_minus26 + 26 + params->slice_qp_delta;
4476 cmd.DW6.WriteDistortions = writeDistortions;
4477 cmd.DW6.UseMvFromPrevStep = useMvFromPrevStep;
4478
4479 cmd.DW6.SuperCombineDist = m_superCombineDist[params->TargetUsage];
4480 cmd.DW6.MaxVmvR = 512; // CModel always uses 512 for HME even though B_MB uses (levelIDC)*4
4481
4482 if (m_pictureCodingType == B_TYPE)
4483 {
4484 // This field is irrelevant since we are not using the bi-direct search.
4485 // set it to 32
4486 cmd.DW1.BiWeight = 32;
4487 cmd.DW13.NumRefIdxL1MinusOne = params->num_ref_idx_l1_active_minus1;
4488 cmd.DW13.NumRefIdxL0MinusOne = params->num_ref_idx_l0_active_minus1;
4489 }
4490
4491 cmd.DW15.MvShiftFactor = mvShiftFactor;
4492 cmd.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
4493
4494 // r3 & r4
4495 uint8_t meMethod = m_meMethod[params->TargetUsage];
4496
4497 eStatus = MOS_SecureMemcpy(&(cmd.SPDelta), 14 * sizeof(uint32_t), CodechalEncoderState::m_encodeSearchPath[0][meMethod], 14 * sizeof(uint32_t));
4498 if (eStatus != MOS_STATUS_SUCCESS)
4499 {
4500 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
4501 return eStatus;
4502 }
4503
4504 // r5
4505 cmd.DW32._4xMeMvOutputDataSurfIndex = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_CM_G9;
4506 cmd.DW33._16xOr32xMeMvInputDataSurfIndex = (params->hmeLvl == HME_LEVEL_32x) ?
4507 CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_CM_G9 : CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_CM_G9;
4508 cmd.DW34._4xMeOutputDistSurfIndex = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_CM_G9;
4509 cmd.DW35._4xMeOutputBrcDistSurfIndex = CODECHAL_ENCODE_ME_BRC_DISTORTION_CM_G9;
4510 cmd.DW36.VMEFwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_CM_G9;
4511 cmd.DW37.VMEBwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_CM_G9;
4512
4513 CODECHAL_ENCODE_CHK_STATUS_RETURN(params->pKernelState->m_dshRegion.AddData(
4514 &cmd,
4515 params->pKernelState->dwCurbeOffset,
4516 sizeof(cmd)));
4517
4518 return eStatus;
4519 }
4520
SetMbEncKernelParams(MHW_KERNEL_PARAM * kernelParams,uint32_t idx)4521 MOS_STATUS CodechalEncHevcStateG9::SetMbEncKernelParams(MHW_KERNEL_PARAM* kernelParams, uint32_t idx)
4522 {
4523 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4524
4525 CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams);
4526
4527 auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
4528
4529 kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
4530 kernelParams->iIdCount = 1;
4531
4532 switch (idx)
4533 {
4534 case CODECHAL_HEVC_MBENC_2xSCALING:
4535 kernelParams->iBTCount = CODECHAL_HEVC_SCALING_FRAME_END - CODECHAL_HEVC_SCALING_FRAME_BEGIN;
4536 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9), curbeAlignment);
4537 kernelParams->iBlockWidth = 32;
4538 kernelParams->iBlockHeight = 32;
4539 break;
4540
4541 case CODECHAL_HEVC_MBENC_32x32MD:
4542 kernelParams->iBTCount = CODECHAL_HEVC_32x32_PU_END - CODECHAL_HEVC_32x32_PU_BEGIN;
4543 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9), curbeAlignment);
4544 kernelParams->iBlockWidth = 32;
4545 kernelParams->iBlockHeight = 32;
4546 break;
4547
4548 case CODECHAL_HEVC_MBENC_16x16SAD:
4549 kernelParams->iBTCount = CODECHAL_HEVC_16x16_PU_SAD_END - CODECHAL_HEVC_16x16_PU_SAD_BEGIN;
4550 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9), curbeAlignment);
4551 kernelParams->iBlockWidth = 16;
4552 kernelParams->iBlockHeight = 16;
4553 break;
4554
4555 case CODECHAL_HEVC_MBENC_16x16MD:
4556 kernelParams->iBTCount = CODECHAL_HEVC_16x16_PU_MD_END - CODECHAL_HEVC_16x16_PU_MD_BEGIN;
4557 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9), curbeAlignment);
4558 kernelParams->iBlockWidth = 32;
4559 kernelParams->iBlockHeight = 32;
4560 break;
4561
4562 case CODECHAL_HEVC_MBENC_8x8PU:
4563 kernelParams->iBTCount = CODECHAL_HEVC_8x8_PU_END - CODECHAL_HEVC_8x8_PU_BEGIN;
4564 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_8x8_PU_CURBE_G9), curbeAlignment);
4565 kernelParams->iBlockWidth = 8;
4566 kernelParams->iBlockHeight = 8;
4567 break;
4568
4569 case CODECHAL_HEVC_MBENC_8x8FMODE:
4570 kernelParams->iBTCount = CODECHAL_HEVC_8x8_PU_FMODE_END - CODECHAL_HEVC_8x8_PU_FMODE_BEGIN;
4571 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_8x8_PU_FMODE_CURBE_G9), curbeAlignment);
4572 kernelParams->iBlockWidth = 32;
4573 kernelParams->iBlockHeight = 32;
4574 break;
4575
4576 case CODECHAL_HEVC_MBENC_32x32INTRACHECK:
4577 kernelParams->iBTCount = CODECHAL_HEVC_B_32x32_PU_END - CODECHAL_HEVC_B_32x32_PU_BEGIN;
4578 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9), curbeAlignment);
4579 kernelParams->iBlockWidth = 32;
4580 kernelParams->iBlockHeight = 32;
4581 break;
4582
4583 case CODECHAL_HEVC_MBENC_BENC:
4584 case CODECHAL_HEVC_MBENC_ADV:
4585 kernelParams->iBTCount = CODECHAL_HEVC_B_MBENC_END - CODECHAL_HEVC_B_MBENC_BEGIN;
4586 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9), curbeAlignment);
4587 kernelParams->iBlockWidth = 16;
4588 kernelParams->iBlockHeight = 16;
4589 break;
4590
4591 case CODECHAL_HEVC_MBENC_BPAK:
4592 kernelParams->iBTCount = CODECHAL_HEVC_B_PAK_END - CODECHAL_HEVC_B_PAK_BEGIN;
4593 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_PAK_CURBE_G9), curbeAlignment);
4594 kernelParams->iBlockWidth = 32;
4595 kernelParams->iBlockHeight = 32;
4596 break;
4597
4598 case CODECHAL_HEVC_MBENC_DS_COMBINED:
4599 if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
4600 {
4601 kernelParams->iBTCount = CODECHAL_HEVC_DS_COMBINED_END - CODECHAL_HEVC_DS_COMBINED_BEGIN;
4602 uint32_t dsCombinedKernelCurbeSize = sizeof(CODECHAL_ENC_HEVC_DS_COMBINED_CURBE_G9);
4603 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(dsCombinedKernelCurbeSize, curbeAlignment);
4604 kernelParams->iBlockWidth = 8;
4605 kernelParams->iBlockHeight = 8;
4606 }
4607 else
4608 {
4609 CODECHAL_ENCODE_ASSERT(false);
4610 eStatus = MOS_STATUS_INVALID_PARAMETER;
4611 }
4612 break;
4613
4614 case CODECHAL_HEVC_MBENC_PENC:
4615 case CODECHAL_HEVC_MBENC_ADV_P:
4616 kernelParams->iBTCount = CODECHAL_HEVC_P_MBENC_END - CODECHAL_HEVC_P_MBENC_BEGIN;
4617 //P MBEnc curbe has one less DWord than B MBEnc curbe
4618 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9) - sizeof(uint32_t), (size_t)curbeAlignment);
4619 kernelParams->iBlockWidth = 16;
4620 kernelParams->iBlockHeight = 16;
4621 break;
4622
4623 default:
4624 CODECHAL_ENCODE_ASSERT(false);
4625 eStatus = MOS_STATUS_INVALID_PARAMETER;
4626 }
4627
4628 return eStatus;
4629 }
4630
SetMbEncBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable,uint32_t idx)4631 MOS_STATUS CodechalEncHevcStateG9::SetMbEncBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable, uint32_t idx)
4632 {
4633 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4634
4635 CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
4636
4637 MOS_ZeroMemory(bindingTable, sizeof(*bindingTable));
4638 bindingTable->dwMediaState = ConvertKrnOpsToMediaState(ENC_MBENC, idx);
4639
4640 switch (idx)
4641 {
4642 case CODECHAL_HEVC_MBENC_2xSCALING:
4643 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_SCALING_FRAME_END - CODECHAL_HEVC_SCALING_FRAME_BEGIN;
4644 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_SCALING_FRAME_BEGIN;
4645 break;
4646
4647 case CODECHAL_HEVC_MBENC_32x32MD:
4648 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_32x32_PU_END - CODECHAL_HEVC_32x32_PU_BEGIN;
4649 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_32x32_PU_BEGIN;
4650 break;
4651
4652 case CODECHAL_HEVC_MBENC_16x16SAD:
4653 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_16x16_PU_SAD_END - CODECHAL_HEVC_16x16_PU_SAD_BEGIN;
4654 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_16x16_PU_SAD_BEGIN;
4655 break;
4656
4657 case CODECHAL_HEVC_MBENC_16x16MD:
4658 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_16x16_PU_MD_END - CODECHAL_HEVC_16x16_PU_MD_BEGIN;
4659 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_16x16_PU_MD_BEGIN;
4660 break;
4661
4662 case CODECHAL_HEVC_MBENC_8x8PU:
4663 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_8x8_PU_END - CODECHAL_HEVC_8x8_PU_BEGIN;
4664 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_8x8_PU_BEGIN;
4665 break;
4666
4667 case CODECHAL_HEVC_MBENC_8x8FMODE:
4668 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_8x8_PU_FMODE_END - CODECHAL_HEVC_8x8_PU_FMODE_BEGIN;
4669 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_8x8_PU_FMODE_BEGIN;
4670 break;
4671
4672 case CODECHAL_HEVC_MBENC_32x32INTRACHECK:
4673 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_B_32x32_PU_END - CODECHAL_HEVC_B_32x32_PU_BEGIN;
4674 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_B_32x32_PU_BEGIN;
4675 break;
4676
4677 case CODECHAL_HEVC_MBENC_BENC:
4678 case CODECHAL_HEVC_MBENC_ADV:
4679 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_B_MBENC_END - CODECHAL_HEVC_B_MBENC_BEGIN;
4680 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_B_MBENC_BEGIN;
4681 break;
4682
4683 case CODECHAL_HEVC_MBENC_BPAK:
4684 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_B_PAK_END - CODECHAL_HEVC_B_PAK_BEGIN;
4685 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_B_PAK_BEGIN;
4686 break;
4687
4688 case CODECHAL_HEVC_MBENC_DS_COMBINED:
4689 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_DS_COMBINED_END - CODECHAL_HEVC_DS_COMBINED_BEGIN;
4690 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_DS_COMBINED_BEGIN;
4691 break;
4692
4693 case CODECHAL_HEVC_MBENC_PENC:
4694 case CODECHAL_HEVC_MBENC_ADV_P:
4695 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_P_MBENC_END - CODECHAL_HEVC_P_MBENC_BEGIN;
4696 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_P_MBENC_BEGIN;
4697 break;
4698
4699 default:
4700 CODECHAL_ENCODE_ASSERT(false);
4701 eStatus = MOS_STATUS_INVALID_PARAMETER;
4702 return eStatus;
4703 }
4704
4705 for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++)
4706 {
4707 bindingTable->dwBindingTableEntries[i] = i;
4708 }
4709
4710 return eStatus;
4711 }
4712
SetBrcKernelParams(MHW_KERNEL_PARAM * kernelParams,uint32_t idx)4713 MOS_STATUS CodechalEncHevcStateG9::SetBrcKernelParams(MHW_KERNEL_PARAM* kernelParams, uint32_t idx)
4714 {
4715 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4716
4717 CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams);
4718
4719 auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
4720
4721 kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
4722 kernelParams->iIdCount = 1;
4723
4724 // Only LCU-based update kernel is running at multple threads. Others run in the single thread.
4725 switch (idx)
4726 {
4727 case CODECHAL_HEVC_BRC_COARSE_INTRA:
4728 kernelParams->iBTCount = CODECHAL_HEVC_COARSE_INTRA_END - CODECHAL_HEVC_COARSE_INTRA_BEGIN;
4729 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9), curbeAlignment);
4730 kernelParams->iBlockWidth = 32;
4731 kernelParams->iBlockHeight = 32;
4732 break;
4733
4734 case CODECHAL_HEVC_BRC_INIT:
4735 kernelParams->iBTCount = CODECHAL_HEVC_BRC_INIT_RESET_END - CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4736 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9), curbeAlignment);
4737 kernelParams->iBlockWidth = 32;
4738 kernelParams->iBlockHeight = 32;
4739 break;
4740
4741 case CODECHAL_HEVC_BRC_RESET:
4742 kernelParams->iBTCount = CODECHAL_HEVC_BRC_INIT_RESET_END - CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4743 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9), curbeAlignment);
4744 kernelParams->iBlockWidth = 32;
4745 kernelParams->iBlockHeight = 32;
4746 break;
4747
4748 case CODECHAL_HEVC_BRC_FRAME_UPDATE:
4749 kernelParams->iBTCount = CODECHAL_HEVC_BRC_UPDATE_END - CODECHAL_HEVC_BRC_UPDATE_BEGIN;
4750 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9), curbeAlignment);
4751 kernelParams->iBlockWidth = 32;
4752 kernelParams->iBlockHeight = 32;
4753 break;
4754
4755 case CODECHAL_HEVC_BRC_LCU_UPDATE:
4756 kernelParams->iBTCount = CODECHAL_HEVC_BRC_LCU_UPDATE_END - CODECHAL_HEVC_BRC_LCU_UPDATE_BEGIN;
4757 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9), curbeAlignment);
4758 kernelParams->iBlockWidth = 128;
4759 kernelParams->iBlockHeight = 128;
4760 break;
4761
4762 default:
4763 CODECHAL_ENCODE_ASSERT(false);
4764 eStatus = MOS_STATUS_INVALID_PARAMETER;
4765 return eStatus;
4766 }
4767
4768 return eStatus;
4769 }
4770
SetBrcBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable,uint32_t idx)4771 MOS_STATUS CodechalEncHevcStateG9::SetBrcBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable, uint32_t idx)
4772 {
4773 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4774
4775 CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
4776
4777 MOS_ZeroMemory(bindingTable, sizeof(*bindingTable));
4778 bindingTable->dwMediaState = ConvertKrnOpsToMediaState(ENC_BRC, idx);
4779
4780 switch (idx)
4781 {
4782 case CODECHAL_HEVC_BRC_COARSE_INTRA:
4783 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_COARSE_INTRA_END - CODECHAL_HEVC_COARSE_INTRA_BEGIN;
4784 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_COARSE_INTRA_BEGIN;
4785 break;
4786
4787 case CODECHAL_HEVC_BRC_INIT:
4788 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_BRC_INIT_RESET_END - CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4789 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4790 break;
4791
4792 case CODECHAL_HEVC_BRC_RESET:
4793 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_BRC_INIT_RESET_END - CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4794 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_BRC_INIT_RESET_BEGIN;
4795 break;
4796
4797 case CODECHAL_HEVC_BRC_FRAME_UPDATE:
4798 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_BRC_UPDATE_END - CODECHAL_HEVC_BRC_UPDATE_BEGIN;
4799 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_BRC_UPDATE_BEGIN;
4800 break;
4801
4802 case CODECHAL_HEVC_BRC_LCU_UPDATE:
4803 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_BRC_LCU_UPDATE_END - CODECHAL_HEVC_BRC_LCU_UPDATE_BEGIN;
4804 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_BRC_LCU_UPDATE_BEGIN;
4805 break;
4806
4807 default:
4808 CODECHAL_ENCODE_ASSERT(false);
4809 eStatus = MOS_STATUS_INVALID_PARAMETER;
4810 return eStatus;
4811 }
4812
4813 for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++)
4814 {
4815 bindingTable->dwBindingTableEntries[i] = i;
4816 }
4817
4818 return eStatus;
4819 }
4820
InitKernelStateBrc()4821 MOS_STATUS CodechalEncHevcStateG9::InitKernelStateBrc()
4822 {
4823 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4824
4825 CODECHAL_ENCODE_FUNCTION_ENTER;
4826
4827 m_numBrcKrnStates = CODECHAL_HEVC_BRC_NUM;
4828
4829 m_brcKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numBrcKrnStates);
4830 CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
4831
4832 m_brcKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
4833 sizeof(GenericBindingTable) * m_numBrcKrnStates);
4834 CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelBindingTable);
4835
4836 auto kernelStatePtr = m_brcKernelStates;
4837
4838 for (uint32_t krnStateIdx = 0; krnStateIdx < m_numBrcKrnStates; krnStateIdx++)
4839 {
4840 auto kernelSize = m_combinedKernelSize;
4841 CODECHAL_KERNEL_HEADER currKrnHeader;
4842
4843 CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
4844 m_kernelBinary,
4845 ENC_BRC,
4846 krnStateIdx,
4847 &currKrnHeader,
4848 &kernelSize));
4849
4850 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBrcKernelParams(
4851 &kernelStatePtr->KernelParams,
4852 krnStateIdx));
4853
4854 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBrcBindingTable(
4855 &m_brcKernelBindingTable[krnStateIdx], krnStateIdx));
4856
4857 kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
4858 kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
4859 kernelStatePtr->KernelParams.iSize = kernelSize;
4860
4861 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
4862 m_stateHeapInterface,
4863 kernelStatePtr->KernelParams.iBTCount,
4864 &kernelStatePtr->dwSshSize,
4865 &kernelStatePtr->dwBindingTableSize));
4866
4867 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
4868
4869 kernelStatePtr++;
4870 }
4871
4872 return eStatus;
4873 }
4874
InitKernelStateMbEnc()4875 MOS_STATUS CodechalEncHevcStateG9::InitKernelStateMbEnc()
4876 {
4877 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4878
4879 CODECHAL_ENCODE_FUNCTION_ENTER;
4880
4881 if(MEDIA_IS_SKU(m_hwInterface->GetSkuTable(), FtrEncodeHEVC10bit) && m_is10BitHevc)
4882 {
4883 m_numMbEncEncKrnStates = CODECHAL_HEVC_MBENC_NUM_BXT_SKL;
4884 }
4885 else if (!m_noMeKernelForPFrame)
4886 {
4887 m_numMbEncEncKrnStates = CODECHAL_HEVC_MBENC_NUM_BXT_SKL;
4888 }
4889 else
4890 {
4891 m_numMbEncEncKrnStates = CODECHAL_HEVC_MBENC_NUM;
4892 }
4893
4894 m_mbEncKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numMbEncEncKrnStates);
4895 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
4896
4897 m_mbEncKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
4898 sizeof(GenericBindingTable) * m_numMbEncEncKrnStates);
4899 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
4900
4901 auto kernelStatePtr = m_mbEncKernelStates;
4902
4903 for (uint32_t krnStateIdx = 0; krnStateIdx < m_numMbEncEncKrnStates; krnStateIdx++)
4904 {
4905 auto kernelSize = m_combinedKernelSize;
4906 CODECHAL_KERNEL_HEADER currKrnHeader;
4907
4908 CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
4909 m_kernelBinary,
4910 ENC_MBENC,
4911 krnStateIdx,
4912 &currKrnHeader,
4913 &kernelSize));
4914
4915 if (kernelSize == 0) //Ignore. It isn't used on current platform.
4916 {
4917 kernelStatePtr++;
4918 continue;
4919 }
4920
4921 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMbEncKernelParams(
4922 &kernelStatePtr->KernelParams,
4923 krnStateIdx));
4924
4925 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMbEncBindingTable(
4926 &m_mbEncKernelBindingTable[krnStateIdx], krnStateIdx));
4927
4928 kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
4929 kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
4930 kernelStatePtr->KernelParams.iSize = kernelSize;
4931
4932 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
4933 m_stateHeapInterface,
4934 kernelStatePtr->KernelParams.iBTCount,
4935 &kernelStatePtr->dwSshSize,
4936 &kernelStatePtr->dwBindingTableSize));
4937
4938 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
4939
4940 kernelStatePtr++;
4941 }
4942
4943 return eStatus;
4944 }
4945
InitSurfaceInfoTable()4946 MOS_STATUS CodechalEncHevcStateG9::InitSurfaceInfoTable()
4947 {
4948 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4949
4950 m_surfaceParams = (PCODECHAL_SURFACE_CODEC_PARAMS)MOS_AllocAndZeroMemory(
4951 sizeof(*m_surfaceParams) * SURFACE_NUM_TOTAL);
4952 CODECHAL_ENCODE_CHK_NULL_RETURN(m_surfaceParams);
4953
4954 PCODECHAL_SURFACE_CODEC_PARAMS param = &m_surfaceParams[SURFACE_RAW_Y];
4955 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4956 param,
4957 m_rawSurfaceToEnc,
4958 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4959 0,
4960 m_verticalLineStride,
4961 false));
4962
4963 param = &m_surfaceParams[SURFACE_RAW_10bit_Y];
4964 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4965 param,
4966 m_rawSurfaceToEnc,
4967 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4968 0,
4969 m_verticalLineStride,
4970 false));
4971
4972 // MB stats surface -- currently not used
4973 param = &m_surfaceParams[SURFACE_RAW_MBSTAT];
4974 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4975 param,
4976 &m_resMbStatisticsSurface.sResource,
4977 m_resMbStatisticsSurface.dwSize,
4978 0,
4979 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
4980 0,
4981 true));
4982 param->bRawSurface = true;
4983
4984 param = &m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV];
4985 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4986 param,
4987 &m_formatConvertedSurface[0],
4988 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4989 0,
4990 m_verticalLineStride,
4991 true)); //this should be writable as it is output of formatconversion
4992 param->bUseUVPlane = true;
4993
4994 param = &m_surfaceParams[SURFACE_RAW_Y_UV];
4995 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4996 param,
4997 m_rawSurfaceToEnc,
4998 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4999 0,
5000 m_verticalLineStride,
5001 false));
5002 param->bUseUVPlane = true;
5003
5004 param = &m_surfaceParams[SURFACE_RAW_10bit_Y_UV];
5005 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5006 param,
5007 m_rawSurfaceToEnc,
5008 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
5009 0,
5010 m_verticalLineStride,
5011 false));//this should be writable as it is output of formatconversion
5012 param->bUseUVPlane = true;
5013
5014 param = &m_surfaceParams[SURFACE_Y_2X];
5015 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5016 param,
5017 &m_scaled2xSurface,
5018 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5019 0,
5020 m_verticalLineStride,
5021 false));
5022
5023 param = &m_surfaceParams[SURFACE_32x32_PU_OUTPUT];
5024 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5025 param,
5026 &m_32x32PuOutputData.sResource,
5027 m_32x32PuOutputData.dwSize,
5028 0,
5029 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5030 0,
5031 false));
5032
5033 param = &m_surfaceParams[SURFACE_SLICE_MAP];
5034 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5035 param,
5036 &m_sliceMapSurface,
5037 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5038 0,
5039 m_verticalLineStride,
5040 false));
5041
5042 param = &m_surfaceParams[SURFACE_Y_2X_VME];
5043 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
5044 param,
5045 &m_scaled2xSurface,
5046 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
5047 0));
5048
5049 param = &m_surfaceParams[SURFACE_BRC_INPUT];
5050 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5051 param,
5052 (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel),
5053 m_allocator->GetResourceSize(m_standard, brcInputForEncKernel),
5054 0,
5055 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5056 0,
5057 false));
5058
5059 param = &m_surfaceParams[SURFACE_LCU_QP];
5060 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5061 param,
5062 &m_brcBuffers.sBrcMbQpBuffer,
5063 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5064 0,
5065 m_verticalLineStride,
5066 false));
5067
5068 param = &m_surfaceParams[SURFACE_ROI];
5069 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5070 param,
5071 &m_roiSurface,
5072 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5073 0,
5074 m_verticalLineStride,
5075 false));
5076
5077 param = &m_surfaceParams[SURFACE_BRC_DATA];
5078 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5079 param,
5080 &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
5081 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5082 0,
5083 m_verticalLineStride,
5084 false));
5085
5086 param = &m_surfaceParams[SURFACE_SIMPLIFIED_INTRA];
5087 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5088 param,
5089 &m_simplestIntraSurface,
5090 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5091 0,
5092 m_verticalLineStride,
5093 false));
5094
5095 param = &m_surfaceParams[SURFACE_HME_MVP];
5096 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5097 param,
5098 m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xMvDataBuffer),
5099 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5100 0,
5101 m_verticalLineStride,
5102 false));
5103
5104 param = &m_surfaceParams[SURFACE_HME_DIST];
5105 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5106 param,
5107 m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xDistortionBuffer),
5108 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value,
5109 0,
5110 m_verticalLineStride,
5111 false));
5112
5113 param = &m_surfaceParams[SURFACE_16x16PU_SAD];
5114 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5115 param,
5116 &m_sad16x16Pu.sResource,
5117 m_sad16x16Pu.dwSize,
5118 0,
5119 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5120 0,
5121 false));
5122
5123 param = &m_surfaceParams[SURFACE_RAW_VME];
5124 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
5125 param,
5126 m_rawSurfaceToEnc,
5127 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
5128 0));
5129
5130 param = &m_surfaceParams[SURFACE_VME_8x8];
5131 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5132 param,
5133 &m_vme8x8Mode.sResource,
5134 m_vme8x8Mode.dwSize,
5135 0,
5136 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5137 0,
5138 false));
5139
5140 param = &m_surfaceParams[SURFACE_CU_RECORD];
5141 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5142 param,
5143 &m_resMbCodeSurface,
5144 m_mbCodeSize - m_mvOffset,
5145 m_mvOffset,
5146 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5147 0,
5148 true));
5149
5150 param = &m_surfaceParams[SURFACE_INTRA_MODE];
5151 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5152 param,
5153 &m_intraMode.sResource,
5154 m_intraMode.dwSize,
5155 0,
5156 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5157 0,
5158 false));
5159
5160 param = &m_surfaceParams[SURFACE_HCP_PAK];
5161 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5162 param,
5163 &m_resMbCodeSurface,
5164 m_mvOffset,
5165 0,
5166 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5167 0,
5168 true));
5169
5170 param = &m_surfaceParams[SURFACE_INTRA_DIST];
5171 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5172 param,
5173 &m_intraDist.sResource,
5174 m_intraDist.dwSize,
5175 0,
5176 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5177 0,
5178 false));
5179
5180 param = &m_surfaceParams[SURFACE_MIN_DIST];
5181 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5182 param,
5183 &m_minDistortion,
5184 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value,
5185 0,
5186 m_verticalLineStride,
5187 false));
5188
5189 param = &m_surfaceParams[SURFACE_VME_UNI_SIC_DATA];
5190 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5191 param,
5192 &m_vmeSavedUniSic.sResource,
5193 m_vmeSavedUniSic.dwSize,
5194 0,
5195 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5196 0,
5197 false));
5198
5199 param = &m_surfaceParams[SURFACE_COL_MB_MV];
5200 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5201 param,
5202 nullptr,
5203 m_sizeOfMvTemporalBuffer,
5204 0,
5205 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5206 0,
5207 false));
5208
5209 m_concurrentThreadIndex = 0;
5210 for (auto i = 0; i < NUM_CONCURRENT_THREAD; i++)
5211 {
5212 param = &m_surfaceParams[SURFACE_CONCURRENT_THREAD + i];
5213 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5214 param,
5215 &m_concurrentThreadSurface[i],
5216 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value,
5217 0,
5218 m_verticalLineStride,
5219 false));
5220 }
5221
5222 param = &m_surfaceParams[SURFACE_MB_MV_INDEX];
5223 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5224 param,
5225 &m_mvIndex.sResource,
5226 m_mvIndex.dwSize,
5227 0,
5228 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5229 0,
5230 false));
5231
5232 param = &m_surfaceParams[SURFACE_MVP_INDEX];
5233 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5234 param,
5235 &m_mvpIndex.sResource,
5236 m_mvpIndex.dwSize,
5237 0,
5238 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5239 0,
5240 false));
5241
5242 param = &m_surfaceParams[SURFACE_REF_FRAME_VME];
5243 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
5244 param,
5245 0,
5246 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
5247 0));
5248
5249 param = &m_surfaceParams[SURFACE_Y_4X];
5250 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5251 param,
5252 nullptr,
5253 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5254 0,
5255 m_verticalLineStride,
5256 false));
5257
5258 param = &m_surfaceParams[SURFACE_Y_4X_VME];
5259 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
5260 param,
5261 nullptr,
5262 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
5263 0));
5264
5265 param = &m_surfaceParams[SURFACE_BRC_HISTORY];
5266 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5267 param,
5268 &m_brcBuffers.resBrcHistoryBuffer,
5269 m_brcHistoryBufferSize,
5270 0,
5271 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5272 0,
5273 true));
5274
5275 param = &m_surfaceParams[SURFACE_BRC_ME_DIST];
5276 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5277 param,
5278 &m_brcBuffers.sMeBrcDistortionBuffer,
5279 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
5280 0,
5281 m_verticalLineStride,
5282 true));
5283
5284 param = &m_surfaceParams[SURFACE_BRC_PAST_PAK_INFO];
5285 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5286 param,
5287 &m_brcBuffers.resBrcPakStatisticBuffer[0],
5288 m_hevcBrcPakStatisticsSize,
5289 0,
5290 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5291 0,
5292 false));
5293
5294 param = &m_surfaceParams[SURFACE_BRC_HCP_PIC_STATE];
5295 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5296 param,
5297 &m_brcBuffers.resBrcImageStatesWriteBuffer[0],
5298 m_brcBuffers.dwBrcHcpPicStateSize,
5299 0,
5300 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5301 0,
5302 false));
5303
5304 return eStatus;
5305 }
5306
RequestSshAndVerifyCommandBufferSize(PMHW_KERNEL_STATE kernelState)5307 MOS_STATUS CodechalEncHevcStateG9::RequestSshAndVerifyCommandBufferSize(PMHW_KERNEL_STATE kernelState)
5308 {
5309 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5310
5311 CODECHAL_ENCODE_CHK_NULL_RETURN(kernelState);
5312
5313 auto maxBtCount = m_singleTaskPhaseSupported ?
5314 m_maxBtCount : kernelState->KernelParams.iBTCount;
5315
5316 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5317 m_stateHeapInterface,
5318 maxBtCount));
5319
5320 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5321 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5322
5323 return eStatus;
5324 }
5325
SendKernelCmdsAndBindingTable(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_KERNEL_STATE kernelState,CODECHAL_MEDIA_STATE_TYPE mediaStateType,PMHW_VFE_SCOREBOARD customScoreBoard)5326 MOS_STATUS CodechalEncHevcStateG9::SendKernelCmdsAndBindingTable(
5327 PMOS_COMMAND_BUFFER cmdBuffer,
5328 PMHW_KERNEL_STATE kernelState,
5329 CODECHAL_MEDIA_STATE_TYPE mediaStateType,
5330 PMHW_VFE_SCOREBOARD customScoreBoard)
5331 {
5332 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5333
5334 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(cmdBuffer));
5335
5336 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5337 MOS_ZeroMemory(&idParams, sizeof(idParams));
5338 idParams.pKernelState = kernelState;
5339 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5340 m_stateHeapInterface,
5341 1,
5342 &idParams));
5343
5344 // Program render engine pipe commands
5345 SendKernelCmdsParams sendKernelCmdsParams;
5346 sendKernelCmdsParams = SendKernelCmdsParams();
5347 sendKernelCmdsParams.EncFunctionType = mediaStateType;
5348 sendKernelCmdsParams.pKernelState = kernelState;
5349 sendKernelCmdsParams.bEnableCustomScoreBoard = customScoreBoard ? true : false;
5350 sendKernelCmdsParams.pCustomScoreBoard = customScoreBoard;
5351 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(cmdBuffer, &sendKernelCmdsParams));
5352
5353 // Add binding table
5354 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5355 m_stateHeapInterface,
5356 kernelState));
5357
5358 return eStatus;
5359 }
5360
EndKernelCall(CODECHAL_MEDIA_STATE_TYPE mediaStateType,PMHW_KERNEL_STATE kernelState,PMOS_COMMAND_BUFFER cmdBuffer)5361 MOS_STATUS CodechalEncHevcStateG9::EndKernelCall(
5362 CODECHAL_MEDIA_STATE_TYPE mediaStateType,
5363 PMHW_KERNEL_STATE kernelState,
5364 PMOS_COMMAND_BUFFER cmdBuffer)
5365 {
5366 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5367
5368 CODECHAL_ENCODE_FUNCTION_ENTER;
5369
5370 MOS_UNUSED(kernelState);
5371
5372 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(cmdBuffer, mediaStateType));
5373
5374 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
5375 m_stateHeapInterface,
5376 kernelState));
5377 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5378 {
5379 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5380 m_stateHeapInterface));
5381 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(cmdBuffer, nullptr));
5382 }
5383
5384 CODECHAL_DEBUG_TOOL(
5385 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5386 mediaStateType,
5387 MHW_SSH_TYPE,
5388 kernelState));
5389 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5390 cmdBuffer,
5391 mediaStateType,
5392 nullptr)));
5393
5394 )
5395
5396 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
5397
5398 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(cmdBuffer));
5399
5400 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5401 {
5402 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, m_renderContextUsesNullHw);
5403 m_lastTaskInPhase = false;
5404 }
5405
5406 return eStatus;
5407 }
5408
AddCurbeToStateHeap(PMHW_KERNEL_STATE kernelState,CODECHAL_MEDIA_STATE_TYPE mediaStateType,void * curbe,uint32_t curbeSize)5409 MOS_STATUS CodechalEncHevcStateG9::AddCurbeToStateHeap(
5410 PMHW_KERNEL_STATE kernelState,
5411 CODECHAL_MEDIA_STATE_TYPE mediaStateType,
5412 void* curbe,
5413 uint32_t curbeSize)
5414 {
5415 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5416
5417 CODECHAL_ENCODE_CHK_NULL_RETURN(kernelState);
5418 MOS_UNUSED(mediaStateType);
5419
5420 CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
5421 curbe,
5422 kernelState->dwCurbeOffset,
5423 curbeSize));
5424
5425 CODECHAL_DEBUG_TOOL(
5426
5427 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5428 mediaStateType,
5429 MHW_DSH_TYPE,
5430 kernelState));
5431 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5432 mediaStateType,
5433 kernelState));
5434
5435 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5436 mediaStateType,
5437 MHW_ISH_TYPE,
5438 kernelState));
5439 )
5440
5441 return eStatus;
5442 }
5443
SetSurfacesState(PMHW_KERNEL_STATE kernelState,PMOS_COMMAND_BUFFER cmdBuffer,SURFACE_ID surfaceId,uint32_t * bindingTableOffset,void * addr,uint32_t width,uint32_t height)5444 MOS_STATUS CodechalEncHevcStateG9::SetSurfacesState(
5445 PMHW_KERNEL_STATE kernelState,
5446 PMOS_COMMAND_BUFFER cmdBuffer,
5447 SURFACE_ID surfaceId,
5448 uint32_t* bindingTableOffset,
5449 void* addr,
5450 uint32_t width,
5451 uint32_t height)
5452 {
5453 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5454
5455 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5456 CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTableOffset);
5457 CODECHAL_ENCODE_CHK_NULL_RETURN(kernelState);
5458
5459 auto surfaceCodecParams = &m_surfaceParams[surfaceId];
5460 surfaceCodecParams->dwBindingTableOffset = bindingTableOffset[0];
5461
5462 if (addr)
5463 {
5464 if (surfaceCodecParams->bIs2DSurface || surfaceCodecParams->bUseAdvState)
5465 {
5466 surfaceCodecParams->psSurface = (PMOS_SURFACE)addr;
5467 }
5468 else
5469 {
5470 surfaceCodecParams->presBuffer = (PMOS_RESOURCE)addr;
5471 }
5472 }
5473
5474 // Some surface states do not always use fixed graphic memory address
5475 switch (surfaceId)
5476 {
5477 case SURFACE_HME_MVP:
5478 surfaceCodecParams->psSurface = m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xMvDataBuffer);
5479 break;
5480
5481 case SURFACE_HME_DIST:
5482 surfaceCodecParams->psSurface = m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xDistortionBuffer);
5483 break;
5484
5485 case SURFACE_BRC_DATA:
5486 surfaceCodecParams->psSurface = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
5487 break;
5488
5489 case SURFACE_CU_RECORD:
5490 case SURFACE_HCP_PAK:
5491 surfaceCodecParams->presBuffer = &m_resMbCodeSurface;
5492 break;
5493
5494 case SURFACE_RAW_Y:
5495 case SURFACE_RAW_Y_UV:
5496 case SURFACE_RAW_VME:
5497 if (m_hevcSeqParams->bit_depth_luma_minus8) // use format converted surface if input is 10 bit
5498 surfaceCodecParams->psSurface = &m_formatConvertedSurface[0];
5499 else
5500 surfaceCodecParams->psSurface = m_rawSurfaceToEnc;
5501 break;
5502
5503 default:
5504 break;
5505 }
5506
5507 if (surfaceCodecParams->bIs2DSurface && surfaceCodecParams->bUseUVPlane)
5508 {
5509 surfaceCodecParams->dwUVBindingTableOffset = bindingTableOffset[1];
5510 }
5511
5512 surfaceCodecParams->dwWidthInUse = width;
5513 surfaceCodecParams->dwHeightInUse = height;
5514
5515 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5516 m_hwInterface,
5517 cmdBuffer,
5518 surfaceCodecParams,
5519 kernelState));
5520
5521 if (surfaceId != SURFACE_KERNEL_DEBUG &&
5522 surfaceId != SURFACE_HCP_PAK &&
5523 surfaceId != SURFACE_CU_RECORD &&
5524 surfaceId != SURFACE_BRC_HISTORY &&
5525 surfaceId != SURFACE_BRC_ME_DIST)
5526 {
5527 if (surfaceCodecParams->bIsWritable)
5528 {
5529 surfaceCodecParams->bIsWritable = false; // reset to the default value
5530 }
5531
5532 if (surfaceCodecParams->bRenderTarget)
5533 {
5534 surfaceCodecParams->bRenderTarget = false; // reset to the default value
5535 }
5536
5537 if (surfaceCodecParams->bUse16UnormSurfaceFormat)
5538 {
5539 surfaceCodecParams->bUse16UnormSurfaceFormat = false; // reset to the default value
5540 }
5541 }
5542
5543 return eStatus;
5544 }
5545
PicCodingTypeToFrameType(uint32_t picType)5546 uint32_t CodechalEncHevcStateG9::PicCodingTypeToFrameType(uint32_t picType)
5547 {
5548 if (picType == I_TYPE)
5549 {
5550 return HEVC_BRC_FRAME_TYPE_I;
5551 }
5552 else if (picType == B_TYPE)
5553 {
5554 return (m_lowDelay) ? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B;
5555 }
5556 else if (picType == B1_TYPE)
5557 {
5558 return HEVC_BRC_FRAME_TYPE_B1;
5559 }
5560 else if (picType == B2_TYPE)
5561 {
5562 return HEVC_BRC_FRAME_TYPE_B2;
5563 }
5564 else if (picType == P_TYPE && (!m_noMeKernelForPFrame))
5565 {
5566 m_lowDelay = true;
5567 return HEVC_BRC_FRAME_TYPE_P_OR_LB;
5568 }
5569 else
5570 {
5571 CODECHAL_ENCODE_ASSERT(false);
5572 return 0;
5573 }
5574 }
5575
5576 /*
5577 sliceType: 0 (Intra), 1 (Inter P), 2 (inter B).
5578 intraSADTransform: 0-Regular, 1-Reserved, 2-HAAR, 3-HADAMARD
5579 */
CalcLambda(uint8_t sliceType,uint8_t intraSADTransform)5580 void CodechalEncHevcStateG9::CalcLambda(uint8_t sliceType, uint8_t intraSADTransform)
5581 {
5582 if (sliceType != CODECHAL_ENCODE_HEVC_I_SLICE)
5583 {
5584 MOS_SecureMemcpy(&m_qpLambdaMd[sliceType], sizeof(m_qpLambdaMd[sliceType]),
5585 &m_qpLambdaMdLut[sliceType], sizeof(m_qpLambdaMdLut[sliceType]));
5586
5587 MOS_SecureMemcpy(&m_qpLambdaMe[sliceType], sizeof(m_qpLambdaMe[sliceType]),
5588 &m_qpLambdaMeLut[sliceType], sizeof(m_qpLambdaMeLut[sliceType]));
5589 }
5590 else
5591 {
5592 for (uint32_t qp = 0; qp < QP_NUM; qp++)
5593 {
5594 double qpTemp = (double)qp - 12;
5595 double lambdaMd = 0.85 * pow(2.0, qpTemp/3.0);
5596
5597 if ((intraSADTransform != INTRA_TRANSFORM_HAAR) && (intraSADTransform != INTRA_TRANSFORM_HADAMARD))
5598 {
5599 lambdaMd *= 0.95;
5600 }
5601
5602 m_qpLambdaMd[sliceType][qp] =
5603 m_qpLambdaMe[sliceType][qp] = sqrt(lambdaMd);
5604 }
5605 }
5606 }
5607
EncodeBrcInitResetKernel()5608 MOS_STATUS CodechalEncHevcStateG9::EncodeBrcInitResetKernel()
5609 {
5610 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5611
5612 CODECHAL_ENCODE_FUNCTION_ENTER;
5613
5614 PerfTagSetting perfTag;
5615 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET);
5616
5617 uint32_t krnIdx = m_brcInit ? CODECHAL_HEVC_BRC_INIT : CODECHAL_HEVC_BRC_RESET;
5618
5619 auto kernelState = &m_brcKernelStates[krnIdx];
5620 auto bindingTable = &m_brcKernelBindingTable[krnIdx];
5621 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
5622 {
5623 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
5624 }
5625
5626 //Setup DSH
5627 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5628 m_stateHeapInterface,
5629 kernelState,
5630 false,
5631 0,
5632 false,
5633 m_storeData));
5634
5635 //Setup CURBE
5636 CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G9 cmd, *curbe = &cmd;
5637 MOS_SecureMemcpy(curbe, sizeof(cmd), m_brcInitCurbeInit, sizeof(m_brcInitCurbeInit));
5638
5639 curbe->DW0.Value = GetProfileLevelMaxFrameSize();
5640
5641 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR ||
5642 m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR ||
5643 m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
5644 {
5645 if (m_hevcSeqParams->InitVBVBufferFullnessInBit == 0)
5646 {
5647 CODECHAL_ENCODE_ASSERT(false);
5648 }
5649
5650 if (m_hevcSeqParams->VBVBufferSizeInBit == 0)
5651 {
5652 CODECHAL_ENCODE_ASSERT(false);
5653 }
5654 }
5655
5656 curbe->DW1.InitBufFull = m_hevcSeqParams->InitVBVBufferFullnessInBit;
5657 curbe->DW2.BufSize = m_hevcSeqParams->VBVBufferSizeInBit;
5658 curbe->DW3.TargetBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
5659 curbe->DW4.MaximumBitRate = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
5660 curbe->DW9.FrameWidth = m_oriFrameWidth;
5661 curbe->DW10.FrameHeight = m_oriFrameHeight;
5662 curbe->DW12.NumberSlice = m_numSlices;
5663
5664 curbe->DW6.FrameRateM = m_hevcSeqParams->FrameRate.Numerator;
5665 curbe->DW7.FrameRateD = m_hevcSeqParams->FrameRate.Denominator;
5666 curbe->DW8.BRCFlag = 0;
5667 curbe->DW8.BRCFlag |= (m_lcuBrcEnabled) ? 0 : CODECHAL_ENCODE_BRCINIT_DISABLE_MBBRC;
5668 // For non-ICQ, ACQP Buffer always set to 1
5669 curbe->DW25.ACQPBuffer = 1;
5670
5671 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR)
5672 {
5673 curbe->DW4.MaximumBitRate = curbe->DW3.TargetBitRate;
5674 curbe->DW8.BRCFlag |= curbe->DW8.BRCFlag | BRCINIT_ISCBR;
5675 }
5676 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR)
5677 {
5678 if (curbe->DW4.MaximumBitRate < curbe->DW3.TargetBitRate)
5679 {
5680 curbe->DW4.MaximumBitRate = 2 * curbe->DW3.TargetBitRate;
5681 }
5682 curbe->DW8.BRCFlag |= curbe->DW8.BRCFlag | BRCINIT_ISVBR;
5683 }
5684 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
5685 {
5686 curbe->DW8.BRCFlag |= curbe->DW8.BRCFlag | BRCINIT_ISAVBR;
5687 // For AVBR, max bitrate = target bitrate,
5688 curbe->DW3.TargetBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
5689 curbe->DW4.MaximumBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
5690 }
5691 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_ICQ)
5692 {
5693 curbe->DW8.BRCFlag |= curbe->DW8.BRCFlag | BRCINIT_ISICQ;
5694 curbe->DW25.ACQPBuffer = m_hevcSeqParams->ICQQualityFactor;
5695 }
5696 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VCM)
5697 {
5698 curbe->DW4.MaximumBitRate = curbe->DW3.TargetBitRate;
5699 curbe->DW8.BRCFlag |= curbe->DW8.BRCFlag | BRCINIT_ISVCM;
5700 }
5701
5702 /**********************************************************************
5703 In case of non-HB/BPyramid Structure
5704 BRC_Param_A = GopP
5705 BRC_Param_B = GopB
5706 In case of HB/BPyramid GOP Structure
5707 BRC_Param_A, BRC_Param_B, BRC_Param_C, BRC_Param_D are
5708 BRC Parameters set as follows as per CModel equation
5709 ***********************************************************************/
5710 // BPyramid GOP
5711 if (m_hevcSeqParams->NumOfBInGop[1] != 0 || m_hevcSeqParams->NumOfBInGop[2] != 0)
5712 {
5713 curbe->DW8.BRC_Param_A = ((m_hevcSeqParams->GopPicSize) / m_hevcSeqParams->GopRefDist);
5714 curbe->DW9.BRC_Param_B = curbe->DW8.BRC_Param_A;
5715 curbe->DW13.BRC_Param_C = curbe->DW8.BRC_Param_A * 2;
5716 curbe->DW14.BRC_Param_D = ((m_hevcSeqParams->GopPicSize) - (curbe->DW8.BRC_Param_A) - (curbe->DW13.BRC_Param_C) - (curbe->DW9.BRC_Param_B));
5717 // B1 Level GOP
5718 if (m_hevcSeqParams->NumOfBInGop[2] == 0)
5719 {
5720 curbe->DW14.MaxBRCLevel = 3;
5721 }
5722 // B2 Level GOP
5723 else
5724 {
5725 curbe->DW14.MaxBRCLevel = 4;
5726 }
5727 }
5728 // For Regular GOP - No BPyramid
5729 else
5730 {
5731 curbe->DW14.MaxBRCLevel = 1;
5732 curbe->DW8.BRC_Param_A =
5733 (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
5734 curbe->DW9.BRC_Param_B = m_hevcSeqParams->GopPicSize - 1 - curbe->DW8.BRC_Param_A;
5735 }
5736
5737 curbe->DW10.AVBRAccuracy = m_usAvbrAccuracy;
5738 curbe->DW11.AVBRConvergence = m_usAvbrConvergence;
5739
5740 // Set dynamic thresholds
5741 double inputBitsPerFrame =
5742 ((double)(curbe->DW4.MaximumBitRate) * (double)(curbe->DW7.FrameRateD) /
5743 (double)(curbe->DW6.FrameRateM));
5744
5745 if (curbe->DW2.BufSize < (uint32_t)inputBitsPerFrame * 4)
5746 {
5747 curbe->DW2.BufSize = (uint32_t)inputBitsPerFrame * 4;
5748 }
5749
5750 if (curbe->DW1.InitBufFull == 0)
5751 {
5752 curbe->DW1.InitBufFull = 7 * curbe->DW2.BufSize/8;
5753 }
5754 if (curbe->DW1.InitBufFull < (uint32_t)(inputBitsPerFrame*2))
5755 {
5756 curbe->DW1.InitBufFull = (uint32_t)(inputBitsPerFrame*2);
5757 }
5758 if (curbe->DW1.InitBufFull > curbe->DW2.BufSize)
5759 {
5760 curbe->DW1.InitBufFull = curbe->DW2.BufSize;
5761 }
5762
5763 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
5764 {
5765 // For AVBR, Buffer size = 2*Bitrate, InitVBV = 0.75 * bufferSize
5766 curbe->DW2.BufSize = 2 * m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
5767 curbe->DW1.InitBufFull = (uint32_t)(0.75 * curbe->DW2.BufSize);
5768 }
5769
5770 double bpsRatio = inputBitsPerFrame / ((double)(curbe->DW2.BufSize)/30);
5771 bpsRatio = (bpsRatio < 0.1) ? 0.1 : (bpsRatio > 3.5) ? 3.5 : bpsRatio;
5772
5773 curbe->DW19.DeviationThreshold0_PBframe = (uint32_t) (-50 * pow(0.90, bpsRatio));
5774 curbe->DW19.DeviationThreshold1_PBframe = (uint32_t) (-50 * pow(0.66, bpsRatio));
5775 curbe->DW19.DeviationThreshold2_PBframe = (uint32_t) (-50 * pow(0.46, bpsRatio));
5776 curbe->DW19.DeviationThreshold3_PBframe = (uint32_t) (-50 * pow(0.3, bpsRatio));
5777
5778 curbe->DW20.DeviationThreshold4_PBframe = (uint32_t) (50 * pow(0.3, bpsRatio));
5779 curbe->DW20.DeviationThreshold5_PBframe = (uint32_t) (50 * pow(0.46, bpsRatio));
5780 curbe->DW20.DeviationThreshold6_PBframe = (uint32_t) (50 * pow(0.7, bpsRatio));
5781 curbe->DW20.DeviationThreshold7_PBframe = (uint32_t) (50 * pow(0.9, bpsRatio));
5782
5783 curbe->DW21.DeviationThreshold0_VBRcontrol = (uint32_t) (-50 * pow(0.9, bpsRatio));
5784 curbe->DW21.DeviationThreshold1_VBRcontrol = (uint32_t) (-50 * pow(0.7, bpsRatio));
5785 curbe->DW21.DeviationThreshold2_VBRcontrol = (uint32_t) (-50 * pow(0.5, bpsRatio));
5786 curbe->DW21.DeviationThreshold3_VBRcontrol = (uint32_t) (-50 * pow(0.3, bpsRatio));
5787
5788 curbe->DW22.DeviationThreshold4_VBRcontrol = (uint32_t) (100 * pow(0.4, bpsRatio));
5789 curbe->DW22.DeviationThreshold5_VBRcontrol = (uint32_t) (100 * pow(0.5, bpsRatio));
5790 curbe->DW22.DeviationThreshold6_VBRcontrol = (uint32_t) (100 * pow(0.75, bpsRatio));
5791 curbe->DW22.DeviationThreshold7_VBRcontrol = (uint32_t) (100 * pow(0.9, bpsRatio));
5792
5793 curbe->DW23.DeviationThreshold0_Iframe = (uint32_t) (-50 * pow(0.8, bpsRatio));
5794 curbe->DW23.DeviationThreshold1_Iframe = (uint32_t) (-50 * pow(0.6, bpsRatio));
5795 curbe->DW23.DeviationThreshold2_Iframe = (uint32_t) (-50 * pow(0.34, bpsRatio));
5796 curbe->DW23.DeviationThreshold3_Iframe = (uint32_t) (-50 * pow(0.2, bpsRatio));
5797
5798 curbe->DW24.DeviationThreshold4_Iframe = (uint32_t) (50 * pow(0.2, bpsRatio));
5799 curbe->DW24.DeviationThreshold5_Iframe = (uint32_t) (50 * pow(0.4, bpsRatio));
5800 curbe->DW24.DeviationThreshold6_Iframe = (uint32_t) (50 * pow(0.66, bpsRatio));
5801 curbe->DW24.DeviationThreshold7_Iframe = (uint32_t) (50 * pow(0.9, bpsRatio));
5802
5803 if (m_brcInit)
5804 {
5805 m_dBrcInitCurrentTargetBufFullInBits = curbe->DW1.InitBufFull;
5806 }
5807
5808 m_brcInitResetBufSizeInBits = curbe->DW2.BufSize;
5809 m_dBrcInitResetInputBitsPerFrame = inputBitsPerFrame;
5810
5811 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_INIT_RESET;
5812 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
5813
5814 //#if (_DEBUG || _RELEASE_INTERNAL)
5815 // if (m_swBrcMode != nullptr)
5816 // {
5817 // CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallHevcSwBrcImpl(
5818 // m_debugInterface,
5819 // encFunctionType,
5820 // this,
5821 // bBrcReset,
5822 // kernelState,
5823 // kernelState));
5824 //
5825 // return eStatus;
5826 // }
5827 //#endif // (_DEBUG || _RELEASE_INTERNAL)
5828
5829 MOS_COMMAND_BUFFER cmdBuffer;
5830 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
5831 &cmdBuffer,
5832 kernelState,
5833 encFunctionType,
5834 nullptr));
5835
5836 //Add surface states
5837 uint32_t startIndex = 0;
5838 // BRC history buffer
5839 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
5840 kernelState,
5841 &cmdBuffer,
5842 SURFACE_BRC_HISTORY,
5843 &bindingTable->dwBindingTableEntries[startIndex++],
5844 &m_brcBuffers.resBrcHistoryBuffer));
5845
5846 // Distortion data surface
5847 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
5848 kernelState,
5849 &cmdBuffer,
5850 SURFACE_BRC_ME_DIST,
5851 &bindingTable->dwBindingTableEntries[startIndex++],
5852 &m_brcBuffers.sMeBrcDistortionBuffer));
5853
5854 CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
5855
5856 MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
5857 MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
5858
5859 MediaObjectInlineData mediaObjectInlineData;
5860 MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
5861
5862 mediaObjectParams.pInlineData = &mediaObjectInlineData;
5863 mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
5864 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObject(
5865 &cmdBuffer,
5866 nullptr,
5867 &mediaObjectParams));
5868
5869 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
5870 encFunctionType,
5871 kernelState,
5872 &cmdBuffer));
5873
5874 // debug dump
5875 CODECHAL_DEBUG_TOOL(
5876 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5877 &m_brcBuffers.resBrcHistoryBuffer,
5878 CodechalDbgAttr::attrOutput,
5879 "HistoryWrite",
5880 m_brcHistoryBufferSize,
5881 0,
5882 CODECHAL_MEDIA_STATE_BRC_INIT_RESET));
5883
5884 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
5885 &m_brcBuffers.sMeBrcDistortionBuffer,
5886 CodechalDbgAttr::attrOutput,
5887 "BrcDist",
5888 CODECHAL_MEDIA_STATE_BRC_INIT_RESET)););
5889
5890 return eStatus;
5891 }
5892
EncodeCoarseIntra16x16Kernel()5893 MOS_STATUS CodechalEncHevcStateG9::EncodeCoarseIntra16x16Kernel()
5894 {
5895 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5896
5897 CODECHAL_ENCODE_FUNCTION_ENTER;
5898
5899 PerfTagSetting perfTag;
5900 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_INTRA_DIST);
5901
5902 uint32_t krnIdx = CODECHAL_HEVC_BRC_COARSE_INTRA;
5903
5904 auto kernelState = &m_brcKernelStates[krnIdx];
5905 auto bindingTable = &m_brcKernelBindingTable[krnIdx];
5906 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
5907 {
5908 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
5909 }
5910
5911 //Setup DSH
5912 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5913 m_stateHeapInterface,
5914 kernelState,
5915 false,
5916 0,
5917 false,
5918 m_storeData));
5919
5920 //Setup CURBE
5921 CODECHAL_ENC_HEVC_COARSE_INTRA_CURBE_G9 cmd, *curbe = &cmd;
5922 MOS_ZeroMemory(curbe, sizeof(*curbe));
5923
5924 // the width and height is the resolution of 4x down-scaled surface
5925 curbe->DW0.PictureWidthInLumaSamples = m_downscaledWidthInMb4x << 4;
5926 curbe->DW0.PictureHeightInLumaSamples = m_downscaledHeightInMb4x << 4;
5927
5928 curbe->DW1.InterSAD = 2;
5929 curbe->DW1.IntraSAD = 2;
5930
5931 uint32_t startBTI = 0;
5932 curbe->DW8.BTI_Src_Y4 = bindingTable->dwBindingTableEntries[startBTI++];
5933 curbe->DW9.BTI_Intra_Dist = bindingTable->dwBindingTableEntries[startBTI++];
5934 curbe->DW10.BTI_VME_Intra = bindingTable->dwBindingTableEntries[startBTI++];
5935
5936 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
5937
5938 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_ENC_I_FRAME_DIST;
5939 CODECHAL_ENCODE_CHK_STATUS_RETURN(
5940 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))
5941 );
5942
5943 MOS_COMMAND_BUFFER cmdBuffer;
5944 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
5945 &cmdBuffer,
5946 kernelState,
5947 encFunctionType,
5948 nullptr));
5949
5950 //Add surface states
5951 startBTI = 0;
5952 //0: Source Y4
5953 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
5954 kernelState,
5955 &cmdBuffer,
5956 SURFACE_Y_4X,
5957 &bindingTable->dwBindingTableEntries[startBTI++],
5958 m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER)));
5959
5960 //1: Intra distortion surface
5961 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
5962 kernelState,
5963 &cmdBuffer,
5964 SURFACE_BRC_ME_DIST,
5965 &bindingTable->dwBindingTableEntries[startBTI++],
5966 &m_brcBuffers.sBrcIntraDistortionBuffer));
5967
5968 //2: Source Y4 for VME
5969 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
5970 kernelState,
5971 &cmdBuffer,
5972 SURFACE_Y_4X_VME,
5973 &bindingTable->dwBindingTableEntries[startBTI++],
5974 m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER)));
5975
5976 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
5977
5978 if (!m_hwWalker)
5979 {
5980 eStatus = MOS_STATUS_UNKNOWN;
5981 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
5982 return eStatus;
5983 }
5984
5985 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
5986 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
5987 walkerCodecParams.WalkerMode = m_walkerMode;
5988 walkerCodecParams.dwResolutionX = m_downscaledWidthInMb4x;
5989 walkerCodecParams.dwResolutionY = m_downscaledHeightInMb4x;
5990 walkerCodecParams.bNoDependency = true;
5991
5992 MHW_WALKER_PARAMS walkerParams;
5993 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
5994 m_hwInterface,
5995 &walkerParams,
5996 &walkerCodecParams));
5997
5998 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
5999 &cmdBuffer,
6000 &walkerParams));
6001
6002 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
6003 encFunctionType,
6004 kernelState,
6005 &cmdBuffer));
6006
6007 return eStatus;
6008 }
6009
GetDefaultCurbeEncBKernel(uint32_t & curbeSize)6010 uint32_t* CodechalEncHevcStateG9::GetDefaultCurbeEncBKernel(uint32_t& curbeSize)
6011 {
6012 CODECHAL_ENCODE_FUNCTION_ENTER;
6013
6014 if (m_hevcSeqParams->TargetUsage == 0x07)
6015 {
6016 if(m_pictureCodingType == I_TYPE)
6017 {
6018 // When TU=7, there is no normal I kernel calls.
6019 // Instead, B kernel is used for I kernel function and a specfic CURBE setting needs to be used
6020 curbeSize = sizeof(m_encBTu7ICurbeInit);
6021 return (uint32_t*)m_encBTu7ICurbeInit;
6022 }
6023 else if (m_pictureCodingType == P_TYPE)
6024 {
6025 curbeSize = sizeof(m_encBTu7PCurbeInit);
6026 return (uint32_t*)m_encBTu7PCurbeInit;
6027 }
6028 else
6029 {
6030 curbeSize = sizeof(m_encBTu7BCurbeInit);
6031 return (uint32_t*)m_encBTu7BCurbeInit;
6032 }
6033 }
6034 else if (m_hevcSeqParams->TargetUsage == 0x04)
6035 {
6036 if (m_pictureCodingType == P_TYPE)
6037 {
6038 curbeSize = sizeof(m_encBTu4PCurbeInit);
6039 return (uint32_t*)m_encBTu4PCurbeInit;
6040 }
6041 else
6042 {
6043 curbeSize = sizeof(m_encBTu4BCurbeInit);
6044 return (uint32_t*)m_encBTu4BCurbeInit;
6045 }
6046 }
6047 else if (m_hevcSeqParams->TargetUsage == 0x01)
6048 {
6049 if (m_pictureCodingType == P_TYPE)
6050 {
6051 curbeSize = sizeof(m_encBTu1PCurbeInit);
6052 return (uint32_t*)m_encBTu1PCurbeInit;
6053 }
6054 else
6055 {
6056 curbeSize = sizeof(m_encBTu1BCurbeInit);
6057 return (uint32_t*)m_encBTu1BCurbeInit;
6058 }
6059 }
6060 else
6061 {
6062 CODECHAL_ENCODE_ASSERT(false);
6063 }
6064
6065 return nullptr;
6066 }
6067
SetupROISurface()6068 MOS_STATUS CodechalEncHevcStateG9::SetupROISurface()
6069 {
6070 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6071
6072 CODECHAL_ENCODE_FUNCTION_ENTER;
6073
6074 m_hevcPicParams->NumROI = MOS_MIN(m_hevcPicParams->NumROI, CODECHAL_ENCODE_HEVC_MAX_NUM_ROI);
6075
6076 // Following code for configuring the ROI surface has been lifted from the CModel and
6077 // ported to work in the context of the driver instead.
6078
6079 CODECHAL_ENC_HEVC_ROI_G9 currentROI[CODECHAL_ENCODE_HEVC_MAX_NUM_ROI] = { 0 };
6080 for (uint32_t i = 0; i < m_hevcPicParams->NumROI; ++i)
6081 {
6082 currentROI[i].Top = m_hevcPicParams->ROI[i].Top;
6083 currentROI[i].Bottom = m_hevcPicParams->ROI[i].Bottom;
6084 currentROI[i].Left = m_hevcPicParams->ROI[i].Left;
6085 currentROI[i].Right = m_hevcPicParams->ROI[i].Right;
6086 if (m_brcEnabled && !m_roiValueInDeltaQp)
6087 {
6088 currentROI[i].ROI_Level = m_hevcPicParams->ROI[i].PriorityLevelOrDQp * 5;
6089 }
6090 else
6091 {
6092 currentROI[i].QPDelta = m_hevcPicParams->ROI[i].PriorityLevelOrDQp;
6093 }
6094 }
6095
6096 MOS_LOCK_PARAMS lockParams;
6097 MOS_ZeroMemory(&lockParams, sizeof(lockParams));
6098 lockParams.ReadOnly = 1;
6099 uint32_t* data = (uint32_t*)m_osInterface->pfnLockResource(m_osInterface, &m_roiSurface.OsResource, &lockParams);
6100 if (!data)
6101 {
6102 eStatus = MOS_STATUS_INVALID_HANDLE;
6103 return eStatus;
6104 }
6105
6106 uint32_t widthInMBsAligned = (m_picWidthInMb * 4 + 63) & ~63;
6107 uint32_t numMBs = m_picWidthInMb * m_picHeightInMb;
6108 for (uint32_t mb = 0 ; mb <= numMBs ; mb++)
6109 {
6110 int32_t curMbY = mb / m_picWidthInMb;
6111 int32_t curMbX = mb - curMbY * m_picWidthInMb;
6112
6113 uint32_t outdata = 0;
6114 for (int32_t roi = (m_hevcPicParams->NumROI - 1); roi >= 0; roi--)
6115 {
6116 if ((currentROI[roi].ROI_Level == 0) && (currentROI[roi].QPDelta == 0))
6117 {
6118 continue;
6119 }
6120
6121 if ((curMbX >= (int32_t)currentROI[roi].Left) && (curMbX < (int32_t)currentROI[roi].Right) &&
6122 (curMbY >= (int32_t)currentROI[roi].Top) && (curMbY < (int32_t)currentROI[roi].Bottom))
6123 {
6124 outdata = 15 | (((currentROI[roi].ROI_Level) & 0xFF) << 8) | ((currentROI[roi].QPDelta & 0xFF) << 16);
6125 }
6126 else if ((curMbX >= (int32_t)currentROI[roi].Left - 1) && (curMbX < (int32_t)currentROI[roi].Right + 1) &&
6127 (curMbY >= (int32_t)currentROI[roi].Top - 1) && (curMbY < (int32_t)currentROI[roi].Bottom + 1))
6128 {
6129 outdata = 14 | (((currentROI[roi].ROI_Level) & 0xFF) << 8) | ((currentROI[roi].QPDelta & 0xFF) << 16);
6130 }
6131 else if ((curMbX >= (int32_t)currentROI[roi].Left - 2) && (curMbX < (int32_t)currentROI[roi].Right + 2) &&
6132 (curMbY >= (int32_t)currentROI[roi].Top - 2) && (curMbY < (int32_t)currentROI[roi].Bottom + 2))
6133 {
6134 outdata = 13 | (((currentROI[roi].ROI_Level) & 0xFF) << 8) | ((currentROI[roi].QPDelta & 0xFF) << 16);
6135 }
6136 else if ((curMbX >= (int32_t)currentROI[roi].Left - 3) && (curMbX < (int32_t)currentROI[roi].Right + 3) &&
6137 (curMbY >= (int32_t)currentROI[roi].Top - 3) && (curMbY < (int32_t)currentROI[roi].Bottom + 3))
6138 {
6139 outdata = 12 | (((currentROI[roi].ROI_Level) & 0xFF) << 8) | ((currentROI[roi].QPDelta & 0xFF) << 16);
6140 }
6141 }
6142 data[(curMbY * (widthInMBsAligned>>2)) + curMbX] = outdata;
6143 }
6144
6145 m_osInterface->pfnUnlockResource(m_osInterface, &m_roiSurface.OsResource);
6146
6147 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6148 &m_roiSurface,
6149 CodechalDbgAttr::attrInput,
6150 "BrcUpdate_ROI",
6151 CODECHAL_MEDIA_STATE_BRC_UPDATE)));
6152
6153 return eStatus;
6154 }
6155
SetupBrcConstantTable(PMOS_SURFACE brcConstantData)6156 MOS_STATUS CodechalEncHevcStateG9::SetupBrcConstantTable(PMOS_SURFACE brcConstantData)
6157 {
6158 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6159
6160 CODECHAL_ENCODE_FUNCTION_ENTER;
6161
6162 MOS_LOCK_PARAMS lockFlags;
6163 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
6164 lockFlags.WriteOnly = true;
6165 uint8_t* data = (uint8_t* )m_osInterface->pfnLockResource(m_osInterface, &brcConstantData->OsResource, &lockFlags);
6166 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6167
6168 uint32_t size = brcConstantData->dwHeight * brcConstantData->dwWidth;
6169 // 576-byte of Qp adjust table
6170 MOS_SecureMemcpy(data, size, g_cInit_HEVC_BRC_QP_ADJUST, sizeof(g_cInit_HEVC_BRC_QP_ADJUST));
6171 data += sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
6172 size -= sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
6173
6174 const uint32_t sizeSkipValTable = HEVC_BRC_SKIP_VAL_TABLE_SIZE;
6175 const uint32_t sizelambdaTable = HEVC_BRC_LAMBDA_TABLE_SIZE;
6176
6177 // Skip thread table
6178 if(m_pictureCodingType == I_TYPE)
6179 {
6180 MOS_ZeroMemory(data, sizeSkipValTable);
6181 }
6182 else
6183 {
6184 uint32_t curbeSize = 0;
6185 void* defaultCurbe = (void*)GetDefaultCurbeEncBKernel(curbeSize);
6186 CODECHAL_ENCODE_ASSERT(defaultCurbe);
6187
6188 CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9 cmd, *curbe = &cmd;
6189 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(curbe, sizeof(cmd), defaultCurbe, curbeSize));
6190
6191 if(curbe->DW3.BlockBasedSkipEnable)
6192 {
6193 MOS_SecureMemcpy(data, size, m_skipThread[1], sizeof(m_skipThread[1]));
6194 }
6195 else
6196 {
6197 MOS_SecureMemcpy(data, size, m_skipThread[0], sizeof(m_skipThread[0]));
6198 }
6199 }
6200 data += sizeSkipValTable;
6201 size -= sizeSkipValTable;
6202
6203 //lambda value table
6204 MOS_SecureMemcpy(data, size, m_brcLambdaHaar, sizeof(m_brcLambdaHaar));
6205 data += sizelambdaTable;
6206 size -= sizelambdaTable;
6207
6208 //Mv mode cost table
6209 if(m_pictureCodingType == I_TYPE)
6210 {
6211 MOS_SecureMemcpy(data, size, m_brcMvCostHaar[0], sizeof(m_brcMvCostHaar[0]));
6212 }
6213 else if (m_pictureCodingType == P_TYPE)
6214 {
6215 MOS_SecureMemcpy(data, size, m_brcMvCostHaar[1], sizeof(m_brcMvCostHaar[1]));
6216 }
6217 else
6218 {
6219 MOS_SecureMemcpy(data, size, m_brcMvCostHaar[2], sizeof(m_brcMvCostHaar[2]));
6220 }
6221
6222 m_osInterface->pfnUnlockResource(m_osInterface, &brcConstantData->OsResource);
6223
6224 return eStatus;
6225 }
6226
Convert1byteTo2bytesQPperLCU(PMOS_SURFACE lcuQPIn,PMOS_SURFACE lcuQPOut)6227 MOS_STATUS CodechalEncHevcStateG9::Convert1byteTo2bytesQPperLCU(PMOS_SURFACE lcuQPIn, PMOS_SURFACE lcuQPOut)
6228 {
6229 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6230
6231 CODECHAL_ENCODE_FUNCTION_ENTER;
6232
6233 MOS_LOCK_PARAMS lockFlagsIn;
6234 MOS_LOCK_PARAMS lockFlagsOut;
6235 MOS_ZeroMemory(&lockFlagsIn, sizeof(MOS_LOCK_PARAMS));
6236 MOS_ZeroMemory(&lockFlagsOut, sizeof(MOS_LOCK_PARAMS));
6237
6238 lockFlagsIn.ReadOnly = true;
6239 uint8_t* dataIn = (uint8_t* )m_osInterface->pfnLockResource(m_osInterface, &lcuQPIn->OsResource, &lockFlagsIn);
6240 CODECHAL_ENCODE_CHK_NULL_RETURN(dataIn);
6241
6242 lockFlagsOut.WriteOnly = true;
6243 uint8_t* dataOut = (uint8_t* )m_osInterface->pfnLockResource(m_osInterface, &lcuQPOut->OsResource, &lockFlagsOut);
6244 CODECHAL_ENCODE_CHK_NULL_RETURN(dataOut);
6245
6246 for(uint32_t h = 0; h < lcuQPIn->dwHeight; h++)
6247 {
6248 for(uint32_t w = 0; w < lcuQPIn->dwWidth; w++)
6249 {
6250 *(dataOut + h * lcuQPOut->dwPitch + 2 * w) = *(dataIn + h * lcuQPIn->dwPitch + w);
6251 *(dataOut + h * lcuQPOut->dwPitch + 2 * w + 1) = 0;
6252 }
6253 }
6254
6255 m_osInterface->pfnUnlockResource(m_osInterface, &lcuQPIn->OsResource);
6256 m_osInterface->pfnUnlockResource(m_osInterface, &lcuQPOut->OsResource);
6257
6258 return eStatus;
6259 }
6260
SetupROICurbe(PCODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 curbe)6261 MOS_STATUS CodechalEncHevcStateG9::SetupROICurbe(PCODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 curbe)
6262 {
6263 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6264
6265 curbe->DW6.CQPValue = 0;
6266 curbe->DW6.ROIFlag = 0x1 | (m_brcEnabled << 1) | (m_hevcSeqParams->bVideoSurveillance << 2);
6267
6268 uint32_t roiSize = 0;
6269 for (uint32_t i = 0; i < m_hevcPicParams->NumROI; ++i)
6270 {
6271 roiSize += (CODECHAL_MACROBLOCK_HEIGHT * MOS_ABS(m_hevcPicParams->ROI[i].Top - m_hevcPicParams->ROI[i].Bottom)) *
6272 (CODECHAL_MACROBLOCK_WIDTH * MOS_ABS(m_hevcPicParams->ROI[i].Right - m_hevcPicParams->ROI[i].Left));
6273 }
6274
6275 uint32_t roiRatio = 0;
6276 if (roiSize)
6277 {
6278 uint32_t numMBs = m_picWidthInMb * m_picHeightInMb;
6279 roiRatio = 2 * (numMBs * 256 / roiSize - 1);
6280 roiRatio = MOS_MIN(51, roiRatio); // clip QP from 0-51
6281 }
6282
6283 curbe->DW6.ROIRatio = roiRatio;
6284 curbe->DW7.FrameWidthInLCU = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
6285
6286 // if the BRC update LCU kernel is being launched in CQP mode we need to add
6287 // the minimum required parameters it needs to run. This is used in ROI CQP.
6288 // In the case of BRC the CURBE will already be set up from frame update setup.
6289 if (!m_brcEnabled)
6290 {
6291 curbe->DW1.FrameNumber = m_storeData - 1;
6292 curbe->DW6.CQPValue = CalSliceQp();
6293 curbe->DW5.CurrFrameType = PicCodingTypeToFrameType(m_pictureCodingType);
6294 }
6295
6296 return eStatus;
6297 }
6298
EncodeBrcUpdateLCUBasedKernel(PCODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 frameBasedBrcCurbe)6299 MOS_STATUS CodechalEncHevcStateG9::EncodeBrcUpdateLCUBasedKernel(PCODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 frameBasedBrcCurbe)
6300 {
6301 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6302
6303 CODECHAL_ENCODE_FUNCTION_ENTER;
6304
6305 PerfTagSetting perfTag;
6306 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE_LCU);
6307
6308 uint32_t krnIdx = CODECHAL_HEVC_BRC_LCU_UPDATE;
6309 auto kernelState = &m_brcKernelStates[krnIdx];
6310 auto bindingTable = &m_brcKernelBindingTable[krnIdx];
6311
6312 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
6313 {
6314 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
6315 }
6316
6317 // Setup DSH
6318 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6319 m_stateHeapInterface,
6320 kernelState,
6321 false,
6322 0,
6323 false,
6324 m_storeData));
6325
6326 // Setup Curbe
6327 CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 cmd, *curbe = &cmd;
6328 if (m_brcEnabled)
6329 {
6330 MOS_SecureMemcpy(curbe, sizeof(cmd), frameBasedBrcCurbe, sizeof(*frameBasedBrcCurbe));
6331 }
6332 else
6333 {
6334 //confiure LCU BRC Update CURBE for CQP (used in ROI) here
6335 MOS_SecureMemcpy(curbe, sizeof(cmd), m_brcUpdateCurbeInit, sizeof(m_brcUpdateCurbeInit));
6336 }
6337
6338 if (m_hevcPicParams->NumROI)
6339 {
6340 SetupROICurbe(&cmd);
6341 SetupROISurface();
6342 }
6343
6344 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_BRC_LCU_UPDATE;
6345 CODECHAL_ENCODE_CHK_STATUS_RETURN(
6346 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
6347
6348 MOS_COMMAND_BUFFER cmdBuffer;
6349 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
6350 &cmdBuffer,
6351 kernelState,
6352 encFunctionType,
6353 nullptr));
6354
6355 //Add surface states
6356 uint32_t startIndex = 0;
6357
6358 //0: BRC history buffer
6359 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6360 kernelState,
6361 &cmdBuffer,
6362 SURFACE_BRC_HISTORY,
6363 &bindingTable->dwBindingTableEntries[startIndex++],
6364 &m_brcBuffers.resBrcHistoryBuffer));
6365
6366 //1: BRC distortion data surface : when picture type is I-type, both inter and intra distortion are the same
6367 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6368 kernelState,
6369 &cmdBuffer,
6370 SURFACE_BRC_ME_DIST,
6371 &bindingTable->dwBindingTableEntries[startIndex++],
6372 m_brcDistortion));
6373
6374 //2: Intra distortion data surface
6375 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6376 kernelState,
6377 &cmdBuffer,
6378 SURFACE_BRC_ME_DIST,
6379 &bindingTable->dwBindingTableEntries[startIndex++],
6380 &m_brcBuffers.sBrcIntraDistortionBuffer));
6381
6382 if(m_hmeSupported)
6383 {
6384 //3: HME MV surface
6385 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6386 kernelState,
6387 &cmdBuffer,
6388 SURFACE_HME_MVP,
6389 &bindingTable->dwBindingTableEntries[startIndex++]));
6390 }
6391 else
6392 {
6393 startIndex++;
6394 }
6395
6396 //4: LCU Qp surface
6397 m_surfaceParams[SURFACE_LCU_QP].bIsWritable =
6398 m_surfaceParams[SURFACE_LCU_QP].bRenderTarget = true;
6399 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6400 kernelState,
6401 &cmdBuffer,
6402 SURFACE_LCU_QP,
6403 &bindingTable->dwBindingTableEntries[startIndex++],
6404 &m_brcBuffers.sBrcMbQpBuffer));
6405
6406 //5: ROI Surface
6407 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6408 kernelState,
6409 &cmdBuffer,
6410 SURFACE_ROI,
6411 &bindingTable->dwBindingTableEntries[startIndex++],
6412 &m_roiSurface));
6413
6414 CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
6415
6416 if (!m_hwWalker)
6417 {
6418 eStatus = MOS_STATUS_UNKNOWN;
6419 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
6420 return eStatus;
6421 }
6422
6423 // LCU-based kernel needs to be executed in 4x4 LCU mode (128x128 per block)
6424 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
6425 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
6426 walkerCodecParams.WalkerMode = m_walkerMode;
6427 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 128) >> 7;
6428 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 128) >> 7;
6429 /* Enforce no dependency dispatch order for LCU-based BRC update kernel */
6430 walkerCodecParams.bNoDependency = true;
6431 walkerCodecParams.wPictureCodingType = m_pictureCodingType;
6432 walkerCodecParams.bUseScoreboard = m_useHwScoreboard;
6433
6434 MHW_WALKER_PARAMS walkerParams;
6435 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
6436 m_hwInterface,
6437 &walkerParams,
6438 &walkerCodecParams));
6439
6440 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
6441 &cmdBuffer,
6442 &walkerParams));
6443
6444 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
6445 encFunctionType,
6446 kernelState,
6447 &cmdBuffer));
6448
6449 return eStatus;
6450 }
6451
EncodeBrcUpdateKernel()6452 MOS_STATUS CodechalEncHevcStateG9::EncodeBrcUpdateKernel()
6453 {
6454 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6455
6456 CODECHAL_ENCODE_FUNCTION_ENTER;
6457
6458 PerfTagSetting perfTag;
6459 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
6460
6461 uint32_t krnIdx = CODECHAL_HEVC_BRC_FRAME_UPDATE;
6462 auto kernelState = &m_brcKernelStates[krnIdx];
6463 auto bindingTable = &m_brcKernelBindingTable[krnIdx];
6464 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
6465 {
6466 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
6467 }
6468
6469 // Fill HCP_IMG_STATE so that BRC kernel can use it to generate the write buffer for PAK
6470 auto brcHcpStateReadBuffer = &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx];
6471 MHW_VDBOX_HEVC_PIC_STATE mhwHevcPicState;
6472 mhwHevcPicState.pHevcEncSeqParams = m_hevcSeqParams;
6473 mhwHevcPicState.pHevcEncPicParams = m_hevcPicParams;
6474 mhwHevcPicState.brcNumPakPasses = m_mfxInterface->GetBrcNumPakPasses();
6475
6476 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcPicBrcBuffer(brcHcpStateReadBuffer, &mhwHevcPicState));
6477
6478 auto brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
6479 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
6480
6481 // debug dump
6482 CODECHAL_DEBUG_TOOL(
6483 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6484 &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx],
6485 CodechalDbgAttr::attrInput,
6486 "ImgStateRead",
6487 BRC_IMG_STATE_SIZE_PER_PASS * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
6488 0,
6489 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6490
6491 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6492 &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
6493 CodechalDbgAttr::attrInput,
6494 "ConstData",
6495 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6496
6497 // PAK statistics buffer is only dumped for BrcUpdate kernel input
6498 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6499 &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
6500 CodechalDbgAttr::attrInput,
6501 "PakStats",
6502 HEVC_BRC_PAK_STATISTCS_SIZE,
6503 0,
6504 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6505 // HEVC maintains a ptr to its own distortion surface, as it may be a couple different surfaces
6506 if (m_brcDistortion) {
6507 CODECHAL_ENCODE_CHK_STATUS_RETURN(
6508 m_debugInterface->DumpBuffer(
6509 &m_brcDistortion->OsResource,
6510 CodechalDbgAttr::attrInput,
6511 "BrcDist",
6512 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
6513 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
6514 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6515 } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_brcBuffers.resBrcHistoryBuffer,
6516 CodechalDbgAttr::attrInput,
6517 "HistoryRead",
6518 m_brcHistoryBufferSize,
6519 0,
6520 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6521 if (m_brcBuffers.pMbEncKernelStateInUse) {
6522 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6523 CODECHAL_MEDIA_STATE_BRC_UPDATE,
6524 m_brcBuffers.pMbEncKernelStateInUse));
6525 } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_resMbStatsBuffer,
6526 CodechalDbgAttr::attrInput,
6527 "MBStatsSurf",
6528 m_hwInterface->m_avcMbStatBufferSize,
6529 0,
6530 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
6531 // Setup DSH
6532 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6533 m_stateHeapInterface,
6534 kernelState,
6535 false,
6536 0,
6537 false,
6538 m_storeData));
6539
6540 // Setup Curbe
6541 CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G9 cmd, *curbe = &cmd;
6542 MOS_SecureMemcpy(curbe, sizeof(cmd), m_brcUpdateCurbeInit, sizeof(m_brcUpdateCurbeInit));
6543
6544 curbe->DW5.TARGETSIZE_FLAG = 0;
6545
6546 if (m_dBrcInitCurrentTargetBufFullInBits > (double)m_brcInitResetBufSizeInBits)
6547 {
6548 m_dBrcInitCurrentTargetBufFullInBits -= (double)m_brcInitResetBufSizeInBits;
6549 curbe->DW5.TARGETSIZE_FLAG = 1;
6550 }
6551
6552 if (m_numSkipFrames)
6553 {
6554 // pass num/size of skipped frames to update BRC
6555 curbe->DW6.NumSkippedFrames = m_numSkipFrames;
6556 curbe->DW15.SizeOfSkippedFrames = m_sizeSkipFrames;
6557
6558 // account for skipped frame in calculating CurrentTargetBufFullInBits
6559 m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame * m_numSkipFrames;
6560 }
6561
6562 curbe->DW0.TARGETSIZE = (uint32_t)(m_dBrcInitCurrentTargetBufFullInBits);
6563 curbe->DW1.FrameNumber = m_storeData - 1;
6564
6565 curbe->DW2.PictureHeaderSize = GetPicHdrSize();
6566
6567 curbe->DW5.CurrFrameType = PicCodingTypeToFrameType(m_pictureCodingType);
6568
6569 // Only brc init uses BRCFlag, brc update does NOT use it (it's reserved bits)
6570 curbe->DW5.BRCFlag = 0;
6571
6572 // Notes from BRC Kernel
6573 /***********************************************************************************************************
6574 * When update kernel Curbe GRF 1.7 bit 15 is set to 1:
6575 * BRC matched with Arch CModel SVN revision 13030 with part of HRD BRC fix in svn 14029 and svn 14228 [HRD]
6576 *
6577 * When update kernel Curbe GRF 1.7 bit 15 is set to 0:
6578 * BRC matched with Arch CModel SVN revision 13419 [HRD Fix] with svn 13833, svn 13827 [Quality] and
6579 * part of BRC fix in svn 14029, svn 14228, svn 13845 [HRD]
6580 ************************************************************************************************************/
6581 curbe->DW7.KernelBuildControl = 0;
6582
6583 curbe->DW7.ucMinQp = m_hevcPicParams->BRCMinQp;
6584 curbe->DW7.ucMaxQp = m_hevcPicParams->BRCMaxQp;
6585
6586 if (m_hevcPicParams->NumROI)
6587 {
6588 SetupROICurbe(&cmd);
6589 }
6590 curbe->DW14.ParallelMode = m_hevcSeqParams->ParallelBRC;
6591
6592 curbe->DW5.MaxNumPAKs = m_mfxInterface->GetBrcNumPakPasses();
6593
6594 m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
6595
6596 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
6597 {
6598 curbe->DW3.startGAdjFrame0 = (uint32_t)((10 * m_usAvbrConvergence) / (double)150);
6599 curbe->DW3.startGAdjFrame1 = (uint32_t)((50 * m_usAvbrConvergence) / (double)150);
6600 curbe->DW4.startGAdjFrame2 = (uint32_t)((100 * m_usAvbrConvergence) / (double)150);
6601 curbe->DW4.startGAdjFrame3 = (uint32_t)((150 * m_usAvbrConvergence) / (double)150);
6602
6603 curbe->DW11.gRateRatioThreshold0 =
6604 (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 40)));
6605 curbe->DW11.gRateRatioThreshold1 =
6606 (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 75)));
6607 curbe->DW12.gRateRatioThreshold2 = (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 97)));
6608 curbe->DW12.gRateRatioThreshold3 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (103 - 100)));
6609 curbe->DW12.gRateRatioThreshold4 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (125 - 100)));
6610 curbe->DW12.gRateRatioThreshold5 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (160 - 100)));
6611 }
6612 else
6613 {
6614 // default CURBE setting is zero. So, driver needs to program them.
6615 curbe->DW3.startGAdjFrame0 = 10;
6616 curbe->DW3.startGAdjFrame1 = 50;
6617 curbe->DW4.startGAdjFrame2 = 100;
6618 curbe->DW4.startGAdjFrame3 = 150;
6619
6620 curbe->DW11.gRateRatioThreshold0 = 40;
6621 curbe->DW11.gRateRatioThreshold1 = 75;
6622 curbe->DW12.gRateRatioThreshold2 = 97;
6623 curbe->DW12.gRateRatioThreshold3 = 103;
6624 curbe->DW12.gRateRatioThreshold4 = 125;
6625 curbe->DW12.gRateRatioThreshold5 = 160;
6626 }
6627
6628 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_UPDATE;
6629 CODECHAL_ENCODE_CHK_STATUS_RETURN(
6630 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
6631
6632 //#if (_DEBUG || _RELEASE_INTERNAL)
6633 // if (m_swBrcMode != nullptr)
6634 // {
6635 // CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallHevcSwBrcImpl(
6636 // m_debugInterface,
6637 // encFunctionType,
6638 // this,
6639 // false,
6640 // kernelState,
6641 // kernelState));
6642 //
6643 // if (bLcuBrcEnabled || pHevcPicParams->NumROI)
6644 // {
6645 // // LCU-based BRC needs to have frame-based one to be call first in order to get HCP_IMG_STATE command result
6646 // CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateLCUBasedKernel(curbe));
6647 // }
6648 // return eStatus;
6649 // }
6650 //#endif // (_DEBUG || _RELEASE_INTERNAL)
6651
6652 MOS_COMMAND_BUFFER cmdBuffer;
6653 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer,
6654 kernelState,
6655 encFunctionType,
6656 nullptr));
6657
6658 if (!m_singleTaskPhaseSupported || m_firstTaskInPhase || !m_singleTaskPhaseSupportedInPak)
6659 {
6660 CODECHAL_ENCODE_CHK_STATUS_RETURN(CheckBrcPakStasBuffer(&cmdBuffer));
6661 }
6662
6663 //Add surface states
6664 uint32_t startIndex = 0;
6665 //0: BRC history buffer
6666 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6667 kernelState,
6668 &cmdBuffer,
6669 SURFACE_BRC_HISTORY,
6670 &bindingTable->dwBindingTableEntries[startIndex++],
6671 &m_brcBuffers.resBrcHistoryBuffer));
6672
6673 //1: Previous PAK statistics output buffer
6674 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6675 kernelState,
6676 &cmdBuffer,
6677 SURFACE_BRC_PAST_PAK_INFO,
6678 &bindingTable->dwBindingTableEntries[startIndex++],
6679 &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead]));
6680
6681 //2: HCP_PIC_STATE buffer for read
6682 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6683 kernelState,
6684 &cmdBuffer,
6685 SURFACE_BRC_HCP_PIC_STATE,
6686 &bindingTable->dwBindingTableEntries[startIndex++],
6687 brcHcpStateReadBuffer));
6688
6689 //3: HCP_PIC_STATE buffer for write
6690 m_surfaceParams[SURFACE_BRC_HCP_PIC_STATE].bIsWritable =
6691 m_surfaceParams[SURFACE_BRC_HCP_PIC_STATE].bRenderTarget = true;
6692 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6693 kernelState,
6694 &cmdBuffer,
6695 SURFACE_BRC_HCP_PIC_STATE,
6696 &bindingTable->dwBindingTableEntries[startIndex++],
6697 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]));
6698
6699 //4: BRC input surface for ENC kernels (output of BRC kernel)
6700 m_surfaceParams[SURFACE_BRC_INPUT].bIsWritable =
6701 m_surfaceParams[SURFACE_BRC_INPUT].bRenderTarget = true;
6702 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6703 kernelState,
6704 &cmdBuffer,
6705 SURFACE_BRC_INPUT,
6706 &bindingTable->dwBindingTableEntries[startIndex++]));
6707
6708 //5: Distortion data surface
6709 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6710 kernelState,
6711 &cmdBuffer,
6712 SURFACE_BRC_ME_DIST,
6713 &bindingTable->dwBindingTableEntries[startIndex++],
6714 m_brcDistortion));
6715
6716 //6: BRC data surface
6717 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6718 kernelState,
6719 &cmdBuffer,
6720 SURFACE_BRC_DATA,
6721 &bindingTable->dwBindingTableEntries[startIndex++]));
6722
6723 CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
6724
6725 MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
6726 MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
6727 MediaObjectInlineData mediaObjectInlineData;
6728 MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
6729 mediaObjectParams.pInlineData = &mediaObjectInlineData;
6730 mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
6731 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObject(
6732 &cmdBuffer,
6733 nullptr,
6734 &mediaObjectParams));
6735
6736 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
6737 encFunctionType,
6738 kernelState,
6739 &cmdBuffer));
6740
6741 if (m_lcuBrcEnabled || m_hevcPicParams->NumROI)
6742 {
6743 // LCU-based BRC needs to have frame-based one to be call first in order to get HCP_IMG_STATE command result
6744 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateLCUBasedKernel(curbe));
6745 }
6746
6747 CODECHAL_DEBUG_TOOL(
6748 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6749 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
6750 CodechalDbgAttr::attrOutput,
6751 "ImgStateWrite",
6752 BRC_IMG_STATE_SIZE_PER_PASS * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
6753 0,
6754 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6755
6756 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6757 &m_brcBuffers.resBrcHistoryBuffer,
6758 CodechalDbgAttr::attrOutput,
6759 "HistoryWrite",
6760 m_brcHistoryBufferSize,
6761 0,
6762 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6763 if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource)) {
6764 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6765 &m_brcBuffers.sBrcMbQpBuffer.OsResource,
6766 CodechalDbgAttr::attrOutput,
6767 "MbQp",
6768 m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight,
6769 m_brcBuffers.dwBrcMbQpBottomFieldOffset,
6770 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6771 } if (m_brcBuffers.pMbEncKernelStateInUse) {
6772 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6773 CODECHAL_MEDIA_STATE_BRC_UPDATE,
6774 m_brcBuffers.pMbEncKernelStateInUse));
6775 } if (m_mbencBrcBufferSize > 0) {
6776 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6777 &m_brcBuffers.resMbEncBrcBuffer,
6778 CodechalDbgAttr::attrOutput,
6779 "MbEncBRCWrite",
6780 m_mbencBrcBufferSize,
6781 0,
6782 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6783 }
6784
6785 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6786 (MOS_RESOURCE *)m_allocator->GetResource(m_standard, brcInputForEncKernel),
6787 CodechalDbgAttr::attrOutput,
6788 "CombinedEnc",
6789 128,
6790 0,
6791 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6792
6793 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6794 &m_brcBuffers.sBrcIntraDistortionBuffer.OsResource,
6795 CodechalDbgAttr::attrOutput,
6796 "IDistortion",
6797 m_brcBuffers.sBrcIntraDistortionBuffer.dwWidth * m_brcBuffers.sBrcIntraDistortionBuffer.dwHeight,
6798 0,
6799 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
6800
6801 //reset info of skip frame
6802 m_numSkipFrames = 0;
6803 m_sizeSkipFrames = 0;
6804 return eStatus;
6805 }
6806
Encode8x8BPakKernel(PCODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9 encBCurbe)6807 MOS_STATUS CodechalEncHevcStateG9::Encode8x8BPakKernel(
6808 PCODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9 encBCurbe)
6809 {
6810 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6811
6812 CODECHAL_ENCODE_FUNCTION_ENTER;
6813
6814 CODECHAL_ENCODE_CHK_NULL_RETURN(encBCurbe);
6815
6816 PerfTagSetting perfTag;
6817 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL);
6818
6819 uint32_t krnIdx = CODECHAL_HEVC_MBENC_BPAK;
6820 auto kernelState = &m_mbEncKernelStates[krnIdx];
6821 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
6822 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
6823 {
6824 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
6825 }
6826
6827 //Setup DSH
6828 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6829 m_stateHeapInterface,
6830 kernelState,
6831 false,
6832 0,
6833 false,
6834 m_storeData));
6835
6836 //Setup CURBE
6837 CODECHAL_ENC_HEVC_B_PAK_CURBE_G9 cmd, *curbe = &cmd;
6838 MOS_ZeroMemory(curbe, sizeof(*curbe));
6839 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
6840 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
6841
6842 curbe->DW1.MaxVmvR = encBCurbe->DW44.MaxVmvR;
6843 curbe->DW1.Qp = encBCurbe->DW13.QpPrimeY;
6844 curbe->DW2.BrcEnable = encBCurbe->DW36.BRCEnable;
6845 curbe->DW2.LcuBrcEnable = encBCurbe->DW36.LCUBRCEnable;
6846 curbe->DW2.ScreenContent = encBCurbe->DW47.ScreenContentFlag;
6847 curbe->DW2.SimplestIntraEnable = encBCurbe->DW47.SkipIntraKrnFlag;
6848 curbe->DW2.SliceType = encBCurbe->DW4.SliceType;
6849 curbe->DW2.ROIEnable = (m_hevcPicParams->NumROI > 0);
6850 curbe->DW2.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
6851 // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
6852 curbe->DW2.KBLControlFlag = UsePlatformControlFlag();
6853 curbe->DW2.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh;
6854 curbe->DW3.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
6855 curbe->DW3.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
6856 curbe->DW3.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
6857
6858 uint32_t startBTI = 0;
6859 curbe->DW16.BTI_CU_Record = bindingTable->dwBindingTableEntries[startBTI++];
6860 curbe->DW17.BTI_PAK_Obj = bindingTable->dwBindingTableEntries[startBTI++];
6861 curbe->DW18.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++];
6862 curbe->DW19.BTI_Brc_Input = bindingTable->dwBindingTableEntries[startBTI++];
6863 curbe->DW20.BTI_LCU_Qp = bindingTable->dwBindingTableEntries[startBTI++];
6864 curbe->DW21.BTI_Brc_Data = bindingTable->dwBindingTableEntries[startBTI++];
6865 curbe->DW22.BTI_MB_Data = bindingTable->dwBindingTableEntries[startBTI++];
6866 curbe->DW23.BTI_MVP_Surface = bindingTable->dwBindingTableEntries[startBTI++];
6867 curbe->DW24.BTI_WA_PAK_Data = bindingTable->dwBindingTableEntries[startBTI++];
6868 curbe->DW25.BTI_WA_PAK_Obj = bindingTable->dwBindingTableEntries[startBTI++];
6869 curbe->DW26.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
6870
6871 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
6872
6873 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_PAK;
6874 CODECHAL_ENCODE_CHK_STATUS_RETURN(
6875 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
6876
6877 MOS_COMMAND_BUFFER cmdBuffer;
6878 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
6879 &cmdBuffer,
6880 kernelState,
6881 encFunctionType,
6882 nullptr));
6883
6884 //Add surface states
6885 startBTI = 0;
6886 //0: CU record
6887 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6888 kernelState,
6889 &cmdBuffer,
6890 SURFACE_CU_RECORD,
6891 &bindingTable->dwBindingTableEntries[startBTI++]));
6892
6893 //1: PAK command
6894 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6895 kernelState,
6896 &cmdBuffer,
6897 SURFACE_HCP_PAK,
6898 &bindingTable->dwBindingTableEntries[startBTI++]));
6899
6900 //2: slice map
6901 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6902 kernelState,
6903 &cmdBuffer,
6904 SURFACE_SLICE_MAP,
6905 &bindingTable->dwBindingTableEntries[startBTI++]));
6906
6907 // 3: BRC Input
6908 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6909 kernelState,
6910 &cmdBuffer,
6911 SURFACE_BRC_INPUT,
6912 &bindingTable->dwBindingTableEntries[startBTI++]));
6913
6914 // 4: LCU Qp
6915 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6916 kernelState,
6917 &cmdBuffer,
6918 SURFACE_LCU_QP,
6919 &bindingTable->dwBindingTableEntries[startBTI++]));
6920
6921 // 5: LCU BRC constant
6922 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6923 kernelState,
6924 &cmdBuffer,
6925 SURFACE_BRC_DATA,
6926 &bindingTable->dwBindingTableEntries[startBTI++]));
6927
6928 // 6: MV index buffer or MB data
6929 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6930 kernelState,
6931 &cmdBuffer,
6932 SURFACE_MB_MV_INDEX,
6933 &bindingTable->dwBindingTableEntries[startBTI++]));
6934
6935 // 7: MVP index buffer
6936 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
6937 kernelState,
6938 &cmdBuffer,
6939 SURFACE_MVP_INDEX,
6940 &bindingTable->dwBindingTableEntries[startBTI++]));
6941
6942 if (!m_hwWalker)
6943 {
6944 eStatus = MOS_STATUS_UNKNOWN;
6945 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
6946 return eStatus;
6947 }
6948
6949 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
6950 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
6951 walkerCodecParams.WalkerMode = m_walkerMode;
6952 /* looping for Walker is needed at 8x8 block level */
6953 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
6954 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
6955 /* Enforce no dependency dispatch order for 32x32 B Intra Check kernel */
6956 walkerCodecParams.bNoDependency = true;
6957 walkerCodecParams.wPictureCodingType = m_pictureCodingType;
6958 walkerCodecParams.bUseScoreboard = m_useHwScoreboard;
6959
6960 MHW_WALKER_PARAMS walkerParams;
6961 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
6962 m_hwInterface,
6963 &walkerParams,
6964 &walkerCodecParams));
6965
6966 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
6967 &cmdBuffer,
6968 &walkerParams));
6969
6970 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
6971 encFunctionType,
6972 kernelState,
6973 &cmdBuffer));
6974
6975 return eStatus;
6976 }
6977
Encode8x8PBMbEncKernel()6978 MOS_STATUS CodechalEncHevcStateG9::Encode8x8PBMbEncKernel()
6979 {
6980 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6981
6982 CODECHAL_ENCODE_FUNCTION_ENTER;
6983
6984 PerfTagSetting perfTag;
6985 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
6986
6987 uint32_t krnIdx = CODECHAL_HEVC_MBENC_BENC;
6988 if (m_pictureCodingType == P_TYPE)
6989 {
6990 krnIdx = m_hevcPicParams->bEnableRollingIntraRefresh ? CODECHAL_HEVC_MBENC_ADV_P : CODECHAL_HEVC_MBENC_PENC;
6991 }
6992 else if (m_pictureCodingType == B_TYPE)
6993 {
6994 // In TU7, we still need the original ENC B kernel to process the I frame
6995 krnIdx = m_hevcPicParams->bEnableRollingIntraRefresh ? CODECHAL_HEVC_MBENC_ADV : CODECHAL_HEVC_MBENC_BENC;
6996 }
6997
6998 auto kernelState = &m_mbEncKernelStates[krnIdx];
6999 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
7000 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
7001 {
7002 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
7003 }
7004
7005 int32_t sliceQp = CalSliceQp();
7006 uint8_t sliceType = PicCodingTypeToSliceType(m_pictureCodingType);
7007
7008 uint8_t tuMode = 0;
7009 if (m_hevcSeqParams->TargetUsage == 0x07)
7010 {
7011 // When TU=7, lambda is not computed in the 32x32 MD stage for it is skipped.
7012 CalcLambda(sliceType, INTRA_TRANSFORM_HAAR);
7013 tuMode = CODECHAL_ENCODE_HEVC_TARGET_USAGE_MODE_PERFORMANCE;
7014 }
7015 else if (m_hevcSeqParams->TargetUsage == 0x04)
7016 {
7017 tuMode = CODECHAL_ENCODE_HEVC_TARGET_USAGE_MODE_NORMAL;
7018 }
7019 else if (m_hevcSeqParams->TargetUsage == 0x01)
7020 {
7021 tuMode = CODECHAL_ENCODE_HEVC_TARGET_USAGE_MODE_QUALITY;
7022 }
7023 else
7024 {
7025 CODECHAL_ENCODE_ASSERT(false);
7026 eStatus = MOS_STATUS_INVALID_PARAMETER;
7027 return eStatus;
7028 }
7029
7030 LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_REGULAR);
7031
7032 uint8_t mbCodeIdxForTempMVP = 0xFF;
7033 if(m_pictureCodingType != I_TYPE)
7034 {
7035 if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
7036 {
7037 uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
7038
7039 mbCodeIdxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
7040 }
7041
7042 if (mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
7043 {
7044 // Temporal reference MV index is invalid and so disable the temporal MVP
7045 CODECHAL_ENCODE_ASSERT(false);
7046 m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
7047 }
7048 }
7049
7050 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateWalkingControlRegion());
7051
7052 //Setup DSH
7053 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
7054 m_stateHeapInterface,
7055 kernelState,
7056 false,
7057 0,
7058 false,
7059 m_storeData));
7060
7061 //Setup CURBE
7062 uint8_t maxLenSP[] = { 25, 57, 57 };
7063 uint8_t forwardTransformThd[7] = { 0 };
7064 CalcForwardCoeffThd(forwardTransformThd, sliceQp);
7065
7066 uint32_t curbeSize = 0;
7067 void* defaultCurbe = (void*)GetDefaultCurbeEncBKernel(curbeSize);
7068 CODECHAL_ENCODE_ASSERT(defaultCurbe);
7069
7070 CODECHAL_ENC_HEVC_B_MB_ENC_CURBE_G9 cmd, *curbe = &cmd;
7071 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(curbe, sizeof(cmd), defaultCurbe, curbeSize));
7072
7073 bool transform_8x8_mode_flag = true;
7074
7075 curbe->DW0.AdaptiveEn = 1;
7076 curbe->DW0.T8x8FlagForInterEn = transform_8x8_mode_flag;
7077 curbe->DW2.PicWidth = m_picWidthInMb;
7078 curbe->DW2.LenSP = maxLenSP[tuMode];
7079 curbe->DW3.SrcAccess = curbe->DW3.RefAccess = 0;
7080 curbe->DW3.FTEnable = (m_ftqBasedSkip[m_hevcSeqParams->TargetUsage] >> 1) & 0x01;
7081
7082 curbe->DW4.PicHeightMinus1 = m_picHeightInMb - 1;
7083 curbe->DW4.HMEEnable = m_hmeEnabled;
7084 curbe->DW4.SliceType = sliceType;
7085 curbe->DW4.UseActualRefQPValue = false;
7086
7087 curbe->DW7.IntraPartMask = 0x3;
7088
7089 curbe->DW6.FrameWidth = m_picWidthInMb * CODECHAL_MACROBLOCK_WIDTH;
7090 curbe->DW6.FrameHeight = m_picHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
7091
7092 curbe->DW8.Mode0Cost = m_modeCost[0];
7093 curbe->DW8.Mode1Cost = m_modeCost[1];
7094 curbe->DW8.Mode2Cost = m_modeCost[2];
7095 curbe->DW8.Mode3Cost = m_modeCost[3];
7096
7097 curbe->DW9.Mode4Cost = m_modeCost[4];
7098 curbe->DW9.Mode5Cost = m_modeCost[5];
7099 curbe->DW9.Mode6Cost = m_modeCost[6];
7100 curbe->DW9.Mode7Cost = m_modeCost[7];
7101
7102 curbe->DW10.Mode8Cost= m_modeCost[8];
7103 curbe->DW10.Mode9Cost= m_modeCost[9];
7104 curbe->DW10.RefIDCost = m_modeCost[10];
7105 curbe->DW10.ChromaIntraModeCost = m_modeCost[11];
7106
7107 curbe->DW11.MV0Cost = m_mvCost[0];
7108 curbe->DW11.MV1Cost = m_mvCost[1];
7109 curbe->DW11.MV2Cost = m_mvCost[2];
7110 curbe->DW11.MV3Cost = m_mvCost[3];
7111
7112 curbe->DW12.MV4Cost = m_mvCost[4];
7113 curbe->DW12.MV5Cost = m_mvCost[5];
7114 curbe->DW12.MV6Cost = m_mvCost[6];
7115 curbe->DW12.MV7Cost = m_mvCost[7];
7116
7117 curbe->DW13.QpPrimeY = sliceQp;
7118 uint8_t bitDepthChromaMinus8 = 0; // support 4:2:0 only
7119 int32_t qpBdOffsetC = 6 * bitDepthChromaMinus8;
7120 int32_t qpi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cb_qp_offset));
7121 int32_t qpc = (qpi < 30) ? qpi : QPcTable[qpi - 30];
7122 curbe->DW13.QpPrimeCb= qpc + qpBdOffsetC;
7123 qpi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cr_qp_offset));
7124 qpc = (qpi < 30) ? qpi : QPcTable[qpi - 30];
7125 curbe->DW13.QpPrimeCr= qpc;
7126
7127 curbe->DW14.SICFwdTransCoeffThreshold_0 = forwardTransformThd[0];
7128 curbe->DW14.SICFwdTransCoeffThreshold_1 = forwardTransformThd[1];
7129 curbe->DW14.SICFwdTransCoeffThreshold_2 = forwardTransformThd[2];
7130
7131 curbe->DW15.SICFwdTransCoeffThreshold_3 = forwardTransformThd[3];
7132 curbe->DW15.SICFwdTransCoeffThreshold_4 = forwardTransformThd[4];
7133 curbe->DW15.SICFwdTransCoeffThreshold_5 = forwardTransformThd[5];
7134 curbe->DW15.SICFwdTransCoeffThreshold_6 = forwardTransformThd[6];
7135
7136 curbe->DW32.SkipVal = m_skipValB[curbe->DW3.BlockBasedSkipEnable][transform_8x8_mode_flag][sliceQp];
7137
7138 if(m_pictureCodingType == I_TYPE)
7139 {
7140 *(float*)&(curbe->DW34.LambdaME) = 0.0;
7141 }
7142 else if (m_pictureCodingType == P_TYPE)
7143 {
7144 *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_P_SLICE][sliceQp];
7145 }
7146 else
7147 {
7148 *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp];
7149 }
7150
7151 curbe->DW35.ModeCostSp = m_modeCostSp;
7152 curbe->DW35.SimpIntraInterThreshold = m_simplestIntraInterThreshold;
7153
7154 curbe->DW36.NumRefIdxL0MinusOne = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
7155 curbe->DW36.NumRefIdxL1MinusOne = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
7156 curbe->DW36.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
7157 curbe->DW36.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
7158 curbe->DW36.PowerSaving = m_powerSavingEnabled;
7159 curbe->DW36.ROIEnable = (m_hevcPicParams->NumROI > 0);
7160 curbe->DW36.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
7161
7162 if(m_pictureCodingType != I_TYPE)
7163 {
7164 curbe->DW37.ActualQpRefID0List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_0);
7165 curbe->DW37.ActualQpRefID1List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_1);
7166 curbe->DW37.ActualQpRefID2List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_2);
7167 curbe->DW37.ActualQpRefID3List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_3);
7168 curbe->DW41.TextureIntraCostThreshold = 500;
7169 if(m_pictureCodingType == B_TYPE) {
7170 curbe->DW39.ActualQpRefID0List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_0);
7171 curbe->DW39.ActualQpRefID1List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_1);
7172 }
7173 }
7174
7175 curbe->DW44.MaxVmvR = 511 * 4;
7176 curbe->DW44.MaxNumMergeCandidates = m_hevcSliceParams->MaxNumMergeCand;
7177
7178 if(m_pictureCodingType != I_TYPE)
7179 {
7180 curbe->DW44.MaxNumRefList0 = curbe->DW36.NumRefIdxL0MinusOne + 1;
7181
7182 curbe->DW45.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
7183 curbe->DW45.HMECombineLenPslice = 8;
7184 if(m_pictureCodingType == B_TYPE)
7185 {
7186 curbe->DW44.MaxNumRefList1 = curbe->DW36.NumRefIdxL1MinusOne + 1;
7187 curbe->DW45.HMECombineLenBslice = 8;
7188 }
7189 }
7190
7191 curbe->DW45.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
7192
7193 curbe->DW46.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
7194 curbe->DW46.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
7195 curbe->DW46.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
7196 curbe->DW46.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
7197
7198 curbe->DW47.NumRegionsInSlice = m_numRegionsInSlice;
7199 curbe->DW47.TypeOfWalkingPattern = m_enable26WalkingPattern;
7200 curbe->DW47.ChromaFlatnessCheckFlag = (m_hevcSeqParams->TargetUsage == 0x07) ? 0 : 1;
7201 curbe->DW47.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04);
7202 curbe->DW47.SkipIntraKrnFlag = (m_hevcSeqParams->TargetUsage == 0x07); // When TU=7, there is no intra kernel call
7203 curbe->DW47.CollocatedFromL0Flag = m_hevcSliceParams->collocated_from_l0_flag;
7204 curbe->DW47.IsLowDelay = m_lowDelay;
7205 curbe->DW47.ScreenContentFlag = m_hevcPicParams->bScreenContent;
7206 curbe->DW47.MultiSliceFlag = (m_numSlices > 1);
7207 curbe->DW47.ArbitarySliceFlag = m_arbitraryNumMbsInSlice;
7208 curbe->DW47.NumRegionMinus1 = m_walkingPatternParam.dwNumRegion - 1;
7209
7210 if(m_pictureCodingType != I_TYPE)
7211 {
7212 curbe->DW48.CurrentTdL0_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][0]);
7213 curbe->DW48.CurrentTdL0_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][1]);
7214 curbe->DW49.CurrentTdL0_2 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][2]);
7215 curbe->DW49.CurrentTdL0_3 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][3]);
7216 if(m_pictureCodingType == B_TYPE) {
7217 curbe->DW50.CurrentTdL1_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][0]);
7218 curbe->DW50.CurrentTdL1_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][1]);
7219 }
7220 }
7221
7222 curbe->DW52.NumofUnitInRegion = m_walkingPatternParam.dwNumUnitsInRegion;
7223 curbe->DW52.MaxHeightInRegion = m_walkingPatternParam.dwMaxHeightInRegion;
7224
7225 uint32_t startBTI = 0;
7226 curbe->DW56.BTI_CU_Record = bindingTable->dwBindingTableEntries[startBTI++];
7227 curbe->DW57.BTI_PAK_Cmd = bindingTable->dwBindingTableEntries[startBTI++];
7228 curbe->DW58.BTI_Src_Y = bindingTable->dwBindingTableEntries[startBTI++];
7229 startBTI++; //skip UV index
7230 curbe->DW59.BTI_Intra_Dist = bindingTable->dwBindingTableEntries[startBTI++];
7231 curbe->DW60.BTI_Min_Dist = bindingTable->dwBindingTableEntries[startBTI++];
7232 curbe->DW61.BTI_HMEMVPredFwdBwdSurfIndex = bindingTable->dwBindingTableEntries[startBTI++];
7233 curbe->DW62.BTI_HMEDistSurfIndex = bindingTable->dwBindingTableEntries[startBTI++];
7234 curbe->DW63.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++];
7235 curbe->DW64.BTI_VME_Saved_UNI_SIC = bindingTable->dwBindingTableEntries[startBTI++];
7236 curbe->DW65.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++];
7237 curbe->DW66.BTI_Collocated_RefFrame = bindingTable->dwBindingTableEntries[startBTI++];
7238 curbe->DW67.BTI_Reserved = bindingTable->dwBindingTableEntries[startBTI++];
7239 curbe->DW68.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++];
7240 curbe->DW69.BTI_LCU_QP = bindingTable->dwBindingTableEntries[startBTI++];
7241 curbe->DW70.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++];
7242 curbe->DW71.BTI_VMEInterPredictionSurfIndex= bindingTable->dwBindingTableEntries[startBTI++];
7243 if(m_pictureCodingType == P_TYPE)
7244 {
7245 //P MBEnc curbe 72~75 are different from B frame.
7246 startBTI += (CODECHAL_HEVC_P_MBENC_CONCURRENT_THD_MAP - CODECHAL_HEVC_P_MBENC_VME_FORWARD_0);
7247 curbe->DW72.BTI_ConcurrentThreadMap= bindingTable->dwBindingTableEntries[startBTI++];
7248 curbe->DW73.BTI_MB_Data_CurFrame = bindingTable->dwBindingTableEntries[startBTI++];
7249 curbe->DW74.BTI_MVP_CurFrame = bindingTable->dwBindingTableEntries[startBTI++];
7250 curbe->DW75.BTI_Haar_Dist16x16 = bindingTable->dwBindingTableEntries[startBTI++];
7251 curbe->DW76.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
7252 curbe->DW77.BTI_Frame_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
7253 curbe->DW78.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
7254 }
7255 else
7256 {
7257 startBTI += (CODECHAL_HEVC_B_MBENC_VME_BACKWARD_7 - CODECHAL_HEVC_B_MBENC_VME_FORWARD_0 + 1);
7258
7259 curbe->DW72.BTI_VMEInterPredictionBSurfIndex = bindingTable->dwBindingTableEntries[startBTI++];
7260 startBTI += (CODECHAL_HEVC_B_MBENC_VME_MUL_NOUSE_3 - CODECHAL_HEVC_B_MBENC_VME_MUL_BACKWARD_0 + 1);
7261
7262 curbe->DW73.BTI_ConcurrentThreadMap= bindingTable->dwBindingTableEntries[startBTI++];
7263 curbe->DW74.BTI_MB_Data_CurFrame = bindingTable->dwBindingTableEntries[startBTI++];
7264 curbe->DW75.BTI_MVP_CurFrame = bindingTable->dwBindingTableEntries[startBTI++];
7265 curbe->DW76.BTI_Haar_Dist16x16 = bindingTable->dwBindingTableEntries[startBTI++];
7266 curbe->DW77.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
7267 curbe->DW78.BTI_Frame_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
7268 curbe->DW79.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
7269 }
7270
7271 // Intra refresh is enabled. Program related CURBE fields
7272 if (m_hevcPicParams->bEnableRollingIntraRefresh)
7273 {
7274 curbe->DW35.IntraRefreshEn = true;
7275 curbe->DW35.FirstIntraRefresh = m_firstIntraRefresh;
7276 curbe->DW35.HalfUpdateMixedLCU = 0;
7277 curbe->DW35.EnableRollingIntra = true;
7278
7279 curbe->DW38.NumFrameInGOB = m_frameNumInGob;
7280 curbe->DW38.NumIntraRefreshOffFrames = m_frameNumWithoutIntraRefresh;
7281
7282 curbe->DW51.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
7283 curbe->DW51.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
7284 curbe->DW51.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
7285
7286 curbe->DW53.IntraRefreshRefHeight = 40;
7287 curbe->DW53.IntraRefreshRefWidth = 48;
7288
7289 m_firstIntraRefresh = false;
7290 m_frameNumWithoutIntraRefresh = 0;
7291 }
7292 else if (m_pictureCodingType != I_TYPE) // don't increment num frames w/o refresh in case of TU7 I frames
7293 {
7294 m_frameNumWithoutIntraRefresh++;
7295 }
7296
7297 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
7298
7299 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_MBENC;
7300 if (m_pictureCodingType == P_TYPE)
7301 {
7302 //P frame curbe only use the DW0~DW75
7303 CODECHAL_ENCODE_CHK_STATUS_RETURN(
7304 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd) - sizeof(uint32_t)));
7305 }
7306 else
7307 {
7308 CODECHAL_ENCODE_CHK_STATUS_RETURN(
7309 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
7310 }
7311
7312 MOS_COMMAND_BUFFER cmdBuffer;
7313 if(m_numMbBKernelSplit == 0)
7314 {
7315 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer,
7316 kernelState,
7317 encFunctionType,
7318 &m_walkingPatternParam.ScoreBoard));
7319 }
7320 else
7321 {
7322 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
7323
7324 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
7325 MOS_ZeroMemory(&idParams, sizeof(idParams));
7326 idParams.pKernelState = kernelState;
7327 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
7328 m_stateHeapInterface,
7329 1,
7330 &idParams));
7331
7332 // Add binding table
7333 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
7334 m_stateHeapInterface,
7335 kernelState));
7336 }
7337
7338 //Add surface states
7339 startBTI = 0;
7340
7341 //0: CU record
7342 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7343 kernelState,
7344 &cmdBuffer,
7345 SURFACE_CU_RECORD,
7346 &bindingTable->dwBindingTableEntries[startBTI++]));
7347
7348 //1: PAK command
7349 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7350 kernelState,
7351 &cmdBuffer,
7352 SURFACE_HCP_PAK,
7353 &bindingTable->dwBindingTableEntries[startBTI++]));
7354
7355 //2 and 3 Source Y and UV
7356 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7357 kernelState,
7358 &cmdBuffer,
7359 SURFACE_RAW_Y_UV,
7360 &bindingTable->dwBindingTableEntries[startBTI++]));
7361 startBTI++;
7362
7363 //4: Intra dist
7364 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7365 kernelState,
7366 &cmdBuffer,
7367 SURFACE_INTRA_DIST,
7368 &bindingTable->dwBindingTableEntries[startBTI++]));
7369
7370 //5: min distortion
7371 m_surfaceParams[SURFACE_MIN_DIST].bIsWritable =
7372 m_surfaceParams[SURFACE_MIN_DIST].bRenderTarget = true;
7373 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7374 kernelState,
7375 &cmdBuffer,
7376 SURFACE_MIN_DIST,
7377 &bindingTable->dwBindingTableEntries[startBTI++]));
7378
7379 if(m_hmeSupported)
7380 {
7381 //6: MV predictor from HME
7382 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7383 kernelState,
7384 &cmdBuffer,
7385 SURFACE_HME_MVP,
7386 &bindingTable->dwBindingTableEntries[startBTI++]));
7387
7388 //7: distortion from HME
7389 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7390 kernelState,
7391 &cmdBuffer,
7392 SURFACE_HME_DIST,
7393 &bindingTable->dwBindingTableEntries[startBTI++]));
7394 }
7395 else
7396 {
7397 startBTI += 2;
7398 }
7399
7400 //8: slice map
7401 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7402 kernelState,
7403 &cmdBuffer,
7404 SURFACE_SLICE_MAP,
7405 &bindingTable->dwBindingTableEntries[startBTI++]));
7406
7407 //9: VME UNI and SIC data
7408 m_surfaceParams[SURFACE_VME_UNI_SIC_DATA].bIsWritable =
7409 m_surfaceParams[SURFACE_VME_UNI_SIC_DATA].bRenderTarget = true;
7410 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7411 kernelState,
7412 &cmdBuffer,
7413 SURFACE_VME_UNI_SIC_DATA,
7414 &bindingTable->dwBindingTableEntries[startBTI++]));
7415
7416 //10: Simplest Intra
7417 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7418 kernelState,
7419 &cmdBuffer,
7420 SURFACE_SIMPLIFIED_INTRA,
7421 &bindingTable->dwBindingTableEntries[startBTI++]));
7422
7423 // 11: Reference frame col-located data surface
7424 if(mbCodeIdxForTempMVP == 0xFF)
7425 {
7426 startBTI++;
7427 }
7428 else
7429 {
7430 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7431 kernelState,
7432 &cmdBuffer,
7433 SURFACE_COL_MB_MV,
7434 &bindingTable->dwBindingTableEntries[startBTI++],
7435 m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP)));
7436 }
7437
7438 // 12: Current frame col-located data surface -- reserved now
7439 startBTI++;
7440
7441 // 13: BRC Input
7442 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7443 kernelState,
7444 &cmdBuffer,
7445 SURFACE_BRC_INPUT,
7446 &bindingTable->dwBindingTableEntries[startBTI++]));
7447
7448 // 14: LCU Qp
7449 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7450 kernelState,
7451 &cmdBuffer,
7452 SURFACE_LCU_QP,
7453 &bindingTable->dwBindingTableEntries[startBTI++]));
7454
7455 // 15: LCU BRC constant
7456 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7457 kernelState,
7458 &cmdBuffer,
7459 SURFACE_BRC_DATA,
7460 &bindingTable->dwBindingTableEntries[startBTI++]));
7461
7462 // 16 - 32 Current plus forward and backward surface 0-7
7463 //16: Source Y for VME
7464 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7465 kernelState,
7466 &cmdBuffer,
7467 SURFACE_RAW_VME,
7468 &bindingTable->dwBindingTableEntries[startBTI++]));
7469
7470 for(uint32_t surfaceIdx = 0; surfaceIdx < 8; surfaceIdx++)
7471 {
7472 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][surfaceIdx];
7473 if (!CodecHal_PictureIsInvalid(refPic) &&
7474 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
7475 {
7476 uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
7477
7478 // picture Y VME
7479 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7480 kernelState,
7481 &cmdBuffer,
7482 SURFACE_REF_FRAME_VME,
7483 &bindingTable->dwBindingTableEntries[startBTI++],
7484 &m_refList[idx]->sRefBuffer,
7485 curbe->DW6.FrameWidth,
7486 curbe->DW6.FrameHeight));
7487 }
7488 else
7489 {
7490 // Skip the binding table index because it is not used
7491 startBTI++;
7492 }
7493
7494 refPic = m_hevcSliceParams->RefPicList[LIST_1][surfaceIdx];
7495 if (!CodecHal_PictureIsInvalid(refPic) &&
7496 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
7497 {
7498 uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
7499
7500 // picture Y VME
7501 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7502 kernelState,
7503 &cmdBuffer,
7504 SURFACE_REF_FRAME_VME,
7505 &bindingTable->dwBindingTableEntries[startBTI++],
7506 &m_refList[idx]->sRefBuffer,
7507 curbe->DW6.FrameWidth,
7508 curbe->DW6.FrameHeight));
7509 }
7510 else
7511 {
7512 // Skip the binding table index because it is not used
7513 startBTI++;
7514 }
7515 }
7516 CODECHAL_ENCODE_ASSERT(startBTI == CODECHAL_HEVC_B_MBENC_VME_BACKWARD_7 - CODECHAL_HEVC_B_MBENC_BEGIN + 1);
7517
7518 if (m_pictureCodingType != P_TYPE)
7519 {
7520 //33-41 VME multi-ref BTI -- Current plus [backward, nil][0..3]
7521 //33: Current Y VME surface
7522 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7523 kernelState,
7524 &cmdBuffer,
7525 SURFACE_RAW_VME,
7526 &bindingTable->dwBindingTableEntries[startBTI++]));
7527
7528 for(uint32_t surfaceIdx = 0; surfaceIdx < 4; surfaceIdx++)
7529 {
7530 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[1][surfaceIdx];
7531 if (!CodecHal_PictureIsInvalid(refPic) &&
7532 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
7533 {
7534 uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
7535
7536 // picture Y VME
7537 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7538 kernelState,
7539 &cmdBuffer,
7540 SURFACE_REF_FRAME_VME,
7541 &bindingTable->dwBindingTableEntries[startBTI++],
7542 &m_refList[idx]->sRefBuffer,
7543 curbe->DW6.FrameWidth,
7544 curbe->DW6.FrameHeight));
7545 }
7546 else
7547 {
7548 // Skip the binding table index because it is not used
7549 startBTI++;
7550 }
7551
7552 // Skip the binding table index because it is not used
7553 startBTI++;
7554 }
7555 CODECHAL_ENCODE_ASSERT(startBTI == CODECHAL_HEVC_B_MBENC_VME_MUL_NOUSE_3 - CODECHAL_HEVC_B_MBENC_BEGIN + 1);
7556 }
7557
7558 // B 42 or P 33: Concurrent thread
7559 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7560 kernelState,
7561 &cmdBuffer,
7562 (SURFACE_ID)(SURFACE_CONCURRENT_THREAD + m_concurrentThreadIndex),
7563 &bindingTable->dwBindingTableEntries[startBTI++]));
7564
7565 if (++m_concurrentThreadIndex >= NUM_CONCURRENT_THREAD)
7566 {
7567 m_concurrentThreadIndex = 0;
7568 }
7569
7570 // B 43 or P 34: MV index buffer
7571 m_surfaceParams[SURFACE_MB_MV_INDEX].bIsWritable =
7572 m_surfaceParams[SURFACE_MB_MV_INDEX].bRenderTarget = true;
7573 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7574 kernelState,
7575 &cmdBuffer,
7576 SURFACE_MB_MV_INDEX,
7577 &bindingTable->dwBindingTableEntries[startBTI++]));
7578
7579 // B 44: or P 35: MVP index buffer
7580 m_surfaceParams[SURFACE_MVP_INDEX].bIsWritable =
7581 m_surfaceParams[SURFACE_MVP_INDEX].bRenderTarget = true;
7582 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7583 kernelState,
7584 &cmdBuffer,
7585 SURFACE_MVP_INDEX,
7586 &bindingTable->dwBindingTableEntries[startBTI++]));
7587
7588 if (!m_hwWalker)
7589 {
7590 eStatus = MOS_STATUS_UNKNOWN;
7591 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
7592 return eStatus;
7593 }
7594
7595 if(m_numMbBKernelSplit == 0)
7596 {
7597 // always use customized media walker
7598 MHW_WALKER_PARAMS walkerParams;
7599 MOS_SecureMemcpy(&walkerParams, sizeof(walkerParams), &m_walkingPatternParam.MediaWalker, sizeof(m_walkingPatternParam.MediaWalker));
7600 walkerParams.ColorCountMinusOne = m_walkingPatternParam.dwNumRegion - 1;
7601
7602 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
7603 &cmdBuffer,
7604 &walkerParams));
7605 }
7606 else
7607 {
7608 int32_t localOuterLoopExecCount = m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount;
7609 int32_t localInitialStartPointY = m_walkingPatternParam.MediaWalker.LocalStart.y;
7610 int32_t phase = MOS_MIN(m_numMbBKernelSplit, MAX_NUM_KERNEL_SPLIT);
7611 int32_t totalExecCount = localOuterLoopExecCount + 1;
7612 int32_t deltaExecCount = (((totalExecCount+phase - 1) / phase) + 1) & 0xfffe;
7613 int32_t remainExecCount = totalExecCount;
7614
7615 int32_t deltaY = 0;
7616 if (m_enable26WalkingPattern)
7617 {
7618 deltaY = deltaExecCount / 2;
7619 }
7620 else
7621 {
7622 deltaY = deltaExecCount * 2;
7623 }
7624
7625 int32_t startPointY[MAX_NUM_KERNEL_SPLIT] = { 0 };
7626 int32_t currentExecCount[MAX_NUM_KERNEL_SPLIT] = { -1 };
7627 currentExecCount[0] = (remainExecCount > deltaExecCount)?(deltaExecCount-1) : (remainExecCount-1);
7628 startPointY[0] = localInitialStartPointY;
7629
7630 for (auto i = 1; i < phase; i++)
7631 {
7632 remainExecCount -= deltaExecCount;
7633 if (remainExecCount < 1)
7634 {
7635 remainExecCount = 1;
7636 }
7637
7638 currentExecCount[i] = (remainExecCount > deltaExecCount)?(deltaExecCount-1) : (remainExecCount-1);
7639 startPointY[i] = startPointY[i-1] + deltaY;
7640 }
7641
7642 for(auto i = 0; i < phase; i++)
7643 {
7644 if(currentExecCount[i] < 0)
7645 {
7646 break;
7647 }
7648
7649 // Program render engine pipe commands
7650 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
7651 sendKernelCmdsParams.EncFunctionType = encFunctionType;
7652 sendKernelCmdsParams.pKernelState = kernelState;
7653 sendKernelCmdsParams.bEnableCustomScoreBoard= true;
7654 sendKernelCmdsParams.pCustomScoreBoard = &m_walkingPatternParam.ScoreBoard;
7655 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
7656
7657 // Change walker execution count and local start Y for different phases
7658 m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount = currentExecCount[i];
7659 m_walkingPatternParam.MediaWalker.LocalStart.y = startPointY[i];
7660
7661 // always use customized media walker
7662 MHW_WALKER_PARAMS walkerParams;
7663 MOS_SecureMemcpy(&walkerParams, sizeof(walkerParams), &m_walkingPatternParam.MediaWalker, sizeof(m_walkingPatternParam.MediaWalker));
7664 walkerParams.ColorCountMinusOne = m_walkingPatternParam.dwNumRegion - 1;
7665
7666 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
7667 &cmdBuffer,
7668 &walkerParams));
7669 }
7670 }
7671
7672 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
7673 encFunctionType,
7674 kernelState,
7675 &cmdBuffer));
7676
7677 CODECHAL_DEBUG_TOOL(
7678 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7679 &m_mvIndex.sResource,
7680 CodechalDbgAttr::attrOutput,
7681 "MbData",
7682 m_mvpIndex.dwSize,
7683 0,
7684 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
7685
7686 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7687 &m_mvpIndex.sResource,
7688 CodechalDbgAttr::attrOutput,
7689 "MvData",
7690 m_mvpIndex.dwSize,
7691 0,
7692 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
7693 )
7694
7695 m_lastTaskInPhase = true;
7696 eStatus = Encode8x8BPakKernel(curbe);
7697
7698 return eStatus;
7699 }
7700
Encode2xScalingKernel()7701 MOS_STATUS CodechalEncHevcStateG9::Encode2xScalingKernel()
7702 {
7703 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7704
7705 PerfTagSetting perfTag;
7706 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL);
7707
7708 uint32_t krnIdx = CODECHAL_HEVC_MBENC_2xSCALING;
7709 auto kernelState = &m_mbEncKernelStates[krnIdx];
7710 auto scalingBindingTable = &m_mbEncKernelBindingTable[krnIdx];
7711 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
7712 {
7713 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
7714 }
7715
7716 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
7717 m_osInterface,
7718 &m_scaled2xSurface));
7719
7720 // Setup DSH
7721 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
7722 m_stateHeapInterface,
7723 kernelState,
7724 false,
7725 0,
7726 false,
7727 m_storeData));
7728
7729 //Setup CURBE
7730 MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9 cmd, *curbe = &cmd;
7731 MOS_ZeroMemory(curbe, sizeof(*curbe));
7732 curbe->DW0.PicWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
7733 curbe->DW0.PicHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
7734
7735 uint32_t startBTI = 0;
7736 curbe->DW8.BTI_Src_Y = scalingBindingTable->dwBindingTableEntries[startBTI++];
7737 curbe->DW9.BTI_Dst_Y = scalingBindingTable->dwBindingTableEntries[startBTI++];
7738
7739 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING;
7740 CODECHAL_ENCODE_CHK_STATUS_RETURN(
7741 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
7742
7743 MOS_COMMAND_BUFFER cmdBuffer;
7744 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
7745 &cmdBuffer,
7746 kernelState,
7747 encFunctionType,
7748 nullptr));
7749
7750 // Add surface states, 2X scaling uses U16Norm surface format
7751 startBTI = 0;
7752
7753 // Source surface/s
7754 auto surfaceCodecParams = &m_surfaceParams[SURFACE_RAW_Y];
7755 surfaceCodecParams->bUse16UnormSurfaceFormat = true;
7756
7757 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7758 kernelState,
7759 &cmdBuffer,
7760 SURFACE_RAW_Y,
7761 &scalingBindingTable->dwBindingTableEntries[startBTI++]
7762 ));
7763
7764 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
7765 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceParams(surfaceCodecParams));
7766
7767 // Destination surface/s
7768 m_scaled2xSurface.dwWidth = MOS_ALIGN_CEIL((m_frameWidth / SCALE_FACTOR_2x), CODECHAL_MACROBLOCK_WIDTH);
7769 m_scaled2xSurface.dwHeight = MOS_ALIGN_CEIL((m_frameHeight / SCALE_FACTOR_2x), CODECHAL_MACROBLOCK_HEIGHT);
7770
7771 m_surfaceParams[SURFACE_Y_2X].bUse16UnormSurfaceFormat =
7772 m_surfaceParams[SURFACE_Y_2X].bIsWritable =
7773 m_surfaceParams[SURFACE_Y_2X].bRenderTarget = true;
7774 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7775 kernelState,
7776 &cmdBuffer,
7777 SURFACE_Y_2X,
7778 &scalingBindingTable->dwBindingTableEntries[startBTI++]
7779 ));
7780
7781 if (!m_hwWalker)
7782 {
7783 eStatus = MOS_STATUS_UNKNOWN;
7784 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
7785 return eStatus;
7786 }
7787
7788 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
7789 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
7790 walkerCodecParams.WalkerMode = m_walkerMode;
7791 // check kernel of Downscaling 2x kernels for Ultra HME.
7792 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
7793 // The frame kernel process 32x32 input pixels and output 16x16 down sampled pixels
7794 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
7795 /* Enforce no dependency dispatch order for Scaling kernel, */
7796 walkerCodecParams.bNoDependency = true;
7797
7798 MHW_WALKER_PARAMS walkerParams;
7799 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
7800 m_hwInterface,
7801 &walkerParams,
7802 &walkerCodecParams));
7803
7804 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
7805 &cmdBuffer,
7806 &walkerParams));
7807
7808 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
7809 encFunctionType,
7810 kernelState,
7811 &cmdBuffer));
7812
7813 return eStatus;
7814 }
7815
Encode32x32PuModeDecisionKernel()7816 MOS_STATUS CodechalEncHevcStateG9::Encode32x32PuModeDecisionKernel()
7817 {
7818 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7819
7820 PerfTagSetting perfTag;
7821 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_PU_MD);
7822
7823 uint32_t krnIdx = CODECHAL_HEVC_MBENC_32x32MD;
7824 auto kernelState = &m_mbEncKernelStates[krnIdx];
7825 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
7826 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
7827 {
7828 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
7829 }
7830
7831 // Setup DSH
7832 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
7833 m_stateHeapInterface,
7834 kernelState,
7835 false,
7836 0,
7837 false,
7838 m_storeData));
7839
7840 //Setup CURBE
7841 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
7842
7843 CalcLambda(CODECHAL_ENCODE_HEVC_I_SLICE, INTRA_TRANSFORM_HAAR);
7844 int32_t sliceQp = CalSliceQp();
7845
7846 double lambdaScalingFactor = 1.0;
7847 double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
7848 double squaredQpLambda = qpLambda * qpLambda;
7849 m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
7850
7851 CODECHAL_ENC_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9 cmd, *curbe = &cmd;
7852 MOS_ZeroMemory(curbe, sizeof(*curbe));
7853 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
7854 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
7855
7856 curbe->DW1.EnableDebugDump = false;
7857 curbe->DW1.LCUType = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/;
7858 curbe->DW1.PuType = 0; // 32x32 PU
7859 curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
7860 curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
7861 curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
7862 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
7863 curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0);
7864
7865 curbe->DW2.Lambda = m_fixedPointLambda;
7866
7867 curbe->DW3.ModeCost32x32 = 0;
7868
7869 curbe->DW4.EarlyExit = (uint32_t)-1;
7870
7871 uint32_t startIndex = 0;
7872 curbe->DW8.BTI_32x32PU_Output = bindingTable->dwBindingTableEntries[startIndex++];
7873 curbe->DW9.BTI_Src_Y = bindingTable->dwBindingTableEntries[startIndex++];
7874 startIndex++; // skip one BTI for Y and UV have the same BTI
7875 curbe->DW10.BTI_Src_Y2x = bindingTable->dwBindingTableEntries[startIndex++];
7876 curbe->DW11.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startIndex++];
7877 curbe->DW12.BTI_Src_Y2x_VME = bindingTable->dwBindingTableEntries[startIndex++];
7878 curbe->DW13.BTI_Brc_Input = bindingTable->dwBindingTableEntries[startIndex++];
7879 curbe->DW14.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startIndex++];
7880 curbe->DW15.BTI_Brc_Data = bindingTable->dwBindingTableEntries[startIndex++];
7881 curbe->DW16.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startIndex++];
7882 curbe->DW17.BTI_Kernel_Debug = bindingTable->dwBindingTableEntries[startIndex++];
7883
7884 CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
7885
7886 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION;
7887 CODECHAL_ENCODE_CHK_STATUS_RETURN(
7888 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
7889
7890 MOS_COMMAND_BUFFER cmdBuffer;
7891 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
7892 &cmdBuffer,
7893 kernelState,
7894 encFunctionType,
7895 nullptr));
7896
7897 //Add surface states
7898 startIndex = 0;
7899
7900 // 32x32 PU output
7901 m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bIsWritable =
7902 m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bRenderTarget = true;
7903 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7904 kernelState,
7905 &cmdBuffer,
7906 SURFACE_32x32_PU_OUTPUT,
7907 &bindingTable->dwBindingTableEntries[startIndex++]));
7908
7909 // Source Y and UV
7910 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7911 kernelState,
7912 &cmdBuffer,
7913 SURFACE_RAW_Y_UV,
7914 &bindingTable->dwBindingTableEntries[startIndex++]));
7915 startIndex ++; // UV index
7916
7917 // Source Y2x
7918 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7919 kernelState,
7920 &cmdBuffer,
7921 SURFACE_Y_2X,
7922 &bindingTable->dwBindingTableEntries[startIndex++]));
7923
7924 // Slice map
7925 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7926 kernelState,
7927 &cmdBuffer,
7928 SURFACE_SLICE_MAP,
7929 &bindingTable->dwBindingTableEntries[startIndex++]));
7930
7931 // Source Y2x for VME
7932 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7933 kernelState,
7934 &cmdBuffer,
7935 SURFACE_Y_2X_VME,
7936 &bindingTable->dwBindingTableEntries[startIndex++]));
7937
7938 // BRC Input
7939 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7940 kernelState,
7941 &cmdBuffer,
7942 SURFACE_BRC_INPUT,
7943 &bindingTable->dwBindingTableEntries[startIndex++]));
7944
7945 // LCU Qp surface
7946 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7947 kernelState,
7948 &cmdBuffer,
7949 SURFACE_LCU_QP,
7950 &bindingTable->dwBindingTableEntries[startIndex++]));
7951
7952 // BRC data surface
7953 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
7954 kernelState,
7955 &cmdBuffer,
7956 SURFACE_BRC_DATA,
7957 &bindingTable->dwBindingTableEntries[startIndex++]));
7958
7959 if (!m_hwWalker)
7960 {
7961 eStatus = MOS_STATUS_UNKNOWN;
7962 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
7963 return eStatus;
7964 }
7965
7966 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
7967 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
7968 walkerCodecParams.WalkerMode = m_walkerMode;
7969 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5; /* looping for Walker is needed at 8x8 block level */
7970 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
7971 walkerCodecParams.bNoDependency = true; /* Enforce no dependency dispatch order for 32x32 MD kernel */
7972
7973 MHW_WALKER_PARAMS walkerParams;
7974 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
7975 m_hwInterface,
7976 &walkerParams,
7977 &walkerCodecParams));
7978
7979 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
7980 &cmdBuffer,
7981 &walkerParams));
7982
7983 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
7984 encFunctionType,
7985 kernelState,
7986 &cmdBuffer));
7987
7988 return eStatus;
7989 }
7990
Encode32X32BIntraCheckKernel()7991 MOS_STATUS CodechalEncHevcStateG9::Encode32X32BIntraCheckKernel()
7992 {
7993 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7994
7995 CODECHAL_ENCODE_FUNCTION_ENTER;
7996
7997 PerfTagSetting perfTag;
7998 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_B_IC);
7999
8000 uint32_t krnIdx = CODECHAL_HEVC_MBENC_32x32INTRACHECK;
8001 auto kernelState = &m_mbEncKernelStates[krnIdx];
8002 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
8003
8004 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
8005 {
8006 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
8007 }
8008
8009 // Setup DSH
8010 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
8011 m_stateHeapInterface,
8012 kernelState,
8013 false,
8014 0,
8015 false,
8016 m_storeData));
8017
8018 // Setup CURBE
8019 if (m_pictureCodingType == P_TYPE)
8020 {
8021 CalcLambda(CODECHAL_ENCODE_HEVC_P_SLICE, INTRA_TRANSFORM_HAAR);
8022 }
8023 else
8024 {
8025 CalcLambda(CODECHAL_ENCODE_HEVC_B_SLICE, INTRA_TRANSFORM_HAAR);
8026 }
8027 int32_t sliceQp = CalSliceQp();
8028
8029 double lambdaScalingFactor = 1.0;
8030 double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
8031 double squaredQpLambda = qpLambda * qpLambda;
8032 m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
8033
8034 CODECHAL_ENC_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 cmd, *curbe = &cmd;
8035 MOS_ZeroMemory(curbe, sizeof(*curbe));
8036 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
8037 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
8038
8039 curbe->DW1.EnableDebugDump = false;
8040 curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
8041 curbe->DW1.Flags = 0;
8042 curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
8043 curbe->DW1.SliceType = m_hevcSliceParams->slice_type;
8044 curbe->DW1.HMEEnable = m_hmeEnabled;
8045 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
8046
8047 curbe->DW2.QpMultiplier = 100;
8048 curbe->DW2.QpValue = 0; // MBZ
8049
8050 uint32_t startIndex = 0;
8051 curbe->DW8.BTI_Per32x32PuIntraCheck = bindingTable->dwBindingTableEntries[startIndex++];
8052 curbe->DW9.BTI_Src_Y = bindingTable->dwBindingTableEntries[startIndex++];
8053 startIndex++; // skip one BTI for Y and UV have the same BTI
8054 curbe->DW10.BTI_Src_Y2X = bindingTable->dwBindingTableEntries[startIndex++];
8055 curbe->DW11.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startIndex++];
8056 curbe->DW12.BTI_VME_Y2X = bindingTable->dwBindingTableEntries[startIndex++];
8057 curbe->DW13.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startIndex++];
8058 curbe->DW14.BTI_HME_MVPred = bindingTable->dwBindingTableEntries[startIndex++];
8059 curbe->DW15.BTI_HME_Dist = bindingTable->dwBindingTableEntries[startIndex++];
8060 curbe->DW16.BTI_LCU_Skip = bindingTable->dwBindingTableEntries[startIndex++];
8061 curbe->DW17.BTI_Debug = bindingTable->dwBindingTableEntries[startIndex++];
8062
8063 CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
8064
8065 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_B_INTRA_CHECK;
8066 CODECHAL_ENCODE_CHK_STATUS_RETURN(
8067 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
8068
8069 MOS_COMMAND_BUFFER cmdBuffer;
8070 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
8071 &cmdBuffer,
8072 kernelState,
8073 encFunctionType,
8074 nullptr));
8075
8076 //Add surface states
8077 startIndex = 0;
8078
8079 // 32x32 PU B Intra Check Output
8080 m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bIsWritable =
8081 m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bRenderTarget = true;
8082 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8083 kernelState,
8084 &cmdBuffer,
8085 SURFACE_32x32_PU_OUTPUT,
8086 &bindingTable->dwBindingTableEntries[startIndex++]));
8087
8088 // Source Y and UV
8089 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8090 kernelState,
8091 &cmdBuffer,
8092 SURFACE_RAW_Y_UV,
8093 &bindingTable->dwBindingTableEntries[startIndex++]));
8094 startIndex++;
8095
8096 // Source Y2x
8097 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8098 kernelState,
8099 &cmdBuffer,
8100 SURFACE_Y_2X,
8101 &bindingTable->dwBindingTableEntries[startIndex++]));
8102
8103 // Slice map
8104 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8105 kernelState,
8106 &cmdBuffer,
8107 SURFACE_SLICE_MAP,
8108 &bindingTable->dwBindingTableEntries[startIndex++]));
8109
8110 // Source Y2x for VME
8111 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8112 kernelState,
8113 &cmdBuffer,
8114 SURFACE_Y_2X_VME,
8115 &bindingTable->dwBindingTableEntries[startIndex++]));
8116
8117 // Simplest Intra
8118 m_surfaceParams[SURFACE_SIMPLIFIED_INTRA].bIsWritable =
8119 m_surfaceParams[SURFACE_SIMPLIFIED_INTRA].bRenderTarget = true;
8120 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8121 kernelState,
8122 &cmdBuffer,
8123 SURFACE_SIMPLIFIED_INTRA,
8124 &bindingTable->dwBindingTableEntries[startIndex++]));
8125
8126 if(m_hmeSupported)
8127 {
8128 //MV predictor from HME
8129 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8130 kernelState,
8131 &cmdBuffer,
8132 SURFACE_HME_MVP,
8133 &bindingTable->dwBindingTableEntries[startIndex++]));
8134
8135 //distortion from HME
8136 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8137 kernelState,
8138 &cmdBuffer,
8139 SURFACE_HME_DIST,
8140 &bindingTable->dwBindingTableEntries[startIndex++]));
8141 }
8142 else
8143 {
8144 startIndex += 2;
8145 }
8146
8147 // LCU Qp/Skip surface
8148 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8149 kernelState,
8150 &cmdBuffer,
8151 SURFACE_LCU_QP,
8152 &bindingTable->dwBindingTableEntries[startIndex++]));
8153
8154 if (!m_hwWalker)
8155 {
8156 eStatus = MOS_STATUS_UNKNOWN;
8157 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
8158 return eStatus;
8159 }
8160
8161 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
8162 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
8163 walkerCodecParams.WalkerMode = m_walkerMode;
8164 /* looping for Walker is needed at 8x8 block level */
8165 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
8166 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
8167 /* Enforce no dependency dispatch order for 32x32 B Intra Check kernel */
8168 walkerCodecParams.bNoDependency = true;
8169
8170 MHW_WALKER_PARAMS walkerParams;
8171 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
8172 m_hwInterface,
8173 &walkerParams,
8174 &walkerCodecParams));
8175
8176 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
8177 &cmdBuffer,
8178 &walkerParams));
8179
8180 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
8181 encFunctionType,
8182 kernelState,
8183 &cmdBuffer));
8184
8185 return eStatus;
8186 }
8187
Encode16x16SadPuComputationKernel()8188 MOS_STATUS CodechalEncHevcStateG9::Encode16x16SadPuComputationKernel()
8189 {
8190 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8191
8192 CODECHAL_ENCODE_FUNCTION_ENTER;
8193
8194 PerfTagSetting perfTag;
8195 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_SAD);
8196
8197 uint32_t krnIdx = CODECHAL_HEVC_MBENC_16x16SAD;
8198 auto kernelState = &m_mbEncKernelStates[krnIdx];
8199 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
8200 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
8201 {
8202 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
8203 }
8204
8205 //Setup DSH
8206 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
8207 m_stateHeapInterface,
8208 kernelState,
8209 false,
8210 0,
8211 false,
8212 m_storeData));
8213
8214 // Setup CURBE
8215 CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9 cmd, *curbe = &cmd;
8216
8217 MOS_ZeroMemory(curbe, sizeof(*curbe));
8218 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
8219 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
8220
8221 curbe->DW1.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
8222 curbe->DW1.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
8223 curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
8224 curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
8225
8226 curbe->DW2.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
8227 curbe->DW2.SimFlagForInter = false;
8228 if (m_hevcPicParams->CodingType != I_TYPE)
8229 {
8230 curbe->DW2.FASTSurveillanceFlag = m_hevcSeqParams->bVideoSurveillance;
8231 }
8232
8233 uint32_t startIndex = 0;
8234 curbe->DW8.BTI_Src_Y = bindingTable->dwBindingTableEntries[startIndex++];
8235 startIndex++; // skip UV BTI
8236 curbe->DW9.BTI_Sad_16x16_PU_Output = bindingTable->dwBindingTableEntries[startIndex++];
8237 curbe->DW10.BTI_32x32_Pu_ModeDecision = bindingTable->dwBindingTableEntries[startIndex++];
8238 curbe->DW11.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startIndex++];
8239 curbe->DW12.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startIndex++];
8240 curbe->DW13.BTI_Debug = bindingTable->dwBindingTableEntries[startIndex++];
8241
8242 CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
8243
8244 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_SAD;
8245 CODECHAL_ENCODE_CHK_STATUS_RETURN(
8246 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
8247
8248 MOS_COMMAND_BUFFER cmdBuffer;
8249 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
8250 &cmdBuffer,
8251 kernelState,
8252 encFunctionType,
8253 nullptr));
8254
8255 //Add surface states
8256 startIndex = 0;
8257
8258 // Source Y and UV
8259 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8260 kernelState,
8261 &cmdBuffer,
8262 SURFACE_RAW_Y_UV,
8263 &bindingTable->dwBindingTableEntries[startIndex++]));
8264 startIndex++;
8265
8266 // 16x16 PU SAD output
8267 m_surfaceParams[SURFACE_16x16PU_SAD].bIsWritable =
8268 m_surfaceParams[SURFACE_16x16PU_SAD].bRenderTarget = true;
8269 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8270 kernelState,
8271 &cmdBuffer,
8272 SURFACE_16x16PU_SAD,
8273 &bindingTable->dwBindingTableEntries[startIndex++]));
8274
8275 // 32x32 PU MD data
8276 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8277 kernelState,
8278 &cmdBuffer,
8279 SURFACE_32x32_PU_OUTPUT,
8280 &bindingTable->dwBindingTableEntries[startIndex++]));
8281
8282 // Slice map
8283 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8284 kernelState,
8285 &cmdBuffer,
8286 SURFACE_SLICE_MAP,
8287 &bindingTable->dwBindingTableEntries[startIndex++]));
8288
8289 // Simplest Intra
8290 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8291 kernelState,
8292 &cmdBuffer,
8293 SURFACE_SIMPLIFIED_INTRA,
8294 &bindingTable->dwBindingTableEntries[startIndex++]));
8295
8296 if (!m_hwWalker)
8297 {
8298 eStatus = MOS_STATUS_UNKNOWN;
8299 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
8300 return eStatus;
8301 }
8302
8303 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
8304 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
8305 walkerCodecParams.WalkerMode = m_walkerMode;
8306 /* looping for Walker is needed at 16x16 block level */
8307 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 16) >> 4;
8308 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 16) >> 4;
8309 /* Enforce no dependency dispatch order for the 16x16 SAD kernel */
8310 walkerCodecParams.bNoDependency = true;
8311
8312 MHW_WALKER_PARAMS walkerParams;
8313 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
8314 m_hwInterface,
8315 &walkerParams,
8316 &walkerCodecParams));
8317
8318 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
8319 &cmdBuffer,
8320 &walkerParams));
8321
8322 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
8323 encFunctionType,
8324 kernelState,
8325 &cmdBuffer));
8326
8327 return eStatus;
8328 }
8329
Encode16x16PuModeDecisionKernel()8330 MOS_STATUS CodechalEncHevcStateG9::Encode16x16PuModeDecisionKernel()
8331 {
8332 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8333
8334 CODECHAL_ENCODE_FUNCTION_ENTER;
8335
8336 PerfTagSetting perfTag;
8337 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_PU_MD);
8338
8339 uint32_t krnIdx = CODECHAL_HEVC_MBENC_16x16MD;
8340 auto kernelState = &m_mbEncKernelStates[krnIdx];
8341 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
8342 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
8343 {
8344 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
8345 }
8346
8347 // Setup DSH
8348 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
8349 m_stateHeapInterface,
8350 kernelState,
8351 false,
8352 0,
8353 false,
8354 m_storeData));
8355
8356 // Setup CURBE
8357 int32_t sliceQp = CalSliceQp();
8358 uint8_t sliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
8359
8360 double lambdaScaleFactor = 0.46 + sliceQp - 22;
8361 if (lambdaScaleFactor < 0)
8362 {
8363 lambdaScaleFactor = 0.46;
8364 }
8365
8366 if (lambdaScaleFactor > 15)
8367 {
8368 lambdaScaleFactor = 15;
8369 }
8370
8371 double squredLambda = lambdaScaleFactor * pow(2.0, ((double)sliceQp-12.0)/6);
8372 m_fixedPointLambdaForLuma = (uint32_t)(squredLambda * (1<<10));
8373
8374 double lambdaScalingFactor = 1.0;
8375 double qpLambda = m_qpLambdaMd[sliceType][sliceQp];
8376 double squaredQpLambda = qpLambda * qpLambda;
8377 m_fixedPointLambdaForChroma = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
8378
8379 LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_HAAR);
8380
8381 CODECHAL_ENC_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9 cmd, *curbe = &cmd;
8382 MOS_ZeroMemory(curbe, sizeof(*curbe));
8383
8384 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
8385 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
8386 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
8387
8388 curbe->DW1.Log2MaxCUSize = log2MaxCUSize;
8389 curbe->DW1.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
8390 curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
8391 curbe->DW1.SliceQp = sliceQp;
8392
8393 curbe->DW2.FixedPoint_Lambda_PredMode = m_fixedPointLambdaForChroma;
8394
8395 curbe->DW3.LambdaScalingFactor = 1;
8396 curbe->DW3.SliceType = sliceType;
8397 curbe->DW3.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
8398 curbe->DW3.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
8399 curbe->DW3.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
8400 curbe->DW3.ROIEnable = (m_hevcPicParams->NumROI > 0);
8401 curbe->DW3.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
8402 curbe->DW3.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh;
8403 //Given only Column Rolling I is supported, if in future, Row Rolling I support to be added, then, need to make change here as per Kernel
8404 curbe->DW3.IntraRefreshEn = m_hevcPicParams->bEnableRollingIntraRefresh;
8405 curbe->DW3.HalfUpdateMixedLCU = 0;
8406
8407 curbe->DW4.PenaltyForIntra8x8NonDCPredMode = 0;
8408 curbe->DW4.IntraComputeType = 1;
8409 curbe->DW4.AVCIntra8x8Mask = 0;
8410 curbe->DW4.IntraSadAdjust = 2;
8411
8412 double lambdaMd = sqrt(0.57*pow(2.0, ((double)sliceQp-12.0)/3));
8413 squredLambda = lambdaMd * lambdaMd;
8414 uint32_t newLambda = (uint32_t)(squredLambda*(1<<10));
8415 curbe->DW5.FixedPoint_Lambda_CU_Mode_for_Cost_Calculation = newLambda;
8416
8417 curbe->DW6.ScreenContentFlag = m_hevcPicParams->bScreenContent;
8418
8419 curbe->DW7.ModeCostIntraNonPred = m_modeCost[0];
8420 curbe->DW7.ModeCostIntra16x16 = m_modeCost[1];
8421 curbe->DW7.ModeCostIntra8x8 = m_modeCost[2];
8422 curbe->DW7.ModeCostIntra4x4 = m_modeCost[3];
8423
8424 curbe->DW8.FixedPoint_Lambda_CU_Mode_for_Luma = m_fixedPointLambdaForLuma;
8425
8426 if (m_hevcPicParams->bEnableRollingIntraRefresh)
8427 {
8428 curbe->DW9.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
8429 curbe->DW9.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
8430 curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
8431 }
8432
8433 curbe->DW10.SimplifiedFlagForInter = 0;
8434 curbe->DW10.HaarTransformMode = (m_hevcPicParams->CodingType == I_TYPE) ? false : true;
8435
8436 uint32_t startBTI = 0;
8437 curbe->DW16.BTI_Src_Y = bindingTable->dwBindingTableEntries[startBTI++];
8438 startBTI++; // skip UV BTI
8439 curbe->DW17.BTI_Sad_16x16_PU = bindingTable->dwBindingTableEntries[startBTI++];
8440 curbe->DW18.BTI_PAK_Object = bindingTable->dwBindingTableEntries[startBTI++];
8441 curbe->DW19.BTI_SAD_32x32_PU_mode = bindingTable->dwBindingTableEntries[startBTI++];
8442 curbe->DW20.BTI_VME_Mode_8x8 = bindingTable->dwBindingTableEntries[startBTI++];
8443 curbe->DW21.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++];
8444 curbe->DW22.BTI_VME_Src = bindingTable->dwBindingTableEntries[startBTI++];
8445 curbe->DW23.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++];
8446 curbe->DW24.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++];
8447 curbe->DW25.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startBTI++];
8448 curbe->DW26.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++];
8449 curbe->DW27.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
8450
8451 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
8452
8453 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION;
8454 CODECHAL_ENCODE_CHK_STATUS_RETURN(
8455 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
8456
8457 MOS_COMMAND_BUFFER cmdBuffer;
8458 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
8459 &cmdBuffer,
8460 kernelState,
8461 encFunctionType,
8462 nullptr));
8463
8464 //Add surface states
8465 startBTI = 0;
8466
8467 // Source Y and UV:
8468 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8469 kernelState,
8470 &cmdBuffer,
8471 SURFACE_RAW_Y_UV,
8472 &bindingTable->dwBindingTableEntries[startBTI++]));
8473 startBTI++;
8474
8475 // 16x16 PU SAD output
8476 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8477 kernelState,
8478 &cmdBuffer,
8479 SURFACE_16x16PU_SAD,
8480 &bindingTable->dwBindingTableEntries[startBTI++]));
8481
8482 // PAK object output
8483 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8484 kernelState,
8485 &cmdBuffer,
8486 SURFACE_CU_RECORD,
8487 &bindingTable->dwBindingTableEntries[startBTI++]));
8488
8489 // 32x32 PU MD data
8490 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8491 kernelState,
8492 &cmdBuffer,
8493 SURFACE_32x32_PU_OUTPUT,
8494 &bindingTable->dwBindingTableEntries[startBTI++]));
8495
8496 // VME 8x8 mode
8497 m_surfaceParams[SURFACE_VME_8x8].bIsWritable =
8498 m_surfaceParams[SURFACE_VME_8x8].bRenderTarget = true;
8499 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8500 kernelState,
8501 &cmdBuffer,
8502 SURFACE_VME_8x8,
8503 &bindingTable->dwBindingTableEntries[startBTI++]));
8504
8505 // Slice map
8506 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8507 kernelState,
8508 &cmdBuffer,
8509 SURFACE_SLICE_MAP,
8510 &bindingTable->dwBindingTableEntries[startBTI++]));
8511
8512 // Source Y for VME
8513 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8514 kernelState,
8515 &cmdBuffer,
8516 SURFACE_RAW_VME,
8517 &bindingTable->dwBindingTableEntries[startBTI++]));
8518
8519 // BRC Input
8520 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8521 kernelState,
8522 &cmdBuffer,
8523 SURFACE_BRC_INPUT,
8524 &bindingTable->dwBindingTableEntries[startBTI++]));
8525
8526 // Simplest Intra
8527 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8528 kernelState,
8529 &cmdBuffer,
8530 SURFACE_SIMPLIFIED_INTRA,
8531 &bindingTable->dwBindingTableEntries[startBTI++]));
8532
8533 // LCU Qp surface
8534 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8535 kernelState,
8536 &cmdBuffer,
8537 SURFACE_LCU_QP,
8538 &bindingTable->dwBindingTableEntries[startBTI++]));
8539
8540 // BRC data surface
8541 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8542 kernelState,
8543 &cmdBuffer,
8544 SURFACE_BRC_DATA,
8545 &bindingTable->dwBindingTableEntries[startBTI++]));
8546
8547 if (!m_hwWalker)
8548 {
8549 eStatus = MOS_STATUS_UNKNOWN;
8550 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
8551 return eStatus;
8552 }
8553
8554 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
8555 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
8556 walkerCodecParams.WalkerMode = m_walkerMode;
8557 /* looping for Walker is needed at 32x32 block level in OPT case*/
8558 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
8559 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
8560 walkerCodecParams.bNoDependency = true;
8561
8562 MHW_WALKER_PARAMS walkerParams;
8563 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
8564 m_hwInterface,
8565 &walkerParams,
8566 &walkerCodecParams));
8567
8568 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
8569 &cmdBuffer,
8570 &walkerParams));
8571
8572 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
8573 encFunctionType,
8574 kernelState,
8575 &cmdBuffer));
8576
8577 return eStatus;
8578 }
8579
Encode8x8PUKernel()8580 MOS_STATUS CodechalEncHevcStateG9::Encode8x8PUKernel()
8581 {
8582 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8583
8584 CODECHAL_ENCODE_FUNCTION_ENTER;
8585
8586 PerfTagSetting perfTag;
8587 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU);
8588
8589 uint32_t krnIdx = CODECHAL_HEVC_MBENC_8x8PU;
8590 auto kernelState = &m_mbEncKernelStates[krnIdx];
8591 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
8592 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
8593 {
8594 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
8595 }
8596
8597 // Setup DSH
8598 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
8599 m_stateHeapInterface,
8600 kernelState,
8601 false,
8602 0,
8603 false,
8604 m_storeData));
8605
8606 // Setup CURBE
8607 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
8608 CODECHAL_ENC_HEVC_I_8x8_PU_CURBE_G9 cmd, *curbe = &cmd;
8609 MOS_ZeroMemory(curbe, sizeof(*curbe));
8610
8611 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
8612 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
8613
8614 curbe->DW1.SliceType = (m_hevcPicParams->CodingType == I_TYPE) ? CODECHAL_ENCODE_HEVC_I_SLICE : CODECHAL_ENCODE_HEVC_B_SLICE;
8615 curbe->DW1.PuType = 2; // 8x8
8616 curbe->DW1.DcFilterFlag = true;
8617 curbe->DW1.AngleRefineFlag = true;
8618 curbe->DW1.LCUType = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/;
8619 curbe->DW1.ScreenContentFlag = m_hevcPicParams->bScreenContent;
8620 curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
8621 curbe->DW1.EnableDebugDump = false;
8622 curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
8623 curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
8624 curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0);
8625 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
8626 if (m_hevcPicParams->bEnableRollingIntraRefresh)
8627 {
8628 curbe->DW1.EnableRollingIntra = true;
8629 curbe->DW1.IntraRefreshEn = true;
8630 curbe->DW1.HalfUpdateMixedLCU = 0;
8631
8632 curbe->DW5.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
8633 curbe->DW5.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
8634 curbe->DW5.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
8635
8636 int32_t qp = CalSliceQp();
8637 curbe->DW1.QPValue = (uint32_t)qp;
8638 }
8639
8640 curbe->DW2.LumaLambda = m_fixedPointLambdaForLuma;
8641
8642 curbe->DW3.ChromaLambda = m_fixedPointLambdaForChroma;
8643
8644 curbe->DW4.HaarTransformFlag = (m_hevcPicParams->CodingType == I_TYPE) ? false : true;
8645 curbe->DW4.SimplifiedFlagForInter = false;
8646
8647 uint32_t startBTI = 0;
8648 curbe->DW8.BTI_Src_Y = bindingTable->dwBindingTableEntries[startBTI++];
8649 startBTI++; // skip one BTI for Y and UV have the same BTI
8650 curbe->DW9.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++];
8651 curbe->DW10.BTI_VME_8x8_Mode = bindingTable->dwBindingTableEntries[startBTI++];
8652 curbe->DW11.BTI_Intra_Mode = bindingTable->dwBindingTableEntries[startBTI++];
8653 curbe->DW12.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++];
8654 curbe->DW13.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++];
8655 curbe->DW14.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startBTI++];
8656 curbe->DW15.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++];
8657 curbe->DW16.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
8658
8659 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
8660
8661 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU;
8662 CODECHAL_ENCODE_CHK_STATUS_RETURN(
8663 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
8664
8665 MOS_COMMAND_BUFFER cmdBuffer;
8666 if(m_numMb8x8IntraKernelSplit == 0)
8667 {
8668 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer,
8669 kernelState,
8670 encFunctionType,
8671 nullptr));
8672 }
8673 else
8674 {
8675 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
8676
8677 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
8678 MOS_ZeroMemory(&idParams, sizeof(idParams));
8679 idParams.pKernelState = kernelState;
8680 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
8681 m_stateHeapInterface,
8682 1,
8683 &idParams));
8684
8685 // Add binding table
8686 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
8687 m_stateHeapInterface,
8688 kernelState));
8689 }
8690
8691 //Add surface states
8692 startBTI = 0;
8693
8694 // Source Y and UV
8695 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8696 kernelState,
8697 &cmdBuffer,
8698 SURFACE_RAW_Y_UV,
8699 &bindingTable->dwBindingTableEntries[startBTI++]));
8700 startBTI++;
8701
8702 // Slice Map
8703 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8704 kernelState,
8705 &cmdBuffer,
8706 SURFACE_SLICE_MAP,
8707 &bindingTable->dwBindingTableEntries[startBTI++]));
8708
8709 // VME 8x8 mode
8710 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8711 kernelState,
8712 &cmdBuffer,
8713 SURFACE_VME_8x8,
8714 &bindingTable->dwBindingTableEntries[startBTI++]));
8715
8716 // Intra mode
8717 m_surfaceParams[SURFACE_INTRA_MODE].bIsWritable =
8718 m_surfaceParams[SURFACE_INTRA_MODE].bRenderTarget = true;
8719 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8720 kernelState,
8721 &cmdBuffer,
8722 SURFACE_INTRA_MODE,
8723 &bindingTable->dwBindingTableEntries[startBTI++]));
8724
8725 // BRC Input
8726 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8727 kernelState,
8728 &cmdBuffer,
8729 SURFACE_BRC_INPUT,
8730 &bindingTable->dwBindingTableEntries[startBTI++]));
8731
8732 // Simplest Intra
8733 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8734 kernelState,
8735 &cmdBuffer,
8736 SURFACE_SIMPLIFIED_INTRA,
8737 &bindingTable->dwBindingTableEntries[startBTI++]));
8738
8739 // LCU Qp surface
8740 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8741 kernelState,
8742 &cmdBuffer,
8743 SURFACE_LCU_QP,
8744 &bindingTable->dwBindingTableEntries[startBTI++]));
8745
8746 // BRC data surface
8747 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8748 kernelState,
8749 &cmdBuffer,
8750 SURFACE_BRC_DATA,
8751 &bindingTable->dwBindingTableEntries[startBTI++]));
8752
8753 if (!m_hwWalker)
8754 {
8755 eStatus = MOS_STATUS_UNKNOWN;
8756 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
8757 return eStatus;
8758 }
8759
8760 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
8761 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
8762 walkerCodecParams.WalkerMode = m_walkerMode;
8763 // each EU is based on one 8x8 block
8764 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH) >> 3;
8765 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT) >> 3;
8766 /* Enforce no dependency dispatch order for 8x8 PU kernel */
8767 walkerCodecParams.bNoDependency = true;
8768
8769 if(m_numMb8x8IntraKernelSplit == 0)
8770 {
8771 MHW_WALKER_PARAMS walkerParams;
8772 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
8773 m_hwInterface,
8774 &walkerParams,
8775 &walkerCodecParams));
8776
8777 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
8778 &cmdBuffer,
8779 &walkerParams));
8780 }
8781 else
8782 {
8783 uint32_t numRowPerSplit = (walkerCodecParams.dwResolutionY + m_numMb8x8IntraKernelSplit - 1) / m_numMb8x8IntraKernelSplit;
8784 uint32_t currentNumRow = 0;
8785
8786 for(uint32_t i = 0; i < m_numMb8x8IntraKernelSplit; i++)
8787 {
8788 // Program render engine pipe commands
8789 SendKernelCmdsParams sendKernelCmdsParams;
8790 sendKernelCmdsParams = SendKernelCmdsParams();
8791 sendKernelCmdsParams.EncFunctionType = encFunctionType;
8792 sendKernelCmdsParams.pKernelState = kernelState;
8793 sendKernelCmdsParams.bEnableCustomScoreBoard= true;
8794 sendKernelCmdsParams.pCustomScoreBoard = &m_walkingPatternParam.ScoreBoard;
8795 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
8796
8797 MHW_WALKER_PARAMS walkerParams;
8798 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
8799 m_hwInterface,
8800 &walkerParams,
8801 &walkerCodecParams));
8802
8803 if(currentNumRow + numRowPerSplit >= walkerCodecParams.dwResolutionY)
8804 {
8805 // the last split may not have the same number of rows as previous splits
8806 numRowPerSplit = walkerCodecParams.dwResolutionY - currentNumRow;
8807 }
8808
8809 walkerParams.LocalStart.y = currentNumRow;
8810 walkerParams.dwLocalLoopExecCount = numRowPerSplit * walkerCodecParams.dwResolutionX;
8811
8812 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
8813 &cmdBuffer,
8814 &walkerParams));
8815
8816 currentNumRow += numRowPerSplit;
8817 if(currentNumRow >= walkerCodecParams.dwResolutionY)
8818 {
8819 break;
8820 }
8821 }
8822 }
8823
8824 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
8825 encFunctionType,
8826 kernelState,
8827 &cmdBuffer));
8828
8829 return eStatus;
8830 }
8831
Encode8x8PUFMODEKernel()8832 MOS_STATUS CodechalEncHevcStateG9::Encode8x8PUFMODEKernel()
8833 {
8834 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8835
8836 CODECHAL_ENCODE_FUNCTION_ENTER;
8837
8838 PerfTagSetting perfTag;
8839 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_FMODE);
8840
8841 uint32_t krnIdx = CODECHAL_HEVC_MBENC_8x8FMODE;
8842 auto kernelState = &m_mbEncKernelStates[krnIdx];
8843 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
8844 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
8845 {
8846 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
8847 }
8848
8849 // Setup DSH
8850 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
8851 m_stateHeapInterface,
8852 kernelState,
8853 false,
8854 0,
8855 false,
8856 m_storeData));
8857
8858 // Setup CURBE
8859 int32_t qp = CalSliceQp();
8860 uint32_t sliceQp = (uint32_t)qp;
8861 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
8862
8863 CODECHAL_ENC_HEVC_I_8x8_PU_FMODE_CURBE_G9 cmd, *curbe = &cmd;
8864 MOS_ZeroMemory(curbe, sizeof(*curbe));
8865 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
8866 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
8867
8868 curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
8869 curbe->DW1.PuType = 2;
8870 curbe->DW1.PakReordingFlag = (m_hevcPicParams->CodingType == I_TYPE) ? true : false;
8871 curbe->DW1.LCUType = (log2MaxCUSize == 6)? 0 /*64x64*/: 1 /*32x32*/;
8872 curbe->DW1.ScreenContentFlag = m_hevcPicParams->bScreenContent;
8873 curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
8874 curbe->DW1.EnableDebugDump = false;
8875 curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
8876 curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
8877 curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0);
8878 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
8879 curbe->DW1.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh;
8880 curbe->DW1.IntraRefreshEn = m_hevcPicParams->bEnableRollingIntraRefresh;
8881 curbe->DW1.HalfUpdateMixedLCU = 0;
8882 curbe->DW2.LambdaForLuma = m_fixedPointLambdaForLuma;
8883
8884 if (m_hevcPicParams->CodingType != I_TYPE)
8885 {
8886 float hadBias = 2.0f;
8887
8888 double lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp];
8889 lambdaMd = lambdaMd * hadBias;
8890 curbe->DW3.LambdaForDistCalculation = (uint32_t)(lambdaMd*(1<<10));
8891 }
8892 curbe->DW4.ModeCostFor8x8PU_TU8 = 0;
8893 curbe->DW5.ModeCostFor8x8PU_TU4 = 0;
8894 curbe->DW6.SATD16x16PuThreshold = MOS_MAX(200 * ((int32_t)sliceQp - 12), 0);
8895 curbe->DW6.BiasFactorToward8x8 = (m_hevcPicParams->bScreenContent) ? 1024 : 1126 + 102;
8896 curbe->DW7.Qp = sliceQp;
8897 curbe->DW7.QpForInter = 0;
8898 curbe->DW8.SimplifiedFlagForInter = false;
8899 // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
8900 curbe->DW8.KBLControlFlag = UsePlatformControlFlag();
8901 curbe->DW9.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
8902 curbe->DW9.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
8903 curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
8904
8905 uint32_t startBTI = 0;
8906 curbe->DW16.BTI_PAK_Object = bindingTable->dwBindingTableEntries[startBTI++];
8907 curbe->DW17.BTI_VME_8x8_Mode = bindingTable->dwBindingTableEntries[startBTI++];
8908 curbe->DW18.BTI_Intra_Mode = bindingTable->dwBindingTableEntries[startBTI++];
8909 curbe->DW19.BTI_PAK_Command = bindingTable->dwBindingTableEntries[startBTI++];
8910 curbe->DW20.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++];
8911 curbe->DW21.BTI_IntraDist = bindingTable->dwBindingTableEntries[startBTI++];
8912 curbe->DW22.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++];
8913 curbe->DW23.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++];
8914 curbe->DW24.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startBTI++];
8915 curbe->DW25.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++];
8916 curbe->DW26.BTI_Haar_Dist16x16 = bindingTable->dwBindingTableEntries[startBTI++];
8917 curbe->DW27.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
8918 curbe->DW28.BTI_Frame_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
8919 curbe->DW29.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
8920
8921 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
8922
8923 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU_FMODE;
8924 CODECHAL_ENCODE_CHK_STATUS_RETURN(
8925 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
8926
8927 MOS_COMMAND_BUFFER cmdBuffer;
8928 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
8929 &cmdBuffer,
8930 kernelState,
8931 encFunctionType,
8932 nullptr));
8933
8934 //Add surface states
8935 startBTI = 0;
8936
8937 // PAK object
8938 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8939 kernelState,
8940 &cmdBuffer,
8941 SURFACE_CU_RECORD,
8942 &bindingTable->dwBindingTableEntries[startBTI++]));
8943
8944 // VME 8x8 mode
8945 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8946 kernelState,
8947 &cmdBuffer,
8948 SURFACE_VME_8x8,
8949 &bindingTable->dwBindingTableEntries[startBTI++]));
8950
8951 // Intra mode
8952 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8953 kernelState,
8954 &cmdBuffer,
8955 SURFACE_INTRA_MODE,
8956 &bindingTable->dwBindingTableEntries[startBTI++]));
8957
8958 // PAK command
8959 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8960 kernelState,
8961 &cmdBuffer,
8962 SURFACE_HCP_PAK,
8963 &bindingTable->dwBindingTableEntries[startBTI++]));
8964
8965 // Slice Map
8966 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8967 kernelState,
8968 &cmdBuffer,
8969 SURFACE_SLICE_MAP,
8970 &bindingTable->dwBindingTableEntries[startBTI++]));
8971
8972 // Intra dist
8973 m_surfaceParams[SURFACE_INTRA_DIST].bIsWritable =
8974 m_surfaceParams[SURFACE_INTRA_DIST].bRenderTarget = true;
8975 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8976 kernelState,
8977 &cmdBuffer,
8978 SURFACE_INTRA_DIST,
8979 &bindingTable->dwBindingTableEntries[startBTI++]));
8980
8981 // BRC Input
8982 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8983 kernelState,
8984 &cmdBuffer,
8985 SURFACE_BRC_INPUT,
8986 &bindingTable->dwBindingTableEntries[startBTI++]));
8987
8988 // Simplest Intra
8989 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8990 kernelState,
8991 &cmdBuffer,
8992 SURFACE_SIMPLIFIED_INTRA,
8993 &bindingTable->dwBindingTableEntries[startBTI++]));
8994
8995 // LCU Qp surface
8996 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
8997 kernelState,
8998 &cmdBuffer,
8999 SURFACE_LCU_QP,
9000 &bindingTable->dwBindingTableEntries[startBTI++]));
9001
9002 // BRC data surface
9003 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9004 kernelState,
9005 &cmdBuffer,
9006 SURFACE_BRC_DATA,
9007 &bindingTable->dwBindingTableEntries[startBTI++]));
9008
9009 if (!m_hwWalker)
9010 {
9011 eStatus = MOS_STATUS_UNKNOWN;
9012 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
9013 return eStatus;
9014 }
9015
9016 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
9017 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
9018 walkerCodecParams.WalkerMode = m_walkerMode;
9019 // each EU is based on one LCU
9020 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, (1<<log2MaxCUSize)) >> log2MaxCUSize;
9021 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, (1<<log2MaxCUSize)) >> log2MaxCUSize;
9022 /* Enforce no dependency dispatch order for 8x8 PU FMODE kernel */
9023 walkerCodecParams.bNoDependency = true;
9024
9025 MHW_WALKER_PARAMS walkerParams;
9026 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
9027 m_hwInterface,
9028 &walkerParams,
9029 &walkerCodecParams));
9030
9031 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
9032 &cmdBuffer,
9033 &walkerParams));
9034
9035 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
9036 encFunctionType,
9037 kernelState,
9038 &cmdBuffer));
9039
9040 return eStatus;
9041 }
9042
EncodeDSCombinedKernel(DsStage downScaleStage,uint32_t index,uint32_t refListIdx)9043 MOS_STATUS CodechalEncHevcStateG9::EncodeDSCombinedKernel(
9044 DsStage downScaleStage,
9045 uint32_t index,
9046 uint32_t refListIdx)
9047 {
9048 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9049
9050 CODECHAL_ENCODE_FUNCTION_ENTER;
9051
9052 if (m_scalingEnabled)
9053 {
9054 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_trackedBuf->AllocateSurfaceDS());
9055 }
9056
9057 PerfTagSetting perfTag;
9058 perfTag.CallType = m_singleTaskPhaseSupported ? CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL :
9059 CODECHAL_ENCODE_PERFTAG_CALL_DS_CONVERSION_KERNEL;
9060 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, perfTag.CallType);
9061
9062 uint32_t krnIdx = CODECHAL_HEVC_MBENC_DS_COMBINED;
9063 auto kernelState = &m_mbEncKernelStates[krnIdx];
9064 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
9065 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
9066 {
9067 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
9068 }
9069
9070 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
9071 m_osInterface,
9072 &m_scaled2xSurface));
9073
9074 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
9075 m_osInterface,
9076 &m_formatConvertedSurface[index]));
9077
9078 //Setup DSH
9079 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
9080 m_stateHeapInterface,
9081 kernelState,
9082 false,
9083 0,
9084 false,
9085 m_storeData));
9086
9087 //Setup Scaling CURBE
9088 CODECHAL_ENC_HEVC_DS_COMBINED_CURBE_G9 cmd, *curbe = &cmd;
9089
9090 MOS_ZeroMemory(curbe, sizeof(*curbe));
9091 curbe->DW0.Pak_BitDepth_Chroma = 10;
9092 curbe->DW0.Pak_BitDepth_Luma = 10;
9093 curbe->DW0.Enc_BitDepth_Chroma = 8;
9094 curbe->DW0.Enc_BitDepth_Luma = 8;
9095 curbe->DW0.Rounding_Value = 1;
9096
9097 curbe->DW1.PicFormat = 0;
9098 curbe->DW1.PicConvertFlag = 1;
9099 curbe->DW1.PicDownscale = downScaleStage;//Downscale stage
9100 curbe->DW1.PicMBStatOutputCntrl = 0;
9101
9102 curbe->DW2.OrigPicWidth = m_frameWidth;
9103 curbe->DW2.OrigPicHeight = m_frameHeight;
9104
9105 uint32_t startBTI = 0;
9106 curbe->DW3.BTI_Surface_P010 = bindingTable->dwBindingTableEntries[startBTI];
9107 startBTI += 2; // increment by no of planes
9108 curbe->DW4.BTI_Surface_NV12 = bindingTable->dwBindingTableEntries[startBTI];
9109 startBTI += 2; // increment by no of planes
9110 curbe->DW5.BTI_Src_Y_4xDownScaled = bindingTable->dwBindingTableEntries[startBTI++];
9111 curbe->DW6.BTI_Surf_MBState = bindingTable->dwBindingTableEntries[startBTI++];
9112 curbe->DW7.BTI_Src_Y_2xDownScaled = bindingTable->dwBindingTableEntries[startBTI++];
9113
9114 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING;
9115 CODECHAL_ENCODE_CHK_STATUS_RETURN(
9116 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))
9117 );
9118
9119 MOS_COMMAND_BUFFER cmdBuffer;
9120 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
9121 &cmdBuffer,
9122 kernelState,
9123 encFunctionType,
9124 nullptr));
9125
9126 // Add surface states, 2X scaling uses U32Norm surface format for destination
9127 startBTI = 0;
9128
9129 if (index == 0)
9130 {
9131 // Source surface/s -- 10 bit YUV
9132 m_surfaceParams[SURFACE_RAW_10bit_Y_UV].bUseUVPlane = true;
9133 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9134 kernelState,
9135 &cmdBuffer,
9136 SURFACE_RAW_10bit_Y_UV,
9137 &bindingTable->dwBindingTableEntries[startBTI],
9138 m_rawSurfaceToEnc
9139 ));
9140 }
9141 else
9142 {
9143 // Source surface/s -- 10 bit YUV
9144 m_surfaceParams[SURFACE_RAW_10bit_Y_UV].bUseUVPlane = true;
9145 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9146 kernelState,
9147 &cmdBuffer,
9148 SURFACE_RAW_10bit_Y_UV,
9149 &bindingTable->dwBindingTableEntries[startBTI],
9150 &(m_refList[refListIdx]->sRefReconBuffer)));
9151 }
9152 startBTI += 2; // advance binding table pointer to next surface setting
9153
9154 // Destination surface/s -- 8 bit Format converted surface
9155 m_formatConvertedSurface[index].dwWidth = m_frameWidth;
9156 m_formatConvertedSurface[index].dwHeight = m_frameHeight;
9157 m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV].bUse32UnormSurfaceFormat = false;
9158 m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV].bUse16UnormSurfaceFormat = false;
9159 m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV].bUseUVPlane = true;
9160 m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV].bIsWritable =
9161 m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV].bRenderTarget = true;
9162 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9163 kernelState,
9164 &cmdBuffer,
9165 SURFACE_RAW_FC_8bit_Y_UV,
9166 &bindingTable->dwBindingTableEntries[startBTI],
9167 &m_formatConvertedSurface[index]));
9168
9169 startBTI += 2;
9170
9171 // Destination surface/s -- 4x downscaled luma only
9172 m_surfaceParams[SURFACE_Y_4X].bUse32UnormSurfaceFormat =
9173 m_surfaceParams[SURFACE_Y_4X].bIsWritable =
9174 m_surfaceParams[SURFACE_Y_4X].bRenderTarget = true;
9175
9176 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9177 kernelState,
9178 &cmdBuffer,
9179 SURFACE_Y_4X,
9180 &bindingTable->dwBindingTableEntries[startBTI],
9181 m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER)));
9182
9183 startBTI++;
9184
9185 //Destination Surface -- MB Stat surface 1D buffer
9186 m_surfaceParams[SURFACE_RAW_MBSTAT].bUse32UnormSurfaceFormat = false;
9187 m_surfaceParams[SURFACE_RAW_MBSTAT].bUse16UnormSurfaceFormat = false;
9188 m_surfaceParams[SURFACE_RAW_MBSTAT].bIsWritable =
9189 m_surfaceParams[SURFACE_RAW_MBSTAT].bRenderTarget = true;
9190 m_surfaceParams[SURFACE_RAW_MBSTAT].dwSize = m_resMbStatisticsSurface.dwSize;
9191 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9192 kernelState,
9193 &cmdBuffer,
9194 SURFACE_RAW_MBSTAT,
9195 &bindingTable->dwBindingTableEntries[startBTI],
9196 &m_resMbStatisticsSurface.sResource));
9197
9198 startBTI++;
9199
9200 // Destination surface/s -- 2x downscaled luma only
9201 m_scaled2xSurface.dwWidth = MOS_ALIGN_CEIL((m_frameWidth / SCALE_FACTOR_2x), (CODECHAL_MACROBLOCK_WIDTH * 2));
9202 m_scaled2xSurface.dwHeight = MOS_ALIGN_CEIL((m_frameHeight / SCALE_FACTOR_2x), (CODECHAL_MACROBLOCK_HEIGHT * 2));
9203
9204 m_surfaceParams[SURFACE_Y_2X].bUse32UnormSurfaceFormat =
9205 m_surfaceParams[SURFACE_Y_2X].bIsWritable =
9206 m_surfaceParams[SURFACE_Y_2X].bRenderTarget = true;
9207
9208 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
9209 kernelState,
9210 &cmdBuffer,
9211 SURFACE_Y_2X,
9212 &bindingTable->dwBindingTableEntries[startBTI]
9213 ));
9214
9215 //move back to 16 aligned..
9216 m_scaled2xSurface.dwWidth = MOS_ALIGN_CEIL((m_frameWidth / SCALE_FACTOR_2x), (CODECHAL_MACROBLOCK_WIDTH));
9217 m_scaled2xSurface.dwHeight = MOS_ALIGN_CEIL((m_frameHeight / SCALE_FACTOR_2x), (CODECHAL_MACROBLOCK_HEIGHT));
9218
9219 m_surfaceParams[SURFACE_Y_2X].bUse16UnormSurfaceFormat =
9220 m_surfaceParams[SURFACE_Y_2X].bIsWritable =
9221 m_surfaceParams[SURFACE_Y_2X].bRenderTarget = true;
9222
9223 if (!m_hwWalker)
9224 {
9225 eStatus = MOS_STATUS_UNKNOWN;
9226 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
9227 return eStatus;
9228 }
9229
9230 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
9231 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
9232
9233 /* first stage of the downscale and convert kernel can do conversion + 4x + 2x */
9234 walkerCodecParams.WalkerMode = m_walkerMode;
9235 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL((m_frameWidth >> 2), 32) >> 3;
9236 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL((m_frameHeight >> 2), 32) >> 3;
9237
9238 /* Enforce no dependency dispatch order for Scaling kernel, */
9239 walkerCodecParams.bNoDependency = true;
9240
9241 MHW_WALKER_PARAMS walkerParams;
9242 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
9243 m_hwInterface,
9244 &walkerParams,
9245 &walkerCodecParams));
9246
9247 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
9248 &cmdBuffer,
9249 &walkerParams));
9250
9251 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
9252 encFunctionType,
9253 kernelState,
9254 &cmdBuffer));
9255
9256 return eStatus;
9257 }
9258
EncodeDSKernel()9259 MOS_STATUS CodechalEncHevcStateG9::EncodeDSKernel()
9260 {
9261 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9262
9263 CODECHAL_ENCODE_FUNCTION_ENTER;
9264
9265 // Walker must be used for HME call and scaling one
9266 CODECHAL_ENCODE_ASSERT(m_hwWalker);
9267
9268 //perform 4x down-scaling
9269 if (MEDIA_IS_SKU(m_hwInterface->GetSkuTable(), FtrEncodeHEVC10bit) && (m_hevcSeqParams->bit_depth_luma_minus8) && m_scalingEnabled)
9270 {
9271 m_lastTaskInPhase = !(m_16xMeSupported || m_hmeEnabled || m_brcEnabled);
9272 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeDSCombinedKernel(dsStage2x4x, 0, 0));
9273
9274 //Dump format converted input surface
9275 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
9276 &m_formatConvertedSurface[0],
9277 CodechalDbgAttr::attrEncodeRawInputSurface,
9278 "SrcSurf")));
9279
9280 //Scaled surface
9281 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
9282 m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER),
9283 CodechalDbgAttr::attrEncodeRawInputSurface,
9284 "SrcSurf")));
9285
9286 //Scaled surface
9287 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
9288 &m_scaled2xSurface,
9289 CodechalDbgAttr::attrEncodeRawInputSurface,
9290 "SrcSurf")));
9291
9292 // call 16x/32x DS
9293 if (m_16xMeSupported)
9294 {
9295 m_lastTaskInPhase = !(m_32xMeSupported || m_hmeEnabled || m_brcEnabled);
9296
9297 // 4x downscaled images used as the input for 16x downscaling
9298 CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
9299 MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
9300 cscScalingKernelParams.b16xScalingInUse = true;
9301 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->DsKernel(&cscScalingKernelParams));
9302
9303 if (m_32xMeSupported)
9304 {
9305 m_lastTaskInPhase = !(m_hmeEnabled || m_brcEnabled);
9306
9307 // 16x downscaled images used as the input for 32x downscaling
9308 cscScalingKernelParams.b32xScalingInUse = true;
9309 cscScalingKernelParams.b16xScalingInUse = false;
9310 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->DsKernel(&cscScalingKernelParams));
9311 }
9312 }
9313 }
9314 else
9315 {
9316 // Csc, Downscaling, and/or 10-bit to 8-bit conversion
9317 CODECHAL_ENCODE_CHK_NULL_RETURN(m_cscDsState);
9318
9319 CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
9320 MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
9321 cscScalingKernelParams.bLastTaskInPhaseCSC =
9322 cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled || m_brcEnabled);
9323 cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled || m_brcEnabled);
9324 cscScalingKernelParams.bLastTaskInPhase32xDS = !(m_hmeEnabled || m_brcEnabled);
9325
9326 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
9327 }
9328
9329 // wait on the current MbCode object if needed
9330 if (m_hevcPicParams->bUsedAsRef || (m_brcEnabled && !m_hevcSeqParams->ParallelBRC))
9331 {
9332 m_currRefSync = &m_refSync[m_currMbCodeIdx];
9333
9334 // Check if the signal obj has been used before
9335 if (m_currRefSync->uiSemaphoreObjCount || m_currRefSync->bInUsed)
9336 {
9337 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
9338 syncParams.GpuContext = m_renderContext;
9339 syncParams.presSyncResource = &m_currRefSync->resSyncObject;
9340 syncParams.uiSemaphoreCount = m_currRefSync->uiSemaphoreObjCount;
9341
9342 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
9343 m_currRefSync->uiSemaphoreObjCount = 0;
9344 m_currRefSync->bInUsed = false;
9345 }
9346 }
9347 else
9348 {
9349 m_currRefSync = nullptr;
9350 }
9351
9352 return eStatus;
9353 }
9354
EncodeKernelFunctions()9355 MOS_STATUS CodechalEncHevcStateG9::EncodeKernelFunctions()
9356 {
9357 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9358
9359 CODECHAL_ENCODE_FUNCTION_ENTER;
9360
9361 CODECHAL_DEBUG_TOOL(
9362 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
9363 m_rawSurfaceToEnc,
9364 CodechalDbgAttr::attrEncodeRawInputSurface,
9365 "SrcSurf")));
9366 )
9367
9368 if (m_pakOnlyTest)
9369 {
9370 // Skip all ENC kernel operations for now it is in the PAK only test mode.
9371 // PAK and CU records will be passed via the app
9372 return eStatus;
9373 }
9374
9375 UpdateSSDSliceCount();
9376
9377 // BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface
9378 if (m_brcEnabled && (m_brcInit || m_brcReset))
9379 {
9380 m_firstTaskInPhase = m_lastTaskInPhase = true;
9381 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcInitResetKernel());
9382 m_brcInit = m_brcReset = false;
9383 }
9384
9385 // Scaled surfaces are required to run both HME and IFrameDist
9386 bool scalingEnabled = (m_hmeSupported || m_brcEnabled);
9387 if (scalingEnabled || m_cscDsState->RequireCsc())
9388 {
9389 //Use a different performance tag ID for scaling and HME
9390 m_osInterface->pfnResetPerfBufferID(m_osInterface);
9391
9392 m_firstTaskInPhase = true;
9393 m_lastTaskInPhase = false;
9394
9395 if(m_hevcSeqParams->GopPicSize != 1 || m_brcEnabled || m_cscDsState->RequireCsc())
9396 {
9397 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeDSKernel());
9398 }
9399
9400 if (m_brcEnabled)
9401 {
9402 // LCU-based BRC update kernel needs both intra and inter (from HME) distortion
9403 m_lastTaskInPhase = (m_pictureCodingType == I_TYPE);
9404 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeCoarseIntra16x16Kernel());
9405 }
9406
9407 // only need to call HME kernel when HME enabled and NOT I-frame
9408 if (m_hmeEnabled)
9409 {
9410 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel());
9411 }
9412 }
9413
9414 if(m_osInterface->bSimIsActive)
9415 {
9416 // Clean MB code buffer to ensure there is no previous CU record and PAK command
9417 MOS_LOCK_PARAMS lockFlags;
9418 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
9419 lockFlags.WriteOnly = 1;
9420
9421 uint8_t* data = (uint8_t* )m_osInterface->pfnLockResource(m_osInterface, &m_resMbCodeSurface, &lockFlags);
9422 if (data)
9423 {
9424 MOS_ZeroMemory(data, m_mbCodeSize);
9425 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
9426 }
9427 }
9428
9429 // Generate slice map for kernel
9430 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateSliceMap());
9431
9432 //Reset to use a different performance tag ID for I kernels. Each kernel has a different buffer ID
9433 m_osInterface->pfnResetPerfBufferID(m_osInterface);
9434
9435 m_firstTaskInPhase = true;
9436 m_lastTaskInPhase = false;
9437
9438 // ROI uses the BRC LCU update kernel, even in CQP. So we will call it
9439 // first if in CQP. It has no other kernel execution dependencies, even
9440 // that brc is not initialized is not a dependency
9441 if (m_hevcPicParams->NumROI && !m_brcEnabled)
9442 {
9443 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateLCUBasedKernel(nullptr));
9444 }
9445
9446 /* When TU=7, fast encoding mode is ON, and I kernels are not needed.
9447 Instead, MB ENC B kernel is used to replace I kernels.
9448 */
9449 bool fastEncodingFlag = (m_hevcSeqParams->TargetUsage == 0x7);
9450 bool brcUpdateComplete = false;
9451
9452 if(fastEncodingFlag)
9453 {
9454 if (m_hevcPicParams->CodingType == I_TYPE)
9455 {
9456 // BRC and MbEnc are included in the same task phase
9457 if (m_brcEnabled && !brcUpdateComplete)
9458 {
9459 // BRC needs previous PAK result if not running in the parallel BRC mode
9460 // If yes, BRC is using the PAk result of the frame before the previous one
9461 CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
9462
9463 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateKernel());
9464
9465 // Reset buffer ID used for BRC kernel performance reports
9466 m_osInterface->pfnResetPerfBufferID(m_osInterface);
9467 brcUpdateComplete = true;
9468 }
9469 else if (!m_brcEnabled)
9470 {
9471 if (m_encodeParams.bMbQpDataEnabled && m_encodeParams.psMbQpDataSurface)
9472 {
9473 auto brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
9474 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
9475 Convert1byteTo2bytesQPperLCU(m_encodeParams.psMbQpDataSurface, &m_brcBuffers.sBrcMbQpBuffer);
9476 }
9477 }
9478
9479 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PBMbEncKernel());
9480 }
9481 }
9482 else
9483 {
9484 // BRC and MbEnc are included in the same task phase
9485 if (m_brcEnabled && !brcUpdateComplete)
9486 {
9487 // BRC needs previous PAK result
9488 CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
9489
9490 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateKernel());
9491
9492 // Reset buffer ID used for BRC kernel performance reports
9493 m_osInterface->pfnResetPerfBufferID(m_osInterface);
9494 brcUpdateComplete = true;
9495 }
9496 else if (!m_brcEnabled)
9497 {
9498 if (m_encodeParams.bMbQpDataEnabled && m_encodeParams.psMbQpDataSurface)
9499 {
9500 auto brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
9501 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
9502 Convert1byteTo2bytesQPperLCU(m_encodeParams.psMbQpDataSurface, &m_brcBuffers.sBrcMbQpBuffer);
9503 }
9504 }
9505
9506 //Step 1: perform 2:1 down-scaling
9507 if (m_hevcSeqParams->bit_depth_luma_minus8 == 0) // use this for 8 bit only case.
9508 {
9509 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode2xScalingKernel());
9510 }
9511
9512 //Step 2: 32x32 PU Mode Decision or 32x32 PU Intra check kernel
9513 if (m_hevcPicParams->CodingType == I_TYPE)
9514 {
9515 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode32x32PuModeDecisionKernel());
9516 }
9517 else
9518 {
9519 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode32X32BIntraCheckKernel());
9520 }
9521
9522 //Step 3: 16x16 SAD Computation
9523 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode16x16SadPuComputationKernel());
9524
9525 CODECHAL_DEBUG_TOOL(
9526 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9527 &m_sad16x16Pu.sResource,
9528 CodechalDbgAttr::attrOutput,
9529 "HEVC_16x16_PU_SAD_Out",
9530 m_sad16x16Pu.dwSize,
9531 0,
9532 CODECHAL_MEDIA_STATE_16x16_PU_SAD));
9533 )
9534
9535 //Step 4: 16x16 PU Mode Decision
9536 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode16x16PuModeDecisionKernel());
9537
9538 CODECHAL_DEBUG_TOOL(
9539 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9540 &m_vme8x8Mode.sResource,
9541 CodechalDbgAttr::attrOutput,
9542 "HEVC_16x16_PU_MD_Out",
9543 m_vme8x8Mode.dwSize,
9544 0,
9545 CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION));
9546 )
9547
9548 //Step 5: 8x8 PU
9549 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PUKernel());
9550
9551 //Step 6: 8x8 PU FMODE
9552 m_lastTaskInPhase = true;
9553 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PUFMODEKernel());
9554
9555 CODECHAL_DEBUG_TOOL(
9556 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
9557 &m_scaled2xSurface,
9558 CodechalDbgAttr::attrReferenceSurfaces,
9559 "2xScaledSurf"));
9560
9561 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
9562 &m_simplestIntraSurface,
9563 CodechalDbgAttr::attrOutput,
9564 "HEVC_32x32_SIF_Out",
9565 CODECHAL_MEDIA_STATE_32x32_B_INTRA_CHECK));
9566
9567 if (m_pictureCodingType == I_TYPE)
9568 {
9569 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9570 &m_32x32PuOutputData.sResource,
9571 CodechalDbgAttr::attrOutput,
9572 "HEVC_32x32_PU_MD_Out",
9573 m_32x32PuOutputData.dwSize,
9574 0,
9575 CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION));
9576 }
9577 else
9578 {
9579 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9580 &m_32x32PuOutputData.sResource,
9581 CodechalDbgAttr::attrOutput,
9582 "HEVC_32x32_B_INTRA_CHECK_Out",
9583 m_32x32PuOutputData.dwSize,
9584 0,
9585 CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION));
9586
9587 }
9588
9589 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9590 &m_intraMode.sResource,
9591 CodechalDbgAttr::attrOutput,
9592 "HEVC_8x8_PU_MD_Out",
9593 m_intraMode.dwSize,
9594 0,
9595 CODECHAL_MEDIA_STATE_8x8_PU));
9596
9597 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9598 &m_intraDist.sResource,
9599 CodechalDbgAttr::attrOutput,
9600 "HEVC_8x8_PU_FMOD_Out",
9601 m_intraDist.dwSize,
9602 0,
9603 CODECHAL_MEDIA_STATE_8x8_PU_FMODE));
9604 )
9605 }
9606
9607 // Sync-wait can be executed after I-kernel is submitted before there is no dependency for I to wait for PAK to be ready
9608 CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
9609
9610 //Step 7: B MB ENC kernel for B picture only
9611 if (m_hevcPicParams->CodingType != I_TYPE)
9612 {
9613 m_firstTaskInPhase = true;
9614 m_lastTaskInPhase = false;
9615
9616 // BRC and MbEnc are included in the same task phase
9617 if (m_brcEnabled && !brcUpdateComplete)
9618 {
9619 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateKernel());
9620
9621 // Reset buffer ID used for BRC kernel performance reports
9622 m_osInterface->pfnResetPerfBufferID(m_osInterface);
9623 brcUpdateComplete = true;
9624 }
9625 else if (!m_brcEnabled)
9626 {
9627 if (m_encodeParams.bMbQpDataEnabled && m_encodeParams.psMbQpDataSurface)
9628 {
9629 auto brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
9630 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
9631 Convert1byteTo2bytesQPperLCU(m_encodeParams.psMbQpDataSurface, &m_brcBuffers.sBrcMbQpBuffer);
9632 }
9633 }
9634
9635 if ((m_hevcSeqParams->bit_depth_luma_minus8))
9636 {
9637 bool formatConversionDone[NUM_FORMAT_CONV_FRAMES] = { false };
9638 formatConversionDone[0] = true; // always true since its for the input surface.
9639
9640 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
9641 {
9642 if (!m_picIdx[i].bValid || !m_currUsedRefPic[i])
9643 {
9644 continue;
9645 }
9646
9647 uint8_t picIdx = m_picIdx[i].ucPicIdx;
9648 CODECHAL_ENCODE_ASSERT(picIdx < 127);
9649
9650 uint8_t frameStoreId = (uint8_t)m_refIdxMapping[i];
9651
9652 if (frameStoreId >= CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC)
9653 {
9654 CODECHAL_ENCODE_ASSERT(false);
9655 eStatus = MOS_STATUS_INVALID_PARAMETER;
9656 return eStatus;
9657 }
9658
9659 if (formatConversionDone[frameStoreId + 1] != true)
9660 {
9661 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeDSCombinedKernel(dsDisabled, (frameStoreId + 1), picIdx));
9662 formatConversionDone[frameStoreId + 1] = true;
9663 m_refList[picIdx]->sRefBuffer = m_formatConvertedSurface[frameStoreId + 1];
9664 }
9665 }
9666 }
9667
9668 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PBMbEncKernel());
9669 }
9670
9671 // Notify PAK engine once ENC is done
9672 if (!m_pakOnlyTest && !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
9673 {
9674 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
9675 syncParams.GpuContext = m_renderContext;
9676 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
9677
9678 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
9679 }
9680
9681 if (m_brcEnabled && m_hevcSeqParams->ParallelBRC)
9682 {
9683 m_brcBuffers.uiCurrBrcPakStasIdxForRead = (m_brcBuffers.uiCurrBrcPakStasIdxForRead + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
9684 }
9685
9686 return eStatus;
9687 }
9688
CheckBrcPakStasBuffer(PMOS_COMMAND_BUFFER cmdBuffer)9689 MOS_STATUS CodechalEncHevcStateG9::CheckBrcPakStasBuffer(
9690 PMOS_COMMAND_BUFFER cmdBuffer)
9691 {
9692 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9693
9694 CODECHAL_ENCODE_FUNCTION_ENTER;
9695
9696 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
9697
9698 auto brcPakStas = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead];
9699
9700 /*
9701 1. The following assembly code is used to implement the following C statements.
9702
9703 if( ((MHW_VDBOX_IMAGE_STATUS_CONTROL*)&(p->HCP_IMAGE_STATUS_CONTROL))->hcpCumulativeFrameDeltaQp <
9704 ((MHW_VDBOX_IMAGE_STATUS_CONTROL*)&(p->HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS))->hcpCumulativeFrameDeltaQp)
9705 {
9706 (MHW_VDBOX_IMAGE_STATUS_CONTROL*)&(p->HCP_IMAGE_STATUS_CONTROL))->hcpCumulativeFrameDeltaQp =
9707 MHW_VDBOX_IMAGE_STATUS_CONTROL*)&(p->HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS))->hcpCumulativeFrameDeltaQp;
9708 }
9709
9710 2. The if statement can be replaced by and-or statements. That is,
9711 (a) a = (a < b) ? b : a;
9712 (b) mask = (a - b) >> 32; a = (b & mask) | (a & !mask);
9713 where (a) and (b) are identical and each variable is assumed to be a 64-bit unsigned integer
9714
9715 3. Totally there are 71 DWs
9716 */
9717 if(cmdBuffer->iRemaining < 71 * sizeof(uint32_t))
9718 {
9719 eStatus = MOS_STATUS_NO_SPACE;
9720 return eStatus;
9721 }
9722
9723 // reg0 = p->HCP_IMAGE_STATUS_CONTROL
9724 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
9725 miLoadRegMemParams.presStoreBuffer = brcPakStas;
9726 miLoadRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
9727 miLoadRegMemParams.dwRegister = CS_GPR_REGISTER_INDEX(0);
9728 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams));
9729
9730 MHW_MI_LOAD_REGISTER_IMM_PARAMS miLoadRegImmParams;
9731 miLoadRegImmParams.dwData = 0;
9732 miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(0) + 4);
9733 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9734
9735 // reg1 = p->HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS
9736 miLoadRegMemParams.presStoreBuffer = brcPakStas;
9737 miLoadRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
9738 miLoadRegMemParams.dwRegister = CS_GPR_REGISTER_INDEX(1);
9739 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams));
9740 miLoadRegImmParams.dwData = 0;
9741 miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(1) + 4);
9742 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9743
9744 // reg2 = 0xFF000000
9745 miLoadRegImmParams.dwData = 0xFF000000;
9746 miLoadRegImmParams.dwRegister = CS_GPR_REGISTER_INDEX(2);
9747 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9748 miLoadRegImmParams.dwData = 0;
9749 miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(2) + 4);
9750 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9751
9752 // reg3 = reg0 & 0xFF000000
9753 uint32_t csALUCmdNum = 0;
9754 MHW_MI_ALU_PARAMS miAluParams[64] = { 0 };
9755
9756 // reg3 = reg0 & 0xFF000000
9757 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 0); // load srcA, reg0
9758 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 2); // load srcB, reg2
9759 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_AND; // and srcA, srcB
9760 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(3); // store reg3, alu
9761
9762 // reg4 = reg1 & 0xFF000000
9763 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 1); // load srcA, reg1
9764 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 2); // load srcB, reg2
9765 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_AND; // and srcA, srcB
9766 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(4); // store reg4, alu
9767
9768 // reg5 = reg3 - reg4
9769 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 3); // load srcA, reg3
9770 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 4); // load srcB, reg4
9771 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_SUB; // sub srcA, srcB
9772 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(5); // store reg5, alu
9773
9774 if (csALUCmdNum >= sizeof(miAluParams) / sizeof(miAluParams[0]))
9775 {
9776 eStatus = MOS_STATUS_NO_SPACE;
9777 return eStatus;
9778 }
9779
9780 MHW_MI_MATH_PARAMS miMathParams;
9781 miMathParams.dwNumAluParams = csALUCmdNum;
9782 miMathParams.pAluPayload = miAluParams;
9783 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiMathCmd(cmdBuffer, &miMathParams));
9784
9785 // reg5 = reg5 >> 32;
9786 MHW_MI_LOAD_REGISTER_REG_PARAMS miLoadRegRegParams;
9787 MOS_ZeroMemory(&miLoadRegRegParams, sizeof(miLoadRegRegParams));
9788 miLoadRegRegParams.dwSrcRegister = CS_GPR_REGISTER_INDEX(5) + 4;
9789 miLoadRegRegParams.dwDstRegister = CS_GPR_REGISTER_INDEX(5) + 0;
9790 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterRegCmd(cmdBuffer, &miLoadRegRegParams));
9791 miLoadRegImmParams.dwData = 0;
9792 miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(5) + 4);
9793 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9794
9795 // reg6 = 0x00000000FFFFFFFF;
9796 miLoadRegImmParams.dwData = 0xFFFFFFFF;
9797 miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(6));
9798 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9799 miLoadRegImmParams.dwData = 0;
9800 miLoadRegImmParams.dwRegister = (CS_GPR_REGISTER_INDEX(6) + 4);
9801 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &miLoadRegImmParams));
9802
9803 csALUCmdNum = 0;
9804 MOS_ZeroMemory(miAluParams, sizeof(miAluParams));
9805
9806 // reg6 = reg5 ^ 0x00000000FFFFFFFF;
9807 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 5); // load srcA, reg5
9808 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 6); // load srcB, reg6
9809 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_XOR; // xor srcA, srcB
9810 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(6); // store reg6, alu
9811
9812 // reg1 = reg1 & reg5
9813 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 1); // load srcA, reg1
9814 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 5); // load srcB, reg5
9815 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_AND; // and srcA, srcB
9816 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(1); // store reg1, alu
9817
9818 // reg0 = reg0 & reg6
9819 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 0); // load srcA, reg0
9820 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 6); // load srcB, reg6
9821 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_AND; // and srcA, srcB
9822 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(0); // store reg0, alu
9823
9824 // reg0 = reg0 | reg1
9825 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(1, 0); // load srcA, reg0
9826 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_LOAD(0, 1); // load srcB, reg1
9827 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_OR; // or srcA, srcB
9828 miAluParams[csALUCmdNum++].Value = CS_ALU_COMMAND_STORE_ACCU(0); // store reg0, alu
9829
9830 if (csALUCmdNum >= sizeof(miAluParams) / sizeof(miAluParams[0]))
9831 {
9832 eStatus = MOS_STATUS_NO_SPACE;
9833 return eStatus;
9834 }
9835
9836 miMathParams.dwNumAluParams = csALUCmdNum;
9837 miMathParams.pAluPayload = miAluParams;
9838 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiMathCmd(cmdBuffer, &miMathParams));
9839
9840 // p->HCP_IMAGE_STATUS_CONTROL = reg0
9841 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
9842 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
9843 miStoreRegMemParams.presStoreBuffer = brcPakStas;
9844 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
9845 miStoreRegMemParams.dwRegister = CS_GPR_REGISTER_INDEX(0);
9846 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
9847
9848 // p->HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS = 0
9849 MHW_MI_STORE_DATA_PARAMS miStoreDataImmParams;
9850 miStoreDataImmParams.pOsResource = brcPakStas;
9851 miStoreDataImmParams.dwResourceOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
9852 miStoreDataImmParams.dwValue = 0;
9853 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &miStoreDataImmParams));
9854
9855 return eStatus;
9856 }
9857
CodechalEncHevcStateG9(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)9858 CodechalEncHevcStateG9::CodechalEncHevcStateG9(
9859 CodechalHwInterface* hwInterface,
9860 CodechalDebugInterface* debugInterface,
9861 PCODECHAL_STANDARD_INFO standardInfo)
9862 :CodechalEncHevcState(hwInterface, debugInterface, standardInfo)
9863 {
9864 m_fieldScalingOutputInterleaved = false;
9865 m_brcHistoryBufferSize = BRC_HISTORY_BUFFER_SIZE;
9866 m_kuid = IDR_CODEC_HEVC_COMBINED_KENREL_INTEL;
9867 m_kernelBase = (uint8_t*)IGCODECKRN_G9;
9868
9869 MOS_ZeroMemory(&m_scaled2xSurface, sizeof(m_scaled2xSurface));
9870 MOS_ZeroMemory(&m_sliceMapSurface, sizeof(m_sliceMapSurface));
9871 MOS_ZeroMemory(&m_32x32PuOutputData, sizeof(m_32x32PuOutputData));
9872 MOS_ZeroMemory(&m_sad16x16Pu, sizeof(m_sad16x16Pu));
9873 MOS_ZeroMemory(&m_vme8x8Mode, sizeof(m_vme8x8Mode));
9874 MOS_ZeroMemory(&m_intraMode, sizeof(m_intraMode));
9875 MOS_ZeroMemory(&m_intraDist, sizeof(m_intraDist));
9876 MOS_ZeroMemory(&m_simplestIntraSurface, sizeof(m_simplestIntraSurface));
9877 MOS_ZeroMemory(&m_roiSurface, sizeof(m_roiSurface));
9878 MOS_ZeroMemory(&m_concurrentThreadSurface, sizeof(m_concurrentThreadSurface));
9879 MOS_ZeroMemory(&m_walkingPatternParam, sizeof(m_walkingPatternParam));
9880 MOS_ZeroMemory(&m_minDistortion, sizeof(m_minDistortion));
9881 MOS_ZeroMemory(&m_vmeSavedUniSic, sizeof(m_vmeSavedUniSic));
9882 MOS_ZeroMemory(&m_mvIndex, sizeof(m_mvIndex));
9883 MOS_ZeroMemory(&m_mvpIndex, sizeof(m_mvpIndex));
9884
9885 m_numRegionsInSlice = 4;
9886 }
9887
InitMhw()9888 MOS_STATUS CodechalEncHevcStateG9::InitMhw()
9889 {
9890 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9891
9892 // MHW set-up
9893 m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS;
9894 m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_INIT_DSH_SIZE_HEVC_ENC;
9895
9896 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
9897 m_kernelBase,
9898 m_kuid,
9899 &m_kernelBinary,
9900 &m_combinedKernelSize));
9901
9902 m_hwInterface->GetStateHeapSettings()->dwIshSize +=
9903 MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
9904
9905 return eStatus;
9906 }
9907
UserFeatureKeyReport()9908 MOS_STATUS CodechalEncHevcStateG9::UserFeatureKeyReport()
9909 {
9910 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9911
9912 CODECHAL_ENCODE_FUNCTION_ENTER;
9913
9914 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::UserFeatureKeyReport());
9915
9916 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_POWER_SAVING, m_powerSavingEnabled, m_osInterface->pOsContext);
9917 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_B_KERNEL_SPLIT, m_numMbBKernelSplit, m_osInterface->pOsContext);
9918 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_8x8_INTRA_KERNEL_SPLIT, m_numMb8x8IntraKernelSplit, m_osInterface->pOsContext);
9919
9920 return eStatus;
9921 }
9922
Initialize(CodechalSetting * settings)9923 MOS_STATUS CodechalEncHevcStateG9::Initialize(CodechalSetting * settings)
9924 {
9925 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9926
9927 CODECHAL_ENCODE_FUNCTION_ENTER;
9928
9929 // common initilization
9930 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::Initialize(settings));
9931
9932 m_bmeMethodTable = (uint8_t *)m_meMethod;
9933 m_meMethodTable = (uint8_t *)m_meMethod;
9934
9935 m_brcBuffers.dwBrcConstantSurfaceWidth = BRC_CONSTANT_SURFACE_WIDTH;
9936 m_brcBuffers.dwBrcConstantSurfaceHeight = BRC_CONSTANT_SURFACE_HEIGHT;
9937
9938 // LCU size is 32x32 in Gen9
9939 m_widthAlignedMaxLcu = MOS_ALIGN_CEIL(m_frameWidth, 32);
9940 m_heightAlignedMaxLcu = MOS_ALIGN_CEIL(m_frameHeight, 32);
9941
9942 // user feature key setup
9943 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
9944 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9945 MOS_UserFeature_ReadValue_ID(
9946 nullptr,
9947 __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
9948 &userFeatureData,
9949 m_osInterface->pOsContext);
9950 m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
9951
9952 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9953 MOS_UserFeature_ReadValue_ID(
9954 nullptr,
9955 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_26Z_ENABLE_ID,
9956 &userFeatureData,
9957 m_osInterface->pOsContext);
9958 m_enable26WalkingPattern = (userFeatureData.i32Data) ? false : true;
9959
9960 if (m_codecFunction != CODECHAL_FUNCTION_PAK)
9961 {
9962 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9963 MOS_UserFeature_ReadValue_ID(
9964 nullptr,
9965 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ME_ENABLE_ID,
9966 &userFeatureData,
9967 m_osInterface->pOsContext);
9968 m_hmeSupported = (userFeatureData.i32Data) ? true : false;
9969
9970 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9971 MOS_UserFeature_ReadValue_ID(
9972 nullptr,
9973 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_16xME_ENABLE_ID,
9974 &userFeatureData,
9975 m_osInterface->pOsContext);
9976 m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
9977
9978 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9979 MOS_UserFeature_ReadValue_ID(
9980 nullptr,
9981 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_32xME_ENABLE_ID,
9982 &userFeatureData,
9983 m_osInterface->pOsContext);
9984
9985 if (userFeatureData.i32Data == 0 || userFeatureData.i32Data == 1)
9986 {
9987 m_32xMeUserfeatureControl = true;
9988 m_32xMeSupported = (userFeatureData.i32Data) ? true : false;
9989 }
9990 else
9991 {
9992 m_32xMeUserfeatureControl = false;
9993 m_32xMeSupported = true;
9994 }
9995 }
9996
9997 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
9998 eStatus = MOS_UserFeature_ReadValue_ID(
9999 nullptr,
10000 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID,
10001 &userFeatureData,
10002 m_osInterface->pOsContext);
10003
10004 if (eStatus == MOS_STATUS_SUCCESS)
10005 {
10006 // Region number must be greater than 1
10007 m_numRegionsInSlice = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data;
10008 }
10009 else
10010 {
10011 // Reset the status to success if user feature key is not set
10012 eStatus = MOS_STATUS_SUCCESS;
10013 }
10014
10015 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
10016 MOS_UserFeature_ReadValue_ID(
10017 nullptr,
10018 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_8x8_INTRA_KERNEL_SPLIT,
10019 &userFeatureData,
10020 m_osInterface->pOsContext);
10021 m_numMb8x8IntraKernelSplit = (userFeatureData.i32Data < 0) ? 0 : userFeatureData.i32Data;
10022
10023 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
10024 MOS_UserFeature_ReadValue_ID(
10025 nullptr,
10026 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_B_KERNEL_SPLIT,
10027 &userFeatureData,
10028 m_osInterface->pOsContext);
10029 m_numMbBKernelSplit = (userFeatureData.i32Data < 0) ? 0 : userFeatureData.i32Data;
10030
10031 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
10032 MOS_UserFeature_ReadValue_ID(
10033 nullptr,
10034 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_POWER_SAVING,
10035 &userFeatureData,
10036 m_osInterface->pOsContext);
10037 m_powerSavingEnabled = (userFeatureData.i32Data) ? true : false;
10038
10039 if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
10040 {
10041 /* Make the width aligned to a multiple of 32 and then get the no of macroblocks.*/
10042 /* This is done to facilitate the use of format conversion kernel for downscaling to 4x and 2x along with formatconversion of 10 bit data to 8 bit data.
10043 Refer format conversion kernel for further details .
10044 We will use only 4x downscale for HME, Super and ultra HME use the traditional scaling kernels.
10045 */
10046 uint32_t downscaledSurfaceWidth4x = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x* CODECHAL_MACROBLOCK_WIDTH), (CODECHAL_MACROBLOCK_WIDTH * 2));
10047 m_downscaledWidthInMb4x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(downscaledSurfaceWidth4x);
10048
10049 }
10050
10051 return eStatus;
10052 }
10053
InitKernelState()10054 MOS_STATUS CodechalEncHevcStateG9::InitKernelState()
10055 {
10056 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10057
10058 CODECHAL_ENCODE_FUNCTION_ENTER;
10059
10060 // Init kernel state
10061 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMbEnc());
10062 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateBrc());
10063
10064 // Create Hme kernel
10065 m_hmeKernel = MOS_New(CodechalKernelHmeG9, this);
10066 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hmeKernel);
10067 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Initialize(
10068 pfnGetKernelHeaderAndSize,
10069 m_kernelBase,
10070 m_kuid));
10071
10072 return eStatus;
10073 }
10074