xref: /aosp_15_r20/external/intel-media-driver/media_common/agnostic/common/vp/kdll/hal_kerneldll_next.h (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      hal_kerneldll_next.h
24 //! \brief         Fast Compositing dynamic kernel linking/loading definitions
25 //!
26 #ifndef __HAL_KERNELDLL_NEXT_H__
27 #define __HAL_KERNELDLL_NEXT_H__
28 
29 #include "vp_common.h"
30 // Kernel IDs and Kernel Names
31 #include "vpkrnheader.h"  // IDR_VP_TOTAL_NUM_KERNELS
32 #include "cm_fc_ld.h"
33 
34 #if EMUL
35 
36 #include "support.h"
37 
38 // Search callback codes
39 #define CB_REASON_SEARCH_FAILED -1
40 #define CB_REASON_UPDATE_FAILED -2
41 #define CB_REASON_BEGIN_SEARCH 0
42 #define CB_REASON_BEGIN_UPDATE 1
43 #define CB_REASON_END_SEARCH 2
44 
45 #else  // EMUL
46 
47 #endif  // EMUL
48 
49 #define ROUND_FLOAT(n, factor) ((n) * (factor) + (((n) > 0.0f) ? 0.5f : -0.5f))
50 
51 #define MIN_SHORT -32768.0f
52 #define MAX_SHORT 32767.0f
53 #define FLOAT_TO_SHORT(n) (short)(MOS_MIN(MOS_MAX(MIN_SHORT, n), MAX_SHORT))
54 
55 #define DL_MAX_SEARCH_FILTER_SIZE 10  // max number of entries to describe a compositing filter
56 
57 #define DL_MAX_KERNELS 256         // Max component kernels to combine
58 #define DL_MAX_PATCH_DATA_SIZE 64  // Max size of a patch block
59 #define DL_MAX_PATCH_BLOCKS 8      // Max number of blocks to patch per patch data
60 #define DL_MAX_PATCHES 8           // Max patches to use
61 #define DL_MAX_EXPORT_COUNT 64     // size of the symbol export table
62 
63 #define DL_MAX_COMBINED_KERNELS 64       // Max number of kernels in cache
64 #define DL_MAX_SYMBOLS 100               // max number of import/export symbols in a combined kernels
65 #define DL_MAX_KERNEL_SIZE (160 * 1024)  // max output kernel size
66 
67 #define DL_CSC_MAX 8                      // 8 CSC matrices max
68 #define DL_MAX_SEARCH_NODES_PER_KERNEL 6  // max number of search nodes for a component kernel (max tree depth)
69 #define DL_MAX_COMPONENT_KERNELS 25       // max number of component kernels that can be combined
70 
71 #define DL_DEFAULT_COMBINED_KERNELS 4                                                  // Default number of kernels in cache
72 #define DL_NEW_COMBINED_KERNELS 4                                                      // The increased number of kernels in cache each time
73 #define DL_CACHE_BLOCK_SIZE (160 * 1024)                                               // Kernel allocation block size
74 #define DL_COMBINED_KERNEL_CACHE_SIZE (DL_CACHE_BLOCK_SIZE * DL_NEW_COMBINED_KERNELS)  // Combined kernel size
75 
76 #define DL_PROCAMP_DISABLED -1  // procamp is disabled
77 #define DL_PROCAMP_MAX 1        // 1 Procamp entry
78 
79 #define DL_CSC_DISABLED -1  // CSC is disabled
80 
81 #define DL_CSC_MAX_G5 2  // 2 CSC matrices max for Gen5
82 
83 #define DL_CHROMASITING_DISABLE -1  // Chromasiting is disabled
84 
85 #ifdef __cplusplus
86 extern "C" {
87 #endif  // __cplusplus
88 
89 typedef enum _MEDIA_CSPACE Kdll_CSpace;
90 
91 #define LumaKey_False 0
92 
93 // Parameters for RID_Op_NewEntry
94 #define RULE_DEFAULT 0
95 #define RULE_CUSTOM 1
96 #define RULE_NO_OVERRIDE 255
97 
98 #define GROUP_DEFAULT RULE_DEFAULT
99 #define GROUP_CUSTOM RULE_CUSTOM
100 #define GROUP_NO_OVERRIDE RULE_NO_OVERRIDE
101 
102 #define ColorFill_Source -1
103 #define ColorFill_False 0
104 #define ColorFill_True 1
105 
106 #define LumaKey_Source -1
107 
108 #define LumaKey_True 1
109 
110 #define Procamp_Source -1
111 
112 // Dynamic Linking rule definitions
113 #define RID_IS_MATCH(rid) ((rid & 0xFE00) == 0x0000)
114 #define RID_IS_SET(rid) ((rid & 0xFE00) == 0x0200)
115 #define RID_IS_EXTENDED(rid) ((rid & 0xFD00) == 0x0100)
116 
117 // Rotation Mode
118 typedef enum tagKdll_Rotation
119 {
120     Rotate_Source
121 } Kdll_Rotation;
122 
123 // Kernel patches
124 typedef enum tagKdll_PatchKind
125 {
126     PatchKind_None           = 0,
127     PatchKind_CSC_Coeff_Src0 = 1,
128     PatchKind_CSC_Coeff_Src1 = 2,
129 } Kdll_PatchKind;
130 
131 // Patch rule entry (rule extension)
132 typedef struct tagKdll_PatchRuleEntry
133 {
134     uint32_t Dest : 16;   // Patch destination in bytes (LSB)
135     uint32_t Source : 8;  // Patch data source in bytes
136     uint32_t Size : 8;    // Patch size in bytes (MSB)
137 } Kdll_PatchRuleEntry;
138 
139 extern const char *g_cInit_ComponentNames[];
140 
141 //------------------------------------------------------------
142 // KERNEL CACHE / LINK
143 //------------------------------------------------------------
144 // Import/export structure from kernel binary file
145 #pragma pack(4)
146 typedef struct tagKdll_LinkFileHeader
147 {
148     uint32_t dwVersion;
149     uint32_t dwSize;
150     uint32_t dwImports;
151     uint32_t dwExports;
152 } Kdll_LinkFileHeader;
153 #pragma pack()
154 
155 const float g_cCSC_sRGB_stRGB[12] =
156     {
157         0.858824f, 0.000000f, 0.000000f, 16.000000f,  // stR = C0 * sR + C1 * sG + C2  * sB + C3
158         0.000000f,
159         0.858824f,
160         0.000000f,
161         16.000000f,  // stG = C4 * sR + C5 * sG + C6  * sB + C7
162         0.000000f,
163         0.000000f,
164         0.858824f,
165         16.000000f  // stB = C8 * sR + C9 * sG + C10 * sB + C11
166 };
167 
168 const float g_cCSC_stRGB_sRGB[12] =
169     {
170         1.164384f, 0.000000f, 0.000000f, -18.630137f,  // sR   = C0 * stR + C1 * stG + C2  * stB + C3
171         0.000000f,
172         1.164384f,
173         0.000000f,
174         -18.630137f,  // sG   = C4 * stR + C5 * stG + C6  * stB + C7
175         0.000000f,
176         0.000000f,
177         1.164384f,
178         -18.630137f  // sB   = C8 * stR + C9 * stG + C10 * stB + C11
179 };
180 
181 //BT2020_RGB to BT2020_limited_RGB conversions
182 const float g_cCSC_BT2020RGB_BT2020stRGB[12] =
183     {
184         0.8563050f, 0.000000f, 0.000000f, 64.000000f,  // sR = C0 * R + C1 * G + C2  * B + C3
185         0.000000f,
186         0.8563050f,
187         0.000000f,
188         64.000000f,  // sG = C4 * R + C5 * G + C6  * B + C7
189         0.000000f,
190         0.000000f,
191         0.8563050f,
192         64.000000f  // sB = C8 * R + C9 * G + C10 * B + C11
193 };
194 
195 //BT2020_limited_RGB to BT2020_RGB conversions
196 const float g_cCSC_BT2020stRGB_BT2020RGB[12] =
197     {
198         1.1678082f, 0.000000f, 0.000000f, -74.739726f,  // R = C0 * sR + C1 * sG + C2  * sB + C3
199         0.000000f,
200         1.1678082f,
201         0.000000f,
202         -74.739726f,  // G = C4 * sR + C5 * sG + C6  * sB + C7
203         0.000000f,
204         0.000000f,
205         1.1678082f,
206         -74.739726f  // B = C8 * sR + C9 * sG + C10 * sB + C11
207 };
208 
209 const float g_cCSC_Identity[12] =
210     {
211         1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f};
212 
213 // Generic RGB to YUV conversion matrix from BT.601 standard
214 const float g_cCSC_BT601_RGB_YUV[9] =
215     {
216         0.299000f, 0.587000f, 0.114000f, -0.168736f, -0.331264f, 0.500000f, 0.500000f, -0.418688f, -0.081312f};
217 
218 // Generic RGB to YUV conversion matrix from BT.709 standard
219 const float g_cCSC_BT709_RGB_YUV[9] =
220     {
221         0.212600f, 0.715200f, 0.072200f, -0.114572f, -0.385428f, 0.500000f, 0.500000f, -0.454153f, -0.045847f};
222 
223 // Generic YUV to RGB conversion matrix from BT.601 standard
224 const float g_cCSC_BT601_YUV_RGB[9] =
225     {
226         1.000000f, 0.000000f, 1.402000f, 1.000000f, -0.344136f, -0.714136f, 1.000000f, 1.772000f, 0.000000f};
227 
228 // Generic YUV to RGB conversion matrix from BT.709 standard
229 const float g_cCSC_BT709_YUV_RGB[9] =
230     {
231         1.000000f, 0.000000f, 1.574800f, 1.000000f, -0.187324f, -0.468124f, 1.000000f, 1.855600f, 0.000000f};
232 // BT2020 RGB to Non-constant YUV conversion matrix from R-REC-BT.2020-1-201406-I!!PDF-E.pdf
233 const float g_cCSC_BT2020_RGB_YUV[9] =
234     {
235         0.262700f, 0.678000f, 0.059300f,  // Y
236         -0.139630f,
237         -0.360370f,
238         0.500000f,  // U
239         0.500000f,
240         -0.459786f,
241         -0.040214f  // V
242 };
243 
244 // BT2020 Non-constant YUV to RGB conversion matrix from R-REC-BT.2020-1-201406-I!!PDF-E.pdf
245 const float g_cCSC_BT2020_YUV_RGB[9] =
246     {
247         1.000000f, 0.000000f, 1.474600f,  //R
248         1.000000f,
249         -0.164553f,
250         -0.571353f,  //G
251         1.000000f,
252         1.881400f,
253         0.000000f  //B
254 };
255 
256 // BT2020 YUV Limited Range to BT2020 RGB full range conversion matrix
257 const float g_cCSC_BT2020_LimitedYUV_RGB[9] =
258 {
259     1.164383f,  0.000000f,  1.678680f,    // R
260     1.164383f, -0.187332f, -0.650421f,    // G
261     1.164383f,  2.141769f,  0.000000f     // B
262 };
263 
264 // BT2020 RGB full range to BT2020 YUV Limited Range tconversion matrix
265 const float g_cCSC_BT2020_RGB_LimitedYUV[9] =
266 {
267     0.225617f,   0.582275f,  0.050934f,  // Y
268     -0.122650f, -0.316559f,  0.439209f,  // U
269     0.439209f,  -0.403885f, -0.035324f   // V
270 };
271 
272 // Layer definition
273 typedef enum _KDLL_LAYER
274 {
275     Layer_Invalid      = -2,  // Invalid
276     Layer_None         = -1,  // None
277     Layer_Background   = 0,   // Background      (0)
278     Layer_MainVideo    = 1,   // Main video      (1)
279     Layer_SubVideo     = 2,   // Secondary video (2)
280     Layer_SubPicture1  = 3,   // Sub-picture 1   (3)
281     Layer_SubPicture2  = 4,   // Sub-picture 2   (4)
282     Layer_SubPicture3  = 5,   // Sub-picture 3   (5) [Gen6]
283     Layer_SubPicture4  = 6,   // Sub-picture 4   (6) [Gen6]
284     Layer_Graphics     = 14,  // Graphics        (7)
285     Layer_RenderTarget = 15   // Render Target
286 } Kdll_Layer;
287 
288 // Sampling mode
289 // Please don't change the order in the enum is setup
290 typedef enum tagKdll_Sampling
291 {
292     Sample_None   = -2,
293     Sample_Source = -1,  // Current source sampling
294     Sample_Any    = 0,
295     Sample_Scaling_Any,    // Scaling (any scaling factor)
296     Sample_Scaling,        // Scaling (above or equal 0.34x)
297     Sample_Scaling_034x,   // Scaling 0.34x (below 0.34x)
298     Sample_iScaling,       // Scaling (above or equal 0.34x)
299     Sample_iScaling_034x,  // Scaling 0.34x (below 0.34x)
300     Sample_iScaling_AVS,   //AVS Interlace Scaling on g75+
301     Sample_Scaling_AVS,    // AVS Scaling on g575+
302 } Kdll_Sampling;
303 
304 // scaling ratio mode
305 typedef enum tagKdll_scalingratio
306 {
307     Scalingratio_Any = 0,    // By default, or scaling ratio <=1/8
308     Scalingratio_over1,      // Scaling ratio > 1 +1/6;
309     Scalingratio_b1p2to1,    // Scaling ratio (1/2, 1+1/6]; //NV12 need 1+1/6 support by kernel
310     Scalingratio_b1p4to1p2,  // Scaling ratio (1/4, 1/2];
311     Scalingratio_b1p8to1p4,  // Scaling ratio (1/8, 1/4]
312 } Kdll_Scalingratio;
313 
314 // Gen7+ : Shuffle data returned by Sample_8x8 message
315 typedef enum tagKdll_Shuffling
316 {
317     Shuffle_None = -1,
318     Shuffle_Any  = 0,
319     Shuffle_All_8x8_Layer,  // Mix of Sample_8x8 and others. Shuffle all 8x8 layer.
320     Shuffle_RenderTarget    // All layers are Sample_8x8. Shuffle only in RenderTarget
321 } Kdll_Shuffling;
322 
323 // Gen7+ : Rendering method
324 typedef enum tagKdll_RenderMethod
325 {
326     RenderMethod_MediaObject       = 0,
327     RenderMethod_MediaObjectWalker = 1
328 } Kdll_RenderMethod;
329 
330 // Gen7+ : Set CSC Coefficients method
331 typedef enum tagKdll_SetCSCCoeffMethod
332 {
333     SetCSCCoeffMethod_Curbe = 0,
334     SetCSCCoeffMethod_Patch = 1
335 } Kdll_SetCSCCoeffMethod;
336 
337 // Processing mode
338 typedef enum tagKdll_Processing
339 {
340     Process_None   = -2,
341     Process_Source = -1,  // Current source processing
342     Process_Any    = 0,
343     Process_Composite,     // Composite 2 layers
344     Process_XORComposite,  // XOR mono composite.
345     Process_PBlend,        // Partial Blend 2 layers  - 8-bits alpha
346     Process_CBlend,        // Constant Blend 2 layers - 8-bits alpha
347     Process_SBlend,        // Source Blend 2 layers   - 8-bits alpha
348     Process_SBlend_4bits,  // Source Blend 2 layers   - 4-bits alpha
349     Process_CSBlend,       // Constant multiply Source Blend 2 layers - 8-bits alpha (Source is NOT premultiplied)
350     Process_CPBlend,       // Constant multiply Source Blend 2 layers - 8-bits alpha (Source is Premultiplied)
351     Process_DI,            // Deinterlacing
352     Process_DN,            // Denoise
353     Process_DNDI           // DNDI
354 } Kdll_Processing;
355 
356 typedef enum tagKdll_CSCType
357 {
358     CSC_YUV_RGB = 0,  // YUV to RGB color space conversion
359     CSC_RGB_YUV = 1,  // RGB to YUV color space conversion
360     CSC_YUV_YUV = 2,  // YUV to YUV color space conversion
361     CSC_RGB_RGB = 3   // RGB to RGB color space conversion
362 } Kdll_CSCType;
363 
364 // CSC Coefficients ID
365 // Worst case we could have any many CSCs are there are number of layers (xvYcc case). So, 6 CoeffIDs defined to support 6 layers max
366 typedef enum tagKdll_CoeffID
367 {
368     CoeffID_Src0   = -5,  // Src0 coefficients
369     CoeffID_Src1   = -4,  // Src1 coefficients
370     CoeffID_Source = -3,  // Current source coefficients
371     CoeffID_Any    = -2,  // Any matrix
372     CoeffID_None   = -1,  // No matrix -> no CSC
373     CoeffID_0      = 0,
374     CoeffID_1      = 1,
375     CoeffID_2      = 2,
376     CoeffID_3      = 3,
377     CoeffID_4      = 4,
378     CoeffID_5      = 5
379 } Kdll_CoeffID;
380 
381 // DL Parser State
382 typedef enum tagKdll_ParserState
383 {
384     Parser_Invalid = -1,           // invalid state
385     Parser_Begin   = 0,            // start dynamic linking
386     Parser_SetRenderMethod,        // MEDIA_OBJECT or MEDIA_OBJECT_WALKER rendering
387     Parser_SetupLayer0,            // setup layer 0
388     Parser_SetupLayer1,            // setup layer 1
389     Parser_SetParamsLayer0,        // setup parameters for layer 0
390     Parser_SetParamsLayer1,        // setup parameters for layer 1
391     Parser_SetParamsTarget,        // setup parameters for render target
392     Parser_SampleLayer0,           // sample layer 0
393     Parser_SampleLayer0Mix,        // sample layer 0 need inter mix (interlaced scaling)
394     Parser_SampleLayer0ColorFill,  // sample layer 0 colorfill
395     Parser_RotateLayer0Check,      // check if layer 0 needs preComp rotation
396     Parser_RotateLayer0,           // PreComp layer 0 rotate
397     Parser_SampleLayer0Done,       // sample layer 0 is complete
398     Parser_ShuffleLayer0,          // shuffle layer 0
399     Parser_SampleLayer1,           // sample layer 1
400     Parser_SampleLayer1Done,       // sample layer 1 is complete
401     Parser_ShuffleLayer1,          // shuffle layer 1
402     Parser_SampleLayer0SelectCSC,  // decided whether CSC be selected for Mix or jump to Parser_SampleLayer0Mix directly
403     Parser_SetupCSC0,              // CSC parameters setup for layer 0
404     Parser_ExecuteCSC0,            // CSC on layer 0
405     Parser_ExecuteCSC0Done,        // CSC on layer 0 is complete
406     Parser_SetupCSC1,              // CSC parameter setup for layer 1
407     Parser_ExecuteCSC1,            // CSC on layer 1
408     Parser_ExecuteCSC1Done,        // CSC on layer 1 is complete
409     Parser_Lumakey,                // lumakey
410     Parser_ProcessLayer,           // process layer 0 + layer 1
411     Parser_ProcessLayerDone,       // processing is complete
412     Parser_DualOutput,             // dual output
413     Parser_Rotation,               // apply post composition rotation
414     Parser_DestSurfIndex,          // destination surface index
415     Parser_Colorfill,              // applying colorfill
416     Parser_WriteOutput,            // write output
417     Parser_End,                    // end dynamic linking
418 
419     // This is used for custom states
420     Parser_Custom,  // Custom state
421 
422     // Number of states - keep this at the end
423     Parser_Count
424 } Kdll_ParserState;
425 
426 typedef enum tagKdll_RuleID
427 {
428     // Parser/Patch Control
429     RID_Op_EOF      = -2,  // End of search table
430     RID_Op_NewEntry = -1,  // New search entry
431 
432     // Simple Match Rules - 0x0000 to 0x0100
433     RID_IsTargetCspace = 0,    // Match target color space
434     RID_IsLayerID,             // Match Layer ID
435     RID_IsLayerFormat,         // Match Layer Format
436     RID_IsParserState,         // Match Parser State
437     RID_IsRenderMethod,        // Match rendering mode, media object or media walker
438     RID_IsShuffling,           // Match Shuffling
439     RID_IsDualOutput,          // Match Dual Output
440     RID_IsLayerRotation,       // Match Rotation
441     RID_IsRTRotate,            // Match if RT rotates
442     RID_IsSrc0Format,          // Current Src0 source (surface) format
443     RID_IsSrc0Sampling,        // Current Src0 sampling mode
444     RID_IsSrc0Rotation,        // Match Layer0 Rotation
445     RID_IsSrc0ColorFill,       // Current Src0 Colorfill flag
446     RID_IsSrc0LumaKey,         // Current Src0 LumaKey flag
447     RID_IsSrc0Procamp,         // Match Src0 Procamp flag
448     RID_IsSrc0Coeff,           // Current Src0 CSC coefficients
449     RID_IsSrc0Processing,      // Current Src0 processing mode
450     RID_IsSrc0Chromasiting,    // Current Src0 Chromasiting mode
451     RID_IsSrc1Format,          // Current Src1 source (surface) format
452     RID_IsSrc1Sampling,        // Current Src1 sampling mode
453     RID_IsSrc1LumaKey,         // Current Src1 LumaKey flag
454     RID_IsSrc1SamplerLumaKey,  // Current Src1 Samper LumaKey flag
455     RID_IsSrc1Procamp,         // Match Src1 Procamp flag
456     RID_IsSrc1Coeff,           // Current Src1 CSC coefficients
457     RID_IsSrc1Processing,      // Current Src1 processing mode
458     RID_IsSrc1Chromasiting,    // Current Src1 Chromasiting mode
459     RID_IsLayerNumber,         // Current Layer number
460     RID_IsQuadrant,            // Current Quadrant
461     RID_IsCSCBeforeMix,        // CSC needed before Mix
462     RID_IsTargetFormat,        // Render Target Format
463     RID_Is64BSaveEnabled,      // Indicate whether 64B save kernel could be used
464     RID_IsTargetTileType,      // Render Target Tile Type
465     RID_IsProcampEnabled,      // Match Procamp
466     RID_IsSetCoeffMode,        // Set CSC coefficients mode
467     RID_IsConstOutAlpha,       // Match alpha fill mode
468     RID_IsDitherNeeded,        // Whether dithering needed
469     RID_IsScalingRatio,        // Current scaling ratio
470     // Extended Match Rules - 0x0100 to 0x01ff
471 
472     // Simple Set Rules - 0x0200 to 0x02ff
473     RID_SetTargetCspace = 512,  // Set target color space
474     RID_SetParserState,         // Set Parser State
475     RID_SetSrc0Format,          // Set Src0 source format
476     RID_SetSrc0Sampling,        // Set Src0 sampling mode
477     RID_SetSrc0Rotation,        // Set Src0 rotation
478     RID_SetSrc0ColorFill,       // Set Src0 Colorfill
479     RID_SetSrc0LumaKey,         // Set Src0 LumaKey
480     RID_SetSrc0Procamp,         // Set Src0 Procamp flag
481     RID_SetSrc0Coeff,           // Set Src0 CSC coefficients
482     RID_SetSrc0Processing,      // Set Src0 Processing mode
483     RID_SetSrc1Format,          // Set Src1 source format
484     RID_SetSrc1Sampling,        // Set Src1 sampling mode
485     RID_SetSrc1Rotation,        // Set Src1 rotation
486     RID_SetSrc1LumaKey,         // Set Src1 LumaKey
487     RID_SetSrc1SamplerLumaKey,  // Set Src1 Sampler LumaKey
488     RID_SetSrc1Procamp,         // Set Src1 Procamp flag
489     RID_SetSrc1Coeff,           // Set Src1 CSC coefficients
490     RID_SetSrc1Processing,      // Set Src1 Processing mode
491     RID_SetKernel,              // Set Kernel
492     RID_SetNextLayer,           // Set Next Layer
493     RID_SetPatchData,           // Set Patch Data to use
494     RID_SetQuadrant,            // Set Quadrant
495     RID_SetCSCBeforeMix,        // Set CSC flag before Mix
496 
497     // Extended Set Rules - 0x0300 to 0x03ff
498     RID_SetPatch = 768,  // Set Patch block
499 } Kdll_RuleID;
500 
501 typedef enum tagKdll_Logic
502 {
503     Kdll_None,
504     Kdll_Or,
505     Kdll_Not,
506 } Kdll_Logic;
507 
508 // Dynamic linking rule entry
509 typedef struct tagKdll_RuleEntry
510 {
511     Kdll_RuleID id : 16;     // LSB
512     int         value : 16;  // MSB
513     Kdll_Logic  logic;
514 } Kdll_RuleEntry;
515 
516 typedef struct tagKdll_RuleEntrySet
517 {
518     const Kdll_RuleEntry *pRuleEntry;        // Pointer to the first meaningful rule of the set
519     uint32_t              iGroup : 8;        // Group (default, custom, non-overridable)
520     uint32_t              iMatchCount : 12;  // Size of Match Rules (including variable length rules)
521     uint32_t              iSetCount : 12;    // Size of Set Rules (including variable length rules)
522 } Kdll_RuleEntrySet;
523 
524 // Structure that defines a set of procamp parameters
525 typedef struct tagKdll_Procamp
526 {
527     bool  bEnabled;         // Procamp Enabled/Disabled
528     int   iProcampVersion;  // Procamp parameters version
529     float fBrightness;      // Brightness : range = -100.0 - 100.0; default = 0.0; step = 0.1
530     float fContrast;        // Contrast   : range =    0.0 -  10.0; default = 1.0; step = 0.01
531     float fHue;             // Hue        : range = -180.0 - 180.0; default = 0.0; step = 0.1
532     float fSaturation;      // Saturation : range =    0.0 -  10.0; default = 1.0; step = 0.01
533 } Kdll_Procamp;
534 
535 // Structure that defines CSC+PA conversion matrix
536 typedef struct tagKdll_CSC_Matrix
537 {
538     int bInUse : 1;  // Matrix is in use and valid (LSB)
539     int : 3;
540     Kdll_CoeffID iCoeffID : 4;     // Coeffient set
541     VPHAL_CSPACE SrcSpace : 8;     // Source Color Space
542     VPHAL_CSPACE DstSpace : 8;     // Destionation Color Space
543     int          iProcampID : 8;   // Procamp parameter set (-1 if no Procamp) (MSB)
544     int          iProcampVersion;  // Last procamp version (to recalculate matrix)
545     short        Coeff[12];        // CSC kernel coeff: [Y'/R']   [0  1  2]   [Y/R]   [ 3]
546                                    //                   [U'/G'] = [4  5  6] * [U/G] + [ 7]
547                                    //                   [V'/B']   [8  9 10]   [V/B]   [11]
548 } Kdll_CSC_Matrix;
549 
550 // Structure that defines a full set of CSC or CSC+PA parameters to be used by a combined kernel
551 typedef struct tagKdll_CSC_Params
552 {
553     VPHAL_CSPACE    ColorSpace;                 // Selected Color Space
554     Kdll_CSC_Matrix Matrix[DL_CSC_MAX];         // CSC conversion matrix (3x3 + 1x3)
555     uint8_t         MatrixID[DL_CSC_MAX];       // Coefficient allocation array
556     uint8_t         PatchMatrixID[DL_CSC_MAX];  // CSC Matrix ID
557     uint8_t         PatchMatrixNum;             // CSC Matrix Number
558 } Kdll_CSC_Params;
559 
560 // Structure that defines a compositing layer
561 typedef struct tagKdll_FilterEntry
562 {
563     // Current layer
564     Kdll_Layer      layer;            // source layer       (Layer identification - Bg, Main, Sub, Gfx, ...)
565     MOS_FORMAT      format;           // source format      (Pixel/Sampling Format - ARBG, NV12, YUY2, ...)
566     VPHAL_CSPACE    cspace;           // source color space (BT709, BT601, xvYCC709, xvYCC601, sRGB, ...)
567     Kdll_Sampling   sampler;          // sampling mode      (AVS, Scaling, ColorFill, Luma Keying, ...)
568     int32_t         colorfill : 16;   // colorfill          (true/false)
569     int32_t         lumakey : 16;     // Luma key           (true/false)
570     int32_t         samplerlumakey;   // Sampler Lumakey    (true/false)
571     Kdll_Processing process;          // processing mode    (Compositing, Constant Blending, Source Blending, ...)
572     int             procamp;          // index to procamp parameters (-1 of Procamp disabled)
573     int             matrix;           // index to CSC matrix entry   (-1 if CSC not required)
574     VPHAL_ROTATION  rotation;         // rotation angle
575     MOS_TILE_TYPE   tiletype;         // Tiling Type
576     bool            dualout;          // dual output mode
577     bool            bWaEnableDscale;  // enable DScale kernels for sampler-unrom issue
578     bool            bEnableDscale;    // always enable DScale Kernels
579     int32_t         chromasiting;     // chromasiting        (-1 if Chromasiting is disabled)
580 
581     // This flag is used to select between kernels:
582     // Save_RGB         or     Save_ARGB
583     // Save_R10G10B10   or     Save_R10G10B10A2
584     // Save_VUYA        or     Save_SrcVUYA
585     bool bFillOutputAlphaWithConstant;
586     bool bIsDitherNeeded;
587 
588     Kdll_Scalingratio      ScalingRatio;
589     Kdll_RenderMethod      RenderMethod;
590     Kdll_SetCSCCoeffMethod SetCSCCoeffMode;
591     bool                   forceToTargetColorSpace;
592 } Kdll_FilterEntry, *PKdll_FilterEntry;
593 
594 // Structure that defines a compositing filter
595 typedef Kdll_FilterEntry Kdll_FilterDesc[DL_MAX_SEARCH_FILTER_SIZE];
596 
597 typedef struct tagKdll_PatchBlock
598 {
599     uint32_t DstOffset : 16;
600     uint32_t SrcOffset : 8;
601     uint32_t BlockSize : 8;
602 } Kdll_PatchBlock;
603 
604 // Kernel Patching data
605 typedef struct tagKdll_PatchData
606 {
607     // Data for patching
608     int     iPatchDataSize;                // Size of Patch data block
609     uint8_t Data[DL_MAX_PATCH_DATA_SIZE];  // Patch data
610 
611     // Patches
612     int             nPatches;
613     Kdll_PatchBlock Patch[DL_MAX_PATCH_BLOCKS];
614 } Kdll_PatchData;
615 
616 typedef struct tagKdll_LinkData
617 {
618     uint32_t iKUID : 16;     // Kernel Unique ID
619     uint32_t iLabelID : 16;  // Label ID
620     uint32_t bExport : 1;    // 0 - import; 1 - export;
621     uint32_t bResolved : 1;  // 0 - unresolved; 1 - resolved;
622     uint32_t dwOffset : 20;  // Offset in DWORDs
623     uint32_t bInline : 1;    // 0 - function; 1 - inline;
624     uint32_t : 9;            // MBZ
625 } Kdll_LinkData;
626 
627 typedef struct tagKdll_Symbol
628 {
629     uint32_t       dwSize;
630     uint32_t       dwCount;
631     Kdll_LinkData *pLink;
632 } Kdll_Symbol;
633 
634 typedef struct tagKdll_CacheEntry
635 {
636     // Kernel binary
637     uint8_t *pBinary;  // kernel binary
638     int      iSize;    // kernel size
639 
640     // Component kernel information
641     int            iKUID;   // kernel unique id (static kernel)
642     const char *   szName;  // kernel name
643     int            nLink;   // Number of imports/exports
644     Kdll_LinkData *pLink;   // Kernel imports/exports
645 
646     // Combined kernel information
647     uint16_t          wHashEntry;        // hash table entry
648     int               iFilterSize;       // kernel filter size
649     Kdll_FilterEntry *pFilter;           // kernel filter description
650     Kdll_CSC_Params * pCscParams;        // kernel CSC parameters
651     VPHAL_CSPACE      colorfill_cspace;  // intermediate color space for colorfill
652 
653     // Cache control
654     int      iKCID;      // kernel cache id (dynamically linked kernel)
655     uint32_t dwLoaded;   // kernel loaded flag
656     uint32_t dwRefresh;  // refresh counter (for expiration control)
657 
658     struct tagKdll_CacheEntry *pNextEntry;  // Next cache entry;
659 } Kdll_CacheEntry;
660 
661 typedef struct tagKdll_KernelCache
662 {
663     int              iCacheMaxEntries;  // Max number of entries
664     int              iCacheEntries;     // Current number of cache entries
665     int              iCacheSize;        // Cache buffer size
666     int              iCacheFree;        // Cache buffer free
667     int              iCacheID;          // Next kernel cache ID
668     Kdll_CacheEntry *pCacheEntries;     // Array of kernel cache entries
669     uint8_t *        pCache;            // Cache (binary data)
670     int              nExports;          // Exports count
671     Kdll_LinkData *  pExports;          // Exports table
672 } Kdll_KernelCache;
673 
674 //--------------------------------------------------------------
675 // Kernel Hash table
676 //--------------------------------------------------------------
677 typedef struct tagKdll_KernelHashEntry
678 {
679     uint16_t          next;         // Next entry with same 8-bit hash + 1 (0 is null)
680     uint32_t          dwHash;       // 32-bit hash value (FNV-1a hash)
681     int               iFilter;      // Filter size
682     Kdll_FilterEntry *pFilter;      // Filter for matching
683     Kdll_CacheEntry * pCacheEntry;  // Pointer to kernel cache entry
684 } Kdll_KernelHashEntry;
685 
686 typedef struct tagKdll_KernelHashTable
687 {
688     uint16_t             wHashTable[256];                     // 256 hashes (1 based index)
689     uint16_t             pool;                                // first in pool (1 based index)
690     uint16_t             last;                                // last in pool (for releasing)
691     Kdll_KernelHashEntry HashEntry[DL_MAX_COMBINED_KERNELS];  // Hash table entries
692 } Kdll_KernelHashTable;
693 
694 //--------------------------------------------------------------
695 // Dynamic linking state
696 //--------------------------------------------------------------
697 typedef struct tagKdll_State *      PKdll_State;
698 typedef struct tagKdll_SearchState *PKdll_SearchState;
699 
700 typedef struct tagKdll_State
701 {
702     int      iSize;        // Size of DL buffer
703     uint32_t dwRefresh;    // Refresh counter (for garbage collection)
704     bool     bEnableCMFC;  // Flag to enable CMFC
705 
706     // Default kernel component cache and rule table
707     Kdll_KernelCache      ComponentKernelCache;  // Component kernels cache
708     const Kdll_RuleEntry *pRuleTableDefault;     // Default Dll rules (internal)
709 
710     // CMFC kernel fcpatch cache
711     Kdll_KernelCache CmFcPatchCache;  // CMFC kernel fcpatch cache
712 
713     // Custom kernel component cache and rule table
714     Kdll_KernelCache *    pCustomKernelCache;  // Custom kernel cache
715     const Kdll_RuleEntry *pRuleTableCustom;    // Custom Dll rules (external)
716 
717     // Combined rule lookup table
718     Kdll_RuleEntrySet *pSortedRules;  // Sorted rule table
719 
720     Kdll_RuleEntrySet *pDllRuleTable[Parser_Count];  // Rule acceleration table (one entry for each Parser State)
721     int                iDllRuleCount[Parser_Count];  // Rule count (number of entries for each Parser State)
722 
723     // Combined kernel cache and hash table
724     Kdll_KernelCache     KernelCache;      // Output kernel cache
725     Kdll_KernelHashTable KernelHashTable;  // Hash table for resulting kernels
726 
727     Kdll_Procamp *pProcamp;      // Array of Procamp parameters
728     int32_t       iProcampSize;  // Size of the array of Procamp parameters
729 
730     // Colorfill
731     VPHAL_CSPACE colorfill_cspace;  // Selected colorfill Color Space by Kdll
732 
733     // Start kernel search
734     void (*pfnStartKernelSearch)(PKdll_State pState,
735         PKdll_SearchState                    pSearchState,
736         Kdll_FilterEntry *                   pFilter,
737         int32_t                              iFilterSize,
738         uint32_t                             uiIs64BInstrEnabled);
739 
740     // Find best ColorSpace to use internally, allocate/calculate CSC matrices and arguments
741     bool (*pfnSetupCSC)(PKdll_State pState,
742         PKdll_SearchState           pSearchState);
743 
744     // Find rule that matches the current search state
745     bool (*pfnFindRule)(PKdll_State pState,
746         PKdll_SearchState           pSearchState);
747 
748     // Update state based on rule
749     bool (*pfnUpdateState)(PKdll_State pState,
750         PKdll_SearchState              pSearchState);
751 
752     // Search Kernel based for a given
753     bool (*pfnSearchKernel)(PKdll_State pState,
754         PKdll_SearchState               pSearchState);
755 
756     // Build current best match kernel
757     bool (*pfnBuildKernel)(PKdll_State pState,
758         PKdll_SearchState              pSearchState);
759 
760     // Map matrix to kernel CSC
761     bool (*pfnMapCSCMatrix)(Kdll_CSCType type,
762         const float *                    matrix,
763         short *                          coeff);
764 #if EMUL
765     // Token to be passed back in Callbacks
766     void *pToken;
767 
768     // Print Component Kernel
769     void (*pfnCbListKernel)(void *token,
770         const char *              szKernel);
771 
772     // State update
773     void (*pfnCbSearchSate)(void *token,
774         int32_t                   reason,
775         PKdll_SearchState         pSearchState);
776 #endif
777 } Kdll_State;
778 
779 typedef struct tagKdll_SearchState
780 {
781     // Kernel DLL state
782     Kdll_State *pKdllState;  // Kernel DLL State
783 
784     // Filter and CSC parameters
785     Kdll_FilterDesc Filter;              // Output Filter, with CSC data
786     int             iFilterSize;         // Size of the filter
787     Kdll_CSC_Params CscParams;           // CSC parameters
788     bool            bCscBeforeMix;       // flag to identify if CSC needed before Mix
789     Kdll_Shuffling  ShuffleSamplerData;  // Gen7+ Shuffle sampler output
790 
791     // RT Rotate
792     bool bRTRotate;
793 
794     // Procamp
795     bool bProcamp;
796 
797     // Search output
798     Kdll_RuleEntrySet *pMatchingRuleSet;  // Pointer to the matching rule set
799 
800     // Kernels
801     int KernelCount;                // # of kernels
802     int KernelID[DL_MAX_KERNELS];   // Array of kernel ids
803     int KernelGrp[DL_MAX_KERNELS];  // Array of kernel groups
804     int PatchID[DL_MAX_KERNELS];    // Array of patches
805 
806     // Kernel patches
807     int            PatchCount;               // Number of patches
808     Kdll_PatchData Patches[DL_MAX_PATCHES];  // Kernel patches
809 
810     // Current state
811     Kdll_FilterEntry *pFilter;       // Current filter entry
812     Kdll_ParserState  state;         // Parser state
813     VPHAL_CSPACE      cspace;        // Destination color space
814     int               quadrant;      // Current quadrant
815     int               layer_number;  // Current layer number
816 
817     // Src0 state
818     MOS_FORMAT      src0_format;     // Src0 source format
819     Kdll_Sampling   src0_sampling;   // Src0 sampling mode
820     int32_t         src0_colorfill;  // Src0 colorfill flag
821     int32_t         src0_lumakey;    // Src0 luma key
822     int32_t         src0_procamp;    // Src0 procamp
823     Kdll_CoeffID    src0_coeff;      // Src0 CSC coefficiants
824     Kdll_Processing src0_process;    // Src0 processing mode
825     VPHAL_ROTATION  src0_rotation;   // Src0 Rotate
826 
827     // Src1 state
828     MOS_FORMAT      src1_format;          // Src1 source format
829     Kdll_Sampling   src1_sampling;        // Src1 sampling mode
830     int32_t         src1_lumakey;         // Src1 luma key
831     int32_t         src1_samplerlumakey;  // Src1 sampler luma key
832     int32_t         src1_procamp;         // Src1 procamp
833     Kdll_CoeffID    src1_coeff;           // Src1 CSC coefficients
834     Kdll_Processing src1_process;         // Src1 processing mode
835     VPHAL_ROTATION  src1_rotation;        // Src1 Rotate
836 
837     // Render Target Format
838     MOS_FORMAT target_format;  // Render Target format
839 
840     bool          b64BSaveEnabled;  // Whether to use 64B save kernel
841     MOS_TILE_TYPE target_tiletype;  // Render Target Tile Type
842 
843     // Dynamic linking
844     int           KernelSize;                  // Kernel Size
845     int           KernelLeft;                  // Remaining size
846     Kdll_Symbol   KernelLink;                  // DL symbols for linking
847     Kdll_LinkData LinkArray[DL_MAX_SYMBOLS];   // Import/Export symbols for dynamic linking
848     uint8_t       Kernel[DL_MAX_KERNEL_SIZE];  // Output Kernel
849 } Kdll_SearchState;
850 
851 void KernelDll_ModifyFunctionPointers_Next(Kdll_State *pState);
852 
853 //---------------------------------
854 // Kernel DLL function prototypes
855 //---------------------------------
856 
857 bool KernelDll_IsYUVFormat(MOS_FORMAT format);
858 
859 bool KernelDll_IsFormat(
860     MOS_FORMAT   format,
861     VPHAL_CSPACE cspace,
862     MOS_FORMAT   match);
863 
864 VPHAL_CSPACE KernelDll_TranslateCspace(VPHAL_CSPACE cspace);
865 
866 bool KernelDll_MapCSCMatrix(
867     Kdll_CSCType type,
868     const float *matrix,
869     short *      coeff);
870 
871 // Kernel Rule Search / State Update
872 bool KernelDll_FindRule(
873     Kdll_State *      pState,
874     Kdll_SearchState *pSearchState);
875 
876 bool KernelDll_UpdateState(
877     Kdll_State *      pState,
878     Kdll_SearchState *pSearchState);
879 
880 bool KernelDll_IsCspace(
881     VPHAL_CSPACE cspace,
882     VPHAL_CSPACE match);
883 
884 void KernelDll_GetCSCMatrix(
885     Kdll_CSpace src,
886     Kdll_CSpace dst,
887     float *     pCSC_Matrix);
888 
889 // Simple Hash function
890 uint32_t KernelDll_SimpleHash(
891     void            *pData,
892     int             iSize);
893 
894 // Setup Kernel Dll Procamp Parameters
895 void KernelDll_SetupProcampParameters(Kdll_State    *pState,
896                                       Kdll_Procamp  *pProcamp,
897                                       int            iProcampSize);
898 
899 // Find Kernel in hash table
900 Kdll_CacheEntry *
901 KernelDll_GetCombinedKernel(Kdll_State       *pState,
902                             Kdll_FilterEntry *iFilter,
903                             int               iFilterSize,
904                             uint32_t          dwHash);
905 
906 //---------------------------------------------------------------------------------------
907 // KernelDll_SetupFunctionPointers - Setup Function pointers based on platform
908 //
909 // Parameters:
910 //    char  *pState    - [in] Kernel Dll state
911 //           platform  - [in] platform
912 //
913 // Output: true  - Function pointers are set
914 //         false - Failed to setup function pointers (invalid platform)
915 //-----------------------------------------------------------------------------------------
916 static bool KernelDll_SetupFunctionPointers(
917     Kdll_State *pState,
918     void (*ModifyFunctionPointers)(PKdll_State));
919 
920 // Allocate Kernel Dll State
921 Kdll_State *KernelDll_AllocateStates(
922     void *                pKernelCache,
923     uint32_t              uKernelCacheSize,
924     void *                pFcPatchCache,
925     uint32_t              uFcPatchCacheSize,
926     const Kdll_RuleEntry *pInternalRules,
927     void (*ModifyFunctionPointers)(PKdll_State));
928 
929 // Release Kernel Dll State
930 void KernelDll_ReleaseStates(Kdll_State *pState);
931 
932 // Update CSC coefficients
933 void KernelDll_UpdateCscCoefficients(Kdll_State *pState,
934     Kdll_CSC_Matrix *                            pMatrix);
935 
936 //Release the additional kernel cache entries
937 void KernelDll_ReleaseAdditionalCacheEntries(Kdll_KernelCache *pCache);
938 
939 // Search kernel, output is in pSearchState
940 bool KernelDll_SearchKernel(
941     Kdll_State *      pState,
942     Kdll_SearchState *pSearchState);
943 
944 // Build kernel in SearchState
945 bool KernelDll_BuildKernel(Kdll_State *pState, Kdll_SearchState *pSearchState);
946 
947 bool KernelDll_SetupCSC(
948     Kdll_State *      pState,
949     Kdll_SearchState *pSearchState);
950 
951 // Add kernel to cache and hash table
952 Kdll_CacheEntry *
953 KernelDll_AddKernel(Kdll_State       *pState,
954                     Kdll_SearchState *pSearchState,
955                     Kdll_FilterEntry *pFilter,
956                     int               iFilterSize,
957                     uint32_t          dwHash);
958 
959 // Allocate cache entry for a given size
960 Kdll_CacheEntry *
961 KernelDll_AllocateCacheEntry(Kdll_KernelCache *pCache,
962                              int32_t           iSize);
963 
964 // Allocate more kernel cache entries
965 Kdll_CacheEntry *
966 KernelDll_AllocateAdditionalCacheEntries(Kdll_KernelCache *pCache);
967 
968 void KernelDll_ReleaseHashEntry(Kdll_KernelHashTable *pHashTable, uint16_t entry);
969 void KernelDll_ReleaseCacheEntry(Kdll_KernelCache *pCache, Kdll_CacheEntry  *pEntry);
970 
971 //---------------------------------------------------------------------------------------
972 // KernelDll_SetupFunctionPointers_Ext - Setup Extension Function pointers
973 //
974 // Parameters:
975 //    KdllState  *pState    - [in/out] Kernel Dll state
976 //
977 // Output: true  - Function pointers are set
978 //         false - Failed to setup function pointers (invalid platform)
979 //-----------------------------------------------------------------------------------------
980 bool KernelDll_SetupFunctionPointers_Ext(
981     Kdll_State *pState);
982 
983 #ifdef __cplusplus
984 }
985 #endif
986 
987 #endif  // __HAL_KERNELDLL_NEXT_H__