xref: /aosp_15_r20/external/intel-media-driver/media_softlet/agnostic/common/vp/kdll/hal_kerneldll_next.c (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      hal_kerneldll_next.c
24 //! \brief         Kernel Dynamic Linking/Loading routines for FC
25 //!
26 
27 #ifndef VPHAL_LIB
28 
29 #if IMOLA
30 #include <stdlib.h>
31 #endif             // IMOLA
32 #include <math.h>  //for sin & cos
33 #endif             // VPHAL_LIB
34 
35 #if EMUL || VPHAL_LIB
36 #include <math.h>
37 #include "support.h"
38 #elif LINUX
39 #else  // !(EMUL | VPHAL_LIB) && !LINUX
40 
41 #endif  // EMUL | VPHAL_LIB
42 
43 #include "hal_kerneldll_next.h"
44 #include "vp_utils.h"
45 
46 // Define _DEBUG symbol for KDLL Release build before loading the "vpkrnheader.h" file
47 // This is necessary for full kernels names in both Release/Debug versions of KDLL app
48 #if EMUL || VPHAL_LIB
49 #ifndef _DEBUG
50 #define _DEBUG 2
51 #endif  // _DEBUG
52 #endif  // EMUL || VPHAL_LIB
53 
54 // Kernel IDs and Kernel Names
55 #include "vpkrnheader.h"  // IDR_VP_TOTAL_NUM_KERNELS
56 
57 // Undefine _DEBUG symbol for the remaining of the KDLL Release build
58 #if _DEBUG == 2
59 #undef _DEBUG
60 #endif  // _DEBUG
61 
62 
63 #ifndef PI
64 #define PI 3.1415926535897932f
65 #endif  // PI
66 
67 #ifdef __cplusplus
68 extern "C" {
69 #endif  // __cplusplus
70 
71 #define FOLD_HASH(folded_hash, hash)                                   \
72     {                                                                  \
73         folded_hash = (((hash) >> 8) ^ (hash)) & 0x00ff00ff;           \
74         folded_hash = ((folded_hash >> 16) ^ folded_hash) & 0xff;      \
75     }                                                                  \
76 
77 const bool g_cIsFormatYUV[Format_Count] =
78     {
79         false,  // Format_Any
80         false,  // Format_A8R8G8B8
81         false,  // Format_X8R8G8B8
82         false,  // Format_A8B8G8R8
83         false,  // Format_X8B8G8R8
84         false,  // Format_A16B16G16R16
85         false,  // Format_A16R16G16B16
86         false,  // Format_R5G6B5
87         false,  // Format_R32U
88         false,  // Format_R32F
89         false,  // Format_R8G8B8
90         false,  // Format_RGBP
91         false,  // Format_BGRP
92         true,   // Format_YUY2
93         true,   // Format_YUYV
94         true,   // Format_YVYU
95         true,   // Format_UYVY
96         true,   // Format_VYUY
97         true,   // Format_Y216
98         true,   // Format_Y210
99         true,   // Format_Y416
100         true,   // Format_AYUV
101         true,   // Format_AUYV
102         true,   // Format_Y410
103         true,   // Format_400P
104         true,   // Format_NV12
105         true,   // Format_NV12_UnAligned
106         true,   // Format_NV21
107         true,   // Format_NV11
108         true,   // Format_NV11_UnAligned
109         true,   // Format_P208
110         true,   // Format_P208_UnAligned
111         true,   // Format_IMC1
112         true,   // Format_IMC2
113         true,   // Format_IMC3
114         true,   // Format_IMC4
115         true,   // Format_422H
116         true,   // Format_422V
117         true,   // Format_444P
118         true,   // Format_411P
119         true,   // Format_411R
120         true,   // Format_I420
121         true,   // Format_IYUV
122         true,   // Format_YV12
123         true,   // Format_YVU9
124         true,   // Format_AI44    (YUV originally, palette may be converted to RGB)
125         true,   // Format_IA44    (same as above)
126         false,  // Format_P8      (using RGB since P8 is uncommon in FC)
127         false,  // Format_A8P8    (same as above)
128         false,  // Format_A8
129         false,  // Format_L8
130         false,  // Format_A4L4
131         false,  // Format_A8L8
132         true,   // Format_IRW0
133         true,   // Format_IRW1
134         true,   // Format_IRW2
135         true,   // Format_IRW3
136         true,   // Format_IRW4
137         true,   // Format_IRW5
138         true,   // Format_IRW6
139         true,   // Format_IRW7
140         false,  // Format_STMM
141         false,  // Format_Buffer
142         false,  // Format_Buffer_2D
143         false,  // Format_V8U8
144         false,  // Format_R32S
145         false,  // Format_R8U
146         false,  // Format_R8G8UN
147         false,  // Format_R8G8SN
148         false,  // Format_G8R8_G8B8
149         false,  // Format_R16U
150         false,  // Format_R16S
151         false,  // Format_R16UN
152         false,  // Format_RAW
153         false,  // Format_Y8
154         false,  // Format_Y1
155         false,  // Format_Y16U
156         false,  // Format_Y16S
157         false,  // Format_L16
158         false,  // Format_D16
159         false,  // Format_R10G10B10A2
160         false,  // Format_B10G10R10A2
161         true,   // Format_P016
162         true,   // Format_P010
163         true    // Format_YV12_Planar
164 };
165 
KernelDll_IsYUVFormat(MOS_FORMAT format)166 bool KernelDll_IsYUVFormat(MOS_FORMAT format)
167 {
168     if (format >= Format_Any && format < Format_Count)
169     {
170         return g_cIsFormatYUV[format];
171     }
172     else
173     {
174         return false;
175     }
176 }
177 
178 /*----------------------------------------------------------------------------
179 | Purpose   : Group common color spaces into one
180 | Returns   : Return the representative color space of the group
181 \---------------------------------------------------------------------------*/
KernelDll_TranslateCspace(VPHAL_CSPACE cspace)182 VPHAL_CSPACE KernelDll_TranslateCspace(VPHAL_CSPACE cspace)
183 {
184     switch (cspace)
185     {
186     case CSpace_BT709:
187     case CSpace_xvYCC709:
188         return CSpace_BT709;
189 
190     case CSpace_BT601:
191     case CSpace_xvYCC601:
192         return CSpace_BT601;
193 
194     case CSpace_BT601_FullRange:
195         return CSpace_BT601_FullRange;
196 
197     case CSpace_BT709_FullRange:
198         return CSpace_BT709_FullRange;
199 
200     case CSpace_RGB:
201     case CSpace_sRGB:
202         return CSpace_sRGB;
203 
204     case CSpace_stRGB:
205         return CSpace_stRGB;
206 
207     case CSpace_Gray:
208     case CSpace_BT601Gray:
209         return CSpace_BT601Gray;
210 
211     case CSpace_BT601Gray_FullRange:
212         return CSpace_BT601Gray_FullRange;
213 
214     case CSpace_BT2020:
215         return CSpace_BT2020;
216 
217     case CSpace_BT2020_FullRange:
218         return CSpace_BT2020_FullRange;
219 
220     case CSpace_BT2020_RGB:
221         return CSpace_BT2020_RGB;
222 
223     case CSpace_BT2020_stRGB:
224         return CSpace_BT2020_stRGB;
225 
226     default:
227         return CSpace_None;
228     }
229 }
230 
KernelDll_MatrixProduct(float * dest,const float * m1,const float * m2)231 void KernelDll_MatrixProduct(
232     float *      dest,
233     const float *m1,
234     const float *m2)
235 {
236     bool  save;
237     float temp[12];
238 
239     // setup temp matrix to allow the following operations:
240     //   dest = dest * m2
241     //   dest = m1 * dest
242     //   dest = dest * dest
243     save = (m1 == dest) || (m2 == dest);
244     m1   = (m1 == dest) ? temp : m1;
245     m2   = (m2 == dest) ? temp : m2;
246     if (save)
247         MOS_SecureMemcpy(temp, sizeof(temp), (void *)dest, sizeof(temp));
248 
249     // Multiply the matrices
250     dest[0]  = m1[0] * m2[0] + m1[1] * m2[4] + m1[2] * m2[8];
251     dest[1]  = m1[0] * m2[1] + m1[1] * m2[5] + m1[2] * m2[9];
252     dest[2]  = m1[0] * m2[2] + m1[1] * m2[6] + m1[2] * m2[10];
253     dest[3]  = m1[0] * m2[3] + m1[1] * m2[7] + m1[2] * m2[11] + m1[3];
254     dest[4]  = m1[4] * m2[0] + m1[5] * m2[4] + m1[6] * m2[8];
255     dest[5]  = m1[4] * m2[1] + m1[5] * m2[5] + m1[6] * m2[9];
256     dest[6]  = m1[4] * m2[2] + m1[5] * m2[6] + m1[6] * m2[10];
257     dest[7]  = m1[4] * m2[3] + m1[5] * m2[7] + m1[6] * m2[11] + m1[7];
258     dest[8]  = m1[8] * m2[0] + m1[9] * m2[4] + m1[10] * m2[8];
259     dest[9]  = m1[8] * m2[1] + m1[9] * m2[5] + m1[10] * m2[9];
260     dest[10] = m1[8] * m2[2] + m1[9] * m2[6] + m1[10] * m2[10];
261     dest[11] = m1[8] * m2[3] + m1[9] * m2[7] + m1[10] * m2[11] + m1[11];
262 }
263 
KernelDll_UpdateCscCoefficients(Kdll_State * pState,Kdll_CSC_Matrix * pMatrix)264 void KernelDll_UpdateCscCoefficients(Kdll_State *pState,
265     Kdll_CSC_Matrix *                            pMatrix)
266 {
267     float         csc[12];     // CSC  matrix (YUV->RGB)
268     float         icsc[12];    // ICSC matrix (RGB->YUV), (YUV->YUV)
269     float         m[12];       // auxiliary matrix
270     float         matrix[12];  // final matrix
271     Kdll_CSCType  csctype;
272     Kdll_Procamp *pProcamp = nullptr;
273     VPHAL_CSPACE  src      = pMatrix->SrcSpace;
274     VPHAL_CSPACE  dst      = pMatrix->DstSpace;
275     bool          bCSC, bICSC;
276 
277     bCSC = bICSC = false;
278     MOS_ZeroMemory(m, sizeof(m));
279     MOS_ZeroMemory(csc, sizeof(csc));
280     MOS_ZeroMemory(icsc, sizeof(icsc));
281 
282     // Select procamp parameters
283     if (pMatrix->iProcampID > DL_PROCAMP_DISABLED &&
284         pMatrix->iProcampID < pState->iProcampSize &&
285         pState->pProcamp != nullptr)
286     {
287         pProcamp = pState->pProcamp + pMatrix->iProcampID;
288     }
289 
290     // Setup CSC matrix
291     if (src != dst)
292     {
293         if ((dst == CSpace_sRGB) && (src != CSpace_stRGB))
294         {
295             KernelDll_GetCSCMatrix(src, dst, csc);
296             MOS_SecureMemcpy(m, sizeof(csc), (void *)csc, sizeof(csc));
297             bCSC    = true;
298             csctype = CSC_YUV_RGB;
299         }
300         else if ((dst == CSpace_stRGB) && (src != CSpace_sRGB))
301         {
302             KernelDll_GetCSCMatrix(src, dst, csc);
303             MOS_SecureMemcpy(m, sizeof(csc), (void *)csc, sizeof(csc));
304             bCSC    = true;
305             csctype = CSC_YUV_RGB;
306         }
307         else
308         {
309             KernelDll_GetCSCMatrix(src, dst, icsc);
310             MOS_SecureMemcpy(m, sizeof(icsc), (void *)icsc, sizeof(icsc));
311             bICSC = true;
312             if (KernelDll_IsCspace(src, CSpace_RGB) && !KernelDll_IsCspace(dst, CSpace_RGB))
313             {
314                 csctype = CSC_RGB_YUV;
315             }
316             else if (KernelDll_IsCspace(src, CSpace_BT2020_RGB) && KernelDll_IsCspace(dst, CSpace_BT2020))
317             {
318                 csctype = CSC_RGB_YUV;
319             }
320             else if (KernelDll_IsCspace(src, CSpace_BT2020) && KernelDll_IsCspace(dst, CSpace_BT2020_RGB))
321             {
322                 csctype = CSC_YUV_RGB;
323             }
324             else if (KernelDll_IsCspace(src, CSpace_BT2020_RGB) && KernelDll_IsCspace(dst, CSpace_BT2020_RGB))
325             {
326                 csctype = CSC_RGB_RGB;
327 
328                 // Kernel params didn't support 10bit, it need transformation from 10bit to 8bit.
329                 m[3]  = ROUND_FLOAT(m[3], 0.25f);   // 10bit to 8bit (value/4)
330                 m[7]  = ROUND_FLOAT(m[7], 0.25f);   // 10bit to 8bit (value/4)
331                 m[11] = ROUND_FLOAT(m[11], 0.25f);  // 10bit to 8bit (value/4)
332             }
333             else
334             {
335                 csctype = CSC_YUV_YUV;
336             }
337         }
338     }
339     // Setup CSC matrix for procamp in sRGB space
340     else if ((dst == CSpace_sRGB) && (pProcamp))
341     {
342         KernelDll_GetCSCMatrix(CSpace_sRGB, CSpace_BT709, icsc);
343         KernelDll_GetCSCMatrix(CSpace_BT709, CSpace_sRGB, csc);
344         bICSC = bCSC = true;
345         csctype      = CSC_RGB_RGB;
346     }
347     // Setup CSC matrix for procamp in stRGB space
348     else if ((dst == CSpace_stRGB) && (pProcamp))
349     {
350         KernelDll_GetCSCMatrix(CSpace_stRGB, CSpace_BT709, icsc);
351         KernelDll_GetCSCMatrix(CSpace_BT709, CSpace_stRGB, csc);
352         bICSC = bCSC = true;
353         csctype      = CSC_RGB_RGB;
354     }
355     else
356     {
357         MOS_SecureMemcpy(m, sizeof(g_cCSC_Identity), (void *)g_cCSC_Identity, sizeof(g_cCSC_Identity));
358         csctype = CSC_YUV_YUV;
359     }
360 
361     // Product only happens if Procamp is present
362     // Otherwise use the original matrix
363     if (pProcamp)
364     {
365         float b, c, h, s;
366 
367         // Calculate procamp parameters
368         b = pProcamp->fBrightness;
369         c = pProcamp->fContrast;
370         h = pProcamp->fHue * (PI / 180.0f);
371         s = pProcamp->fSaturation;
372 
373         // procamp matrix
374         //
375         // [Y']   [ c            0          0  ] [Y]   [ 16  - 16 * c + b              ]
376         // [U'] = [ 0   c*s*cos(h)  c*s*sin(h) ] [U] + [ 128 - 128*c*s*(cos(h)+sin(h)) ]
377         // [V']   [ 0  -c*s*sin(h)  c*s*cos(h) ] [V]   [ 128 - 128*c*s*(cos(h)-sin(h)) ]
378 
379         matrix[0]  = c;
380         matrix[1]  = 0.0f;
381         matrix[2]  = 0.0f;
382         matrix[3]  = 16.0f - 16.0f * c + b;
383         matrix[4]  = 0.0f;
384         matrix[5]  = (float)cos(h) * c * s;
385         matrix[6]  = (float)sin(h) * c * s;
386         matrix[7]  = 128.0f * (1.0f - matrix[5] - matrix[6]);
387         matrix[8]  = 0.0f;
388         matrix[9]  = -matrix[6];
389         matrix[10] = matrix[5];
390         matrix[11] = 128.0f * (1.0f - matrix[5] + matrix[6]);
391 
392         // Calculate final CSC matrix (csc * pa * icsc)
393         if (bICSC)
394         {  // Calculate [pa] * [icsc]
395             KernelDll_MatrixProduct(matrix, matrix, icsc);
396         }
397 
398         if (bCSC)
399         {  // Calculate [csc] * [pa]     (if no icsc)
400             //        or [csc] * [pa] * [icsc]
401             KernelDll_MatrixProduct(matrix, csc, matrix);
402         }
403 
404         // Update procamp version
405         pMatrix->iProcampVersion = pProcamp->iProcampVersion;
406 
407         // Use the output matrix to generate kernel CSC parameters
408         MOS_SecureMemcpy(m, sizeof(m), (void *)matrix, sizeof(m));
409     }
410 
411     // normalize for kernel use
412     matrix[0]  = ROUND_FLOAT(m[0], 128.0f);   // 9.7
413     matrix[1]  = ROUND_FLOAT(m[1], 128.0f);   // 9.7
414     matrix[2]  = ROUND_FLOAT(m[2], 128.0f);   // 9.7
415     matrix[3]  = ROUND_FLOAT(m[3], 0.5f);     // 16.0 (value/2)
416     matrix[4]  = ROUND_FLOAT(m[4], 128.0f);   // 9.7
417     matrix[5]  = ROUND_FLOAT(m[5], 128.0f);   // 9.7
418     matrix[6]  = ROUND_FLOAT(m[6], 128.0f);   // 9.7
419     matrix[7]  = ROUND_FLOAT(m[7], 0.5f);     // 16.0 (value/2)
420     matrix[8]  = ROUND_FLOAT(m[8], 128.0f);   // 9.7
421     matrix[9]  = ROUND_FLOAT(m[9], 128.0f);   // 9.7
422     matrix[10] = ROUND_FLOAT(m[10], 128.0f);  // 9.7
423     matrix[11] = ROUND_FLOAT(m[11], 0.5f);    // 16.0 (value/2)
424 
425     // Save matrix as kernel CSC coefficients
426     pState->pfnMapCSCMatrix(csctype, matrix, pMatrix->Coeff);
427 }
428 
429 //---------------------------------------------------------------------------------------
430 // KernelDll_StartKernelSearch_Next - Starts kernel search
431 //
432 // Parameters:
433 //    Kdll_State       *pState       - [in]     Dynamic Linking State
434 //    Kdll_FilterEntry *pFilter      - [in]     Search filter (array of search entries)
435 //    int               iFilterSize  - [in]     Search filter size
436 //    Kdll_SearchState *pSearchState - [in/out] Kernel search state
437 //
438 // Output: none
439 //---------------------------------------------------------------------------------------
KernelDll_StartKernelSearch_Next(Kdll_State * pState,Kdll_SearchState * pSearchState,Kdll_FilterEntry * pFilter,int32_t iFilterSize,uint32_t uiIs64BInstrEnabled)440 void KernelDll_StartKernelSearch_Next(
441     Kdll_State *      pState,
442     Kdll_SearchState *pSearchState,
443     Kdll_FilterEntry *pFilter,
444     int32_t           iFilterSize,
445     uint32_t          uiIs64BInstrEnabled)
446 {
447     int32_t nLayer;
448 
449     VP_RENDER_FUNCTION_ENTER;
450 
451     // Reset all states
452     MOS_ZeroMemory(pSearchState, sizeof(Kdll_SearchState));
453 
454     // Setup KDLL state
455     pSearchState->pKdllState = pState;  // KDLL state
456 
457     // Cleanup kernel table
458     pSearchState->KernelCount = 0;  // # of kernels
459 
460     // Cleanup patch data
461     memset(pSearchState->Patches, 0, sizeof(pSearchState->Patches));
462     memset(pSearchState->PatchID, -1, sizeof(pSearchState->PatchID));
463     pSearchState->PatchCount = 0;
464 
465     // Copy original filter; filter will be modified as part of the search
466     if (pFilter && iFilterSize > 0)
467     {
468         MOS_SecureMemcpy(pSearchState->Filter, iFilterSize * sizeof(Kdll_FilterEntry), pFilter, iFilterSize * sizeof(Kdll_FilterEntry));
469         pSearchState->pFilter     = pSearchState->Filter;
470         pSearchState->iFilterSize = iFilterSize;
471 
472         // Copy the render target format
473         pSearchState->target_format = pSearchState->pFilter[iFilterSize - 1].format;
474 
475         // Copy the render target tile type
476         pSearchState->target_tiletype = pSearchState->pFilter[iFilterSize - 1].tiletype;
477 
478         // Indicate whether to use 64B save kernel for render target surface
479         if (uiIs64BInstrEnabled &&
480             ((pSearchState->target_tiletype == MOS_TILE_X) ||
481                 (pSearchState->target_tiletype == MOS_TILE_LINEAR)))
482         {
483             pSearchState->b64BSaveEnabled = true;
484         }
485     }
486 }
487 
KernelDll_ModifyFunctionPointers_Next(Kdll_State * pState)488 void KernelDll_ModifyFunctionPointers_Next(Kdll_State *pState)
489 {
490     pState->pfnStartKernelSearch = KernelDll_StartKernelSearch_Next;
491 }
492 
KernelDll_IsCspace(VPHAL_CSPACE cspace,VPHAL_CSPACE match)493 bool KernelDll_IsCspace(VPHAL_CSPACE cspace, VPHAL_CSPACE match)
494 {
495     switch (match)
496     {
497     case CSpace_RGB:
498         return (cspace == CSpace_sRGB ||
499                 cspace == CSpace_stRGB);
500 
501     case CSpace_YUV:
502         return (cspace == CSpace_BT709 ||
503                 cspace == CSpace_BT601 ||
504                 cspace == CSpace_BT601_FullRange ||
505                 cspace == CSpace_BT709_FullRange ||
506                 cspace == CSpace_xvYCC709 ||
507                 cspace == CSpace_xvYCC601);
508 
509     case CSpace_Gray:
510         return (cspace == CSpace_BT601Gray ||
511                 cspace == CSpace_BT601Gray_FullRange);
512 
513     case CSpace_Any:
514         return (cspace != CSpace_None);
515 
516     case CSpace_BT2020:
517         return (cspace == CSpace_BT2020 ||
518                 cspace == CSpace_BT2020_FullRange);
519 
520     case CSpace_BT2020_RGB:
521         return (cspace == CSpace_BT2020_RGB ||
522                 cspace == CSpace_BT2020_stRGB);
523 
524     default:
525         return (cspace == match);
526     }
527 
528     return false;
529 }
530 
531 /*----------------------------------------------------------------------------
532 | Name      : KernelDll_GetYuvRangeAndOffset
533 | Purpose   : Get the YUV offset and excursion for the input color space
534 | Return    : true if success else false
535 \---------------------------------------------------------------------------*/
KernelDll_GetYuvRangeAndOffset(Kdll_CSpace cspace,float * pLumaOffset,float * pLumaExcursion,float * pChromaZero,float * pChromaExcursion)536 bool KernelDll_GetYuvRangeAndOffset(
537     Kdll_CSpace cspace,
538     float *     pLumaOffset,
539     float *     pLumaExcursion,
540     float *     pChromaZero,
541     float *     pChromaExcursion)
542 {
543     bool res = true;
544 
545     switch (cspace)
546     {
547     case CSpace_BT601_FullRange:
548     case CSpace_BT709_FullRange:
549     case CSpace_BT601Gray_FullRange:
550     case CSpace_BT2020_FullRange:
551         *pLumaOffset      = 0.0f;
552         *pLumaExcursion   = 255.0f;
553         *pChromaZero      = 128.0f;
554         *pChromaExcursion = 255.0f;
555         break;
556 
557     case CSpace_BT601:
558     case CSpace_BT709:
559     case CSpace_xvYCC601:  // since matrix is the same as 601, use the same range
560     case CSpace_xvYCC709:  // since matrix is the same as 709, use the same range
561     case CSpace_BT601Gray:
562     case CSpace_BT2020:
563         *pLumaOffset      = 16.0f;
564         *pLumaExcursion   = 219.0f;
565         *pChromaZero      = 128.0f;
566         *pChromaExcursion = 224.0f;
567         break;
568 
569     default:
570         res = false;
571         break;
572     }
573 
574     return res;
575 }
576 
577 /*----------------------------------------------------------------------------
578 | Name      : KernelDll_GetRgbRangeAndOffset
579 | Purpose   : Get the RGB offset and excursion for the input color space
580 | Return    : true if success else false
581 \---------------------------------------------------------------------------*/
KernelDll_GetRgbRangeAndOffset(Kdll_CSpace cspace,float * pRgbOffset,float * pRgbExcursion)582 bool KernelDll_GetRgbRangeAndOffset(
583     Kdll_CSpace cspace,
584     float *     pRgbOffset,
585     float *     pRgbExcursion)
586 {
587     bool res = true;
588 
589     switch (cspace)
590     {
591     case CSpace_sRGB:
592     case CSpace_BT2020_RGB:
593         *pRgbOffset    = 0.0f;
594         *pRgbExcursion = 255.0f;
595         break;
596 
597     case CSpace_stRGB:
598     case CSpace_BT2020_stRGB:
599         *pRgbOffset    = 16.0f;
600         *pRgbExcursion = 219.0f;
601         break;
602 
603     default:
604         res = false;
605         break;
606     }
607 
608     return res;
609 }
610 
611 /*----------------------------------------------------------------------------
612 | Name      : KernelDll_CalcYuvToRgbMatrix
613 | Purpose   : Given the YUV->RGB transfer matrix, get the final matrix after
614 |             applying offsets and excursions.
615 |
616 | [R']     [R_o]                                 [R_e/Y_e    0       0   ]  [Y'  - Y_o]
617 | [G']  =  [R_o] + [YUVtoRGBCoeff (3x3 matrix)]. [   0    R_e/C_e    0   ]. [Cb' - C_z]
618 | [B']     [R_o]                                 [   0       0    R_e/C_e]. [Cr' - C_z]
619 |
620 | [R']  = [C0  C1   C2] [Y' ]   [C3]      {Out pMatrix}
621 | [G']  = [C4  C5   C6].[Cb'] + [C7]
622 | [B']  = [C8  C9  C10] [Cr'] + [C11]
623 |
624 | Return    : true if success else false
625 \---------------------------------------------------------------------------*/
KernelDll_CalcYuvToRgbMatrix(Kdll_CSpace src,Kdll_CSpace dst,float * pTransferMatrix,float * pOutMatrix)626 bool KernelDll_CalcYuvToRgbMatrix(
627     Kdll_CSpace src,              // [in] YUV Color space
628     Kdll_CSpace dst,              // [in] RGB Color space
629     float *     pTransferMatrix,  // [in] Transfer matrix (3x3)
630     float *     pOutMatrix)            // [out] Conversion matrix (3x4)
631 {
632     bool  res;
633     float Y_o, Y_e, C_z, C_e;
634     float R_o, R_e;
635 
636     res = true;
637 
638     res = KernelDll_GetRgbRangeAndOffset(dst, &R_o, &R_e);
639     if (res == false)
640     {
641         goto finish;
642     }
643 
644     res = KernelDll_GetYuvRangeAndOffset(src, &Y_o, &Y_e, &C_z, &C_e);
645     if (res == false)
646     {
647         goto finish;
648     }
649 
650     // after + (3x3)(3x3)
651     pOutMatrix[0]  = pTransferMatrix[0] * R_e / Y_e;
652     pOutMatrix[4]  = pTransferMatrix[3] * R_e / Y_e;
653     pOutMatrix[8]  = pTransferMatrix[6] * R_e / Y_e;
654     pOutMatrix[1]  = pTransferMatrix[1] * R_e / C_e;
655     pOutMatrix[5]  = pTransferMatrix[4] * R_e / C_e;
656     pOutMatrix[9]  = pTransferMatrix[7] * R_e / C_e;
657     pOutMatrix[2]  = pTransferMatrix[2] * R_e / C_e;
658     pOutMatrix[6]  = pTransferMatrix[5] * R_e / C_e;
659     pOutMatrix[10] = pTransferMatrix[8] * R_e / C_e;
660 
661     // (3x1) - (3x3)(3x3)(3x1)
662     pOutMatrix[3]  = R_o - (pOutMatrix[0] * Y_o + pOutMatrix[1] * C_z + pOutMatrix[2] * C_z);
663     pOutMatrix[7]  = R_o - (pOutMatrix[4] * Y_o + pOutMatrix[5] * C_z + pOutMatrix[6] * C_z);
664     pOutMatrix[11] = R_o - (pOutMatrix[8] * Y_o + pOutMatrix[9] * C_z + pOutMatrix[10] * C_z);
665 
666 finish:
667     return res;
668 }
669 
670 /*----------------------------------------------------------------------------
671 | Name      : KernelDll_CalcRgbToYuvMatrix
672 | Purpose   : Given the RGB->YUV transfer matrix, get the final matrix after
673 |             applying offsets and excursions.
674 |
675 | [Y' ]     [Y_o - Y_e.R_o/R_e]   [Y_e/R_e    0       0   ]  [   RGB to YUV  ]  [R']
676 | [Cb']  =  [C_z]               + [   0    C_e/R_e    0   ]. [Transfer matrix]. [G']
677 | [Cr']     [C_z]                 [   0       0    C_e/R_e]  [   3x3 matrix  ]  [B']
678 |
679 | [Y' ]  = [C0  C1   C2] [R']   [C3]      {Out pMatrix}
680 | [Cb']  = [C4  C5   C6].[G'] + [C7]
681 | [Cr']  = [C8  C9  C10] [B'] + [C11]
682 |
683 | Return    : true if success else false
684 \---------------------------------------------------------------------------*/
KernelDll_CalcRgbToYuvMatrix(Kdll_CSpace src,Kdll_CSpace dst,float * pTransferMatrix,float * pOutMatrix)685 bool KernelDll_CalcRgbToYuvMatrix(
686     Kdll_CSpace src,              // [in] RGB Color space
687     Kdll_CSpace dst,              // [in] YUV Color space
688     float *     pTransferMatrix,  // [in] Transfer matrix (3x3)
689     float *     pOutMatrix)            // [out] Conversion matrix (3x4)
690 {
691     bool  res;
692     float Y_o, Y_e, C_z, C_e;
693     float R_o, R_e;
694 
695     res = true;
696 
697     res = KernelDll_GetRgbRangeAndOffset(src, &R_o, &R_e);
698     if (res == false)
699     {
700         goto finish;
701     }
702 
703     res = KernelDll_GetYuvRangeAndOffset(dst, &Y_o, &Y_e, &C_z, &C_e);
704     if (res == false)
705     {
706         goto finish;
707     }
708 
709     // multiplication of + onwards
710     pOutMatrix[0]  = pTransferMatrix[0] * Y_e / R_e;
711     pOutMatrix[1]  = pTransferMatrix[1] * Y_e / R_e;
712     pOutMatrix[2]  = pTransferMatrix[2] * Y_e / R_e;
713     pOutMatrix[4]  = pTransferMatrix[3] * C_e / R_e;
714     pOutMatrix[5]  = pTransferMatrix[4] * C_e / R_e;
715     pOutMatrix[6]  = pTransferMatrix[5] * C_e / R_e;
716     pOutMatrix[8]  = pTransferMatrix[6] * C_e / R_e;
717     pOutMatrix[9]  = pTransferMatrix[7] * C_e / R_e;
718     pOutMatrix[10] = pTransferMatrix[8] * C_e / R_e;
719 
720     // before +
721     pOutMatrix[3]  = Y_o - Y_e * R_o / R_e;
722     pOutMatrix[7]  = C_z;
723     pOutMatrix[11] = C_z;
724 
725 finish:
726     return res;
727 }
728 
729 /*----------------------------------------------------------------------------
730 | Name      : KernelDll_CalcGrayCoeffs
731 | Purpose   : Given CSC matrix, calculate the new matrix making Chroma zero.
732 |             Chroma will be read from the surface, but we need to factor in C_z
733 |             by adjusting this in the constant.
734 |
735 | [R']  = [C0  C1   C2] [Y' ]   [C3]      {Out pMatrix}
736 | [G']  = [C4  C5   C6].[C_z] + [C7]
737 | [B']  = [C8  C9  C10] [C_z]   [C11]
738 |
739 | New C3 = C1 * C_z + C2 * C_z + C3
740 |
741 | Return    : true if success else false
742 \---------------------------------------------------------------------------*/
KernelDll_CalcGrayCoeffs(Kdll_CSpace src,float * pMatrix)743 bool KernelDll_CalcGrayCoeffs(
744     Kdll_CSpace src,  // [in] YUV source Color space
745     float *     pMatrix)   // [in/out] Conversion matrix (3x4)
746 {
747     float Y_o, Y_e, C_z, C_e;
748     bool  res;
749 
750     res = true;
751 
752     res = KernelDll_GetYuvRangeAndOffset(src, &Y_o, &Y_e, &C_z, &C_e);
753     if (res == false)
754     {
755         goto finish;
756     }
757 
758     // Calculate the constant offset by factoring in C_z
759     pMatrix[3]  = pMatrix[1] * C_z + pMatrix[2] * C_z + pMatrix[3];
760     pMatrix[7]  = pMatrix[5] * C_z + pMatrix[6] * C_z + pMatrix[7];
761     pMatrix[11] = pMatrix[9] * C_z + pMatrix[10] * C_z + pMatrix[11];
762 
763     // Nullify the effect of chroma read
764     pMatrix[1] = pMatrix[2] = 0;
765     pMatrix[5] = pMatrix[6] = 0;
766     pMatrix[9] = pMatrix[10] = 0;
767 
768 finish:
769     return res;
770 }
771 
772 /*----------------------------------------------------------------------------
773 | Name      : KernelDll_3x3MatrixProduct
774 | Purpose   : Given two [3x4] input matrices, calculate [3x3]x[3x3] ignoring
775 |             the last column in both inputs
776 | Return    : none
777 \---------------------------------------------------------------------------*/
KernelDll_3x3MatrixProduct(float * dest,const float * m1,const float * m2)778 void KernelDll_3x3MatrixProduct(
779     float *      dest,
780     const float *m1,
781     const float *m2)
782 {
783     dest[0] = m1[0] * m2[0] + m1[1] * m2[4] + m1[2] * m2[8];
784     dest[1] = m1[0] * m2[1] + m1[1] * m2[5] + m1[2] * m2[9];
785     dest[2] = m1[0] * m2[2] + m1[1] * m2[6] + m1[2] * m2[10];
786 
787     dest[4] = m1[4] * m2[0] + m1[5] * m2[4] + m1[6] * m2[8];
788     dest[5] = m1[4] * m2[1] + m1[5] * m2[5] + m1[6] * m2[9];
789     dest[6] = m1[4] * m2[2] + m1[5] * m2[6] + m1[6] * m2[10];
790 
791     dest[8]  = m1[8] * m2[0] + m1[9] * m2[4] + m1[10] * m2[8];
792     dest[9]  = m1[8] * m2[1] + m1[9] * m2[5] + m1[10] * m2[9];
793     dest[10] = m1[8] * m2[2] + m1[9] * m2[6] + m1[10] * m2[10];
794 }
795 
796 /*----------------------------------------------------------------------------
797 | Name      : KernelDll_CalcYuvToYuvMatrix
798 | Purpose   : Calculate the matrix equation for converting b/w YUV color spaces.
799 |             1. Get conversion matrix from Source YUV to sRGB
800 |             2. Get conversion matrix from sRGB to Destination YUV
801 |             3. Apply the transformation below to get the final matrix
802 |
803 | [Y'dst]  = [C0  C1   C2] [C0  C1   C2][Y'src] [C0  C1   C2] [C3]    [C3]
804 | [U']     = [C4  C5   C6].[C4  C5   C6][C_z] + [C4  C5   C6].[C7]  + [C7]
805 | [V']     = [C8  C9  C10] [C8  C9  C10][C_z]   [C8  C9  C10] [C11]   [C11]
806 |             dst matrix    src matrix           dst matrix    src     dst
807 |
808 | [Y'dst]  = [C0  C1   C2] [Y'src]   [C3]      {Out pMatrix}
809 | [U']     = [C4  C5   C6].[C_z] +   [C7]
810 | [V']     = [C8  C9  C10] [C_z]     [C11]
811 |
812 | Return    : true if success else false
813 \---------------------------------------------------------------------------*/
KernelDll_CalcYuvToYuvMatrix(Kdll_CSpace src,Kdll_CSpace dst,float * pOutMatrix)814 bool KernelDll_CalcYuvToYuvMatrix(
815     Kdll_CSpace src,    // [in] YUV Color space
816     Kdll_CSpace dst,    // [in] YUV Color space
817     float *     pOutMatrix)  // [out] Conversion matrix (3x4)
818 {
819     float fYuvToRgb[12] = {0};
820     float fRgbToYuv[12] = {0};
821     bool  res;
822 
823     res = true;
824 
825     // 1. Get conversion matrix from Source YUV to sRGB
826     if (IS_BT601_CSPACE(src))
827     {
828         res = KernelDll_CalcYuvToRgbMatrix(src, CSpace_sRGB, (float *)g_cCSC_BT601_YUV_RGB, fYuvToRgb);
829     }
830     else if(IS_COLOR_SPACE_BT2020_YUV(src))
831     {
832         switch (src)
833         {
834             case CSpace_BT2020:
835                 res = KernelDll_CalcYuvToRgbMatrix(CSpace_BT2020, CSpace_sRGB, (float *)g_cCSC_BT2020_LimitedYUV_RGB, fYuvToRgb);
836                 break;
837             case CSpace_BT2020_FullRange:
838                 res = KernelDll_CalcYuvToRgbMatrix(CSpace_BT2020_FullRange, CSpace_sRGB, (float *)g_cCSC_BT2020_YUV_RGB, fYuvToRgb);
839                 break;
840             default:
841                 res = false;
842                 break;
843         }
844     }
845     else
846     {
847         res = KernelDll_CalcYuvToRgbMatrix(src, CSpace_sRGB, (float *)g_cCSC_BT709_YUV_RGB, fYuvToRgb);
848     }
849     if (res == false)
850     {
851         goto finish;
852     }
853 
854     // 2. Get conversion matrix from sRGB to Destination YUV
855     if (IS_BT601_CSPACE(dst))
856     {
857         res = KernelDll_CalcRgbToYuvMatrix(CSpace_sRGB, dst, (float *)g_cCSC_BT601_RGB_YUV, fRgbToYuv);
858     }
859     else if (IS_COLOR_SPACE_BT2020_YUV(dst))
860     {
861         switch (dst)
862         {
863             case CSpace_BT2020_FullRange:
864                 res = KernelDll_CalcRgbToYuvMatrix(CSpace_sRGB, dst, (float *)g_cCSC_BT2020_RGB_YUV, fRgbToYuv);
865                 break;
866             case CSpace_BT2020:
867                 res = KernelDll_CalcRgbToYuvMatrix(CSpace_sRGB, dst, (float *)g_cCSC_BT2020_RGB_LimitedYUV, fRgbToYuv);
868                 break;
869             default:
870                 res = false;
871                 break;
872         }
873     }
874     else
875     {
876         res = KernelDll_CalcRgbToYuvMatrix(CSpace_sRGB, dst, (float *)g_cCSC_BT709_RGB_YUV, fRgbToYuv);
877     }
878     if (res == false)
879     {
880         goto finish;
881     }
882 
883     // 3. Multiply the 2 matrices above
884     KernelDll_3x3MatrixProduct(pOutMatrix, fRgbToYuv, fYuvToRgb);
885 
886     // Perform [3x3][3x1] matrix multiply + [3x1] matrix
887     pOutMatrix[3] = fRgbToYuv[0] * fYuvToRgb[3] + fRgbToYuv[1] * fYuvToRgb[7] +
888                     fRgbToYuv[2] * fYuvToRgb[11] + fRgbToYuv[3];
889     pOutMatrix[7] = fRgbToYuv[4] * fYuvToRgb[3] + fRgbToYuv[5] * fYuvToRgb[7] +
890                     fRgbToYuv[6] * fYuvToRgb[11] + fRgbToYuv[7];
891     pOutMatrix[11] = fRgbToYuv[8] * fYuvToRgb[3] + fRgbToYuv[9] * fYuvToRgb[7] +
892                      fRgbToYuv[10] * fYuvToRgb[11] + fRgbToYuv[11];
893 
894 finish:
895     return res;
896 }
897 
898 /*----------------------------------------------------------------------------
899 | Name      : KernelDll_GetCSCMatrix
900 | Purpose   : Get the required matrix for the given CSC conversion
901 | Return    :
902 \---------------------------------------------------------------------------*/
KernelDll_GetCSCMatrix(Kdll_CSpace src,Kdll_CSpace dst,float * pCSC_Matrix)903 void KernelDll_GetCSCMatrix(
904     Kdll_CSpace src,     // [in] Source Color space
905     Kdll_CSpace dst,     // [in] Destination Color space
906     float *     pCSC_Matrix)  // [out] CSC matrix to use
907 {
908     bool        bMatrix;
909     bool        bSrcGray;
910     Kdll_CSpace temp;
911     int32_t     i;
912 
913     bMatrix  = false;
914     bSrcGray = KernelDll_IsCspace(src, CSpace_Gray);
915 
916     // convert gray color spaces to its equivalent non-gray cpsace
917     switch (src)
918     {
919     case CSpace_BT601Gray:
920         temp = CSpace_BT601;
921         break;
922     case CSpace_BT601Gray_FullRange:
923         temp = CSpace_BT601_FullRange;
924         break;
925     default:
926         temp = src;
927         break;
928     }
929 
930     // BT601/709 YUV to sRGB/stRGB conversion
931     if (KernelDll_IsCspace(temp, CSpace_YUV) || KernelDll_IsCspace(temp, CSpace_Gray))
932     {
933         if (KernelDll_IsCspace(dst, CSpace_RGB))
934         {
935             if (IS_BT601_CSPACE(temp))
936             {
937                 KernelDll_CalcYuvToRgbMatrix(temp, dst, (float *)g_cCSC_BT601_YUV_RGB, pCSC_Matrix);
938                 bMatrix = true;
939             }
940             else  // if (IS_BT709_CSPACE(temp))
941             {
942                 KernelDll_CalcYuvToRgbMatrix(temp, dst, (float *)g_cCSC_BT709_YUV_RGB, pCSC_Matrix);
943                 bMatrix = true;
944             }
945         }
946     }
947     // sRGB/stRGB to BT601/709 YUV conversion
948     else if (KernelDll_IsCspace(temp, CSpace_RGB))
949     {
950         if (KernelDll_IsCspace(dst, CSpace_YUV))
951         {
952             if (IS_BT601_CSPACE(dst))
953             {
954                 KernelDll_CalcRgbToYuvMatrix(temp, dst, (float *)g_cCSC_BT601_RGB_YUV, pCSC_Matrix);
955                 bMatrix = true;
956             }
957             else  // if (IS_BT709_CSPACE(temp))
958             {
959                 KernelDll_CalcRgbToYuvMatrix(temp, dst, (float *)g_cCSC_BT709_RGB_YUV, pCSC_Matrix);
960                 bMatrix = true;
961             }
962         }
963     }
964     // BT2020 YUV to RGB conversion
965     else if (KernelDll_IsCspace(temp, CSpace_BT2020))
966     {
967         if (KernelDll_IsCspace(dst, CSpace_BT2020_RGB))
968         {
969             KernelDll_CalcYuvToRgbMatrix(temp, dst, (float *)g_cCSC_BT2020_YUV_RGB, pCSC_Matrix);
970             bMatrix = true;
971         }
972     }
973     // BT2020 RGB to YUV conversion
974     else if (KernelDll_IsCspace(temp, CSpace_BT2020_RGB))
975     {
976         if (KernelDll_IsCspace(dst, CSpace_BT2020))
977         {
978             KernelDll_CalcRgbToYuvMatrix(temp, dst, (float *)g_cCSC_BT2020_RGB_YUV, pCSC_Matrix);
979             bMatrix = true;
980         }
981     }
982 
983     // If matrix has not been derived yet, its one of the below special cases
984     if (!bMatrix)
985     {
986         if (temp == dst)  // Check if its identity matrix
987         {
988             MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_Identity), (void *)g_cCSC_Identity, sizeof(g_cCSC_Identity));
989         }
990         else if (KernelDll_IsCspace(temp, CSpace_RGB))  // sRGB to stRGB inter-conversions
991         {
992             if (temp == CSpace_sRGB)
993             {
994                 MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_sRGB_stRGB), (void *)g_cCSC_sRGB_stRGB, sizeof(g_cCSC_sRGB_stRGB));
995             }
996             else  //temp == CSpace_stRGB
997             {
998                 MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_stRGB_sRGB), (void *)g_cCSC_stRGB_sRGB, sizeof(g_cCSC_stRGB_sRGB));
999             }
1000         }
1001         else if (KernelDll_IsCspace(temp, CSpace_YUV))  // 601 to 709 inter-conversions
1002         {
1003             KernelDll_CalcYuvToYuvMatrix(temp, dst, pCSC_Matrix);
1004         }
1005         else if (KernelDll_IsCspace(temp, CSpace_BT2020_RGB))
1006         {
1007             if (temp == CSpace_BT2020_RGB)  //BT2020_RGB to BT2020_limited_RGB conversions
1008             {
1009                 MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_BT2020RGB_BT2020stRGB), (void *)g_cCSC_BT2020RGB_BT2020stRGB, sizeof(g_cCSC_BT2020RGB_BT2020stRGB));
1010             }
1011             else if (temp == CSpace_BT2020_stRGB)  //BT2020_limited_RGB to BT2020_RGB conversions
1012             {
1013                 MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_BT2020stRGB_BT2020RGB), (void *)g_cCSC_BT2020stRGB_BT2020RGB, sizeof(g_cCSC_BT2020stRGB_BT2020RGB));
1014             }
1015         }
1016         else if (KernelDll_IsCspace(temp, CSpace_BT2020))  // BT2020 limited_YUV to BT2020_FullRange_YUV conversions
1017         {
1018             KernelDll_CalcYuvToYuvMatrix(temp, dst, pCSC_Matrix);
1019         }
1020         else
1021         {
1022             VP_RENDER_ASSERTMESSAGE("Not supported color space conversion(from %d to %d)", src, dst);
1023             MT_ERR2(MT_VP_KERNEL_CSC, MT_VP_KERNEL_CSPACE, src, MT_VP_KERNEL_CSPACE, dst);
1024         }
1025     }
1026 
1027     // Calculate the Gray transformation matrix now
1028     if (bSrcGray)
1029     {
1030         KernelDll_CalcGrayCoeffs(src, pCSC_Matrix);
1031     }
1032 
1033     VP_RENDER_NORMALMESSAGE("");
1034     for (i = 0; i < 3; i++)
1035     {
1036         VP_RENDER_NORMALMESSAGE("%f\t%f\t%f\t%f",
1037             pCSC_Matrix[4 * i],
1038             pCSC_Matrix[4 * i + 1],
1039             pCSC_Matrix[4 * i + 2],
1040             pCSC_Matrix[4 * i + 3]);
1041     }
1042 }
1043 
KernelDll_MapCSCMatrix(Kdll_CSCType csctype,const float * matrix,short * coeff)1044 bool KernelDll_MapCSCMatrix(
1045     Kdll_CSCType csctype,
1046     const float *matrix,
1047     short *      coeff)
1048 {
1049     // Unified kernel architecture requires that the color space
1050     // conversion coefficients programmed in specific orders, depends on the
1051     // type of the color space conversion.
1052     //
1053     // M (matrix)  ---> C (coeff)
1054 
1055     switch (csctype)
1056     {
1057     case CSC_YUV_RGB:
1058         // direct mapping from matrix to coeff
1059         coeff[0]  = FLOAT_TO_SHORT(matrix[0]);   // M0  --> C0
1060         coeff[1]  = FLOAT_TO_SHORT(matrix[1]);   // M1  --> C1
1061         coeff[2]  = FLOAT_TO_SHORT(matrix[2]);   // M2  --> C2
1062         coeff[3]  = FLOAT_TO_SHORT(matrix[3]);   // M3  --> C3
1063         coeff[4]  = FLOAT_TO_SHORT(matrix[4]);   // M4  --> C4
1064         coeff[5]  = FLOAT_TO_SHORT(matrix[5]);   // M5  --> C5
1065         coeff[6]  = FLOAT_TO_SHORT(matrix[6]);   // M6  --> C6
1066         coeff[7]  = FLOAT_TO_SHORT(matrix[7]);   // M7  --> C7
1067         coeff[8]  = FLOAT_TO_SHORT(matrix[8]);   // M8  --> C8
1068         coeff[9]  = FLOAT_TO_SHORT(matrix[9]);   // M9  --> C9
1069         coeff[10] = FLOAT_TO_SHORT(matrix[10]);  // M10 --> C10
1070         coeff[11] = FLOAT_TO_SHORT(matrix[11]);  // M11 --> C11
1071         break;
1072 
1073     case CSC_RGB_YUV:
1074         coeff[6]  = FLOAT_TO_SHORT(matrix[0]);   // M0   --> C6
1075         coeff[4]  = FLOAT_TO_SHORT(matrix[1]);   // M1   --> C4
1076         coeff[5]  = FLOAT_TO_SHORT(matrix[2]);   // M2   --> C5
1077         coeff[7]  = FLOAT_TO_SHORT(matrix[3]);   // M3   --> C7
1078         coeff[10] = FLOAT_TO_SHORT(matrix[4]);   // M4   --> C10
1079         coeff[8]  = FLOAT_TO_SHORT(matrix[5]);   // M5   --> C8
1080         coeff[9]  = FLOAT_TO_SHORT(matrix[6]);   // M6   --> C9
1081         coeff[11] = FLOAT_TO_SHORT(matrix[7]);   // M7   --> C11
1082         coeff[2]  = FLOAT_TO_SHORT(matrix[8]);   // M8   --> C2
1083         coeff[0]  = FLOAT_TO_SHORT(matrix[9]);   // M9   --> C0
1084         coeff[1]  = FLOAT_TO_SHORT(matrix[10]);  // M10  --> C1
1085         coeff[3]  = FLOAT_TO_SHORT(matrix[11]);  // M11  --> C3
1086         break;
1087 
1088     case CSC_YUV_YUV:
1089         coeff[4]  = FLOAT_TO_SHORT(matrix[0]);   // M0   --> C4
1090         coeff[5]  = FLOAT_TO_SHORT(matrix[1]);   // M1   --> C5
1091         coeff[6]  = FLOAT_TO_SHORT(matrix[2]);   // M2   --> C6
1092         coeff[7]  = FLOAT_TO_SHORT(matrix[3]);   // M3   --> C7
1093         coeff[8]  = FLOAT_TO_SHORT(matrix[4]);   // M4   --> C8
1094         coeff[9]  = FLOAT_TO_SHORT(matrix[5]);   // M5   --> C9
1095         coeff[10] = FLOAT_TO_SHORT(matrix[6]);   // M6   --> C10
1096         coeff[11] = FLOAT_TO_SHORT(matrix[7]);   // M7   --> C11
1097         coeff[0]  = FLOAT_TO_SHORT(matrix[8]);   // M8   --> C0
1098         coeff[1]  = FLOAT_TO_SHORT(matrix[9]);   // M9   --> C1
1099         coeff[2]  = FLOAT_TO_SHORT(matrix[10]);  // M10  --> C2
1100         coeff[3]  = FLOAT_TO_SHORT(matrix[11]);  // M11  --> C3
1101         break;
1102 
1103     default:
1104         //CSC_RGB_RGB
1105         coeff[0]  = FLOAT_TO_SHORT(matrix[1]);   // M1   --> C0
1106         coeff[1]  = FLOAT_TO_SHORT(matrix[2]);   // M2   --> C1
1107         coeff[2]  = FLOAT_TO_SHORT(matrix[0]);   // M0   --> C2
1108         coeff[3]  = FLOAT_TO_SHORT(matrix[3]);   // M3   --> C3
1109         coeff[4]  = FLOAT_TO_SHORT(matrix[5]);   // M5   --> C4
1110         coeff[5]  = FLOAT_TO_SHORT(matrix[6]);   // M6   --> C5
1111         coeff[6]  = FLOAT_TO_SHORT(matrix[4]);   // M4   --> C6
1112         coeff[7]  = FLOAT_TO_SHORT(matrix[7]);   // M7   --> C7
1113         coeff[8]  = FLOAT_TO_SHORT(matrix[9]);   // M9   --> C8
1114         coeff[9]  = FLOAT_TO_SHORT(matrix[10]);  // M10  --> C9
1115         coeff[10] = FLOAT_TO_SHORT(matrix[8]);   // M8   --> C10
1116         coeff[11] = FLOAT_TO_SHORT(matrix[11]);  // M11  --> C11
1117         break;
1118     }
1119 
1120     return true;
1121 }
1122 
KernelDll_IsFormat(MOS_FORMAT format,VPHAL_CSPACE cspace,MOS_FORMAT match)1123 bool KernelDll_IsFormat(
1124     MOS_FORMAT   format,
1125     VPHAL_CSPACE cspace,
1126     MOS_FORMAT   match)
1127 {
1128     switch (match)
1129     {
1130     case Format_Any:
1131         return (format != Format_None);
1132         break;
1133 
1134     case Format_RGB_Swap:
1135         return (IS_RGB_SWAP(format));
1136 
1137     case Format_RGB_No_Swap:
1138         return (IS_RGB_NO_SWAP(format));
1139 
1140     case Format_RGB:
1141         if (IS_PAL_FORMAT(format))
1142         {
1143             return (KernelDll_IsCspace(cspace, CSpace_RGB));
1144         }
1145         else
1146         {
1147             return (IS_RGB_FORMAT(format) && !IS_PL3_RGB_FORMAT(format));
1148         }
1149 
1150     case Format_RGB32:
1151         return (IS_RGB32_FORMAT(format));
1152 
1153     case Format_PA:
1154         if (IS_PAL_FORMAT(format))
1155         {
1156             return (KernelDll_IsCspace(cspace, CSpace_YUV));
1157         }
1158         else
1159         {
1160             return (IS_PA_FORMAT(format) ||
1161                     format == Format_AUYV);
1162         }
1163 
1164     case Format_PL2:
1165         return (IS_PL2_FORMAT(format));
1166 
1167     case Format_PL2_UnAligned:
1168         return (IS_PL2_FORMAT_UnAligned(format));
1169 
1170     case Format_PL3:
1171         return (IS_PL3_FORMAT(format));
1172 
1173     case Format_PL3_RGB:
1174         return (IS_PL3_RGB_FORMAT(format));
1175 
1176     case Format_AYUV:
1177         return (format == Format_AYUV);
1178 
1179     case Format_PAL:
1180         return (IS_PAL_FORMAT(format));
1181 
1182     default:
1183         return (format == match);
1184     }
1185 
1186     return false;
1187 }
1188 
1189 //---------------------------------------------------------------------------------------
1190 // KernelDll_SetupProcampParameters - Setup Kernel Procamp Parameters
1191 //
1192 // Parameters:
1193 //    Kdll_State   *pState       - [in] Kernel dll State to release
1194 //    Kdll_Procamp *pProcamp     - [in] Pointer to array of Procamp Parameters
1195 //    int32_t       iProcampSize - [in] Size of the array
1196 //
1197 // Output: Pointer to allocated Kernel dll state
1198 //         nullptr - Failed to allocate Kernel dll state
1199 //-----------------------------------------------------------------------------------------
KernelDll_SetupProcampParameters(Kdll_State * pState,Kdll_Procamp * pProcamp,int32_t iProcampSize)1200 void KernelDll_SetupProcampParameters(Kdll_State    *pState,
1201                                       Kdll_Procamp  *pProcamp,
1202                                       int32_t        iProcampSize)
1203 {
1204     VP_RENDER_FUNCTION_ENTER;
1205 
1206     // Setup pointer to procamp parameters
1207     pState->pProcamp     = pProcamp;
1208     pState->iProcampSize = iProcampSize;
1209 }
1210 
1211 //--------------------------------------------------------------
1212 // Fowler/Noll/Vo FNV-1a hash algorithm - public domain
1213 //--------------------------------------------------------------
KernelDll_SimpleHash(void * pData,int32_t iSize)1214 uint32_t KernelDll_SimpleHash(void *pData, int32_t iSize)
1215 {
1216    static const uint32_t k = 0x1000193;
1217    uint32_t hash = 0x811c9dc5;
1218    char *p = (char *)pData;
1219 
1220    for(; iSize > 0; iSize--)
1221    {
1222       hash ^= (*p++);
1223       hash *= k;
1224    }
1225 
1226    return hash;
1227 }
1228 
1229 //--------------------------------------------------------------
1230 // KernelDll_GetCombinedKernel - Search combined kernel
1231 //--------------------------------------------------------------
KernelDll_GetCombinedKernel(Kdll_State * pState,Kdll_FilterEntry * pFilter,int32_t iFilterSize,uint32_t dwHash)1232 Kdll_CacheEntry *KernelDll_GetCombinedKernel(
1233     Kdll_State          *pState,
1234     Kdll_FilterEntry    *pFilter,
1235     int32_t             iFilterSize,
1236     uint32_t            dwHash)
1237 {
1238     Kdll_KernelHashTable *pHashTable;
1239     Kdll_KernelHashEntry *entries, *curr, *next;
1240     uint32_t folded_hash;
1241     uint16_t entry;
1242 
1243     VP_RENDER_FUNCTION_ENTER;
1244 
1245     // Get hash table
1246     pHashTable = &pState->KernelHashTable;
1247 
1248     // fold hash from 32 to 8 bit :-)
1249     FOLD_HASH(folded_hash, dwHash)
1250 
1251     // No entries
1252     entry = pHashTable->wHashTable[folded_hash];
1253     if (entry == 0 || entry > DL_MAX_COMBINED_KERNELS ) return nullptr;
1254 
1255     entries = (&pHashTable->HashEntry[0]) - 1;  // all indices are 1 based (0 means null)
1256     curr    = &entries[entry];
1257     for (; (curr != nullptr); curr = next)
1258     {
1259         // match 32-bit hash, then compare filter
1260         if (curr->dwHash  == dwHash &&
1261             curr->iFilter == iFilterSize)
1262         {
1263             if (memcmp(curr->pFilter, pFilter, iFilterSize * sizeof(Kdll_FilterEntry)) == 0)
1264             {
1265                 break;
1266             }
1267         }
1268 
1269         // Next entry with the same 8-bit folded hash
1270         next = (curr->next) ? (&entries[curr->next]) : nullptr;
1271     }
1272 
1273     if (curr)
1274     {   // Kernel already cached
1275         curr->pCacheEntry->dwRefresh = pState->dwRefresh++;
1276         return (curr->pCacheEntry);
1277     }
1278     else
1279     {   // Kernel must be built
1280         return nullptr;
1281     }
1282 }
1283 
1284 /*----------------------------------------------------------------------------
1285 | Name      : KernelDll_FindRule
1286 | Purpose   : Find a rule that matches the current search/input state
1287 |
1288 | Input     : pState       - Kernel Dll state
1289 |             pSearchState - current DL search state
1290 |
1291 | Return    :
1292 \---------------------------------------------------------------------------*/
KernelDll_FindRule(Kdll_State * pState,Kdll_SearchState * pSearchState)1293 bool KernelDll_FindRule(
1294     Kdll_State *      pState,
1295     Kdll_SearchState *pSearchState)
1296 {
1297     uint32_t              parser_state = (uint32_t)pSearchState->state;
1298     Kdll_RuleEntrySet *   pRuleSet;
1299     const Kdll_RuleEntry *pRuleEntry;
1300     int32_t               iRuleCount;
1301     int32_t               iMatchCount;
1302     bool                  bLayerFormatMatched;
1303     bool                  bSrc0FormatMatched;
1304     bool                  bSrc1FormatMatched;
1305     bool                  bTargetFormatMatched;
1306     bool                  bSrc0SampingMatched;
1307 
1308     VP_RENDER_FUNCTION_ENTER;
1309 
1310     // All Custom states are handled as a single group
1311     if (parser_state >= Parser_Custom)
1312     {
1313         parser_state = Parser_Custom;
1314     }
1315 
1316     pRuleSet   = pState->pDllRuleTable[parser_state];
1317     iRuleCount = pState->iDllRuleCount[parser_state];
1318 
1319     if (pRuleSet == nullptr || iRuleCount == 0)
1320     {
1321         VP_RENDER_NORMALMESSAGE("Search rules undefined.");
1322         pSearchState->pMatchingRuleSet = nullptr;
1323         return false;
1324     }
1325 
1326     // Search matching entry
1327     for (; iRuleCount > 0; iRuleCount--, pRuleSet++)
1328     {
1329         // Points to the first rule, get number of matches
1330         pRuleEntry  = pRuleSet->pRuleEntry;
1331         iMatchCount = pRuleSet->iMatchCount;
1332 
1333         // Initialize for each Ruleset
1334         bLayerFormatMatched  = false;
1335         bSrc0FormatMatched   = false;
1336         bSrc1FormatMatched   = false;
1337         bTargetFormatMatched = false;
1338         bSrc0SampingMatched  = false;
1339 
1340         // Match all rules within the same RuleSet
1341         for (; iMatchCount > 0; iMatchCount--, pRuleEntry++)
1342         {
1343             switch (pRuleEntry->id)
1344             {
1345             // Match current Parser State
1346             case RID_IsParserState:
1347                 if (pSearchState->state == (Kdll_ParserState)pRuleEntry->value)
1348                 {
1349                     continue;
1350                 }
1351                 else
1352                 {
1353                     break;
1354                 }
1355 
1356             // Match render method
1357             case RID_IsRenderMethod:
1358                 if (pSearchState->pFilter->RenderMethod == (Kdll_RenderMethod)pRuleEntry->value)
1359                 {
1360                     continue;
1361                 }
1362                 else
1363                 {
1364                     break;
1365                 }
1366 
1367             // Match target color space
1368             case RID_IsTargetCspace:
1369                 if (KernelDll_IsCspace(pSearchState->cspace, (VPHAL_CSPACE)pRuleEntry->value))
1370                 {
1371                     continue;
1372                 }
1373                 else
1374                 {
1375                     break;
1376                 }
1377 
1378             // Match current layer ID
1379             case RID_IsLayerID:
1380                 if (pSearchState->pFilter->layer == (Kdll_Layer)pRuleEntry->value)
1381                 {
1382                     continue;
1383                 }
1384                 else
1385                 {
1386                     break;
1387                 }
1388 
1389             // Match current layer format
1390             case RID_IsLayerFormat:
1391                 if (pRuleEntry->logic == Kdll_Or && bLayerFormatMatched)
1392                 {
1393                     // Already found matching format in the ruleset
1394                     continue;
1395                 }
1396                 else
1397                 {
1398                     // Check if the layer format matches the rule
1399                     if (KernelDll_IsFormat(pSearchState->pFilter->format,
1400                             pSearchState->pFilter->cspace,
1401                             (MOS_FORMAT)pRuleEntry->value))
1402                     {
1403                         bLayerFormatMatched = true;
1404                     }
1405 
1406                     if (pRuleEntry->logic == Kdll_None && !bLayerFormatMatched)
1407                     {
1408                         // Last entry and No matching format was found
1409                         break;
1410                     }
1411                     else
1412                     {
1413                         continue;
1414                     }
1415                 }
1416 
1417             // Match shuffling requirement
1418             case RID_IsShuffling:
1419                 if (pSearchState->ShuffleSamplerData == (Kdll_Shuffling)pRuleEntry->value)
1420                 {
1421                     continue;
1422                 }
1423                 else
1424                 {
1425                     break;
1426                 }
1427 
1428             // Check if RT rotates
1429             case RID_IsRTRotate:
1430                 if (pSearchState->bRTRotate == (pRuleEntry->value ? true : false))
1431                 {
1432                     continue;
1433                 }
1434                 else
1435                 {
1436                     break;
1437                 }
1438 
1439             // Match current layer rotation
1440             case RID_IsLayerRotation:
1441                 if (pSearchState->pFilter->rotation == (VPHAL_ROTATION)pRuleEntry->value)
1442                 {
1443                     continue;
1444                 }
1445                 else
1446                 {
1447                     break;
1448                 }
1449 
1450             // Match Src0 source format (surface)
1451             case RID_IsSrc0Format:
1452                 if (pRuleEntry->logic == Kdll_Or && bSrc0FormatMatched)
1453                 {
1454                     // Already found matching format in the ruleset
1455                     continue;
1456                 }
1457                 else
1458                 {
1459                     // Check if the source 0 format matches the rule
1460                     // The intermediate colorspace is used to determine
1461                     // if palettized input is given in RGB or YUV format.
1462                     if (KernelDll_IsFormat(pSearchState->src0_format,
1463                             pSearchState->cspace,
1464                             (MOS_FORMAT)pRuleEntry->value))
1465                     {
1466                         bSrc0FormatMatched = true;
1467                     }
1468 
1469                     if (pRuleEntry->logic == Kdll_None && !bSrc0FormatMatched)
1470                     {
1471                         // Last entry and No matching format was found
1472                         break;
1473                     }
1474                     else
1475                     {
1476                         continue;
1477                     }
1478                 }
1479 
1480             // Match Src0 sampling mode
1481             case RID_IsSrc0Sampling:
1482                 // Check if the layer format matches the rule
1483                 if (pSearchState->src0_sampling == (Kdll_Sampling)pRuleEntry->value)
1484                 {
1485                     bSrc0SampingMatched = true;
1486                     continue;
1487                 }
1488                 else if (bSrc0SampingMatched || pRuleEntry->logic == Kdll_Or)
1489                 {
1490                     continue;
1491                 }
1492                 else if ((Kdll_Sampling)pRuleEntry->value == Sample_Any &&
1493                          pSearchState->src0_sampling != Sample_None)
1494                 {
1495                     continue;
1496                 }
1497                 else
1498                 {
1499                     break;
1500                 }
1501 
1502             // Match Src0 rotation
1503             case RID_IsSrc0Rotation:
1504                 if (pSearchState->src0_rotation == (VPHAL_ROTATION)pRuleEntry->value)
1505                 {
1506                     continue;
1507                 }
1508                 else
1509                 {
1510                     break;
1511                 }
1512 
1513             // Match Src0 Colorfill
1514             case RID_IsSrc0ColorFill:
1515                 if (pSearchState->src0_colorfill == (int32_t)pRuleEntry->value)
1516                 {
1517                     continue;
1518                 }
1519                 else
1520                 {
1521                     break;
1522                 }
1523 
1524             // Match Src0 Luma Key
1525             case RID_IsSrc0LumaKey:
1526                 if (pSearchState->src0_lumakey == (int32_t)pRuleEntry->value)
1527                 {
1528                     continue;
1529                 }
1530                 else
1531                 {
1532                     break;
1533                 }
1534 
1535             // Match Src0 Procamp
1536             case RID_IsSrc0Procamp:
1537                 if (pSearchState->pFilter->procamp == (int32_t)pRuleEntry->value)
1538                 {
1539                     continue;
1540                 }
1541                 else
1542                 {
1543                     break;
1544                 }
1545 
1546             // Match Src0 CSC coefficients
1547             case RID_IsSrc0Coeff:
1548                 if (pSearchState->src0_coeff == (Kdll_CoeffID)pRuleEntry->value)
1549                 {
1550                     continue;
1551                 }
1552                 else if ((Kdll_CoeffID)pRuleEntry->value == CoeffID_Any &&
1553                          pSearchState->src0_coeff != CoeffID_None)
1554                 {
1555                     continue;
1556                 }
1557                 else
1558                 {
1559                     break;
1560                 }
1561 
1562             // Match Src0 CSC coefficients setting mode
1563             case RID_IsSetCoeffMode:
1564                 if (pSearchState->pFilter->SetCSCCoeffMode == (Kdll_SetCSCCoeffMethod)pRuleEntry->value)
1565                 {
1566                     continue;
1567                 }
1568                 else
1569                 {
1570                     break;
1571                 }
1572 
1573             // Match Src0 processing mode
1574             case RID_IsSrc0Processing:
1575                 if (pSearchState->src0_process == (Kdll_Processing)pRuleEntry->value)
1576                 {
1577                     continue;
1578                 }
1579                 if ((Kdll_Processing)pRuleEntry->value == Process_Any &&
1580                     pSearchState->src0_process != Process_None)
1581                 {
1582                     continue;
1583                 }
1584                 else
1585                 {
1586                     break;
1587                 }
1588 
1589             // Match Src0 chromasiting mode
1590             case RID_IsSrc0Chromasiting:
1591                 if (pSearchState->Filter->chromasiting == (int32_t)pRuleEntry->value)
1592                 {
1593                     continue;
1594                 }
1595                 else
1596                 {
1597                     break;
1598                 }
1599 
1600             // Match Src1 source format (surface)
1601             case RID_IsSrc1Format:
1602                 if (pRuleEntry->logic == Kdll_Or && bSrc1FormatMatched)
1603                 {
1604                     // Already found matching format in the ruleset
1605                     continue;
1606                 }
1607                 else
1608                 {
1609                     // Check if the source 1 format matches the rule
1610                     // The intermediate colorspace is used to determine
1611                     // if palettized input is given in RGB or YUV format.
1612                     if (KernelDll_IsFormat(pSearchState->src1_format,
1613                             pSearchState->cspace,
1614                             (MOS_FORMAT)pRuleEntry->value))
1615                     {
1616                         bSrc1FormatMatched = true;
1617                     }
1618 
1619                     if (pRuleEntry->logic == Kdll_None && !bSrc1FormatMatched)
1620                     {
1621                         // Last entry and No matching format was found
1622                         break;
1623                     }
1624                     else
1625                     {
1626                         continue;
1627                     }
1628                 }
1629             // Match Src1 sampling mode
1630             case RID_IsSrc1Sampling:
1631                 if (pSearchState->src1_sampling == (Kdll_Sampling)pRuleEntry->value)
1632                 {
1633                     continue;
1634                 }
1635                 else if ((Kdll_Sampling)pRuleEntry->value == Sample_Any &&
1636                          pSearchState->src1_sampling != Sample_None)
1637                 {
1638                     continue;
1639                 }
1640                 else
1641                 {
1642                     break;
1643                 }
1644 
1645             // Match Src1 Luma Key
1646             case RID_IsSrc1LumaKey:
1647                 if (pSearchState->src1_lumakey == (int32_t)pRuleEntry->value)
1648                 {
1649                     continue;
1650                 }
1651                 else
1652                 {
1653                     break;
1654                 }
1655 
1656             // Match Src1 Sampler LumaKey
1657             case RID_IsSrc1SamplerLumaKey:
1658                 if (pSearchState->src1_samplerlumakey == (int32_t)pRuleEntry->value)
1659                 {
1660                     continue;
1661                 }
1662                 else
1663                 {
1664                     break;
1665                 }
1666 
1667             // Match Src1 Procamp
1668             case RID_IsSrc1Procamp:
1669                 if (pSearchState->pFilter->procamp == (int32_t)pRuleEntry->value)
1670                 {
1671                     continue;
1672                 }
1673                 else
1674                 {
1675                     break;
1676                 }
1677 
1678             // Match Src1 CSC coefficients
1679             case RID_IsSrc1Coeff:
1680                 if (pSearchState->src1_coeff == (Kdll_CoeffID)pRuleEntry->value)
1681                 {
1682                     continue;
1683                 }
1684                 else if ((Kdll_CoeffID)pRuleEntry->value == CoeffID_Any &&
1685                          pSearchState->src1_coeff != CoeffID_None)
1686                 {
1687                     continue;
1688                 }
1689                 else
1690                 {
1691                     break;
1692                 }
1693 
1694             // Match Src1 processing mode
1695             case RID_IsSrc1Processing:
1696                 if (pSearchState->src1_process == (Kdll_Processing)pRuleEntry->value)
1697                 {
1698                     continue;
1699                 }
1700                 if ((Kdll_Processing)pRuleEntry->value == Process_Any &&
1701                     pSearchState->src1_process != Process_None)
1702                 {
1703                     continue;
1704                 }
1705                 else
1706                 {
1707                     break;
1708                 }
1709 
1710             // Match Src1 chromasiting mode
1711             case RID_IsSrc1Chromasiting:
1712                 //pSearchState->pFilter is pointed to the real sub layer
1713                 if (pSearchState->pFilter->chromasiting == (int32_t)pRuleEntry->value)
1714                 {
1715                     continue;
1716                 }
1717                 else
1718                 {
1719                     break;
1720                 }
1721 
1722             // Match Layer number
1723             case RID_IsLayerNumber:
1724                 if (pSearchState->layer_number == (int32_t)pRuleEntry->value)
1725                 {
1726                     continue;
1727                 }
1728                 else
1729                 {
1730                     break;
1731                 }
1732 
1733             // Match quadrant
1734             case RID_IsQuadrant:
1735                 if (pSearchState->quadrant == (int32_t)pRuleEntry->value)
1736                 {
1737                     continue;
1738                 }
1739                 else
1740                 {
1741                     break;
1742                 }
1743 
1744             // Set CSC flag before Mix
1745             case RID_IsCSCBeforeMix:
1746                 if (pSearchState->bCscBeforeMix == (pRuleEntry->value ? true : false))
1747                 {
1748                     continue;
1749                 }
1750                 else
1751                 {
1752                     break;
1753                 }
1754 
1755             case RID_IsDualOutput:
1756                 if (pSearchState->pFilter->dualout == (pRuleEntry->value ? true : false))
1757                 {
1758                     continue;
1759                 }
1760                 else
1761                 {
1762                     break;
1763                 }
1764 
1765             case RID_IsTargetFormat:
1766                 if (pRuleEntry->logic == Kdll_Or && bTargetFormatMatched)
1767                 {
1768                     // Already found matching format in the ruleset
1769                     continue;
1770                 }
1771                 else
1772                 {
1773                     if (pSearchState->target_format == (MOS_FORMAT)pRuleEntry->value)
1774                     {
1775                         bTargetFormatMatched = true;
1776                     }
1777 
1778                     if (pRuleEntry->logic == Kdll_None && !bTargetFormatMatched)
1779                     {
1780                         // Last entry and No matching format was found
1781                         break;
1782                     }
1783                     else
1784                     {
1785                         continue;
1786                     }
1787                 }
1788 
1789             case RID_Is64BSaveEnabled:
1790                 if (pSearchState->b64BSaveEnabled == (pRuleEntry->value ? true : false))
1791                 {
1792                     continue;
1793                 }
1794                 else
1795                 {
1796                     break;
1797                 }
1798 
1799             case RID_IsTargetTileType:
1800                 if (pRuleEntry->logic == Kdll_None &&
1801                     pSearchState->target_tiletype == (MOS_TILE_TYPE)pRuleEntry->value)
1802                 {
1803                     continue;
1804                 }
1805                 else if (pRuleEntry->logic == Kdll_Not &&
1806                          pSearchState->target_tiletype != (MOS_TILE_TYPE)pRuleEntry->value)
1807                 {
1808                     continue;
1809                 }
1810                 else
1811                 {
1812                     break;
1813                 }
1814 
1815             case RID_IsProcampEnabled:
1816                 if (pSearchState->bProcamp == (pRuleEntry->value ? true : false))
1817                 {
1818                     continue;
1819                 }
1820                 else
1821                 {
1822                     break;
1823                 }
1824 
1825             case RID_IsConstOutAlpha:
1826                 if (pSearchState->pFilter->bFillOutputAlphaWithConstant == (pRuleEntry->value ? true : false))
1827                 {
1828                     continue;
1829                 }
1830                 else
1831                 {
1832                     break;
1833                 }
1834 
1835             case RID_IsDitherNeeded:
1836                 if (pSearchState->pFilter->bIsDitherNeeded == (pRuleEntry->value ? true : false))
1837                 {
1838                     continue;
1839                 }
1840                 else
1841                 {
1842                     break;
1843                 }
1844             // Undefined search rule will fail
1845             default:
1846                 VP_RENDER_ASSERTMESSAGE("Invalid rule %d @ layer %d, state %d.", pRuleEntry->id, pSearchState->layer_number, pSearchState->state);
1847                 MT_ERR3(MT_VP_KERNEL_RULE, MT_VP_KERNEL_RULE_ID, pRuleEntry->id, MT_VP_KERNEL_RULE_LAYERNUM, pSearchState->layer_number, MT_VP_KERNEL_RULE_SEARCH_STATE, pSearchState->state);
1848                 break;
1849             }  // End of switch to deal with all matching rule IDs
1850 
1851             // Rule didn't match - try another RuleSet
1852             break;
1853         }  // End of file loop to test all rules for the current RuleSet
1854 
1855         // Match
1856         if (iMatchCount == 0)
1857         {
1858             pSearchState->pMatchingRuleSet = pRuleSet;
1859             return true;
1860         }
1861     }  // End of for loop to test all RuleSets for the current parser state
1862 
1863     // Failed to find a matching rule -> kernel search will fail
1864     VP_RENDER_NORMALMESSAGE("Fail to find a matching rule @ layer %d, state %d.", pSearchState->layer_number, pSearchState->state);
1865     MT_ERR2(MT_VP_KERNEL_RULE, MT_VP_KERNEL_RULE_LAYERNUM, pSearchState->layer_number, MT_VP_KERNEL_RULE_SEARCH_STATE, pSearchState->state);
1866 
1867     // No match -> return
1868     pSearchState->pMatchingRuleSet = nullptr;
1869     return false;
1870 }
1871 
1872 //--------------------------------------------------------------
1873 // Append kernel, include symbols to resolve
1874 //--------------------------------------------------------------
Kdll_AppendKernel(Kdll_KernelCache * pKernelCache,Kdll_SearchState * pSearchState,int32_t iKUID,Kdll_PatchData * pKernelPatch)1875 bool Kdll_AppendKernel(Kdll_KernelCache *pKernelCache,
1876     Kdll_SearchState *                   pSearchState,
1877     int32_t                              iKUID,
1878     Kdll_PatchData *                     pKernelPatch)
1879 {
1880     Kdll_State *     pState;
1881     Kdll_Symbol *    pSymbols;
1882     Kdll_CacheEntry *kernels;
1883     Kdll_LinkData *  link;
1884     Kdll_LinkData *  liSearch_reloc;
1885     uint8_t *        kernel;
1886     int *            size;
1887     int *            left;
1888     int              dwSize;
1889     int              i;
1890     int              base;
1891     bool             bInline;
1892     bool             res;
1893 
1894     VP_RENDER_FUNCTION_ENTER;
1895 
1896     res = false;
1897 
1898     // Check if Kernel ID is valid
1899     if (iKUID >= pKernelCache->iCacheEntries)
1900     {
1901         VP_RENDER_NORMALMESSAGE("invalid Kernel ID %d.", iKUID);
1902         goto cleanup;
1903     }
1904 
1905     // Get KDLL state
1906     pState = pSearchState->pKdllState;
1907 
1908     // Get current combined kernel
1909     kernel   = pSearchState->Kernel;
1910     size     = &pSearchState->KernelSize;
1911     left     = &pSearchState->KernelLeft;
1912     pSymbols = &pSearchState->KernelLink;
1913     base     = (*size) >> 2;
1914 
1915     // Find selected kernel and kernel size; check if there is enough space
1916     kernels = &pKernelCache->pCacheEntries[iKUID];
1917     dwSize  = kernels->iSize;
1918     if (*left < dwSize)
1919     {
1920         VP_RENDER_NORMALMESSAGE("exceeded maximum kernel size.");
1921         goto cleanup;
1922     }
1923 
1924     // Check if there is enough space for symbols
1925     if (pSymbols->dwCount + kernels->nLink >= pSymbols->dwSize)
1926     {
1927         VP_RENDER_NORMALMESSAGE("exceeded maximum numbers of symbols to resolve.");
1928         goto cleanup;
1929     }
1930 
1931 #if EMUL || VPHAL_LIB
1932     VP_RENDER_NORMALMESSAGE("%s.", kernels->szName);
1933 
1934     if (pState->pfnCbListKernel)
1935     {
1936         pState->pfnCbListKernel(pState->pToken, kernels->szName);
1937     }
1938 #elif _DEBUG  // EMUL || VPHAL_LIB
1939     VP_RENDER_NORMALMESSAGE("%s.", kernels->szName);
1940 #endif  // _DEBUG
1941 
1942     // Append symbols to resolve, relocate symbols
1943     link           = kernels->pLink;
1944     liSearch_reloc = pSymbols->pLink + pSymbols->dwCount;
1945 
1946     bInline = false;
1947     if (link)
1948     {
1949         for (i = kernels->nLink; i > 0; i--, link++)
1950         {
1951             if (link->bInline)
1952             {
1953                 // Inline code included
1954                 if (!link->bExport)
1955                 {
1956                     bInline = true;
1957                 }
1958             }
1959             else
1960             {
1961                 *liSearch_reloc = *link;
1962                 liSearch_reloc->dwOffset += base;
1963                 liSearch_reloc++;
1964 
1965                 pSymbols->dwCount++;
1966             }
1967         }
1968     }
1969 
1970     // Append kernel
1971     MOS_SecureMemcpy(&kernel[*size], dwSize, (void *)kernels->pBinary, dwSize);
1972 
1973     // Patch kernel
1974     if (pKernelPatch)
1975     {
1976         uint8_t *pSource      = pKernelPatch->Data;
1977         uint8_t *pDestination = kernel + (*size);
1978         int32_t  i;
1979 
1980         Kdll_PatchBlock *pBlock = pKernelPatch->Patch;
1981         for (i = pKernelPatch->nPatches; i > 0; i--, pBlock++)
1982         {
1983             MOS_SecureMemcpy(pDestination + pBlock->DstOffset, pBlock->BlockSize, (void *)(pSource + pBlock->SrcOffset), pBlock->BlockSize);
1984         }
1985     }
1986 
1987     res = true;
1988     *size += dwSize;
1989     *left -= dwSize;
1990 
1991     // Insert inline code
1992     if (bInline)
1993     {
1994         for (link = kernels->pLink, i = kernels->nLink; (i > 0) && (res); i--, link++)
1995         {
1996             if (link->bInline && (!link->bExport))
1997             {
1998                 iKUID = pKernelCache->pExports[link->iLabelID].iKUID;
1999                 res &= Kdll_AppendKernel(pKernelCache, pSearchState, iKUID, pKernelPatch);
2000             }
2001         }
2002     }
2003 
2004 cleanup:
2005     return res;
2006 }
2007 
2008 //--------------------------------------------------------------
2009 // Resolve kernel dependencies and perform patching
2010 //--------------------------------------------------------------
Kdll_ResolveKernelDependencies(Kdll_State * pState,Kdll_SearchState * pSearchState)2011 bool Kdll_ResolveKernelDependencies(
2012     Kdll_State *      pState,
2013     Kdll_SearchState *pSearchState)
2014 {
2015     Kdll_KernelCache *cache    = &pState->ComponentKernelCache;
2016     uint8_t *         kernel   = pSearchState->Kernel;
2017     Kdll_Symbol *     pSymbols = &pSearchState->KernelLink;
2018     uint32_t          nExports = cache->nExports;
2019     Kdll_LinkData *   pExports = cache->pExports;
2020     Kdll_LinkData *   pLink;
2021     int32_t           iKUID;
2022     int32_t           iOffset;
2023     uint32_t          dwResolveOffset[DL_MAX_EXPORT_COUNT];
2024     bool              bResolveDone;
2025     int32_t           i;
2026     uint32_t *        d;
2027 
2028     VP_RENDER_FUNCTION_ENTER;
2029 
2030     MOS_ZeroMemory(dwResolveOffset, sizeof(dwResolveOffset));
2031 
2032     do
2033     {
2034         // Update exports
2035         for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
2036         {
2037             if (pLink->bExport)
2038             {
2039                 dwResolveOffset[pLink->iLabelID] = pLink->dwOffset;
2040             }
2041         }
2042 
2043         bResolveDone = true;
2044         for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
2045         {
2046             // validate label
2047             if (pLink->iLabelID > nExports ||            // invalid label
2048                 pExports[pLink->iLabelID].bExport == 0)  // label not in the export table
2049             {
2050                 VP_RENDER_ASSERTMESSAGE("Invalid/unresolved label %d.", pLink->iLabelID);
2051                 return false;
2052             }
2053 
2054             // load dependencies
2055             if (!pLink->bExport && !dwResolveOffset[pLink->iLabelID])
2056             {
2057                 // set flag for another pass as newly loaded
2058                 // kernels may contain dependencies of their own
2059                 bResolveDone = false;
2060 
2061                 // Load dependencies
2062                 iKUID = pExports[pLink->iLabelID].iKUID;
2063                 Kdll_AppendKernel(cache, pSearchState, iKUID, nullptr);
2064 
2065                 // Restart
2066                 break;
2067             }
2068         }  // for
2069     } while (!bResolveDone);
2070 
2071     // All modules must be loaded by now, start patching
2072     for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
2073     {
2074         iOffset = (int32_t)dwResolveOffset[pLink->iLabelID] - 4;
2075         iOffset -= pLink->dwOffset;
2076 
2077         d = ((uint32_t *)kernel) + pLink->dwOffset;
2078 
2079         // Patch offset
2080         if (!pLink->bExport && !pLink->bInline)
2081         {
2082             d[3] = iOffset << 2;  // jmpi - index * 8 bits
2083         }
2084     }
2085 
2086     return true;
2087 }
2088 
2089 //---------------------------------------------------------------------------------------
2090 // Kdll_SearchKernel - Performs full kernel search, including selection of best match
2091 //                     Search state must be initialized by KernelDll_StartKernelSearch
2092 //
2093 // Parameters:
2094 //    Kdll_State       *pState       - [in]     Dynamic Linking state
2095 //    Kdll_SearchState *pSearchState - [in/out] Kernel search state
2096 //
2097 // Output: true if suceeded, false otherwise
2098 //---------------------------------------------------------------------------------------
KernelDll_SearchKernel(Kdll_State * pState,Kdll_SearchState * pSearchState)2099 bool KernelDll_SearchKernel(Kdll_State *pState,
2100     Kdll_SearchState *                  pSearchState)
2101 {
2102     VP_RENDER_FUNCTION_ENTER;
2103 
2104     // Check parameters
2105     if ((!pSearchState) || pSearchState->iFilterSize < 1)
2106     {
2107         VP_RENDER_NORMALMESSAGE("Search is empty, must contain 2 or more layers.");
2108         return false;
2109     }
2110 
2111     // Setup CSC; allocate and calculate CSC matrices
2112     if (!pState->pfnSetupCSC(pState, pSearchState))
2113     {
2114         VP_RENDER_NORMALMESSAGE("CSC setup failed.");
2115         return false;
2116     }
2117 
2118     // Initial search states
2119     pSearchState->bCscBeforeMix = false;
2120     pSearchState->state         = Parser_Begin;
2121     pSearchState->cspace        = pSearchState->CscParams.ColorSpace;
2122     pSearchState->quadrant      = 0;
2123     pSearchState->layer_number  = 0;
2124 
2125     pSearchState->pMatchingRuleSet = nullptr;
2126 
2127     // Reset Src0 state
2128     pSearchState->src0_format    = Format_None;
2129     pSearchState->src0_sampling  = Sample_None;
2130     pSearchState->src0_colorfill = false;
2131     pSearchState->src0_lumakey   = LumaKey_False;
2132     pSearchState->src0_coeff     = CoeffID_None;
2133 
2134     // Reset Src1 state
2135     pSearchState->src1_format         = Format_None;
2136     pSearchState->src1_sampling       = Sample_None;
2137     pSearchState->src1_lumakey        = LumaKey_False;
2138     pSearchState->src1_samplerlumakey = LumaKey_False;
2139     pSearchState->src1_coeff          = CoeffID_None;
2140     pSearchState->src1_process        = Process_None;
2141 
2142     // Search loop
2143     while (pSearchState->state != Parser_End)
2144     {
2145 #if EMUL || VPHAL_LIB
2146         if (pState->pfnCbSearchSate)
2147         {
2148             pState->pfnCbSearchSate(pState->pToken, CB_REASON_BEGIN_SEARCH, pSearchState);
2149         }
2150 #endif
2151         // Find rule that matches
2152         if (!pState->pfnFindRule(pState, pSearchState))
2153         {
2154 #if EMUL || VPHAL_LIB
2155             if (pState->pfnCbSearchSate)
2156             {
2157                 pState->pfnCbSearchSate(pState->pToken, CB_REASON_SEARCH_FAILED, pSearchState);
2158             }
2159 #endif
2160             return false;
2161         }
2162 
2163 #if EMUL || VPHAL_LIB
2164         if (pState->pfnCbSearchSate)
2165         {
2166             pState->pfnCbSearchSate(pState->pToken, CB_REASON_BEGIN_UPDATE, pSearchState);
2167         }
2168 #endif
2169         // Update state
2170         if (!pState->pfnUpdateState(pState, pSearchState))
2171         {
2172 #if EMUL || VPHAL_LIB
2173             if (pState->pfnCbSearchSate)
2174             {
2175                 pState->pfnCbSearchSate(pState->pToken, CB_REASON_UPDATE_FAILED, pSearchState);
2176             }
2177 #endif
2178             return false;
2179         }
2180     }
2181 
2182 #if EMUL || VPHAL_LIB
2183     if (pState->pfnCbSearchSate)
2184     {
2185         pState->pfnCbSearchSate(pState->pToken, CB_REASON_END_SEARCH, pSearchState);
2186     }
2187 #endif
2188 
2189     VP_RENDER_VERBOSEMESSAGE("Search completed successfully.");
2190     return true;
2191 }
2192 
2193 //--------------------------------------------------------------
2194 // KernelDll_BuildKernel - build kernel
2195 //--------------------------------------------------------------
KernelDll_BuildKernel(Kdll_State * pState,Kdll_SearchState * pSearchState)2196 bool KernelDll_BuildKernel(Kdll_State *pState, Kdll_SearchState *pSearchState)
2197 {
2198     Kdll_KernelCache *pKernelCache = &pState->ComponentKernelCache;
2199     Kdll_KernelCache *pCustomCache = pState->pCustomKernelCache;
2200     Kdll_PatchData *  pKernelPatch;
2201     bool              res;
2202     int32_t           offset = 0;
2203     int32_t *         pKernelID, *pGroupID, *pPatchID;
2204 
2205     VP_RENDER_FUNCTION_ENTER;
2206 
2207     pSearchState->KernelLink.dwSize  = DL_MAX_SYMBOLS;
2208     pSearchState->KernelLink.dwCount = 0;
2209     pSearchState->KernelLink.pLink   = pSearchState->LinkArray;
2210     pSearchState->KernelSize         = 0;
2211     pSearchState->KernelLeft         = sizeof(pSearchState->Kernel);
2212     pSearchState->KernelLink.dwCount = 0;
2213 
2214 #if EMUL || VPHAL_LIB || _DEBUG || _RELEASE_INTERNAL
2215     VP_RENDER_NORMALMESSAGE("Component Kernels:");
2216 #endif  // EMUL || VPHAL_LIB || _DEBUG
2217 
2218     pKernelID = pSearchState->KernelID;
2219     pGroupID  = pSearchState->KernelGrp;
2220     pPatchID  = pSearchState->PatchID;
2221 
2222     for (offset = 0; offset < pSearchState->KernelCount; offset++, pKernelID++, pGroupID++, pPatchID++)
2223     {
2224         // Get patch information associated with the kernel
2225         pKernelPatch = (*pPatchID >= 0) ? &(pSearchState->Patches[*pPatchID]) : nullptr;
2226 
2227         // Append/Patch kernel from custom cache
2228         if (*pGroupID == GROUP_CUSTOM)
2229         {
2230             res = Kdll_AppendKernel(pCustomCache, pSearchState, *pKernelID, pKernelPatch);
2231         }
2232         // Append/Patch kernel from internal cache
2233         else
2234         {
2235             res = Kdll_AppendKernel(pKernelCache, pSearchState, *pKernelID, pKernelPatch);
2236         }
2237 
2238         if (!res)
2239         {
2240             VP_RENDER_ASSERTMESSAGE("Failed to build kernel ID %d.", pSearchState->KernelID[offset]);
2241             return false;
2242         }
2243         else
2244         {
2245             Kdll_CacheEntry *kernels = (*pGroupID == GROUP_CUSTOM) ? &pCustomCache->pCacheEntries[*pKernelID] : &pKernelCache->pCacheEntries[*pKernelID];
2246             VP_RENDER_NORMALMESSAGE("Component kernels [%d]: %s", *pKernelID, kernels->szName);
2247         }
2248     }
2249 
2250     // Resolve kernel dependencies
2251     res = Kdll_ResolveKernelDependencies(pState, pSearchState);
2252     if (!res)
2253     {
2254         VP_RENDER_ASSERTMESSAGE("Failed to resolve symbols.");
2255         return false;
2256     }
2257 
2258     return true;
2259 }
2260 
2261 //---------------------------------------------------------------------------------------
2262 // KernelDll_StartKernelSearch - Starts kernel search
2263 //
2264 // Parameters:
2265 //    Kdll_State       *pState       - [in]     Dynamic Linking State
2266 //    Kdll_FilterEntry *pFilter      - [in]     Search filter (array of search entries)
2267 //    int               iFilterSize  - [in]     Search filter size
2268 //    Kdll_SearchState *pSearchState - [in/out] Kernel search state
2269 //
2270 // Output: none
2271 //---------------------------------------------------------------------------------------
KernelDll_StartKernelSearch(Kdll_State * pState,Kdll_SearchState * pSearchState,Kdll_FilterEntry * pFilter,int32_t iFilterSize,uint32_t uiIs64BInstrEnabled)2272 void KernelDll_StartKernelSearch(
2273     Kdll_State *      pState,
2274     Kdll_SearchState *pSearchState,
2275     Kdll_FilterEntry *pFilter,
2276     int32_t           iFilterSize,
2277     uint32_t          uiIs64BInstrEnabled)
2278 {
2279     int32_t nLayer;
2280 
2281     VP_RENDER_FUNCTION_ENTER;
2282 
2283     // Reset all states
2284     MOS_ZeroMemory(pSearchState, sizeof(Kdll_SearchState));
2285 
2286     // Setup KDLL state
2287     pSearchState->pKdllState = pState;  // KDLL state
2288 
2289     // Cleanup kernel table
2290     pSearchState->KernelCount = 0;  // # of kernels
2291 
2292     // Cleanup patch data
2293     memset(pSearchState->Patches, 0, sizeof(pSearchState->Patches));
2294     memset(pSearchState->PatchID, -1, sizeof(pSearchState->PatchID));
2295     pSearchState->PatchCount = 0;
2296 
2297     // Copy original filter; filter will be modified as part of the search
2298     if (pFilter && iFilterSize > 0)
2299     {
2300         MOS_SecureMemcpy(pSearchState->Filter, iFilterSize * sizeof(Kdll_FilterEntry), pFilter, iFilterSize * sizeof(Kdll_FilterEntry));
2301         pSearchState->pFilter     = pSearchState->Filter;
2302         pSearchState->iFilterSize = iFilterSize;
2303 
2304         for (nLayer = 0; nLayer < iFilterSize; nLayer++)
2305         {
2306             // DScale Kernels are enabled for all gen9 stepping.
2307             //For Gen9+, kernel don't support sublayer DScale+rotation
2308             //Sampler_unorm does not support Y410/RGB10, we need to use sampler_16 to support Y410/RGB10
2309             if (!pFilter[nLayer].bEnableDscale &&
2310                 (!pFilter[nLayer].bWaEnableDscale ||
2311                     (pFilter[nLayer].layer == Layer_SubVideo &&
2312                         pFilter[nLayer].rotation != VPHAL_ROTATION_IDENTITY)))
2313             {
2314                 if (pFilter[nLayer].sampler == Sample_Scaling_034x)
2315                 {
2316                     pSearchState->pFilter[nLayer].sampler = Sample_Scaling;
2317                 }
2318                 else if (pFilter[nLayer].sampler == Sample_iScaling_034x)
2319                 {
2320                     pSearchState->pFilter[nLayer].sampler = Sample_iScaling;
2321                 }
2322                 else if (pFilter[nLayer].sampler == Sample_iScaling_AVS)
2323                 {
2324                     pSearchState->pFilter[nLayer].sampler = Sample_iScaling_AVS;
2325                 }
2326             }
2327         }
2328 
2329         // Copy the render target format
2330         pSearchState->target_format = pSearchState->pFilter[iFilterSize - 1].format;
2331 
2332         // Copy the render target tile type
2333         pSearchState->target_tiletype = pSearchState->pFilter[iFilterSize - 1].tiletype;
2334 
2335         // Indicate whether to use 64B save kernel for render target surface
2336         if (uiIs64BInstrEnabled &&
2337             ((pSearchState->target_tiletype == MOS_TILE_X) ||
2338                 (pSearchState->target_tiletype == MOS_TILE_LINEAR)))
2339         {
2340             pSearchState->b64BSaveEnabled = true;
2341         }
2342     }
2343 }
2344 
2345 /*----------------------------------------------------------------------------
2346 | Name      : KernelDll_SetupCSC
2347 | Purpose   : Defines CSC conversions necessary for a given filter
2348 |
2349 | Input     : pState       - Kernel Dll state
2350 |             pSearchState - current DL search state
2351 |
2352 | Return    :
2353 \---------------------------------------------------------------------------*/
KernelDll_SetupCSC(Kdll_State * pState,Kdll_SearchState * pSearchState)2354 bool KernelDll_SetupCSC(
2355     Kdll_State *      pState,
2356     Kdll_SearchState *pSearchState)
2357 {
2358     int i, m;  // Integer iterators
2359 
2360     bool bCoeffID_0_Used = false;
2361 
2362     VPHAL_CSPACE cspace      = CSpace_None;  // Current ColorSpace
2363     VPHAL_CSPACE out_cspace  = CSpace_None;  // Render Target CS
2364     VPHAL_CSPACE main_cspace = CSpace_None;  // Main video CS
2365     VPHAL_CSPACE sel_cspace  = CSpace_Any;   // Selected CS
2366 
2367     Kdll_FilterEntry *pFilter;  // Current Filter information
2368     int               iFilterSize = pSearchState->iFilterSize;
2369     Kdll_CSC_Params * pCSC        = &pSearchState->CscParams;
2370 
2371     int     csc_count;                    // Number of CSC operations
2372     int     matrix_count;                 // Number of Matrices in use
2373     int     procamp_count = 0;            // Number of PA operations
2374     int     sel_csc_count = -1;           // Minimum number of CSC operations
2375     int     iCoeffID;                     // coeffID for layers other than main video
2376     uint8_t cspace_in_use[CSpace_Count];  // Color Spaces in use
2377 
2378     Kdll_CSC_Matrix  curr_matrix;
2379     Kdll_CSC_Matrix *matrix   = pCSC->Matrix;    // Color Space conversion matrix
2380     uint8_t *        matrixID = pCSC->MatrixID;  // CSC coefficient allocation table
2381     bool forceToTargetColorSpace = false;
2382 
2383     // Clear all CSC matrices
2384     MOS_ZeroMemory(matrix, sizeof(pCSC->Matrix));
2385     memset(matrixID, DL_CSC_DISABLED, sizeof(pCSC->MatrixID));
2386     memset(pCSC->PatchMatrixID, DL_CSC_DISABLED, sizeof(pCSC->PatchMatrixID));
2387     pCSC->PatchMatrixNum = 0;
2388 
2389     // Clear array of color spaces in use
2390     MOS_ZeroMemory(cspace_in_use, sizeof(cspace_in_use));
2391 
2392     //---------------------------------------------------------------//
2393     // Collect information about Color Spaces in use
2394     // Get Primary Video and Render Target Color Spaces
2395     // Force xvYCC passthrough if enabled
2396     //---------------------------------------------------------------//
2397     for (i = iFilterSize, pFilter = pSearchState->Filter; i > 0; i--, pFilter++)
2398     {
2399         if (pFilter->forceToTargetColorSpace)
2400         {
2401             forceToTargetColorSpace = true;
2402         }
2403         // Disable Procamp for all layers except Main Video
2404         // Disable Procamp if source is RGB
2405         if (pFilter->layer != Layer_MainVideo ||
2406             pFilter->cspace == CSpace_sRGB ||
2407             pFilter->cspace == CSpace_stRGB)
2408         {
2409             pFilter->procamp = DL_PROCAMP_DISABLED;
2410         }
2411 
2412         // Count number of procamp operations (limited by number of independent coefficients)
2413         // Ignore layers with palletized/constant colors
2414         if (pFilter->procamp != DL_PROCAMP_DISABLED &&
2415             pFilter->cspace != CSpace_Any)
2416         {
2417             procamp_count++;
2418         }
2419 
2420         // Set xvYCC passthrough mode
2421         if (pFilter->cspace == CSpace_xvYCC709 ||
2422             pFilter->cspace == CSpace_xvYCC601)
2423         {
2424             sel_cspace = pFilter->cspace;
2425         }
2426 
2427         // Get Main Video color space
2428         if (pFilter->layer == Layer_MainVideo)
2429         {
2430             main_cspace = pFilter->cspace;
2431         }
2432 
2433         // Get Render Target color space
2434         if (pFilter->layer == Layer_RenderTarget)
2435         {
2436             // Target is sRGB/stRGB
2437             if (!KernelDll_IsYUVFormat(pFilter->format))
2438             {
2439                 // Disable xvYCC passthrough (sRGB cannot have extended gamut)
2440                 sel_cspace = CSpace_Any;
2441             }
2442             out_cspace = pFilter->cspace;
2443         }
2444 
2445         // Mark color spaces in use for search that follows
2446         if (pFilter->cspace > CSpace_Any && pFilter->cspace < CSpace_Count)
2447         {
2448             cspace_in_use[pFilter->cspace] = 1;
2449         }
2450     }
2451 
2452     // Check max number of procamp operations
2453     if (procamp_count > DL_PROCAMP_MAX)
2454     {
2455         return false;
2456     }
2457 
2458     //---------------------------------------------------------------//
2459     // Search Color Space that provides minimum number of CSC conversions
2460     // If there are multiple solutions, select main video cspace (quality)
2461     //---------------------------------------------------------------//
2462     if (sel_cspace == CSpace_Any)
2463     {
2464         if (forceToTargetColorSpace)
2465         {
2466             sel_cspace = out_cspace;
2467         }
2468         else
2469         {
2470             int cs;
2471             for (cs = (CSpace_Any + 1); cs < CSpace_Count; cs++)
2472             {
2473                 // Skip color spaces not in use
2474                 cspace = (VPHAL_CSPACE)cs;
2475                 if (!cspace_in_use[cspace])
2476                 {
2477                     continue;
2478                 }
2479 
2480                 // xvYCC and BT are treated as same for CSC considerations (BT.x to xvYCC.x matrix is I)
2481                 cspace = KernelDll_TranslateCspace(cspace);
2482 
2483                 // Count # of CS conversions and matrices
2484                 csc_count = 0;
2485                 for (i = iFilterSize, pFilter = pSearchState->Filter; i > 0; i--, pFilter++)
2486                 {
2487                     // Ignore layers where the Color Space may be set in software (colorfill, palletized)
2488                     if (pFilter->cspace == CSpace_Any)
2489                     {
2490                         continue;
2491                     }
2492 
2493                     // Check if CSC/PA is required
2494                     if (KernelDll_TranslateCspace(pFilter->cspace) != cspace ||
2495                         pFilter->procamp != DL_PROCAMP_DISABLED)
2496                     {
2497                         csc_count++;
2498                     }
2499                 }
2500 
2501                 // Save best choice as requiring minimum number of CSC operations
2502                 if ((sel_csc_count < 0) ||                              // Initial value
2503                     (csc_count < sel_csc_count) ||                      // Minimum number of CSC operations
2504                     (csc_count == sel_csc_count && cs == main_cspace))  // Use main cspace as default if same CSC count
2505                 {
2506                     sel_cspace    = cspace;
2507                     sel_csc_count = csc_count;
2508                 }
2509             }
2510         }
2511     }
2512 
2513     // Due to put the colorfill behind CSC, so Src0 cspace needs to change
2514     // to selspace in order to fill colorfill values correctly.
2515     pState->colorfill_cspace = sel_cspace;
2516 
2517     // color space is selected by now... setup CSC matrices
2518     matrix_count = 0;
2519     iCoeffID     = 1;
2520     for (i = iFilterSize, pFilter = pSearchState->Filter; i > 0; i--, pFilter++)
2521     {
2522         // Setup CSC for palettized/colorfill layers
2523         if (pFilter->cspace == CSpace_Any)
2524         {
2525             // Set Color Space and format (for software)
2526             if (pFilter->format == Format_Any)
2527             {
2528                 pFilter->format = KernelDll_IsCspace(sel_cspace, CSpace_YUV) ? Format_AYUV : Format_A8R8G8B8;
2529             }
2530 
2531             pFilter->cspace = sel_cspace;
2532             pFilter->matrix = DL_CSC_DISABLED;
2533         }
2534         else
2535         {
2536             // Setup CSC parameters: SrcSpace is the layer color space,
2537             //                       DstSpace is the internal color space selected
2538             curr_matrix.SrcSpace   = KernelDll_TranslateCspace(pFilter->cspace);
2539             curr_matrix.DstSpace   = KernelDll_TranslateCspace(sel_cspace);
2540             curr_matrix.iProcampID = pFilter->procamp;
2541 
2542             // Check if CSC is necessary
2543             if (curr_matrix.SrcSpace == curr_matrix.DstSpace &&
2544                 curr_matrix.iProcampID == DL_PROCAMP_DISABLED)
2545             {
2546                 pFilter->matrix      = DL_CSC_DISABLED;
2547                 curr_matrix.iCoeffID = CoeffID_None;
2548                 continue;
2549             }
2550 
2551             // Reserve CoeffID_0 for main video - CoeffID_0 gets CSC coeff from static parameters
2552             // If main video doesn't use CoeffID_0, assign to RT
2553             if ((pFilter->layer == Layer_MainVideo) ||
2554                 (pFilter->layer == Layer_RenderTarget))
2555             {
2556                 if (bCoeffID_0_Used)
2557                 {
2558                     curr_matrix.iCoeffID = (Kdll_CoeffID)iCoeffID++;
2559                 }
2560                 else
2561                 {
2562                     curr_matrix.iCoeffID = CoeffID_0;
2563                     bCoeffID_0_Used      = true;
2564                 }
2565             }
2566             else
2567             {
2568                 curr_matrix.iCoeffID = (Kdll_CoeffID)iCoeffID++;
2569             }
2570 
2571             // CSC at the target layer is from internal cspace (SrcSpace)
2572             //                              to external cspace (DstCspace)
2573             if (pFilter->layer == Layer_RenderTarget)
2574             {
2575                 VPHAL_CSPACE aux     = curr_matrix.SrcSpace;
2576                 curr_matrix.SrcSpace = curr_matrix.DstSpace;
2577                 curr_matrix.DstSpace = aux;
2578             }
2579 
2580             // Search CSC matrix - avoid duplicated CSC matrices
2581             for (m = 0; m < matrix_count; m++)
2582             {
2583                 if (curr_matrix.SrcSpace == matrix[m].SrcSpace &&
2584                     curr_matrix.DstSpace == matrix[m].DstSpace &&
2585                     curr_matrix.iProcampID == matrix[m].iProcampID)
2586                 {
2587                     break;
2588                 }
2589             }
2590 
2591             // Check limit
2592             if (m == matrix_count)
2593             {
2594                 // Exceeded number of CSC matrices allowed
2595                 if (matrix_count == DL_CSC_MAX)
2596                 {
2597                     VP_RENDER_ASSERTMESSAGE("CSC matrix count %d exceeded number of CSC matrices allowed!", matrix_count);
2598                     return false;
2599                 }
2600 
2601                 matrix[m].bInUse     = true;
2602                 matrix[m].SrcSpace   = curr_matrix.SrcSpace;
2603                 matrix[m].DstSpace   = curr_matrix.DstSpace;
2604                 matrix[m].iProcampID = curr_matrix.iProcampID;
2605                 matrix[m].iCoeffID   = curr_matrix.iCoeffID;
2606 
2607                 // Calculate coefficients for the first time
2608                 KernelDll_UpdateCscCoefficients(pState, &matrix[m]);
2609 
2610                 // Next matrix
2611                 matrix_count++;
2612             }
2613 
2614             // point to the matrix
2615             pFilter->matrix = m;
2616         }
2617     }
2618 
2619     // Link matrices to kernel coefficients (and vice-versa)
2620     matrix = pCSC->Matrix;
2621     for (m = 0; m < matrix_count; m++, matrix++)
2622     {
2623         // Coefficient table points to matrix index
2624         matrixID[matrix->iCoeffID] = (uint8_t)m;
2625     }
2626 
2627     // Save selected color space
2628     pCSC->ColorSpace = sel_cspace;
2629 
2630     return true;
2631 }
2632 
2633 /*----------------------------------------------------------------------------
2634 | Name      : KernelDll_GetPatchData
2635 | Purpose   : Get binary data block to be used for kernel patching
2636 |
2637 | Input     : pState       - [in]  Current DL state
2638 |             pSearchState - [in]  Current DL search state
2639 |             iPatchKind   - [in]  Patch kind
2640 |             pSize        - [out] Data block Size
2641 |
2642 | Return    : nullptr - Unsupported patch data kind
2643 |             <>nullptr - Pointer to data block
2644 \---------------------------------------------------------------------------*/
KernelDll_GetPatchData(Kdll_State * pState,Kdll_SearchState * pSearchState,int32_t iPatchKind,int32_t * pSize)2645 static uint8_t *KernelDll_GetPatchData(
2646     Kdll_State *      pState,
2647     Kdll_SearchState *pSearchState,
2648     int32_t           iPatchKind,
2649     int32_t *         pSize)
2650 {
2651     MOS_UNUSED(pState);
2652 
2653     VP_RENDER_FUNCTION_ENTER;
2654 
2655     if (iPatchKind == PatchKind_CSC_Coeff_Src0 ||
2656         iPatchKind == PatchKind_CSC_Coeff_Src1)
2657     {
2658         Kdll_CoeffID coeffID  = CoeffID_None;
2659         uint8_t      matrixID = DL_CSC_DISABLED;
2660 
2661         // Get matrix id
2662         if (iPatchKind == PatchKind_CSC_Coeff_Src0)
2663         {
2664             coeffID = pSearchState->src0_coeff;
2665         }
2666         else
2667         {
2668             coeffID = pSearchState->src1_coeff;
2669         }
2670 
2671         // Get matrix associated with the coefficient ID
2672         if (coeffID > CoeffID_None)
2673         {
2674             matrixID = pSearchState->CscParams.MatrixID[coeffID];
2675         }
2676 
2677         // Found matrix
2678         if (matrixID < DL_CSC_MAX)
2679         {
2680             Kdll_CSC_Matrix *pMatrix = &(pSearchState->CscParams.Matrix[matrixID]);
2681 
2682             *pSize = 12 * sizeof(uint16_t);
2683 
2684             if (pState->bEnableCMFC)
2685             {
2686                 if (pSearchState->CscParams.PatchMatrixNum < DL_CSC_MAX)
2687                 {
2688                     pSearchState->CscParams.PatchMatrixID[pSearchState->CscParams.PatchMatrixNum] = matrixID;
2689                     pSearchState->CscParams.PatchMatrixNum++;
2690                 }
2691                 else
2692                 {
2693                     VP_RENDER_ASSERTMESSAGE("Patch CSC coefficient number %d exceed limitation %d!", pSearchState->CscParams.PatchMatrixNum, DL_CSC_MAX);
2694                     return nullptr;
2695                 }
2696             }
2697 
2698             return ((uint8_t *)pMatrix->Coeff);
2699         }
2700     }
2701     else
2702     {
2703         VP_RENDER_ASSERTMESSAGE("Invalid patch kind %d.", iPatchKind);
2704     }
2705 
2706     return nullptr;
2707 }
2708 
2709 /*----------------------------------------------------------------------------
2710 | Name      : KernelDll_UpdateState
2711 | Purpose   : Update search state using current matching rule
2712 |
2713 | Input     : pState       - Kernel Dll state
2714 |             pSearchState - current DL search state
2715 |
2716 | Return    :
2717 \---------------------------------------------------------------------------*/
KernelDll_UpdateState(Kdll_State * pState,Kdll_SearchState * pSearchState)2718 bool KernelDll_UpdateState(
2719     Kdll_State *      pState,
2720     Kdll_SearchState *pSearchState)
2721 {
2722     Kdll_RuleEntrySet *   pRuleSet = pSearchState->pMatchingRuleSet;
2723     const Kdll_RuleEntry *pRuleEntry;
2724     int32_t               iSetCount;
2725 
2726     VP_RENDER_FUNCTION_ENTER;
2727 
2728     // Ensures that we have a matching rule
2729     if (pRuleSet == nullptr)
2730     {
2731         return false;
2732     }
2733 
2734     // Get rule entry and number of state update ("Set") rules; validate
2735     pRuleEntry = pRuleSet->pRuleEntry;
2736     iSetCount  = pRuleSet->iSetCount;
2737     if (pRuleEntry == nullptr || iSetCount < 1)
2738     {
2739         VP_RENDER_NORMALMESSAGE("Invalid rule set.");
2740         return false;
2741     }
2742 
2743     // Jump to set rules (skip match rules)
2744     pRuleEntry += pRuleSet->iMatchCount;
2745 
2746     // Apply state update rules
2747     for (; iSetCount > 0; iSetCount--, pRuleEntry++)
2748     {
2749         switch (pRuleEntry->id)
2750         {
2751         // Add kernel to the Dynamic Linking array
2752         case RID_SetKernel:
2753             if (pSearchState->KernelCount < DL_MAX_KERNELS)
2754             {
2755                 int32_t i                  = pSearchState->KernelCount++;
2756                 pSearchState->KernelID[i]  = pRuleEntry->value;
2757                 pSearchState->KernelGrp[i] = pRuleSet->iGroup;  // Group associated with the kernel ID
2758             }
2759             else
2760             {
2761                 VP_RENDER_ASSERTMESSAGE("reached maximum number of component kernels.");
2762                 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
2763                 return false;
2764             }
2765             break;
2766 
2767         // Set Parser State
2768         case RID_SetParserState:
2769             pSearchState->state = (Kdll_ParserState)pRuleEntry->value;
2770             break;
2771 
2772         // Move to Next/Prev Layer
2773         case RID_SetNextLayer:
2774             if (pRuleEntry->value == -1)
2775             {
2776                 pSearchState->layer_number--;
2777                 pSearchState->pFilter--;
2778             }
2779             else if (pRuleEntry->value == -2)  // jump to layer main video
2780             {
2781                 do
2782                 {
2783                     pSearchState->layer_number--;
2784                     pSearchState->pFilter--;
2785                     if (pSearchState->pFilter == nullptr || pSearchState->layer_number < 0)
2786                     {
2787                         return false;
2788                     }
2789                 } while (pSearchState->pFilter->layer != Layer_MainVideo);
2790             }
2791             else if (pRuleEntry->value == 2)  // jump to target layer
2792             {
2793                 while (pSearchState->pFilter->layer < Layer_RenderTarget)
2794                 {
2795                     pSearchState->layer_number++;
2796                     pSearchState->pFilter++;
2797                 }
2798             }
2799             else
2800             {
2801                 pSearchState->layer_number++;
2802                 pSearchState->pFilter++;
2803             }
2804             break;
2805 
2806         // Set patch data
2807         case RID_SetPatchData: {
2808             uint8_t *       pData        = nullptr;
2809             int32_t         iSize        = 0;
2810             int32_t         iKernelIndex = pSearchState->KernelCount - 1;
2811             int32_t         iPatchIndex;
2812             Kdll_PatchData *pPatch;
2813 
2814             // Get block of data for patching
2815             pData = KernelDll_GetPatchData(pState, pSearchState, (Kdll_PatchKind)pRuleEntry->value, &iSize);
2816             if (pData == nullptr || iSize == 0)
2817             {
2818                 VP_RENDER_ASSERTMESSAGE("invalid patch.");
2819                 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
2820                 return false;
2821             }
2822 
2823             // Append to the existing patch data block
2824             iPatchIndex = pSearchState->PatchID[iKernelIndex];
2825 
2826             // Allocate new patch structure
2827             if (iPatchIndex < 0)
2828             {
2829                 // Fail to allocate
2830                 if (pSearchState->PatchCount >= DL_MAX_PATCHES)
2831                 {
2832                     VP_RENDER_ASSERTMESSAGE("reached maximum number of patches.");
2833                     MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
2834                     return false;
2835                 }
2836 
2837                 // Get new patch block
2838                 iPatchIndex                         = pSearchState->PatchCount++;
2839                 pSearchState->PatchID[iKernelIndex] = iPatchIndex;
2840 
2841                 // Reset new patch entry
2842                 pPatch = &(pSearchState->Patches[iPatchIndex]);
2843                 MOS_ZeroMemory(pPatch, sizeof(Kdll_PatchData));
2844             }
2845             else
2846             {
2847                 // Get Patch entry already in use
2848                 pPatch = &(pSearchState->Patches[iPatchIndex]);
2849             }
2850 
2851             // Check if data can be appended
2852             if (pPatch->iPatchDataSize + iSize > DL_MAX_PATCH_DATA_SIZE)
2853             {
2854                 VP_RENDER_ASSERTMESSAGE("exceeded maximum patch size.");
2855                 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
2856                 return false;
2857             }
2858 
2859             // Append patch data
2860             MOS_SecureMemcpy(pPatch->Data + pPatch->iPatchDataSize, iSize, (void *)pData, iSize);
2861             pPatch->iPatchDataSize += iSize;
2862         }
2863         break;
2864 
2865         // Set patch operation
2866         case RID_SetPatch: {
2867             int32_t          iKernelIndex = pSearchState->KernelCount - 1;
2868             int32_t          iPatchIndex  = pSearchState->PatchID[iKernelIndex];
2869             Kdll_PatchData * pPatch;
2870             uint8_t *        pPatchRule;
2871             Kdll_PatchBlock *pPatchBlock;
2872             int32_t          nPatches;
2873 
2874             // No patch associated with the current kernel
2875             if (iPatchIndex < 0)
2876             {
2877                 return false;
2878             }
2879 
2880             // Get Patch entry
2881             pPatch = &(pSearchState->Patches[iPatchIndex]);
2882 
2883             // Get number of patches and pointer to first rule extension (patch rule)
2884             nPatches   = pRuleEntry->value;
2885             pPatchRule = (uint8_t *)(pRuleEntry + 1);
2886 
2887             // Check if rules can be applied
2888             if (nPatches + pPatch->nPatches > DL_MAX_PATCH_BLOCKS)
2889             {
2890                 VP_RENDER_ASSERTMESSAGE("exceeded number of patch blocks.");
2891                 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
2892                 return false;
2893             }
2894 
2895             // Set Patches
2896             pPatchBlock = &(pPatch->Patch[pPatch->nPatches]);
2897             for (; nPatches > 0; nPatches--, pPatchBlock++, pPatch->nPatches++)
2898             {
2899                 pPatchBlock->BlockSize = ((Kdll_PatchRuleEntry *)pPatchRule)->Size;
2900                 pPatchBlock->SrcOffset = ((Kdll_PatchRuleEntry *)pPatchRule)->Source;
2901                 pPatchBlock->DstOffset = ((Kdll_PatchRuleEntry *)pPatchRule)->Dest;
2902                 pPatchRule += sizeof(Kdll_RuleEntry);
2903             }
2904 
2905             // Skip rule extensions
2906             iSetCount -= pRuleEntry->value;
2907             pRuleEntry += pRuleEntry->value;
2908         }
2909         break;
2910 
2911         // Set destination colorspace
2912         case RID_SetTargetCspace:
2913             if ((VPHAL_CSPACE)pRuleEntry->value == CSpace_Source)
2914             {
2915                 pSearchState->cspace = pSearchState->pFilter->cspace;
2916             }
2917             else
2918             {
2919                 pSearchState->cspace = (VPHAL_CSPACE)pRuleEntry->value;
2920             }
2921             break;
2922 
2923         // Set Src0 source format
2924         case RID_SetSrc0Format:
2925             if ((MOS_FORMAT)pRuleEntry->value == Format_Source)
2926             {
2927                 pSearchState->src0_format = pSearchState->pFilter->format;
2928             }
2929             else
2930             {
2931                 pSearchState->src0_format = (MOS_FORMAT)pRuleEntry->value;
2932             }
2933             break;
2934 
2935         // Set Src0 sampling mode
2936         case RID_SetSrc0Sampling:
2937             if ((Kdll_Sampling)pRuleEntry->value == Sample_Source)
2938             {
2939                 pSearchState->src0_sampling = pSearchState->pFilter->sampler;
2940             }
2941             else
2942             {
2943                 pSearchState->src0_sampling = (Kdll_Sampling)pRuleEntry->value;
2944             }
2945             break;
2946 
2947         // Set Src0 Rotation
2948         case RID_SetSrc0Rotation:
2949             pSearchState->src0_rotation = pSearchState->pFilter->rotation;
2950             break;
2951 
2952         // Set Src0 Colorfill
2953         case RID_SetSrc0ColorFill:
2954             if ((int32_t)pRuleEntry->value == ColorFill_Source)
2955             {
2956                 pSearchState->src0_colorfill = pSearchState->pFilter->colorfill;
2957             }
2958             else
2959             {
2960                 pSearchState->src0_colorfill = (int32_t)pRuleEntry->value;
2961             }
2962             break;
2963 
2964         // Set Src0 luma key
2965         case RID_SetSrc0LumaKey:
2966             if (pRuleEntry->value == LumaKey_Source)
2967             {
2968                 pSearchState->src0_lumakey = pSearchState->pFilter->lumakey;
2969             }
2970             else
2971             {
2972                 pSearchState->src0_lumakey = (int32_t)pRuleEntry->value;
2973             }
2974             break;
2975 
2976         // Set Src0 Procamp
2977         case RID_SetSrc0Procamp:
2978             if (pRuleEntry->value == Procamp_Source)
2979             {
2980                 pSearchState->src0_procamp = pSearchState->pFilter->procamp;
2981             }
2982             else
2983             {
2984                 pSearchState->src0_procamp = (int32_t)pRuleEntry->value;
2985             }
2986             break;
2987 
2988         // Set Src0 CSC coefficients
2989         case RID_SetSrc0Coeff:
2990             if ((Kdll_CoeffID)pRuleEntry->value == CoeffID_Source)
2991             {
2992                 if (pSearchState->pFilter->matrix == DL_CSC_DISABLED)
2993                 {
2994                     pSearchState->src0_coeff = CoeffID_None;
2995                 }
2996                 else
2997                 {
2998                     Kdll_CSC_Matrix *matrix = pSearchState->CscParams.Matrix;
2999                     matrix += pSearchState->pFilter->matrix;
3000 
3001                     pSearchState->src0_coeff = matrix->iCoeffID;
3002                 }
3003             }
3004             else
3005             {
3006                 pSearchState->src0_coeff = (Kdll_CoeffID)pRuleEntry->value;
3007             }
3008             break;
3009 
3010         case RID_SetSrc0Processing:
3011             if ((Kdll_Processing)pRuleEntry->value == Process_Source)
3012             {
3013                 pSearchState->src0_process = pSearchState->pFilter->process;
3014             }
3015             else
3016             {
3017                 pSearchState->src0_process = (Kdll_Processing)pRuleEntry->value;
3018             }
3019             break;
3020 
3021         // Set Src1 source format
3022         case RID_SetSrc1Format:
3023             if ((MOS_FORMAT)pRuleEntry->value == Format_Source)
3024             {
3025                 pSearchState->src1_format = pSearchState->pFilter->format;
3026             }
3027             else
3028             {
3029                 pSearchState->src1_format = (MOS_FORMAT)pRuleEntry->value;
3030             }
3031             break;
3032 
3033         // Set Src1 sampling mode
3034         case RID_SetSrc1Sampling:
3035             if ((Kdll_Sampling)pRuleEntry->value == Sample_Source)
3036             {
3037                 pSearchState->src1_sampling = pSearchState->pFilter->sampler;
3038             }
3039             else
3040             {
3041                 pSearchState->src1_sampling = (Kdll_Sampling)pRuleEntry->value;
3042             }
3043             break;
3044 
3045         // Set Src1 Rotation
3046         case RID_SetSrc1Rotation:
3047             pSearchState->src1_rotation = pSearchState->pFilter->rotation;
3048             break;
3049 
3050         // Set Src1 luma key
3051         case RID_SetSrc1LumaKey:
3052             if (pRuleEntry->value == LumaKey_Source)
3053             {
3054                 pSearchState->src1_lumakey = pSearchState->pFilter->lumakey;
3055             }
3056             else
3057             {
3058                 pSearchState->src1_lumakey = (int32_t)pRuleEntry->value;
3059             }
3060             break;
3061 
3062         // Set Src1 Sampler LumaKey
3063         case RID_SetSrc1SamplerLumaKey:
3064             if (pRuleEntry->value == LumaKey_Source)
3065             {
3066                 pSearchState->src1_samplerlumakey = pSearchState->pFilter->samplerlumakey;
3067             }
3068             else
3069             {
3070                 pSearchState->src1_samplerlumakey = (int32_t)pRuleEntry->value;
3071             }
3072             break;
3073 
3074         // Set Src1 Procamp
3075         case RID_SetSrc1Procamp:
3076             if (pRuleEntry->value == Procamp_Source)
3077             {
3078                 pSearchState->src1_procamp = pSearchState->pFilter->procamp;
3079             }
3080             else
3081             {
3082                 pSearchState->src1_procamp = (int32_t)pRuleEntry->value;
3083             }
3084             break;
3085 
3086         // Set Src1 CSC coefficients
3087         case RID_SetSrc1Coeff:
3088             if ((Kdll_CoeffID)pRuleEntry->value == CoeffID_Source)
3089             {
3090                 if (pSearchState->pFilter->matrix == DL_CSC_DISABLED)
3091                 {
3092                     pSearchState->src1_coeff = CoeffID_None;
3093                 }
3094                 else
3095                 {
3096                     Kdll_CSC_Matrix *matrix = pSearchState->CscParams.Matrix;
3097                     matrix += pSearchState->pFilter->matrix;
3098 
3099                     pSearchState->src1_coeff = matrix->iCoeffID;
3100                 }
3101             }
3102             else
3103             {
3104                 pSearchState->src1_coeff = (Kdll_CoeffID)pRuleEntry->value;
3105             }
3106             break;
3107 
3108         // Set Src1 processing mode
3109         case RID_SetSrc1Processing:
3110             if ((Kdll_Processing)pRuleEntry->value == Process_Source)
3111             {
3112                 pSearchState->src1_process = pSearchState->pFilter->process;
3113             }
3114             else
3115             {
3116                 pSearchState->src1_process = (Kdll_Processing)pRuleEntry->value;
3117             }
3118             break;
3119 
3120         // Set current quadrant
3121         case RID_SetQuadrant:
3122             pSearchState->quadrant = (int32_t)pRuleEntry->value;
3123             break;
3124 
3125         // Set CSC flag before Mix
3126         case RID_SetCSCBeforeMix:
3127             pSearchState->bCscBeforeMix = pRuleEntry->value ? true : false;
3128             break;
3129 
3130         // Unsupported "Set" rule
3131         default:
3132             // Failed to find a matching rule -> kernel search will fail
3133             VP_RENDER_ASSERTMESSAGE("Invalid rule %d @ layer %d, state %d.", pRuleEntry->id, pSearchState->layer_number, pSearchState->state);
3134             MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3135             return false;
3136         }
3137     }
3138 
3139     // Reset matching rule
3140     pSearchState->pMatchingRuleSet = nullptr;
3141     return true;
3142 }
3143 
3144 //-----------------------------------------------------------------------------------------
3145 // KernelDll_SortRuleTable - Sort master dynamic linking rule table
3146 //
3147 // Parameters:
3148 //    char  *pState    - [in] Kernel Dll state
3149 //
3150 // Output: true  - Master rule table (and acceleration table) successfully created
3151 //         false - Failed to setup master rule table
3152 //-----------------------------------------------------------------------------------------
KernelDll_SortRuleTable(Kdll_State * pState)3153 bool KernelDll_SortRuleTable(Kdll_State *pState)
3154 {
3155     uint8_t               group;
3156     int32_t               state;
3157     const Kdll_RuleEntry *pRule = nullptr;
3158     Kdll_RuleEntrySet *   pRuleSet;
3159     int32_t               i, j;
3160 
3161     int32_t iTotal = 0;
3162     int32_t iNoOverr[Parser_Count];  // Non-overridable (enforced) rules
3163     int32_t iDefault[Parser_Count];  // Default rules
3164     int32_t iCustom[Parser_Count];   // Custom rules
3165 
3166     VP_RENDER_FUNCTION_ENTER;
3167 
3168     // Release previous table (rule table update)
3169     if (pState->pSortedRules)
3170     {
3171         MOS_FreeMemory(pState->pSortedRules);
3172         pState->pSortedRules = nullptr;
3173 
3174         MOS_ZeroMemory(pState->pDllRuleTable, sizeof(pState->pDllRuleTable));
3175         MOS_ZeroMemory(pState->iDllRuleCount, sizeof(pState->iDllRuleCount));
3176     }
3177 
3178     // Zero counters
3179     MOS_ZeroMemory(iNoOverr, sizeof(iNoOverr));
3180     MOS_ZeroMemory(iDefault, sizeof(iDefault));
3181     MOS_ZeroMemory(iCustom, sizeof(iCustom));
3182 
3183     // Count number of entries for each state
3184     for (i = 0; i < 2; i++)
3185     {
3186         if (i == 0)
3187         {
3188             pRule = pState->pRuleTableDefault;
3189         }
3190         else if (i == 1)
3191         {
3192             pRule = pState->pRuleTableCustom;
3193         }
3194 
3195         // Table not set - continue
3196         if (!pRule)
3197             continue;
3198 
3199         for (; pRule->id != RID_Op_EOF; pRule++)
3200         {
3201             // Skip extended rules (variable lenght)
3202             if (RID_IS_EXTENDED(pRule->id))
3203             {  // value contains number of entries
3204                 pRule += pRule->value;
3205             }
3206             else if (pRule->id == RID_Op_NewEntry)
3207             {
3208                 // Save Rule Group
3209                 if (i == 0)
3210                 {
3211                     group = pRule->value;
3212                 }
3213                 else
3214                 {
3215                     group = RULE_CUSTOM;
3216                 }
3217 
3218                 // Second rule must always be RID_IsParserState
3219                 pRule++;
3220                 if (pRule->id != RID_IsParserState)
3221                 {
3222                     VP_RENDER_ASSERTMESSAGE("Rule does not start with State.");
3223                     MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3224                     return false;
3225                 }
3226 
3227                 // Get Parser State -> validate value
3228                 state = pRule->value;
3229                 if (state < Parser_Begin)
3230                 {
3231                     VP_RENDER_ASSERTMESSAGE("Invalid State %d.", state);
3232                     MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3233                     return false;
3234                 }
3235                 else if (state >= Parser_Custom)
3236                 {  // Custom states are set together in the same entry
3237                     state = Parser_Custom;
3238                 }
3239 
3240                 if (group == RULE_NO_OVERRIDE)
3241                 {
3242                     iNoOverr[state]++;
3243                 }
3244                 else if (group == RULE_DEFAULT)
3245                 {
3246                     iDefault[state]++;
3247                 }
3248                 else
3249                 {
3250                     iCustom[state]++;
3251                 }
3252 
3253                 iTotal++;
3254             }
3255         }
3256     }
3257 
3258     // Allocate rules
3259     pState->pSortedRules = (Kdll_RuleEntrySet *)MOS_AllocAndZeroMemory(iTotal * sizeof(Kdll_RuleEntrySet));
3260     if (!pState->pSortedRules)
3261     {
3262         VP_RENDER_ASSERTMESSAGE("Failed to allocate rule table.");
3263         MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3264         return false;
3265     }
3266 
3267     // Setup pointers to sorted rules
3268     pState->pDllRuleTable[0] = pState->pSortedRules;
3269     for (j = 0, i = 0; i < Parser_Count; i++)
3270     {
3271         // Setup start pointer and number of entries to search for each state
3272         pState->pDllRuleTable[i] = pState->pDllRuleTable[j] + pState->iDllRuleCount[j];
3273         pState->iDllRuleCount[i] = iNoOverr[i] + iCustom[i] + iDefault[i];
3274 
3275         // Setup offsets to rules for sorting
3276         iDefault[i] = iNoOverr[i] + iCustom[i];  // Last set of rules
3277         iCustom[i]  = iNoOverr[i];               // 2nd set of rules
3278         iNoOverr[i] = 0;                         // 1st set of rules
3279 
3280         j = i;
3281     }
3282 
3283     // Sort rules for fast access
3284     // Integrate enforced, custom, default rules into one single access table
3285     for (i = 0; i < 2; i++)
3286     {
3287         if (i == 0)
3288         {
3289             pRule = pState->pRuleTableDefault;
3290         }
3291         else if (i == 1)
3292         {
3293             pRule = pState->pRuleTableCustom;
3294         }
3295 
3296         // Table not set - continue
3297         if (!pRule)
3298             continue;
3299 
3300         while (pRule->id != RID_Op_EOF)
3301         {
3302             if (pRule->id != RID_Op_NewEntry)
3303             {
3304                 VP_RENDER_ASSERTMESSAGE("New rule entry expected.");
3305                 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3306                 return false;
3307             }
3308 
3309             // Save Rule Group
3310             if (i == 0)
3311             {
3312                 group = pRule->value;
3313             }
3314             else
3315             {
3316                 group = RULE_CUSTOM;
3317             }
3318 
3319             // Get Parser State -> validate value
3320             pRule++;
3321             state = pRule->value;
3322             if (state >= Parser_Custom)
3323             {  // Custom states are set together in the same entry
3324                 state = Parser_Custom;
3325             }
3326             else
3327             {  // Skip state check - already handled by acceleration table
3328                 pRule++;
3329             }
3330 
3331             // Point to sorted rule set entry
3332             if (group == RULE_NO_OVERRIDE)
3333             {
3334                 j = iNoOverr[state]++;
3335             }
3336             else if (group == RULE_DEFAULT)
3337             {
3338                 j = iDefault[state]++;
3339             }
3340             else
3341             {
3342                 j = iCustom[state]++;
3343             }
3344 
3345             // Point to sorted ruleset for the current parser state
3346             pRuleSet = pState->pDllRuleTable[state] + j;
3347 
3348             // Fill RuleSet
3349             pRuleSet->pRuleEntry = pRule;
3350             pRuleSet->iGroup     = group;
3351 
3352             // Count number of match rules, including extended rules
3353             while (RID_IS_MATCH(pRule->id))
3354             {
3355                 if (RID_IS_EXTENDED(pRule->id))
3356                 {
3357                     pRuleSet->iMatchCount += pRule->value;
3358                     pRule += pRule->value;
3359                 }
3360 
3361                 pRuleSet->iMatchCount++;
3362                 pRule++;
3363             }
3364 
3365             // Count number of set rules, including extended rules
3366             while (RID_IS_SET(pRule->id))
3367             {
3368                 if (RID_IS_EXTENDED(pRule->id))
3369                 {
3370                     pRuleSet->iSetCount += pRule->value;
3371                     pRule += pRule->value;
3372                 }
3373 
3374                 pRuleSet->iSetCount++;
3375                 pRule++;
3376             }
3377 
3378             // Rule must have at least one "Set" rule
3379             if (pRuleSet->iSetCount < 1)
3380             {
3381                 VP_RENDER_ASSERTMESSAGE("Ruleset must have at least one set rule.");
3382                 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3383                 return false;
3384             }
3385         }
3386     }
3387 
3388     // Rule table is now sorted and integrated with custom rules
3389     return true;
3390 }
3391 
3392 //---------------------------------------------------------------------------------------
3393 // KernelDll_AllocateStates - Allocate Kernel Dynamic Linking/Loading (Dll) States
3394 //
3395 //    - Allocate DL states
3396 //    - Setup export/import list for linking
3397 //    - Prepare pool of search nodes
3398 //    - Load component kernels from binary file
3399 //    - Setup kernel cache
3400 //    - Setup kernel dynamic linking rules
3401 //
3402 // Parameters: [in] pKernelBin        - Pointer to Kernel binary file loaded in sys memory
3403 //             [in] uKernelSize       - Kernel file size
3404 //             [in] pFcPatchBin       - Pointer to FC patch binary file loaded in sys memory
3405 //             [in] uFcPatchCacheSize - FC patch binary file size
3406 //             [in] platform          - Gfx platform
3407 //             [in] pDefaultRules     - Dynamic Linking Rules Table
3408 //
3409 // Output: Pointer to allocated Kernel dll state
3410 //         nullptr - Failed to allocate Kernel dll state
3411 //-----------------------------------------------------------------------------------------
KernelDll_AllocateStates(void * pKernelBin,uint32_t uKernelSize,void * pFcPatchCache,uint32_t uFcPatchCacheSize,const Kdll_RuleEntry * pDefaultRules,void (* ModifyFunctionPointers)(PKdll_State))3412 Kdll_State *KernelDll_AllocateStates(
3413     void *                pKernelBin,
3414     uint32_t              uKernelSize,
3415     void *                pFcPatchCache,
3416     uint32_t              uFcPatchCacheSize,
3417     const Kdll_RuleEntry *pDefaultRules,
3418     void (*ModifyFunctionPointers)(PKdll_State))
3419 {
3420     Kdll_State *          pState;
3421     Kdll_CacheEntry *     pCacheEntry;
3422     Kdll_KernelCache *    pKernelCache;
3423     Kdll_KernelHashTable *pHashTable;
3424     Kdll_KernelHashEntry *pHashEntries;
3425 
3426     int32_t              iSize;
3427     int32_t              nExports    = 0;
3428     int32_t              nImports    = 0;
3429     uint32_t *           pLinkOffset = nullptr;
3430     Kdll_LinkData *      pLinkSort   = nullptr;
3431     Kdll_LinkData *      pLinkData;
3432     Kdll_LinkData *      pExports;
3433     Kdll_LinkFileHeader *pLinkHeader;
3434 
3435     int32_t   i, j;
3436     uint32_t *pOffsets;
3437     uint8_t * pBase;
3438 
3439     VP_RENDER_FUNCTION_ENTER;
3440 
3441     // Allocate dynamic linking states
3442     i = sizeof(Kdll_State);                                      // Dynamic linking states
3443     i += sizeof(Kdll_CacheEntry) * IDR_VP_TOTAL_NUM_KERNELS;     // Component kernel cache entries
3444     i += sizeof(Kdll_CacheEntry) * IDR_VP_TOTAL_NUM_KERNELS;     // CMFC kernel patch cache entries
3445     i += sizeof(Kdll_CacheEntry) * DL_DEFAULT_COMBINED_KERNELS;  // Combined kernel cache entries
3446     i += DL_COMBINED_KERNEL_CACHE_SIZE;                          // Combined kernel buffer
3447     i += sizeof(Kdll_LinkData) * DL_MAX_EXPORT_COUNT;            // Kernel Export table
3448 
3449     pState = (Kdll_State *)MOS_AllocAndZeroMemory(i);
3450     if (!pState)
3451     {
3452         VP_RENDER_ASSERTMESSAGE("Failed to allocate kernel dll states.");
3453         MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3454         goto cleanup;
3455     }
3456     pState->iSize        = i;
3457     pState->dwRefresh    = 0;
3458     pState->pProcamp     = nullptr;
3459     pState->iProcampSize = 0;
3460     pState->pSortedRules = nullptr;
3461 
3462     if ((pFcPatchCache != nullptr) && (uFcPatchCacheSize != 0))
3463     {
3464         pState->bEnableCMFC = true;
3465     }
3466 
3467     // Initialize platform specific function pointers
3468     if (!KernelDll_SetupFunctionPointers(pState, ModifyFunctionPointers))
3469     {
3470         VP_RENDER_ASSERTMESSAGE("Failed to setup function pointers.");
3471         MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3472         goto cleanup;
3473     }
3474 
3475     pKernelCache = &pState->ComponentKernelCache;
3476 
3477     // No custom kernels/rules
3478     pState->pRuleTableCustom   = nullptr;
3479     pState->pCustomKernelCache = nullptr;
3480 
3481     // Set Kernel DLL Rules
3482     pState->pRuleTableDefault = pDefaultRules;
3483 
3484     // Integrate and sort rule tables
3485     KernelDll_SortRuleTable(pState);
3486 
3487     // Setup component kernel cache
3488     pKernelCache->pCache           = (uint8_t *)pKernelBin;
3489     pKernelCache->iCacheSize       = (int32_t)uKernelSize;
3490     pKernelCache->iCacheFree       = 0;
3491     pKernelCache->iCacheMaxEntries = IDR_VP_TOTAL_NUM_KERNELS;
3492     pKernelCache->iCacheEntries    = IDR_VP_TOTAL_NUM_KERNELS;
3493     pKernelCache->pCacheEntries    = (Kdll_CacheEntry *)(pState + 1);
3494 
3495     pOffsets    = (uint32_t *)pKernelCache->pCache;
3496     pBase       = (uint8_t *)(pOffsets + IDR_VP_TOTAL_NUM_KERNELS + 1);
3497     pCacheEntry = pKernelCache->pCacheEntries;
3498     for (i = 0; i < IDR_VP_TOTAL_NUM_KERNELS; i++, pCacheEntry++)
3499     {
3500         pCacheEntry->iKUID      = i;
3501         pCacheEntry->iKCID      = -1;
3502         pCacheEntry->dwLoaded   = 0;
3503         pCacheEntry->dwRefresh  = 0;
3504         pCacheEntry->wHashEntry = 0;
3505         pCacheEntry->szName     = g_cInit_ComponentNames[i];
3506         pCacheEntry->iSize      = pOffsets[i + 1] - pOffsets[i];
3507         pCacheEntry->pBinary    = (pCacheEntry->iSize > 0) ? (pBase + pOffsets[i]) : nullptr;
3508     }
3509 
3510     // Setup CMFC kernel patch cache
3511     pKernelCache = &pState->CmFcPatchCache;
3512 
3513     if (pState->bEnableCMFC && pFcPatchCache)
3514     {
3515         pKernelCache->pCache           = (uint8_t *)pFcPatchCache;
3516         pKernelCache->iCacheSize       = (int32_t)uFcPatchCacheSize;
3517         pKernelCache->iCacheFree       = 0;
3518         pKernelCache->iCacheMaxEntries = IDR_VP_TOTAL_NUM_KERNELS;
3519         pKernelCache->iCacheEntries    = IDR_VP_TOTAL_NUM_KERNELS;
3520         pKernelCache->pCacheEntries    = pCacheEntry;
3521 
3522         pOffsets = (uint32_t *)pKernelCache->pCache;
3523         pBase    = (uint8_t *)(pOffsets + IDR_VP_TOTAL_NUM_KERNELS + 1);
3524         for (i = 0; i < IDR_VP_TOTAL_NUM_KERNELS; i++, pCacheEntry++)
3525         {
3526             pCacheEntry->iKUID      = i;
3527             pCacheEntry->iKCID      = -1;
3528             pCacheEntry->dwLoaded   = 0;
3529             pCacheEntry->dwRefresh  = 0;
3530             pCacheEntry->wHashEntry = 0;
3531             pCacheEntry->szName     = g_cInit_ComponentNames[i];
3532             pCacheEntry->iSize      = pOffsets[i + 1] - pOffsets[i];
3533             pCacheEntry->pBinary    = (pCacheEntry->iSize > 0) ? (pBase + pOffsets[i]) : nullptr;
3534         }
3535     }
3536     else
3537     {
3538         pCacheEntry += IDR_VP_TOTAL_NUM_KERNELS;
3539     }
3540 
3541     // Setup combined kernel cache
3542     pKernelCache                   = &pState->KernelCache;
3543     pKernelCache->iCacheMaxEntries = DL_DEFAULT_COMBINED_KERNELS;
3544     pKernelCache->iCacheEntries    = 0;
3545     pKernelCache->iCacheSize       = DL_COMBINED_KERNEL_CACHE_SIZE;                           // Size of kernel cache
3546     pKernelCache->iCacheFree       = DL_COMBINED_KERNEL_CACHE_SIZE;                           // Free cache size
3547     pKernelCache->iCacheID         = 0x00010000;                                              // Cache ID
3548     pKernelCache->pCacheEntries    = pCacheEntry;                                             // Cached kernel entries
3549     pKernelCache->pCache           = (uint8_t *)(pCacheEntry + DL_DEFAULT_COMBINED_KERNELS);  // kernels
3550 
3551     // reset cache entries
3552     for (i = 0; i < DL_DEFAULT_COMBINED_KERNELS; i++, pCacheEntry++)
3553     {
3554         pCacheEntry->iKUID   = -1;
3555         pCacheEntry->iKCID   = -1;
3556         pCacheEntry->pBinary = pKernelCache->pCache + i * DL_CACHE_BLOCK_SIZE;
3557         if (i != DL_DEFAULT_COMBINED_KERNELS - 1)
3558         {
3559             pCacheEntry->pNextEntry = pCacheEntry + 1;
3560         }
3561         else
3562         {
3563             pCacheEntry->pNextEntry = nullptr;
3564         }
3565     }
3566 
3567     //------------------------------------
3568     // Setup hash table
3569     //------------------------------------
3570     pHashTable   = &pState->KernelHashTable;
3571     pHashEntries = pState->KernelHashTable.HashEntry - 1;
3572 
3573     pHashTable->pool = 1;                        // first in pool (1 based index)
3574     pHashTable->last = DL_MAX_COMBINED_KERNELS;  // last in pool (for releasing)
3575 
3576     for (i = 1; i <= DL_MAX_COMBINED_KERNELS; i++)
3577     {
3578         pHashEntries[i].next = i + 1;
3579     }
3580     pHashEntries[i - 1].next = 0;  // last entry
3581 
3582     //------------------------------------
3583     // Setup dynamic linking import/export array
3584     //------------------------------------
3585     pCacheEntry = pState->ComponentKernelCache.pCacheEntries;
3586     iSize       = pCacheEntry[IDR_VP_LinkFile].iSize;
3587     if (iSize == 0)
3588     {
3589         VP_RENDER_NORMALMESSAGE("Link file is missing.");
3590         goto cleanup;
3591     }
3592 
3593     // Get link file binary data
3594     pLinkHeader = (Kdll_LinkFileHeader *)pCacheEntry[IDR_VP_LinkFile].pBinary;
3595     if (pLinkHeader == nullptr ||
3596         pLinkHeader->dwVersion != IDR_VP_LINKFILE_VERSION ||
3597         sizeof(Kdll_LinkFileHeader) != IDR_VP_LINKFILE_HEADER)
3598     {
3599         VP_RENDER_ASSERTMESSAGE("Invalid link file version.");
3600         goto cleanup;
3601     }
3602     iSize = (iSize - IDR_VP_LINKFILE_HEADER) / sizeof(Kdll_LinkData);
3603 
3604     // Create temporary list of sorted link data and offsets
3605     pLinkSort   = (Kdll_LinkData *)MOS_AllocAndZeroMemory(iSize * sizeof(Kdll_LinkData));
3606     pLinkOffset = (uint32_t *)MOS_AllocAndZeroMemory((IDR_VP_TOTAL_NUM_KERNELS + 1) * sizeof(uint32_t));
3607     if (!pLinkSort || !pLinkOffset)
3608     {
3609         VP_RENDER_ASSERTMESSAGE("Failed to allocate temporary buffers.");
3610         goto cleanup;
3611     }
3612 
3613     // Count number of imports for each component kernel
3614     pCacheEntry[0].pLink = pLinkData = (Kdll_LinkData *)(pLinkHeader + 1);
3615     for (i = iSize; i > 0; i--, pLinkData++)
3616     {
3617         if (pLinkData->iKUID < IDR_VP_TOTAL_NUM_KERNELS)
3618         {
3619             pCacheEntry[pLinkData->iKUID].nLink++;
3620         }
3621 
3622         nExports += pLinkData->bExport;
3623         nImports += !pLinkData->bExport;
3624     }
3625 
3626     // Sanity check
3627     if (nExports != (int32_t)pLinkHeader->dwExports ||
3628         nImports != (int32_t)pLinkHeader->dwImports)
3629     {
3630         VP_RENDER_ASSERTMESSAGE("Inconsistent header data.");
3631         goto cleanup;
3632     }
3633 
3634     if (nExports > DL_MAX_EXPORT_COUNT)
3635     {
3636         VP_RENDER_ASSERTMESSAGE("Unsupported number of exports %d > %d.", nExports, DL_MAX_EXPORT_COUNT);
3637         goto cleanup;
3638     }
3639 
3640     pState->ComponentKernelCache.pExports = pExports = (Kdll_LinkData *)(pKernelCache->pCache + pKernelCache->iCacheSize);
3641     pState->ComponentKernelCache.nExports            = nExports;
3642 
3643     // Calculate offsets for sorting
3644     pLinkOffset[0] = 0;
3645     pLinkData      = pCacheEntry[0].pLink;
3646     for (i = 1; i < IDR_VP_TOTAL_NUM_KERNELS; i++)
3647     {
3648         pLinkOffset[i]       = pLinkOffset[i - 1] + pCacheEntry[i - 1].nLink;
3649         pCacheEntry[i].pLink = (pCacheEntry[i].nLink) ? (pLinkData + pLinkOffset[i]) : nullptr;
3650     }
3651     pLinkOffset[i] = pLinkOffset[i - 1] + pCacheEntry[i - 1].nLink;
3652 
3653     // Sort link data
3654     for (i = iSize; i > 0; i--, pLinkData++)
3655     {
3656         j            = pLinkOffset[MOS_MIN(pLinkData->iKUID, IDR_VP_TOTAL_NUM_KERNELS)]++;
3657         pLinkSort[j] = *pLinkData;
3658 
3659         // Add to export table
3660         if (pLinkData->bExport &&
3661             pLinkData->iLabelID < DL_MAX_EXPORT_COUNT)
3662         {
3663             pExports[pLinkData->iLabelID] = *pLinkData;
3664         }
3665     }
3666 
3667     // Copy sort data
3668     pLinkData = pCacheEntry[0].pLink;
3669     MOS_SecureMemcpy(pLinkData, iSize * sizeof(Kdll_LinkData), (void *)pLinkSort, iSize * sizeof(Kdll_LinkData));
3670 
3671     // Release sort buffers
3672     MOS_FreeMemory(pLinkOffset);
3673     MOS_FreeMemory(pLinkSort);
3674 
3675     // Return
3676     return pState;
3677 
3678 cleanup:
3679     if (pState)
3680     {
3681         MOS_FreeMemory(pState->pSortedRules);
3682         pState->pSortedRules = nullptr;
3683     }
3684 
3685     // Free DL States and temporary sort buffers
3686     MOS_FreeMemory(pState);
3687     MOS_FreeMemory(pLinkSort);
3688     MOS_FreeMemory(pLinkOffset);
3689 
3690     return nullptr;
3691 }
3692 
3693 //--------------------------------------------------------------
3694 // KernelDll_ReleaseAdditionalCacheEntries - Release the additional kernel cache entries
3695 //--------------------------------------------------------------
KernelDll_ReleaseAdditionalCacheEntries(Kdll_KernelCache * pCache)3696 void KernelDll_ReleaseAdditionalCacheEntries(Kdll_KernelCache *pCache)
3697 {
3698     VP_RENDER_FUNCTION_ENTER;
3699     if (pCache->iCacheMaxEntries > DL_DEFAULT_COMBINED_KERNELS)
3700     {
3701         Kdll_CacheEntry *pNewEntries, *pEntries;
3702         pNewEntries = (pCache->pCacheEntries + DL_DEFAULT_COMBINED_KERNELS - 1)->pNextEntry;
3703         for (int i = 0; i < (pCache->iCacheMaxEntries - DL_DEFAULT_COMBINED_KERNELS) / DL_NEW_COMBINED_KERNELS; i++)
3704         {
3705             pEntries = (pNewEntries + DL_NEW_COMBINED_KERNELS - 1)->pNextEntry;
3706             MOS_FreeMemory(pNewEntries);
3707             pNewEntries = pEntries;
3708         }
3709     }
3710 }
3711 
3712 //---------------------------------------------------------------------------------------
3713 // KernelDll_ReleaseStates - Release Kernel Dynamic Linking/Loading (Dll) States
3714 //
3715 // Parameters:
3716 //    Kdll_State *pState - [in] Kernel dll State to release
3717 //
3718 // Output: Pointer to allocated Kernel dll state
3719 //         nullptr - Failed to allocate Kernel dll state
3720 //-----------------------------------------------------------------------------------------
KernelDll_ReleaseStates(Kdll_State * pState)3721 void KernelDll_ReleaseStates(Kdll_State *pState)
3722 {
3723     VP_RENDER_FUNCTION_ENTER;
3724 
3725     if (!pState)
3726         return;
3727     KernelDll_ReleaseAdditionalCacheEntries(&pState->KernelCache);
3728     MOS_FreeMemory(pState->ComponentKernelCache.pCache);
3729     MOS_FreeMemory(pState->CmFcPatchCache.pCache);
3730     MOS_FreeMemory(pState->pSortedRules);
3731     MOS_FreeMemory(pState);
3732 }
3733 
3734 //---------------------------------------------------------------------------------------
3735 // KernelDll_SetupFunctionPointers - Setup Function pointers based on platform
3736 //
3737 // Parameters:
3738 //    char  *pState    - [in/out] Kernel Dll state
3739 //           platform  - [in] platform
3740 //
3741 // Output: true  - Function pointers are set
3742 //         false - Failed to setup function pointers (invalid platform)
3743 //-----------------------------------------------------------------------------------------
KernelDll_SetupFunctionPointers(Kdll_State * pState,void (* ModifyFunctionPointers)(PKdll_State))3744 static bool KernelDll_SetupFunctionPointers(
3745     Kdll_State *pState,
3746     void (*ModifyFunctionPointers)(PKdll_State))
3747 {
3748     VP_RENDER_FUNCTION_ENTER;
3749 
3750     pState->pfnSetupCSC          = KernelDll_SetupCSC;
3751     pState->pfnMapCSCMatrix      = KernelDll_MapCSCMatrix;
3752     pState->pfnFindRule          = KernelDll_FindRule;
3753     pState->pfnUpdateState       = KernelDll_UpdateState;
3754     pState->pfnSearchKernel      = KernelDll_SearchKernel;
3755     pState->pfnBuildKernel       = KernelDll_BuildKernel;
3756     pState->pfnStartKernelSearch = KernelDll_StartKernelSearch;
3757 
3758     if (ModifyFunctionPointers != nullptr)
3759     {
3760         (*ModifyFunctionPointers)(pState);
3761     }
3762 
3763 #if EMUL || VPHAL_LIB
3764     // Disable callbacks
3765     pState->pToken          = nullptr;
3766     pState->pfnCbListKernel = nullptr;
3767     pState->pfnCbSearchSate = nullptr;
3768 #endif  // EMUL || VPHAL_LIB
3769 
3770     return true;
3771 }
3772 
3773 //---------------------------------------------------------------------------------------
3774 // Kdll_AddKernelList - Add kernel to CM FC kernel list
3775 //
3776 // Parameters:
3777 //    Kdll_KernelCache *pKernelCache     - [in]     Component kernel cache
3778 //    Kdll_KernelCache *pCmFcPatchCache  - [in]     Component kernel patch data cache
3779 //    Kdll_SearchState *pSearchState     - [in/out] Kernel search state
3780 //    Kdll_PatchData   *pKernelPatch     - [in]     Kernel Patch data
3781 //    void             *pPatchDst         - [in]     Patch data Dst address
3782 //    int32_t          iKUID             - [in]     Kernel Unique ID
3783 //    cm_fc_kernel_t   *Cm_Fc_Kernels    - [in/out] CM FC Kernels
3784 //
3785 // Output: true if suceeded, false otherwise
3786 //---------------------------------------------------------------------------------------
Kdll_AddKernelList(Kdll_KernelCache * pKernelCache,Kdll_KernelCache * pCmFcPatchCache,Kdll_SearchState * pSearchState,int32_t iKUID,Kdll_PatchData * pKernelPatch,void * pPatchDst,cm_fc_kernel_t * Cm_Fc_Kernels)3787 bool Kdll_AddKernelList(Kdll_KernelCache *pKernelCache,
3788     Kdll_KernelCache *                    pCmFcPatchCache,
3789     Kdll_SearchState *                    pSearchState,
3790     int32_t                               iKUID,
3791     Kdll_PatchData *                      pKernelPatch,
3792     void *                                pPatchDst,
3793     cm_fc_kernel_t *                      Cm_Fc_Kernels)
3794 {
3795     Kdll_State *     pState;
3796     Kdll_Symbol *    pSymbols;
3797     Kdll_CacheEntry *kernels;
3798     Kdll_CacheEntry *pPatch;
3799     Kdll_LinkData *  link;
3800     Kdll_LinkData *  liSearch_reloc;
3801     int *            size;
3802     int *            left;
3803     int              dwSize;
3804     int              i;
3805     int              base;
3806     bool             bInline;
3807     bool             res;
3808 
3809     VP_RENDER_FUNCTION_ENTER;
3810 
3811     res = false;
3812 
3813     // Check if Kernel ID is valid
3814     if (iKUID >= pKernelCache->iCacheEntries)
3815     {
3816         VP_RENDER_NORMALMESSAGE("invalid Kernel ID %d.", iKUID);
3817         goto finish;
3818     }
3819 
3820     // Get KDLL state
3821     pState = pSearchState->pKdllState;
3822 
3823     // Get current combined kernel
3824     size     = &pSearchState->KernelSize;
3825     left     = &pSearchState->KernelLeft;
3826     pSymbols = &pSearchState->KernelLink;
3827     base     = (*size) >> 2;
3828 
3829     // Find selected kernel/patch and kernel size; check if there is enough space
3830     kernels = &pKernelCache->pCacheEntries[iKUID];
3831     pPatch  = &pCmFcPatchCache->pCacheEntries[iKUID];
3832     dwSize  = kernels->iSize;
3833     if (*left < dwSize)
3834     {
3835         VP_RENDER_NORMALMESSAGE("exceeded maximum kernel size.");
3836         goto finish;
3837     }
3838 
3839     // Check if there is enough space for symbols
3840     if (pSymbols->dwCount + kernels->nLink >= pSymbols->dwSize)
3841     {
3842         VP_RENDER_NORMALMESSAGE("exceeded maximum numbers of symbols to resolve.");
3843         goto finish;
3844     }
3845 
3846 #if EMUL || VPHAL_LIB
3847     VP_RENDER_NORMALMESSAGE("%s.", kernels->szName);
3848 
3849     if (pState->pfnCbListKernel)
3850     {
3851         pState->pfnCbListKernel(pState->pToken, kernels->szName);
3852     }
3853 #elif _DEBUG || _RELEASE_INTERNAL  // EMUL || VPHAL_LIB
3854     VP_RENDER_NORMALMESSAGE("%s.", kernels->szName);
3855 #endif  // _DEBUG
3856 
3857     MT_LOG1(MT_VP_KERNEL_LIST_ADD, MT_NORMAL, MT_VP_KERNEL_ID, iKUID);
3858 
3859     // Append symbols to resolve, relocate symbols
3860     link           = kernels->pLink;
3861     liSearch_reloc = pSymbols->pLink + pSymbols->dwCount;
3862 
3863     bInline = false;
3864     if (link)
3865     {
3866         for (i = kernels->nLink; i > 0; i--, link++)
3867         {
3868             if (link->bInline)
3869             {
3870                 // Inline code included
3871                 if (!link->bExport)
3872                 {
3873                     bInline = true;
3874                 }
3875             }
3876             else
3877             {
3878                 *liSearch_reloc = *link;
3879                 liSearch_reloc->dwOffset += base;
3880                 liSearch_reloc++;
3881 
3882                 pSymbols->dwCount++;
3883             }
3884         }
3885     }
3886 
3887     *size += dwSize;
3888     *left -= dwSize;
3889     Cm_Fc_Kernels->binary_buf  = (const char *)kernels->pBinary;
3890     Cm_Fc_Kernels->binary_size = kernels->iSize;
3891     Cm_Fc_Kernels->patch_buf   = (const char *)pPatch->pBinary;
3892     Cm_Fc_Kernels->patch_size  = pPatch->iSize;
3893     res                        = true;
3894 
3895 finish:
3896     return res;
3897 }
3898 
3899 //---------------------------------------------------------------------------------------
3900 // KernelDll_BuildKernel_CmFc - Build CM based FC combine Kernel
3901 //
3902 // Parameters: [in/out] pState        - Pointer to Kernel binary file loaded in sys memory
3903 //             [in/out] pSearchState       - Kernel file size
3904 //
3905 // Output: bool
3906 //         TRUE - Successful FALSE - Failed
3907 //-----------------------------------------------------------------------------------------
KernelDll_BuildKernel_CmFc(Kdll_State * pState,Kdll_SearchState * pSearchState)3908 bool KernelDll_BuildKernel_CmFc(Kdll_State *pState, Kdll_SearchState *pSearchState)
3909 {
3910     Kdll_KernelCache *pKernelCache = &pState->ComponentKernelCache;
3911     Kdll_KernelCache *pPatchCache  = &pState->CmFcPatchCache;
3912     Kdll_KernelCache *pCustomCache = pState->pCustomKernelCache;
3913     bool              res;
3914     int32_t           offset = 0;
3915     int32_t *         pKernelID, *pPatchID;
3916     uint8_t *         pPatchData;
3917     Kdll_PatchData *  pKernelPatch;
3918     uint8_t *         kernel   = pSearchState->Kernel;
3919     Kdll_Symbol *     pSymbols = &pSearchState->KernelLink;
3920     uint32_t          nExports = pKernelCache->nExports;
3921     Kdll_LinkData *   pExports = pKernelCache->pExports;
3922     Kdll_LinkData *   pLink;
3923     int32_t           iOffset;
3924     uint32_t          dwResolveOffset[DL_MAX_EXPORT_COUNT];
3925     uint32_t          dwTotalKernelCount;
3926     size_t            stEstimatedKernelSize;
3927     int32_t           iKUID;
3928     bool              bResolveDone;
3929     int32_t           i;
3930     cm_fc_kernel_t    Cm_Fc_kernels[DL_MAX_KERNELS];
3931 
3932     VP_RENDER_FUNCTION_ENTER;
3933 
3934     // Disable pop-up box window for STL assertion to avoid VM hang in auto test.
3935 #if (!LINUX && !ANDROID)
3936     uint32_t prevErrorMode = ::SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX);
3937 #if defined(_MSC_VER)
3938     ::_set_error_mode(_OUT_TO_STDERR);
3939     _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
3940     _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
3941     _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
3942     _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
3943     _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
3944     _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
3945 #endif
3946 #endif
3947 
3948     pSearchState->KernelLink.dwSize  = DL_MAX_SYMBOLS;
3949     pSearchState->KernelLink.dwCount = 0;
3950     pSearchState->KernelLink.pLink   = pSearchState->LinkArray;
3951     pSearchState->KernelSize         = 0;
3952     pSearchState->KernelLeft         = sizeof(pSearchState->Kernel);
3953     pSearchState->KernelLink.dwCount = 0;
3954 
3955     MOS_ZeroMemory(Cm_Fc_kernels, sizeof(Cm_Fc_kernels));
3956     dwTotalKernelCount    = 0;
3957     stEstimatedKernelSize = 0;
3958 
3959 #if EMUL || VPHAL_LIB || _DEBUG
3960     VP_RENDER_NORMALMESSAGE("Component Kernels:");
3961 #endif  // EMUL || VPHAL_LIB || _DEBUG
3962 
3963     pKernelID  = pSearchState->KernelID;
3964     pPatchID   = pSearchState->PatchID;
3965     pPatchData = nullptr;
3966 
3967     for (offset = 0; offset < pSearchState->KernelCount; offset++, pKernelID++, pPatchID++, dwTotalKernelCount++)
3968     {
3969         // Get patch information associated with the kernel
3970         pKernelPatch = (*pPatchID >= 0) ? &(pSearchState->Patches[*pPatchID]) : nullptr;
3971 
3972         // Append/Patch kernel from internal cache
3973         res = Kdll_AddKernelList(pKernelCache, pPatchCache, pSearchState, *pKernelID, pKernelPatch, pPatchData, &Cm_Fc_kernels[dwTotalKernelCount]);
3974 
3975         stEstimatedKernelSize += Cm_Fc_kernels[dwTotalKernelCount].binary_size;
3976 
3977         if (*pKernelID == IDR_VP_EOT)
3978         {
3979             dwTotalKernelCount--;
3980         }
3981 
3982         if (!res)
3983         {
3984             VP_RENDER_NORMALMESSAGE("Failed to build kernel ID %d.", pSearchState->KernelID[offset]);
3985             res = false;
3986             goto finish;
3987         }
3988     }
3989 
3990     // Resolve kernel dependencies
3991     MOS_ZeroMemory(dwResolveOffset, sizeof(dwResolveOffset));
3992 
3993     do
3994     {
3995         // Update exports
3996         for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
3997         {
3998             if (pLink->bExport)
3999             {
4000                 dwResolveOffset[pLink->iLabelID] = pLink->dwOffset;
4001             }
4002         }
4003 
4004         bResolveDone = true;
4005         for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
4006         {
4007             // validate label
4008             if (pLink->iLabelID > nExports ||            // invalid label
4009                 pExports[pLink->iLabelID].bExport == 0)  // label not in the export table
4010             {
4011                 VP_RENDER_NORMALMESSAGE("Invalid/unresolved label %d.", pLink->iLabelID);
4012                 res = false;
4013                 goto finish;
4014             }
4015 
4016             // load dependencies
4017             if (!pLink->bExport && !dwResolveOffset[pLink->iLabelID])
4018             {
4019                 // set flag for another pass as newly loaded
4020                 // kernels may contain dependencies of their own
4021                 bResolveDone = false;
4022 
4023                 // Add dependencies to kernel list
4024                 iKUID = pExports[pLink->iLabelID].iKUID;
4025                 res   = Kdll_AddKernelList(pKernelCache, pPatchCache, pSearchState, iKUID, nullptr, nullptr, &Cm_Fc_kernels[dwTotalKernelCount]);
4026 
4027                 if (!res)
4028                 {
4029                     VP_RENDER_NORMALMESSAGE("Failed to build kernel ID %d.", pSearchState->KernelID[offset]);
4030                     res = false;
4031                     goto finish;
4032                 }
4033 
4034                 dwTotalKernelCount++;
4035 
4036                 // Restart
4037                 break;
4038             }
4039         }  // for
4040     } while (!bResolveDone);
4041 
4042     if (stEstimatedKernelSize > DL_MAX_KERNEL_SIZE)
4043     {
4044         res = false;
4045         VP_RENDER_NORMALMESSAGE("Kernel size exceeded kdll limitatin.");
4046         goto finish;
4047     }
4048 
4049     stEstimatedKernelSize = DL_MAX_KERNEL_SIZE;
4050 
4051     // Get combine kernel binary from CMFC lib
4052     if (CM_FC_OK != cm_fc_combine_kernels(dwTotalKernelCount, Cm_Fc_kernels, (char *)pSearchState->Kernel, &stEstimatedKernelSize, nullptr))
4053     {
4054         res = false;
4055         VP_RENDER_NORMALMESSAGE("cm_fc_combine_kernels() function call failed.");
4056         goto finish;
4057     }
4058 
4059     // Get combine kernel binary size from CMFC lib
4060     pSearchState->KernelSize = (int)stEstimatedKernelSize;
4061 
4062     res = true;
4063 
4064 finish:
4065 #if (!LINUX && !ANDROID)
4066     ::SetErrorMode(prevErrorMode);
4067 #endif
4068     return res;
4069 }
4070 
4071 //--------------------------------------------------------------
4072 // KernelDll_AllocateHashEntry - Allocate hash entry
4073 //--------------------------------------------------------------
KernelDll_AllocateHashEntry(Kdll_KernelHashTable * pHashTable,uint32_t hash)4074 uint16_t KernelDll_AllocateHashEntry(Kdll_KernelHashTable *pHashTable,
4075                                  uint32_t              hash)
4076 {
4077     Kdll_KernelHashEntry *pHashEntry = &pHashTable->HashEntry[0] - 1;
4078     Kdll_KernelHashEntry *pNewEntry;
4079     uint32_t folded_hash;
4080     uint16_t entry;
4081 
4082     VP_RENDER_FUNCTION_ENTER;
4083 
4084     entry = pHashTable->pool;
4085     if (!entry)
4086     {
4087         return 0;
4088     }
4089 
4090     // Get entry from pool
4091     pNewEntry = &pHashEntry[entry];
4092     pHashTable->pool = pNewEntry->next;
4093     if (pHashTable->last == entry)
4094     {
4095         pHashTable->last = 0;
4096     }
4097 
4098     // Initialize entry, attach to the hash table
4099     FOLD_HASH(folded_hash, hash);
4100     pNewEntry->dwHash      = hash;
4101     pNewEntry->next        = pHashTable->wHashTable[folded_hash];
4102     pNewEntry->iFilter     = 0;
4103     pNewEntry->pFilter     = nullptr;
4104     pNewEntry->pCacheEntry = nullptr;
4105     pHashTable->wHashTable[folded_hash] = entry;
4106     return entry;
4107 }
4108 
4109 //--------------------------------------------------------------
4110 // KernelDll_CacheGarbageCollection - performs garbage collection
4111 //--------------------------------------------------------------
KernelDll_GarbageCollection(Kdll_State * pState,int32_t size)4112 bool KernelDll_GarbageCollection(Kdll_State *pState, int32_t size)
4113 {
4114     Kdll_KernelCache     *pCache     = &pState->KernelCache;
4115     Kdll_CacheEntry      *pEntry     = pCache->pCacheEntries;
4116     Kdll_CacheEntry      *pOldest    = nullptr;
4117     Kdll_KernelHashTable *pHashTable = &pState->KernelHashTable;
4118     Kdll_KernelHashEntry *pHashEntry = &pHashTable->HashEntry[0] - 1;
4119     uint32_t              dwOldest   = (uint32_t)-1;
4120     uint16_t              wEntry     = 0;
4121     int32_t i;
4122 
4123     MOS_UNUSED(size);
4124 
4125     VP_RENDER_FUNCTION_ENTER;
4126 
4127     // Adjust refresh values to avoid overflow
4128     if (pState->dwRefresh > 0xffff0000)
4129     {
4130         pState->dwRefresh -= 0x80000000;
4131         for (i = pCache->iCacheMaxEntries; i > 0; i--)
4132         {
4133             if (pEntry->dwRefresh < 0x80000000)
4134                 pEntry->dwRefresh = 0;
4135             else
4136                 pEntry->dwRefresh -= 0x80000000;
4137             pEntry = pEntry->pNextEntry;
4138         }
4139     }
4140 
4141     // No need to deallocate old entries
4142     if (pCache->iCacheEntries < DL_MAX_COMBINED_KERNELS)
4143     {
4144         return true;
4145     }
4146 
4147     for (i = pCache->iCacheMaxEntries; i > 0; i--)
4148     {
4149         // deallocate old unreferenced entries
4150         if (pEntry->iKCID != -1 && pEntry->dwLoaded == 0)
4151         {
4152             if (pEntry->dwRefresh < dwOldest)
4153             {
4154                 pOldest  = pEntry;
4155                 dwOldest = pEntry->dwRefresh;
4156                 wEntry   = pEntry->wHashEntry;
4157             }
4158         }
4159         pEntry = pEntry->pNextEntry;
4160     }
4161 
4162     // No entry to release, sanity checks
4163     pHashEntry += wEntry;
4164     if (!pOldest ||
4165         wEntry == 0 ||
4166         pHashEntry->pCacheEntry != pOldest)
4167     {
4168         VP_RENDER_ASSERT(false);
4169         return false;
4170     }
4171 
4172     // Release hash and cache entries
4173     KernelDll_ReleaseHashEntry(pHashTable, wEntry);
4174     KernelDll_ReleaseCacheEntry(pCache, pOldest);
4175 
4176     return true;
4177 }
4178 
4179 //--------------------------------------------------------------
4180 // KernelDll_AllocateCacheEntry - Allocate cache entry for a given size
4181 //--------------------------------------------------------------
4182 Kdll_CacheEntry *
KernelDll_AllocateCacheEntry(Kdll_KernelCache * pCache,int32_t iSize)4183 KernelDll_AllocateCacheEntry(Kdll_KernelCache *pCache, int32_t iSize)
4184 {
4185     Kdll_CacheEntry *pEntry          = pCache->pCacheEntries;
4186     uint8_t *pCacheBinary               = nullptr;
4187     Kdll_CacheEntry *pCacheNextEntry = nullptr;
4188     int32_t i, j;
4189 
4190     VP_RENDER_FUNCTION_ENTER;
4191 
4192     // Check size
4193     if (iSize > DL_CACHE_BLOCK_SIZE)
4194     {
4195         return nullptr;
4196     }
4197 
4198     // Search empty entry
4199     j = pCache->iCacheMaxEntries;
4200     for (i = 0; i < j; i++)
4201     {
4202         if (pEntry->iKCID == -1)
4203         {
4204             break;
4205         }
4206         pEntry = pEntry->pNextEntry;
4207     }
4208     if (i == j)
4209     {
4210         // Try to allocate more cache entries
4211         pEntry = KernelDll_AllocateAdditionalCacheEntries(pCache);
4212         if(! pEntry)
4213         {
4214             return nullptr;
4215         }
4216     }
4217 
4218     // Reset entry
4219     pCacheBinary    = pEntry->pBinary;
4220     pCacheNextEntry = pEntry->pNextEntry;
4221     MOS_ZeroMemory(pEntry, sizeof(Kdll_CacheEntry));
4222     pEntry->iSize   = iSize;
4223     pEntry->pBinary    = pCacheBinary;
4224     pEntry->pNextEntry = pCacheNextEntry;
4225 
4226     // Increment entries
4227     pCache->iCacheEntries++;
4228     return pEntry;
4229 }
4230 
4231 //--------------------------------------------------------------
4232 // KernelDll_AllocateAdditionalCacheEntries - Allocate more kernel cache entries
4233 //--------------------------------------------------------------
4234 Kdll_CacheEntry *
KernelDll_AllocateAdditionalCacheEntries(Kdll_KernelCache * pCache)4235 KernelDll_AllocateAdditionalCacheEntries(Kdll_KernelCache *pCache)
4236 {
4237     Kdll_CacheEntry *pNewEntry = nullptr;
4238     Kdll_CacheEntry *pChcheEntry;
4239     int i, j;
4240 
4241     VP_RENDER_FUNCTION_ENTER;
4242 
4243     // Check num
4244     if (pCache->iCacheEntries + DL_NEW_COMBINED_KERNELS > DL_MAX_COMBINED_KERNELS)
4245     {
4246         VP_RENDER_ASSERTMESSAGE("KernelDll_AllocateAdditionalCacheEntries: Can't allocate more kernel cache entries\n");
4247         return nullptr;
4248     }
4249 
4250     // Allocate the new entires
4251     i = (sizeof(Kdll_CacheEntry) + DL_CACHE_BLOCK_SIZE) * DL_NEW_COMBINED_KERNELS;
4252     pNewEntry = (Kdll_CacheEntry *)MOS_AllocAndZeroMemory(i);
4253     if (!pNewEntry)
4254     {
4255         VP_RENDER_ASSERTMESSAGE("KernelDll_AllocateAdditionalCacheEntries: Failed to allocate kernel cache entries\n");
4256         return nullptr;
4257     }
4258 
4259     // Update the cache entires
4260     pChcheEntry = pCache->pCacheEntries;
4261     for(j = 0; j < pCache->iCacheMaxEntries - 1; j++)
4262     {
4263         pChcheEntry = pChcheEntry->pNextEntry;
4264     }
4265     pChcheEntry->pNextEntry = pNewEntry;
4266     for(j = 0; j < DL_NEW_COMBINED_KERNELS; j++, pNewEntry++)
4267     {
4268         pNewEntry->iKUID   = -1;
4269         pNewEntry->iKCID   = -1;
4270         pNewEntry->pBinary = (uint8_t *)(pNewEntry + DL_NEW_COMBINED_KERNELS - j) + j * DL_CACHE_BLOCK_SIZE;
4271         if(j != DL_NEW_COMBINED_KERNELS - 1)
4272         {
4273             pNewEntry->pNextEntry = pNewEntry + 1;
4274         }
4275         else
4276         {
4277             pNewEntry->pNextEntry = nullptr;
4278         }
4279     }
4280 
4281     pCache->iCacheMaxEntries += DL_NEW_COMBINED_KERNELS;
4282     pCache->iCacheSize       += DL_NEW_COMBINED_KERNELS * DL_CACHE_BLOCK_SIZE;
4283     pCache->iCacheFree       += DL_NEW_COMBINED_KERNELS * DL_CACHE_BLOCK_SIZE;
4284     return (Kdll_CacheEntry *)(pNewEntry - DL_NEW_COMBINED_KERNELS);
4285 }
4286 
4287 //--------------------------------------------------------------
4288 // KernelDll_AddKernel - Add kernel into hash table and kernel cache
4289 //--------------------------------------------------------------
4290 Kdll_CacheEntry *
KernelDll_AddKernel(Kdll_State * pState,Kdll_SearchState * pSearchState,Kdll_FilterEntry * pFilter,int32_t iFilterSize,uint32_t dwHash)4291 KernelDll_AddKernel(Kdll_State       *pState,           // Kernel Dll state
4292                     Kdll_SearchState *pSearchState,     // Search state
4293                     Kdll_FilterEntry *pFilter,          // Original filter
4294                     int32_t           iFilterSize,      // Original filter size
4295                     uint32_t          dwHash)
4296 {
4297     Kdll_CacheEntry      *pCacheEntry;
4298     Kdll_KernelHashTable *pHashTable;
4299     Kdll_KernelHashEntry *pHashEntry;
4300     uint16_t entry;
4301     int32_t size;
4302     uint8_t *ptr;
4303 
4304     VP_RENDER_FUNCTION_ENTER;
4305 
4306     // Check kernel
4307     if (pSearchState->KernelSize <= 0)
4308     {
4309         return nullptr;
4310     }
4311 
4312     // Get hash table
4313     pHashTable = &pState->KernelHashTable;
4314     pHashEntry = &pHashTable->HashEntry[0] - 1;  // all indices are 1 based (0 = null)
4315 
4316     // allocate space in kernel cache to store the kernel, filter, CSC parameters
4317     size  = pSearchState->KernelSize +                                  // Kernel
4318             pSearchState->iFilterSize * sizeof(Kdll_FilterEntry) * 2 +  // Original + Modified Filter
4319             sizeof(Kdll_CSC_Params) +                                   // CSC parameters
4320             sizeof(VPHAL_CSPACE);                                       // Intermediate Color Space for colorfill
4321 
4322     // Run garbage collection, create space for new kernel and metadata
4323     KernelDll_GarbageCollection(pState, size);
4324 
4325     // Get new kernel cache entry
4326     pCacheEntry = KernelDll_AllocateCacheEntry(&pState->KernelCache, size);
4327     if (!pCacheEntry)
4328     {
4329         VP_RENDER_ASSERTMESSAGE("Failed to allocate cache space for new kernel.");
4330         return nullptr;
4331     }
4332 
4333     // Get hash entry
4334     entry = KernelDll_AllocateHashEntry(pHashTable, dwHash);
4335     if (!entry)
4336     {
4337         VP_RENDER_ASSERTMESSAGE("Failed to allocate hash entry for new kernel.");
4338         KernelDll_ReleaseCacheEntry(&pState->KernelCache, pCacheEntry);
4339         return nullptr;
4340     }
4341 
4342     // Setup cache entry, copy kernel
4343     pCacheEntry->iKUID       = -1;
4344     pCacheEntry->iKCID       = pState->KernelCache.iCacheID;  // Create new kernel cache id (KCID)
4345     pCacheEntry->dwRefresh   = pState->dwRefresh++;
4346     pCacheEntry->wHashEntry  = entry;
4347 
4348     // Save kernel
4349     pCacheEntry->iSize = pSearchState->KernelSize;
4350     MOS_SecureMemcpy(pCacheEntry->pBinary, pSearchState->KernelSize, (void *)pSearchState->Kernel, pSearchState->KernelSize);
4351     ptr = pCacheEntry->pBinary + pSearchState->KernelSize;
4352 
4353     // Save modified filter
4354     pCacheEntry->iFilterSize = pSearchState->iFilterSize;
4355     pCacheEntry->pFilter     = (Kdll_FilterEntry *) (ptr);
4356     MOS_SecureMemcpy(ptr, pSearchState->iFilterSize * sizeof(Kdll_FilterEntry), (void *)pSearchState->Filter, pSearchState->iFilterSize * sizeof(Kdll_FilterEntry));
4357     ptr += pSearchState->iFilterSize * sizeof(Kdll_FilterEntry);
4358 
4359     // Save CSC parameters associated with the kernel
4360     pCacheEntry->pCscParams = (Kdll_CSC_Params *) (ptr);
4361     MOS_SecureMemcpy(ptr, sizeof(Kdll_CSC_Params), (void *)&pSearchState->CscParams, sizeof(Kdll_CSC_Params));
4362     ptr += sizeof(Kdll_CSC_Params);
4363     // Save intermediate color space for colorfill
4364     pCacheEntry->colorfill_cspace = pState->colorfill_cspace;
4365     ptr += sizeof(VPHAL_CSPACE);
4366 
4367     // increment KCID (Range = 0x00010000 - 0x7fffffff)
4368     pState->KernelCache.iCacheID = 0x00010000 + (pState->KernelCache.iCacheID - 0x0000ffff) % 0x7fff0000;
4369 
4370     // Setup hash entry, copy filter
4371     pHashEntry += entry;
4372     pHashEntry->pCacheEntry = pCacheEntry;
4373 
4374     // Save original filter for search purposes - modified filter is used for rendering
4375     pHashEntry->iFilter     = iFilterSize;
4376     pHashEntry->pFilter     = (Kdll_FilterEntry *) (ptr);
4377     MOS_SecureMemcpy(ptr, iFilterSize * sizeof(Kdll_FilterEntry), (void *)pFilter, iFilterSize * sizeof(Kdll_FilterEntry));
4378 
4379     return pCacheEntry;
4380 }
4381 
4382 //--------------------------------------------------------------
4383 // KernelDll_ReleaseHashEntry - Release hash table entry
4384 //--------------------------------------------------------------
KernelDll_ReleaseHashEntry(Kdll_KernelHashTable * pHashTable,uint16_t entry)4385 void KernelDll_ReleaseHashEntry(Kdll_KernelHashTable *pHashTable, uint16_t entry)
4386 {
4387     Kdll_KernelHashEntry *pHashEntry = &pHashTable->HashEntry[0] - 1;
4388     uint32_t folded_hash;
4389     uint16_t next;
4390 
4391     VP_RENDER_FUNCTION_ENTER;
4392 
4393     if (entry == 0)
4394     {
4395         return;
4396     }
4397 
4398     // unlink entry
4399     next = pHashEntry[entry].next;
4400     pHashEntry[entry].next = 0;
4401 
4402     // remove references to entry from hash table
4403     FOLD_HASH(folded_hash, pHashEntry[entry].dwHash);
4404     if (pHashTable->wHashTable[folded_hash] == entry)
4405     {
4406         pHashTable->wHashTable[folded_hash] = next;
4407     }
4408     else
4409     {
4410         uint16_t prev = pHashTable->wHashTable[folded_hash];
4411 
4412         while (prev != 0 &&
4413                pHashEntry[prev].next != entry)
4414         {
4415             prev = pHashEntry[prev].next;
4416         }
4417 
4418         if (prev)
4419         {
4420             pHashEntry[prev].next = next;
4421         }
4422     }
4423 
4424     // return entry to pool
4425     if (pHashTable->pool == 0)
4426     {
4427         pHashTable->pool = entry;
4428     }
4429     else
4430     {
4431         pHashEntry[pHashTable->last].next = entry;
4432     }
4433     pHashTable->last = entry;
4434 }
4435 
4436 //--------------------------------------------------------------
4437 // KernelDll_ReleaseCacheEntry - Release cache entry
4438 //--------------------------------------------------------------
KernelDll_ReleaseCacheEntry(Kdll_KernelCache * pCache,Kdll_CacheEntry * pEntry)4439 void KernelDll_ReleaseCacheEntry(Kdll_KernelCache *pCache,
4440                                  Kdll_CacheEntry  *pEntry)
4441 {
4442     pEntry->iKUID = -1;
4443     pEntry->iKCID = -1;
4444     pCache->iCacheEntries--;
4445 }
4446 
4447 //---------------------------------------------------------------------------------------
4448 // KernelDll_SetupFunctionPointers - Setup Function pointers based on platform
4449 //
4450 // Parameters:
4451 //    KdllState  *pState    - [in/out] Kernel Dll state
4452 //
4453 // Output: true  - Function pointers are set
4454 //         false - Failed to setup function pointers (invalid platform)
4455 //-----------------------------------------------------------------------------------------
KernelDll_SetupFunctionPointers_Ext(Kdll_State * pState)4456 bool KernelDll_SetupFunctionPointers_Ext(
4457     Kdll_State *pState)
4458 {
4459     VP_RENDER_FUNCTION_ENTER;
4460 
4461     if (pState && pState->bEnableCMFC)
4462     {
4463         pState->pfnBuildKernel = KernelDll_BuildKernel_CmFc;
4464     }
4465 
4466     return true;
4467 }
4468 
4469 #ifdef __cplusplus
4470 }
4471 #endif  // __cplusplus
4472