1 /*
2 * Copyright (c) 2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file hal_kerneldll_next.c
24 //! \brief Kernel Dynamic Linking/Loading routines for FC
25 //!
26
27 #ifndef VPHAL_LIB
28
29 #if IMOLA
30 #include <stdlib.h>
31 #endif // IMOLA
32 #include <math.h> //for sin & cos
33 #endif // VPHAL_LIB
34
35 #if EMUL || VPHAL_LIB
36 #include <math.h>
37 #include "support.h"
38 #elif LINUX
39 #else // !(EMUL | VPHAL_LIB) && !LINUX
40
41 #endif // EMUL | VPHAL_LIB
42
43 #include "hal_kerneldll_next.h"
44 #include "vp_utils.h"
45
46 // Define _DEBUG symbol for KDLL Release build before loading the "vpkrnheader.h" file
47 // This is necessary for full kernels names in both Release/Debug versions of KDLL app
48 #if EMUL || VPHAL_LIB
49 #ifndef _DEBUG
50 #define _DEBUG 2
51 #endif // _DEBUG
52 #endif // EMUL || VPHAL_LIB
53
54 // Kernel IDs and Kernel Names
55 #include "vpkrnheader.h" // IDR_VP_TOTAL_NUM_KERNELS
56
57 // Undefine _DEBUG symbol for the remaining of the KDLL Release build
58 #if _DEBUG == 2
59 #undef _DEBUG
60 #endif // _DEBUG
61
62
63 #ifndef PI
64 #define PI 3.1415926535897932f
65 #endif // PI
66
67 #ifdef __cplusplus
68 extern "C" {
69 #endif // __cplusplus
70
71 #define FOLD_HASH(folded_hash, hash) \
72 { \
73 folded_hash = (((hash) >> 8) ^ (hash)) & 0x00ff00ff; \
74 folded_hash = ((folded_hash >> 16) ^ folded_hash) & 0xff; \
75 } \
76
77 const bool g_cIsFormatYUV[Format_Count] =
78 {
79 false, // Format_Any
80 false, // Format_A8R8G8B8
81 false, // Format_X8R8G8B8
82 false, // Format_A8B8G8R8
83 false, // Format_X8B8G8R8
84 false, // Format_A16B16G16R16
85 false, // Format_A16R16G16B16
86 false, // Format_R5G6B5
87 false, // Format_R32U
88 false, // Format_R32F
89 false, // Format_R8G8B8
90 false, // Format_RGBP
91 false, // Format_BGRP
92 true, // Format_YUY2
93 true, // Format_YUYV
94 true, // Format_YVYU
95 true, // Format_UYVY
96 true, // Format_VYUY
97 true, // Format_Y216
98 true, // Format_Y210
99 true, // Format_Y416
100 true, // Format_AYUV
101 true, // Format_AUYV
102 true, // Format_Y410
103 true, // Format_400P
104 true, // Format_NV12
105 true, // Format_NV12_UnAligned
106 true, // Format_NV21
107 true, // Format_NV11
108 true, // Format_NV11_UnAligned
109 true, // Format_P208
110 true, // Format_P208_UnAligned
111 true, // Format_IMC1
112 true, // Format_IMC2
113 true, // Format_IMC3
114 true, // Format_IMC4
115 true, // Format_422H
116 true, // Format_422V
117 true, // Format_444P
118 true, // Format_411P
119 true, // Format_411R
120 true, // Format_I420
121 true, // Format_IYUV
122 true, // Format_YV12
123 true, // Format_YVU9
124 true, // Format_AI44 (YUV originally, palette may be converted to RGB)
125 true, // Format_IA44 (same as above)
126 false, // Format_P8 (using RGB since P8 is uncommon in FC)
127 false, // Format_A8P8 (same as above)
128 false, // Format_A8
129 false, // Format_L8
130 false, // Format_A4L4
131 false, // Format_A8L8
132 true, // Format_IRW0
133 true, // Format_IRW1
134 true, // Format_IRW2
135 true, // Format_IRW3
136 true, // Format_IRW4
137 true, // Format_IRW5
138 true, // Format_IRW6
139 true, // Format_IRW7
140 false, // Format_STMM
141 false, // Format_Buffer
142 false, // Format_Buffer_2D
143 false, // Format_V8U8
144 false, // Format_R32S
145 false, // Format_R8U
146 false, // Format_R8G8UN
147 false, // Format_R8G8SN
148 false, // Format_G8R8_G8B8
149 false, // Format_R16U
150 false, // Format_R16S
151 false, // Format_R16UN
152 false, // Format_RAW
153 false, // Format_Y8
154 false, // Format_Y1
155 false, // Format_Y16U
156 false, // Format_Y16S
157 false, // Format_L16
158 false, // Format_D16
159 false, // Format_R10G10B10A2
160 false, // Format_B10G10R10A2
161 true, // Format_P016
162 true, // Format_P010
163 true // Format_YV12_Planar
164 };
165
KernelDll_IsYUVFormat(MOS_FORMAT format)166 bool KernelDll_IsYUVFormat(MOS_FORMAT format)
167 {
168 if (format >= Format_Any && format < Format_Count)
169 {
170 return g_cIsFormatYUV[format];
171 }
172 else
173 {
174 return false;
175 }
176 }
177
178 /*----------------------------------------------------------------------------
179 | Purpose : Group common color spaces into one
180 | Returns : Return the representative color space of the group
181 \---------------------------------------------------------------------------*/
KernelDll_TranslateCspace(VPHAL_CSPACE cspace)182 VPHAL_CSPACE KernelDll_TranslateCspace(VPHAL_CSPACE cspace)
183 {
184 switch (cspace)
185 {
186 case CSpace_BT709:
187 case CSpace_xvYCC709:
188 return CSpace_BT709;
189
190 case CSpace_BT601:
191 case CSpace_xvYCC601:
192 return CSpace_BT601;
193
194 case CSpace_BT601_FullRange:
195 return CSpace_BT601_FullRange;
196
197 case CSpace_BT709_FullRange:
198 return CSpace_BT709_FullRange;
199
200 case CSpace_RGB:
201 case CSpace_sRGB:
202 return CSpace_sRGB;
203
204 case CSpace_stRGB:
205 return CSpace_stRGB;
206
207 case CSpace_Gray:
208 case CSpace_BT601Gray:
209 return CSpace_BT601Gray;
210
211 case CSpace_BT601Gray_FullRange:
212 return CSpace_BT601Gray_FullRange;
213
214 case CSpace_BT2020:
215 return CSpace_BT2020;
216
217 case CSpace_BT2020_FullRange:
218 return CSpace_BT2020_FullRange;
219
220 case CSpace_BT2020_RGB:
221 return CSpace_BT2020_RGB;
222
223 case CSpace_BT2020_stRGB:
224 return CSpace_BT2020_stRGB;
225
226 default:
227 return CSpace_None;
228 }
229 }
230
KernelDll_MatrixProduct(float * dest,const float * m1,const float * m2)231 void KernelDll_MatrixProduct(
232 float * dest,
233 const float *m1,
234 const float *m2)
235 {
236 bool save;
237 float temp[12];
238
239 // setup temp matrix to allow the following operations:
240 // dest = dest * m2
241 // dest = m1 * dest
242 // dest = dest * dest
243 save = (m1 == dest) || (m2 == dest);
244 m1 = (m1 == dest) ? temp : m1;
245 m2 = (m2 == dest) ? temp : m2;
246 if (save)
247 MOS_SecureMemcpy(temp, sizeof(temp), (void *)dest, sizeof(temp));
248
249 // Multiply the matrices
250 dest[0] = m1[0] * m2[0] + m1[1] * m2[4] + m1[2] * m2[8];
251 dest[1] = m1[0] * m2[1] + m1[1] * m2[5] + m1[2] * m2[9];
252 dest[2] = m1[0] * m2[2] + m1[1] * m2[6] + m1[2] * m2[10];
253 dest[3] = m1[0] * m2[3] + m1[1] * m2[7] + m1[2] * m2[11] + m1[3];
254 dest[4] = m1[4] * m2[0] + m1[5] * m2[4] + m1[6] * m2[8];
255 dest[5] = m1[4] * m2[1] + m1[5] * m2[5] + m1[6] * m2[9];
256 dest[6] = m1[4] * m2[2] + m1[5] * m2[6] + m1[6] * m2[10];
257 dest[7] = m1[4] * m2[3] + m1[5] * m2[7] + m1[6] * m2[11] + m1[7];
258 dest[8] = m1[8] * m2[0] + m1[9] * m2[4] + m1[10] * m2[8];
259 dest[9] = m1[8] * m2[1] + m1[9] * m2[5] + m1[10] * m2[9];
260 dest[10] = m1[8] * m2[2] + m1[9] * m2[6] + m1[10] * m2[10];
261 dest[11] = m1[8] * m2[3] + m1[9] * m2[7] + m1[10] * m2[11] + m1[11];
262 }
263
KernelDll_UpdateCscCoefficients(Kdll_State * pState,Kdll_CSC_Matrix * pMatrix)264 void KernelDll_UpdateCscCoefficients(Kdll_State *pState,
265 Kdll_CSC_Matrix * pMatrix)
266 {
267 float csc[12]; // CSC matrix (YUV->RGB)
268 float icsc[12]; // ICSC matrix (RGB->YUV), (YUV->YUV)
269 float m[12]; // auxiliary matrix
270 float matrix[12]; // final matrix
271 Kdll_CSCType csctype;
272 Kdll_Procamp *pProcamp = nullptr;
273 VPHAL_CSPACE src = pMatrix->SrcSpace;
274 VPHAL_CSPACE dst = pMatrix->DstSpace;
275 bool bCSC, bICSC;
276
277 bCSC = bICSC = false;
278 MOS_ZeroMemory(m, sizeof(m));
279 MOS_ZeroMemory(csc, sizeof(csc));
280 MOS_ZeroMemory(icsc, sizeof(icsc));
281
282 // Select procamp parameters
283 if (pMatrix->iProcampID > DL_PROCAMP_DISABLED &&
284 pMatrix->iProcampID < pState->iProcampSize &&
285 pState->pProcamp != nullptr)
286 {
287 pProcamp = pState->pProcamp + pMatrix->iProcampID;
288 }
289
290 // Setup CSC matrix
291 if (src != dst)
292 {
293 if ((dst == CSpace_sRGB) && (src != CSpace_stRGB))
294 {
295 KernelDll_GetCSCMatrix(src, dst, csc);
296 MOS_SecureMemcpy(m, sizeof(csc), (void *)csc, sizeof(csc));
297 bCSC = true;
298 csctype = CSC_YUV_RGB;
299 }
300 else if ((dst == CSpace_stRGB) && (src != CSpace_sRGB))
301 {
302 KernelDll_GetCSCMatrix(src, dst, csc);
303 MOS_SecureMemcpy(m, sizeof(csc), (void *)csc, sizeof(csc));
304 bCSC = true;
305 csctype = CSC_YUV_RGB;
306 }
307 else
308 {
309 KernelDll_GetCSCMatrix(src, dst, icsc);
310 MOS_SecureMemcpy(m, sizeof(icsc), (void *)icsc, sizeof(icsc));
311 bICSC = true;
312 if (KernelDll_IsCspace(src, CSpace_RGB) && !KernelDll_IsCspace(dst, CSpace_RGB))
313 {
314 csctype = CSC_RGB_YUV;
315 }
316 else if (KernelDll_IsCspace(src, CSpace_BT2020_RGB) && KernelDll_IsCspace(dst, CSpace_BT2020))
317 {
318 csctype = CSC_RGB_YUV;
319 }
320 else if (KernelDll_IsCspace(src, CSpace_BT2020) && KernelDll_IsCspace(dst, CSpace_BT2020_RGB))
321 {
322 csctype = CSC_YUV_RGB;
323 }
324 else if (KernelDll_IsCspace(src, CSpace_BT2020_RGB) && KernelDll_IsCspace(dst, CSpace_BT2020_RGB))
325 {
326 csctype = CSC_RGB_RGB;
327
328 // Kernel params didn't support 10bit, it need transformation from 10bit to 8bit.
329 m[3] = ROUND_FLOAT(m[3], 0.25f); // 10bit to 8bit (value/4)
330 m[7] = ROUND_FLOAT(m[7], 0.25f); // 10bit to 8bit (value/4)
331 m[11] = ROUND_FLOAT(m[11], 0.25f); // 10bit to 8bit (value/4)
332 }
333 else
334 {
335 csctype = CSC_YUV_YUV;
336 }
337 }
338 }
339 // Setup CSC matrix for procamp in sRGB space
340 else if ((dst == CSpace_sRGB) && (pProcamp))
341 {
342 KernelDll_GetCSCMatrix(CSpace_sRGB, CSpace_BT709, icsc);
343 KernelDll_GetCSCMatrix(CSpace_BT709, CSpace_sRGB, csc);
344 bICSC = bCSC = true;
345 csctype = CSC_RGB_RGB;
346 }
347 // Setup CSC matrix for procamp in stRGB space
348 else if ((dst == CSpace_stRGB) && (pProcamp))
349 {
350 KernelDll_GetCSCMatrix(CSpace_stRGB, CSpace_BT709, icsc);
351 KernelDll_GetCSCMatrix(CSpace_BT709, CSpace_stRGB, csc);
352 bICSC = bCSC = true;
353 csctype = CSC_RGB_RGB;
354 }
355 else
356 {
357 MOS_SecureMemcpy(m, sizeof(g_cCSC_Identity), (void *)g_cCSC_Identity, sizeof(g_cCSC_Identity));
358 csctype = CSC_YUV_YUV;
359 }
360
361 // Product only happens if Procamp is present
362 // Otherwise use the original matrix
363 if (pProcamp)
364 {
365 float b, c, h, s;
366
367 // Calculate procamp parameters
368 b = pProcamp->fBrightness;
369 c = pProcamp->fContrast;
370 h = pProcamp->fHue * (PI / 180.0f);
371 s = pProcamp->fSaturation;
372
373 // procamp matrix
374 //
375 // [Y'] [ c 0 0 ] [Y] [ 16 - 16 * c + b ]
376 // [U'] = [ 0 c*s*cos(h) c*s*sin(h) ] [U] + [ 128 - 128*c*s*(cos(h)+sin(h)) ]
377 // [V'] [ 0 -c*s*sin(h) c*s*cos(h) ] [V] [ 128 - 128*c*s*(cos(h)-sin(h)) ]
378
379 matrix[0] = c;
380 matrix[1] = 0.0f;
381 matrix[2] = 0.0f;
382 matrix[3] = 16.0f - 16.0f * c + b;
383 matrix[4] = 0.0f;
384 matrix[5] = (float)cos(h) * c * s;
385 matrix[6] = (float)sin(h) * c * s;
386 matrix[7] = 128.0f * (1.0f - matrix[5] - matrix[6]);
387 matrix[8] = 0.0f;
388 matrix[9] = -matrix[6];
389 matrix[10] = matrix[5];
390 matrix[11] = 128.0f * (1.0f - matrix[5] + matrix[6]);
391
392 // Calculate final CSC matrix (csc * pa * icsc)
393 if (bICSC)
394 { // Calculate [pa] * [icsc]
395 KernelDll_MatrixProduct(matrix, matrix, icsc);
396 }
397
398 if (bCSC)
399 { // Calculate [csc] * [pa] (if no icsc)
400 // or [csc] * [pa] * [icsc]
401 KernelDll_MatrixProduct(matrix, csc, matrix);
402 }
403
404 // Update procamp version
405 pMatrix->iProcampVersion = pProcamp->iProcampVersion;
406
407 // Use the output matrix to generate kernel CSC parameters
408 MOS_SecureMemcpy(m, sizeof(m), (void *)matrix, sizeof(m));
409 }
410
411 // normalize for kernel use
412 matrix[0] = ROUND_FLOAT(m[0], 128.0f); // 9.7
413 matrix[1] = ROUND_FLOAT(m[1], 128.0f); // 9.7
414 matrix[2] = ROUND_FLOAT(m[2], 128.0f); // 9.7
415 matrix[3] = ROUND_FLOAT(m[3], 0.5f); // 16.0 (value/2)
416 matrix[4] = ROUND_FLOAT(m[4], 128.0f); // 9.7
417 matrix[5] = ROUND_FLOAT(m[5], 128.0f); // 9.7
418 matrix[6] = ROUND_FLOAT(m[6], 128.0f); // 9.7
419 matrix[7] = ROUND_FLOAT(m[7], 0.5f); // 16.0 (value/2)
420 matrix[8] = ROUND_FLOAT(m[8], 128.0f); // 9.7
421 matrix[9] = ROUND_FLOAT(m[9], 128.0f); // 9.7
422 matrix[10] = ROUND_FLOAT(m[10], 128.0f); // 9.7
423 matrix[11] = ROUND_FLOAT(m[11], 0.5f); // 16.0 (value/2)
424
425 // Save matrix as kernel CSC coefficients
426 pState->pfnMapCSCMatrix(csctype, matrix, pMatrix->Coeff);
427 }
428
429 //---------------------------------------------------------------------------------------
430 // KernelDll_StartKernelSearch_Next - Starts kernel search
431 //
432 // Parameters:
433 // Kdll_State *pState - [in] Dynamic Linking State
434 // Kdll_FilterEntry *pFilter - [in] Search filter (array of search entries)
435 // int iFilterSize - [in] Search filter size
436 // Kdll_SearchState *pSearchState - [in/out] Kernel search state
437 //
438 // Output: none
439 //---------------------------------------------------------------------------------------
KernelDll_StartKernelSearch_Next(Kdll_State * pState,Kdll_SearchState * pSearchState,Kdll_FilterEntry * pFilter,int32_t iFilterSize,uint32_t uiIs64BInstrEnabled)440 void KernelDll_StartKernelSearch_Next(
441 Kdll_State * pState,
442 Kdll_SearchState *pSearchState,
443 Kdll_FilterEntry *pFilter,
444 int32_t iFilterSize,
445 uint32_t uiIs64BInstrEnabled)
446 {
447 int32_t nLayer;
448
449 VP_RENDER_FUNCTION_ENTER;
450
451 // Reset all states
452 MOS_ZeroMemory(pSearchState, sizeof(Kdll_SearchState));
453
454 // Setup KDLL state
455 pSearchState->pKdllState = pState; // KDLL state
456
457 // Cleanup kernel table
458 pSearchState->KernelCount = 0; // # of kernels
459
460 // Cleanup patch data
461 memset(pSearchState->Patches, 0, sizeof(pSearchState->Patches));
462 memset(pSearchState->PatchID, -1, sizeof(pSearchState->PatchID));
463 pSearchState->PatchCount = 0;
464
465 // Copy original filter; filter will be modified as part of the search
466 if (pFilter && iFilterSize > 0)
467 {
468 MOS_SecureMemcpy(pSearchState->Filter, iFilterSize * sizeof(Kdll_FilterEntry), pFilter, iFilterSize * sizeof(Kdll_FilterEntry));
469 pSearchState->pFilter = pSearchState->Filter;
470 pSearchState->iFilterSize = iFilterSize;
471
472 // Copy the render target format
473 pSearchState->target_format = pSearchState->pFilter[iFilterSize - 1].format;
474
475 // Copy the render target tile type
476 pSearchState->target_tiletype = pSearchState->pFilter[iFilterSize - 1].tiletype;
477
478 // Indicate whether to use 64B save kernel for render target surface
479 if (uiIs64BInstrEnabled &&
480 ((pSearchState->target_tiletype == MOS_TILE_X) ||
481 (pSearchState->target_tiletype == MOS_TILE_LINEAR)))
482 {
483 pSearchState->b64BSaveEnabled = true;
484 }
485 }
486 }
487
KernelDll_ModifyFunctionPointers_Next(Kdll_State * pState)488 void KernelDll_ModifyFunctionPointers_Next(Kdll_State *pState)
489 {
490 pState->pfnStartKernelSearch = KernelDll_StartKernelSearch_Next;
491 }
492
KernelDll_IsCspace(VPHAL_CSPACE cspace,VPHAL_CSPACE match)493 bool KernelDll_IsCspace(VPHAL_CSPACE cspace, VPHAL_CSPACE match)
494 {
495 switch (match)
496 {
497 case CSpace_RGB:
498 return (cspace == CSpace_sRGB ||
499 cspace == CSpace_stRGB);
500
501 case CSpace_YUV:
502 return (cspace == CSpace_BT709 ||
503 cspace == CSpace_BT601 ||
504 cspace == CSpace_BT601_FullRange ||
505 cspace == CSpace_BT709_FullRange ||
506 cspace == CSpace_xvYCC709 ||
507 cspace == CSpace_xvYCC601);
508
509 case CSpace_Gray:
510 return (cspace == CSpace_BT601Gray ||
511 cspace == CSpace_BT601Gray_FullRange);
512
513 case CSpace_Any:
514 return (cspace != CSpace_None);
515
516 case CSpace_BT2020:
517 return (cspace == CSpace_BT2020 ||
518 cspace == CSpace_BT2020_FullRange);
519
520 case CSpace_BT2020_RGB:
521 return (cspace == CSpace_BT2020_RGB ||
522 cspace == CSpace_BT2020_stRGB);
523
524 default:
525 return (cspace == match);
526 }
527
528 return false;
529 }
530
531 /*----------------------------------------------------------------------------
532 | Name : KernelDll_GetYuvRangeAndOffset
533 | Purpose : Get the YUV offset and excursion for the input color space
534 | Return : true if success else false
535 \---------------------------------------------------------------------------*/
KernelDll_GetYuvRangeAndOffset(Kdll_CSpace cspace,float * pLumaOffset,float * pLumaExcursion,float * pChromaZero,float * pChromaExcursion)536 bool KernelDll_GetYuvRangeAndOffset(
537 Kdll_CSpace cspace,
538 float * pLumaOffset,
539 float * pLumaExcursion,
540 float * pChromaZero,
541 float * pChromaExcursion)
542 {
543 bool res = true;
544
545 switch (cspace)
546 {
547 case CSpace_BT601_FullRange:
548 case CSpace_BT709_FullRange:
549 case CSpace_BT601Gray_FullRange:
550 case CSpace_BT2020_FullRange:
551 *pLumaOffset = 0.0f;
552 *pLumaExcursion = 255.0f;
553 *pChromaZero = 128.0f;
554 *pChromaExcursion = 255.0f;
555 break;
556
557 case CSpace_BT601:
558 case CSpace_BT709:
559 case CSpace_xvYCC601: // since matrix is the same as 601, use the same range
560 case CSpace_xvYCC709: // since matrix is the same as 709, use the same range
561 case CSpace_BT601Gray:
562 case CSpace_BT2020:
563 *pLumaOffset = 16.0f;
564 *pLumaExcursion = 219.0f;
565 *pChromaZero = 128.0f;
566 *pChromaExcursion = 224.0f;
567 break;
568
569 default:
570 res = false;
571 break;
572 }
573
574 return res;
575 }
576
577 /*----------------------------------------------------------------------------
578 | Name : KernelDll_GetRgbRangeAndOffset
579 | Purpose : Get the RGB offset and excursion for the input color space
580 | Return : true if success else false
581 \---------------------------------------------------------------------------*/
KernelDll_GetRgbRangeAndOffset(Kdll_CSpace cspace,float * pRgbOffset,float * pRgbExcursion)582 bool KernelDll_GetRgbRangeAndOffset(
583 Kdll_CSpace cspace,
584 float * pRgbOffset,
585 float * pRgbExcursion)
586 {
587 bool res = true;
588
589 switch (cspace)
590 {
591 case CSpace_sRGB:
592 case CSpace_BT2020_RGB:
593 *pRgbOffset = 0.0f;
594 *pRgbExcursion = 255.0f;
595 break;
596
597 case CSpace_stRGB:
598 case CSpace_BT2020_stRGB:
599 *pRgbOffset = 16.0f;
600 *pRgbExcursion = 219.0f;
601 break;
602
603 default:
604 res = false;
605 break;
606 }
607
608 return res;
609 }
610
611 /*----------------------------------------------------------------------------
612 | Name : KernelDll_CalcYuvToRgbMatrix
613 | Purpose : Given the YUV->RGB transfer matrix, get the final matrix after
614 | applying offsets and excursions.
615 |
616 | [R'] [R_o] [R_e/Y_e 0 0 ] [Y' - Y_o]
617 | [G'] = [R_o] + [YUVtoRGBCoeff (3x3 matrix)]. [ 0 R_e/C_e 0 ]. [Cb' - C_z]
618 | [B'] [R_o] [ 0 0 R_e/C_e]. [Cr' - C_z]
619 |
620 | [R'] = [C0 C1 C2] [Y' ] [C3] {Out pMatrix}
621 | [G'] = [C4 C5 C6].[Cb'] + [C7]
622 | [B'] = [C8 C9 C10] [Cr'] + [C11]
623 |
624 | Return : true if success else false
625 \---------------------------------------------------------------------------*/
KernelDll_CalcYuvToRgbMatrix(Kdll_CSpace src,Kdll_CSpace dst,float * pTransferMatrix,float * pOutMatrix)626 bool KernelDll_CalcYuvToRgbMatrix(
627 Kdll_CSpace src, // [in] YUV Color space
628 Kdll_CSpace dst, // [in] RGB Color space
629 float * pTransferMatrix, // [in] Transfer matrix (3x3)
630 float * pOutMatrix) // [out] Conversion matrix (3x4)
631 {
632 bool res;
633 float Y_o, Y_e, C_z, C_e;
634 float R_o, R_e;
635
636 res = true;
637
638 res = KernelDll_GetRgbRangeAndOffset(dst, &R_o, &R_e);
639 if (res == false)
640 {
641 goto finish;
642 }
643
644 res = KernelDll_GetYuvRangeAndOffset(src, &Y_o, &Y_e, &C_z, &C_e);
645 if (res == false)
646 {
647 goto finish;
648 }
649
650 // after + (3x3)(3x3)
651 pOutMatrix[0] = pTransferMatrix[0] * R_e / Y_e;
652 pOutMatrix[4] = pTransferMatrix[3] * R_e / Y_e;
653 pOutMatrix[8] = pTransferMatrix[6] * R_e / Y_e;
654 pOutMatrix[1] = pTransferMatrix[1] * R_e / C_e;
655 pOutMatrix[5] = pTransferMatrix[4] * R_e / C_e;
656 pOutMatrix[9] = pTransferMatrix[7] * R_e / C_e;
657 pOutMatrix[2] = pTransferMatrix[2] * R_e / C_e;
658 pOutMatrix[6] = pTransferMatrix[5] * R_e / C_e;
659 pOutMatrix[10] = pTransferMatrix[8] * R_e / C_e;
660
661 // (3x1) - (3x3)(3x3)(3x1)
662 pOutMatrix[3] = R_o - (pOutMatrix[0] * Y_o + pOutMatrix[1] * C_z + pOutMatrix[2] * C_z);
663 pOutMatrix[7] = R_o - (pOutMatrix[4] * Y_o + pOutMatrix[5] * C_z + pOutMatrix[6] * C_z);
664 pOutMatrix[11] = R_o - (pOutMatrix[8] * Y_o + pOutMatrix[9] * C_z + pOutMatrix[10] * C_z);
665
666 finish:
667 return res;
668 }
669
670 /*----------------------------------------------------------------------------
671 | Name : KernelDll_CalcRgbToYuvMatrix
672 | Purpose : Given the RGB->YUV transfer matrix, get the final matrix after
673 | applying offsets and excursions.
674 |
675 | [Y' ] [Y_o - Y_e.R_o/R_e] [Y_e/R_e 0 0 ] [ RGB to YUV ] [R']
676 | [Cb'] = [C_z] + [ 0 C_e/R_e 0 ]. [Transfer matrix]. [G']
677 | [Cr'] [C_z] [ 0 0 C_e/R_e] [ 3x3 matrix ] [B']
678 |
679 | [Y' ] = [C0 C1 C2] [R'] [C3] {Out pMatrix}
680 | [Cb'] = [C4 C5 C6].[G'] + [C7]
681 | [Cr'] = [C8 C9 C10] [B'] + [C11]
682 |
683 | Return : true if success else false
684 \---------------------------------------------------------------------------*/
KernelDll_CalcRgbToYuvMatrix(Kdll_CSpace src,Kdll_CSpace dst,float * pTransferMatrix,float * pOutMatrix)685 bool KernelDll_CalcRgbToYuvMatrix(
686 Kdll_CSpace src, // [in] RGB Color space
687 Kdll_CSpace dst, // [in] YUV Color space
688 float * pTransferMatrix, // [in] Transfer matrix (3x3)
689 float * pOutMatrix) // [out] Conversion matrix (3x4)
690 {
691 bool res;
692 float Y_o, Y_e, C_z, C_e;
693 float R_o, R_e;
694
695 res = true;
696
697 res = KernelDll_GetRgbRangeAndOffset(src, &R_o, &R_e);
698 if (res == false)
699 {
700 goto finish;
701 }
702
703 res = KernelDll_GetYuvRangeAndOffset(dst, &Y_o, &Y_e, &C_z, &C_e);
704 if (res == false)
705 {
706 goto finish;
707 }
708
709 // multiplication of + onwards
710 pOutMatrix[0] = pTransferMatrix[0] * Y_e / R_e;
711 pOutMatrix[1] = pTransferMatrix[1] * Y_e / R_e;
712 pOutMatrix[2] = pTransferMatrix[2] * Y_e / R_e;
713 pOutMatrix[4] = pTransferMatrix[3] * C_e / R_e;
714 pOutMatrix[5] = pTransferMatrix[4] * C_e / R_e;
715 pOutMatrix[6] = pTransferMatrix[5] * C_e / R_e;
716 pOutMatrix[8] = pTransferMatrix[6] * C_e / R_e;
717 pOutMatrix[9] = pTransferMatrix[7] * C_e / R_e;
718 pOutMatrix[10] = pTransferMatrix[8] * C_e / R_e;
719
720 // before +
721 pOutMatrix[3] = Y_o - Y_e * R_o / R_e;
722 pOutMatrix[7] = C_z;
723 pOutMatrix[11] = C_z;
724
725 finish:
726 return res;
727 }
728
729 /*----------------------------------------------------------------------------
730 | Name : KernelDll_CalcGrayCoeffs
731 | Purpose : Given CSC matrix, calculate the new matrix making Chroma zero.
732 | Chroma will be read from the surface, but we need to factor in C_z
733 | by adjusting this in the constant.
734 |
735 | [R'] = [C0 C1 C2] [Y' ] [C3] {Out pMatrix}
736 | [G'] = [C4 C5 C6].[C_z] + [C7]
737 | [B'] = [C8 C9 C10] [C_z] [C11]
738 |
739 | New C3 = C1 * C_z + C2 * C_z + C3
740 |
741 | Return : true if success else false
742 \---------------------------------------------------------------------------*/
KernelDll_CalcGrayCoeffs(Kdll_CSpace src,float * pMatrix)743 bool KernelDll_CalcGrayCoeffs(
744 Kdll_CSpace src, // [in] YUV source Color space
745 float * pMatrix) // [in/out] Conversion matrix (3x4)
746 {
747 float Y_o, Y_e, C_z, C_e;
748 bool res;
749
750 res = true;
751
752 res = KernelDll_GetYuvRangeAndOffset(src, &Y_o, &Y_e, &C_z, &C_e);
753 if (res == false)
754 {
755 goto finish;
756 }
757
758 // Calculate the constant offset by factoring in C_z
759 pMatrix[3] = pMatrix[1] * C_z + pMatrix[2] * C_z + pMatrix[3];
760 pMatrix[7] = pMatrix[5] * C_z + pMatrix[6] * C_z + pMatrix[7];
761 pMatrix[11] = pMatrix[9] * C_z + pMatrix[10] * C_z + pMatrix[11];
762
763 // Nullify the effect of chroma read
764 pMatrix[1] = pMatrix[2] = 0;
765 pMatrix[5] = pMatrix[6] = 0;
766 pMatrix[9] = pMatrix[10] = 0;
767
768 finish:
769 return res;
770 }
771
772 /*----------------------------------------------------------------------------
773 | Name : KernelDll_3x3MatrixProduct
774 | Purpose : Given two [3x4] input matrices, calculate [3x3]x[3x3] ignoring
775 | the last column in both inputs
776 | Return : none
777 \---------------------------------------------------------------------------*/
KernelDll_3x3MatrixProduct(float * dest,const float * m1,const float * m2)778 void KernelDll_3x3MatrixProduct(
779 float * dest,
780 const float *m1,
781 const float *m2)
782 {
783 dest[0] = m1[0] * m2[0] + m1[1] * m2[4] + m1[2] * m2[8];
784 dest[1] = m1[0] * m2[1] + m1[1] * m2[5] + m1[2] * m2[9];
785 dest[2] = m1[0] * m2[2] + m1[1] * m2[6] + m1[2] * m2[10];
786
787 dest[4] = m1[4] * m2[0] + m1[5] * m2[4] + m1[6] * m2[8];
788 dest[5] = m1[4] * m2[1] + m1[5] * m2[5] + m1[6] * m2[9];
789 dest[6] = m1[4] * m2[2] + m1[5] * m2[6] + m1[6] * m2[10];
790
791 dest[8] = m1[8] * m2[0] + m1[9] * m2[4] + m1[10] * m2[8];
792 dest[9] = m1[8] * m2[1] + m1[9] * m2[5] + m1[10] * m2[9];
793 dest[10] = m1[8] * m2[2] + m1[9] * m2[6] + m1[10] * m2[10];
794 }
795
796 /*----------------------------------------------------------------------------
797 | Name : KernelDll_CalcYuvToYuvMatrix
798 | Purpose : Calculate the matrix equation for converting b/w YUV color spaces.
799 | 1. Get conversion matrix from Source YUV to sRGB
800 | 2. Get conversion matrix from sRGB to Destination YUV
801 | 3. Apply the transformation below to get the final matrix
802 |
803 | [Y'dst] = [C0 C1 C2] [C0 C1 C2][Y'src] [C0 C1 C2] [C3] [C3]
804 | [U'] = [C4 C5 C6].[C4 C5 C6][C_z] + [C4 C5 C6].[C7] + [C7]
805 | [V'] = [C8 C9 C10] [C8 C9 C10][C_z] [C8 C9 C10] [C11] [C11]
806 | dst matrix src matrix dst matrix src dst
807 |
808 | [Y'dst] = [C0 C1 C2] [Y'src] [C3] {Out pMatrix}
809 | [U'] = [C4 C5 C6].[C_z] + [C7]
810 | [V'] = [C8 C9 C10] [C_z] [C11]
811 |
812 | Return : true if success else false
813 \---------------------------------------------------------------------------*/
KernelDll_CalcYuvToYuvMatrix(Kdll_CSpace src,Kdll_CSpace dst,float * pOutMatrix)814 bool KernelDll_CalcYuvToYuvMatrix(
815 Kdll_CSpace src, // [in] YUV Color space
816 Kdll_CSpace dst, // [in] YUV Color space
817 float * pOutMatrix) // [out] Conversion matrix (3x4)
818 {
819 float fYuvToRgb[12] = {0};
820 float fRgbToYuv[12] = {0};
821 bool res;
822
823 res = true;
824
825 // 1. Get conversion matrix from Source YUV to sRGB
826 if (IS_BT601_CSPACE(src))
827 {
828 res = KernelDll_CalcYuvToRgbMatrix(src, CSpace_sRGB, (float *)g_cCSC_BT601_YUV_RGB, fYuvToRgb);
829 }
830 else if(IS_COLOR_SPACE_BT2020_YUV(src))
831 {
832 switch (src)
833 {
834 case CSpace_BT2020:
835 res = KernelDll_CalcYuvToRgbMatrix(CSpace_BT2020, CSpace_sRGB, (float *)g_cCSC_BT2020_LimitedYUV_RGB, fYuvToRgb);
836 break;
837 case CSpace_BT2020_FullRange:
838 res = KernelDll_CalcYuvToRgbMatrix(CSpace_BT2020_FullRange, CSpace_sRGB, (float *)g_cCSC_BT2020_YUV_RGB, fYuvToRgb);
839 break;
840 default:
841 res = false;
842 break;
843 }
844 }
845 else
846 {
847 res = KernelDll_CalcYuvToRgbMatrix(src, CSpace_sRGB, (float *)g_cCSC_BT709_YUV_RGB, fYuvToRgb);
848 }
849 if (res == false)
850 {
851 goto finish;
852 }
853
854 // 2. Get conversion matrix from sRGB to Destination YUV
855 if (IS_BT601_CSPACE(dst))
856 {
857 res = KernelDll_CalcRgbToYuvMatrix(CSpace_sRGB, dst, (float *)g_cCSC_BT601_RGB_YUV, fRgbToYuv);
858 }
859 else if (IS_COLOR_SPACE_BT2020_YUV(dst))
860 {
861 switch (dst)
862 {
863 case CSpace_BT2020_FullRange:
864 res = KernelDll_CalcRgbToYuvMatrix(CSpace_sRGB, dst, (float *)g_cCSC_BT2020_RGB_YUV, fRgbToYuv);
865 break;
866 case CSpace_BT2020:
867 res = KernelDll_CalcRgbToYuvMatrix(CSpace_sRGB, dst, (float *)g_cCSC_BT2020_RGB_LimitedYUV, fRgbToYuv);
868 break;
869 default:
870 res = false;
871 break;
872 }
873 }
874 else
875 {
876 res = KernelDll_CalcRgbToYuvMatrix(CSpace_sRGB, dst, (float *)g_cCSC_BT709_RGB_YUV, fRgbToYuv);
877 }
878 if (res == false)
879 {
880 goto finish;
881 }
882
883 // 3. Multiply the 2 matrices above
884 KernelDll_3x3MatrixProduct(pOutMatrix, fRgbToYuv, fYuvToRgb);
885
886 // Perform [3x3][3x1] matrix multiply + [3x1] matrix
887 pOutMatrix[3] = fRgbToYuv[0] * fYuvToRgb[3] + fRgbToYuv[1] * fYuvToRgb[7] +
888 fRgbToYuv[2] * fYuvToRgb[11] + fRgbToYuv[3];
889 pOutMatrix[7] = fRgbToYuv[4] * fYuvToRgb[3] + fRgbToYuv[5] * fYuvToRgb[7] +
890 fRgbToYuv[6] * fYuvToRgb[11] + fRgbToYuv[7];
891 pOutMatrix[11] = fRgbToYuv[8] * fYuvToRgb[3] + fRgbToYuv[9] * fYuvToRgb[7] +
892 fRgbToYuv[10] * fYuvToRgb[11] + fRgbToYuv[11];
893
894 finish:
895 return res;
896 }
897
898 /*----------------------------------------------------------------------------
899 | Name : KernelDll_GetCSCMatrix
900 | Purpose : Get the required matrix for the given CSC conversion
901 | Return :
902 \---------------------------------------------------------------------------*/
KernelDll_GetCSCMatrix(Kdll_CSpace src,Kdll_CSpace dst,float * pCSC_Matrix)903 void KernelDll_GetCSCMatrix(
904 Kdll_CSpace src, // [in] Source Color space
905 Kdll_CSpace dst, // [in] Destination Color space
906 float * pCSC_Matrix) // [out] CSC matrix to use
907 {
908 bool bMatrix;
909 bool bSrcGray;
910 Kdll_CSpace temp;
911 int32_t i;
912
913 bMatrix = false;
914 bSrcGray = KernelDll_IsCspace(src, CSpace_Gray);
915
916 // convert gray color spaces to its equivalent non-gray cpsace
917 switch (src)
918 {
919 case CSpace_BT601Gray:
920 temp = CSpace_BT601;
921 break;
922 case CSpace_BT601Gray_FullRange:
923 temp = CSpace_BT601_FullRange;
924 break;
925 default:
926 temp = src;
927 break;
928 }
929
930 // BT601/709 YUV to sRGB/stRGB conversion
931 if (KernelDll_IsCspace(temp, CSpace_YUV) || KernelDll_IsCspace(temp, CSpace_Gray))
932 {
933 if (KernelDll_IsCspace(dst, CSpace_RGB))
934 {
935 if (IS_BT601_CSPACE(temp))
936 {
937 KernelDll_CalcYuvToRgbMatrix(temp, dst, (float *)g_cCSC_BT601_YUV_RGB, pCSC_Matrix);
938 bMatrix = true;
939 }
940 else // if (IS_BT709_CSPACE(temp))
941 {
942 KernelDll_CalcYuvToRgbMatrix(temp, dst, (float *)g_cCSC_BT709_YUV_RGB, pCSC_Matrix);
943 bMatrix = true;
944 }
945 }
946 }
947 // sRGB/stRGB to BT601/709 YUV conversion
948 else if (KernelDll_IsCspace(temp, CSpace_RGB))
949 {
950 if (KernelDll_IsCspace(dst, CSpace_YUV))
951 {
952 if (IS_BT601_CSPACE(dst))
953 {
954 KernelDll_CalcRgbToYuvMatrix(temp, dst, (float *)g_cCSC_BT601_RGB_YUV, pCSC_Matrix);
955 bMatrix = true;
956 }
957 else // if (IS_BT709_CSPACE(temp))
958 {
959 KernelDll_CalcRgbToYuvMatrix(temp, dst, (float *)g_cCSC_BT709_RGB_YUV, pCSC_Matrix);
960 bMatrix = true;
961 }
962 }
963 }
964 // BT2020 YUV to RGB conversion
965 else if (KernelDll_IsCspace(temp, CSpace_BT2020))
966 {
967 if (KernelDll_IsCspace(dst, CSpace_BT2020_RGB))
968 {
969 KernelDll_CalcYuvToRgbMatrix(temp, dst, (float *)g_cCSC_BT2020_YUV_RGB, pCSC_Matrix);
970 bMatrix = true;
971 }
972 }
973 // BT2020 RGB to YUV conversion
974 else if (KernelDll_IsCspace(temp, CSpace_BT2020_RGB))
975 {
976 if (KernelDll_IsCspace(dst, CSpace_BT2020))
977 {
978 KernelDll_CalcRgbToYuvMatrix(temp, dst, (float *)g_cCSC_BT2020_RGB_YUV, pCSC_Matrix);
979 bMatrix = true;
980 }
981 }
982
983 // If matrix has not been derived yet, its one of the below special cases
984 if (!bMatrix)
985 {
986 if (temp == dst) // Check if its identity matrix
987 {
988 MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_Identity), (void *)g_cCSC_Identity, sizeof(g_cCSC_Identity));
989 }
990 else if (KernelDll_IsCspace(temp, CSpace_RGB)) // sRGB to stRGB inter-conversions
991 {
992 if (temp == CSpace_sRGB)
993 {
994 MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_sRGB_stRGB), (void *)g_cCSC_sRGB_stRGB, sizeof(g_cCSC_sRGB_stRGB));
995 }
996 else //temp == CSpace_stRGB
997 {
998 MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_stRGB_sRGB), (void *)g_cCSC_stRGB_sRGB, sizeof(g_cCSC_stRGB_sRGB));
999 }
1000 }
1001 else if (KernelDll_IsCspace(temp, CSpace_YUV)) // 601 to 709 inter-conversions
1002 {
1003 KernelDll_CalcYuvToYuvMatrix(temp, dst, pCSC_Matrix);
1004 }
1005 else if (KernelDll_IsCspace(temp, CSpace_BT2020_RGB))
1006 {
1007 if (temp == CSpace_BT2020_RGB) //BT2020_RGB to BT2020_limited_RGB conversions
1008 {
1009 MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_BT2020RGB_BT2020stRGB), (void *)g_cCSC_BT2020RGB_BT2020stRGB, sizeof(g_cCSC_BT2020RGB_BT2020stRGB));
1010 }
1011 else if (temp == CSpace_BT2020_stRGB) //BT2020_limited_RGB to BT2020_RGB conversions
1012 {
1013 MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_BT2020stRGB_BT2020RGB), (void *)g_cCSC_BT2020stRGB_BT2020RGB, sizeof(g_cCSC_BT2020stRGB_BT2020RGB));
1014 }
1015 }
1016 else if (KernelDll_IsCspace(temp, CSpace_BT2020)) // BT2020 limited_YUV to BT2020_FullRange_YUV conversions
1017 {
1018 KernelDll_CalcYuvToYuvMatrix(temp, dst, pCSC_Matrix);
1019 }
1020 else
1021 {
1022 VP_RENDER_ASSERTMESSAGE("Not supported color space conversion(from %d to %d)", src, dst);
1023 MT_ERR2(MT_VP_KERNEL_CSC, MT_VP_KERNEL_CSPACE, src, MT_VP_KERNEL_CSPACE, dst);
1024 }
1025 }
1026
1027 // Calculate the Gray transformation matrix now
1028 if (bSrcGray)
1029 {
1030 KernelDll_CalcGrayCoeffs(src, pCSC_Matrix);
1031 }
1032
1033 VP_RENDER_NORMALMESSAGE("");
1034 for (i = 0; i < 3; i++)
1035 {
1036 VP_RENDER_NORMALMESSAGE("%f\t%f\t%f\t%f",
1037 pCSC_Matrix[4 * i],
1038 pCSC_Matrix[4 * i + 1],
1039 pCSC_Matrix[4 * i + 2],
1040 pCSC_Matrix[4 * i + 3]);
1041 }
1042 }
1043
KernelDll_MapCSCMatrix(Kdll_CSCType csctype,const float * matrix,short * coeff)1044 bool KernelDll_MapCSCMatrix(
1045 Kdll_CSCType csctype,
1046 const float *matrix,
1047 short * coeff)
1048 {
1049 // Unified kernel architecture requires that the color space
1050 // conversion coefficients programmed in specific orders, depends on the
1051 // type of the color space conversion.
1052 //
1053 // M (matrix) ---> C (coeff)
1054
1055 switch (csctype)
1056 {
1057 case CSC_YUV_RGB:
1058 // direct mapping from matrix to coeff
1059 coeff[0] = FLOAT_TO_SHORT(matrix[0]); // M0 --> C0
1060 coeff[1] = FLOAT_TO_SHORT(matrix[1]); // M1 --> C1
1061 coeff[2] = FLOAT_TO_SHORT(matrix[2]); // M2 --> C2
1062 coeff[3] = FLOAT_TO_SHORT(matrix[3]); // M3 --> C3
1063 coeff[4] = FLOAT_TO_SHORT(matrix[4]); // M4 --> C4
1064 coeff[5] = FLOAT_TO_SHORT(matrix[5]); // M5 --> C5
1065 coeff[6] = FLOAT_TO_SHORT(matrix[6]); // M6 --> C6
1066 coeff[7] = FLOAT_TO_SHORT(matrix[7]); // M7 --> C7
1067 coeff[8] = FLOAT_TO_SHORT(matrix[8]); // M8 --> C8
1068 coeff[9] = FLOAT_TO_SHORT(matrix[9]); // M9 --> C9
1069 coeff[10] = FLOAT_TO_SHORT(matrix[10]); // M10 --> C10
1070 coeff[11] = FLOAT_TO_SHORT(matrix[11]); // M11 --> C11
1071 break;
1072
1073 case CSC_RGB_YUV:
1074 coeff[6] = FLOAT_TO_SHORT(matrix[0]); // M0 --> C6
1075 coeff[4] = FLOAT_TO_SHORT(matrix[1]); // M1 --> C4
1076 coeff[5] = FLOAT_TO_SHORT(matrix[2]); // M2 --> C5
1077 coeff[7] = FLOAT_TO_SHORT(matrix[3]); // M3 --> C7
1078 coeff[10] = FLOAT_TO_SHORT(matrix[4]); // M4 --> C10
1079 coeff[8] = FLOAT_TO_SHORT(matrix[5]); // M5 --> C8
1080 coeff[9] = FLOAT_TO_SHORT(matrix[6]); // M6 --> C9
1081 coeff[11] = FLOAT_TO_SHORT(matrix[7]); // M7 --> C11
1082 coeff[2] = FLOAT_TO_SHORT(matrix[8]); // M8 --> C2
1083 coeff[0] = FLOAT_TO_SHORT(matrix[9]); // M9 --> C0
1084 coeff[1] = FLOAT_TO_SHORT(matrix[10]); // M10 --> C1
1085 coeff[3] = FLOAT_TO_SHORT(matrix[11]); // M11 --> C3
1086 break;
1087
1088 case CSC_YUV_YUV:
1089 coeff[4] = FLOAT_TO_SHORT(matrix[0]); // M0 --> C4
1090 coeff[5] = FLOAT_TO_SHORT(matrix[1]); // M1 --> C5
1091 coeff[6] = FLOAT_TO_SHORT(matrix[2]); // M2 --> C6
1092 coeff[7] = FLOAT_TO_SHORT(matrix[3]); // M3 --> C7
1093 coeff[8] = FLOAT_TO_SHORT(matrix[4]); // M4 --> C8
1094 coeff[9] = FLOAT_TO_SHORT(matrix[5]); // M5 --> C9
1095 coeff[10] = FLOAT_TO_SHORT(matrix[6]); // M6 --> C10
1096 coeff[11] = FLOAT_TO_SHORT(matrix[7]); // M7 --> C11
1097 coeff[0] = FLOAT_TO_SHORT(matrix[8]); // M8 --> C0
1098 coeff[1] = FLOAT_TO_SHORT(matrix[9]); // M9 --> C1
1099 coeff[2] = FLOAT_TO_SHORT(matrix[10]); // M10 --> C2
1100 coeff[3] = FLOAT_TO_SHORT(matrix[11]); // M11 --> C3
1101 break;
1102
1103 default:
1104 //CSC_RGB_RGB
1105 coeff[0] = FLOAT_TO_SHORT(matrix[1]); // M1 --> C0
1106 coeff[1] = FLOAT_TO_SHORT(matrix[2]); // M2 --> C1
1107 coeff[2] = FLOAT_TO_SHORT(matrix[0]); // M0 --> C2
1108 coeff[3] = FLOAT_TO_SHORT(matrix[3]); // M3 --> C3
1109 coeff[4] = FLOAT_TO_SHORT(matrix[5]); // M5 --> C4
1110 coeff[5] = FLOAT_TO_SHORT(matrix[6]); // M6 --> C5
1111 coeff[6] = FLOAT_TO_SHORT(matrix[4]); // M4 --> C6
1112 coeff[7] = FLOAT_TO_SHORT(matrix[7]); // M7 --> C7
1113 coeff[8] = FLOAT_TO_SHORT(matrix[9]); // M9 --> C8
1114 coeff[9] = FLOAT_TO_SHORT(matrix[10]); // M10 --> C9
1115 coeff[10] = FLOAT_TO_SHORT(matrix[8]); // M8 --> C10
1116 coeff[11] = FLOAT_TO_SHORT(matrix[11]); // M11 --> C11
1117 break;
1118 }
1119
1120 return true;
1121 }
1122
KernelDll_IsFormat(MOS_FORMAT format,VPHAL_CSPACE cspace,MOS_FORMAT match)1123 bool KernelDll_IsFormat(
1124 MOS_FORMAT format,
1125 VPHAL_CSPACE cspace,
1126 MOS_FORMAT match)
1127 {
1128 switch (match)
1129 {
1130 case Format_Any:
1131 return (format != Format_None);
1132 break;
1133
1134 case Format_RGB_Swap:
1135 return (IS_RGB_SWAP(format));
1136
1137 case Format_RGB_No_Swap:
1138 return (IS_RGB_NO_SWAP(format));
1139
1140 case Format_RGB:
1141 if (IS_PAL_FORMAT(format))
1142 {
1143 return (KernelDll_IsCspace(cspace, CSpace_RGB));
1144 }
1145 else
1146 {
1147 return (IS_RGB_FORMAT(format) && !IS_PL3_RGB_FORMAT(format));
1148 }
1149
1150 case Format_RGB32:
1151 return (IS_RGB32_FORMAT(format));
1152
1153 case Format_PA:
1154 if (IS_PAL_FORMAT(format))
1155 {
1156 return (KernelDll_IsCspace(cspace, CSpace_YUV));
1157 }
1158 else
1159 {
1160 return (IS_PA_FORMAT(format) ||
1161 format == Format_AUYV);
1162 }
1163
1164 case Format_PL2:
1165 return (IS_PL2_FORMAT(format));
1166
1167 case Format_PL2_UnAligned:
1168 return (IS_PL2_FORMAT_UnAligned(format));
1169
1170 case Format_PL3:
1171 return (IS_PL3_FORMAT(format));
1172
1173 case Format_PL3_RGB:
1174 return (IS_PL3_RGB_FORMAT(format));
1175
1176 case Format_AYUV:
1177 return (format == Format_AYUV);
1178
1179 case Format_PAL:
1180 return (IS_PAL_FORMAT(format));
1181
1182 default:
1183 return (format == match);
1184 }
1185
1186 return false;
1187 }
1188
1189 //---------------------------------------------------------------------------------------
1190 // KernelDll_SetupProcampParameters - Setup Kernel Procamp Parameters
1191 //
1192 // Parameters:
1193 // Kdll_State *pState - [in] Kernel dll State to release
1194 // Kdll_Procamp *pProcamp - [in] Pointer to array of Procamp Parameters
1195 // int32_t iProcampSize - [in] Size of the array
1196 //
1197 // Output: Pointer to allocated Kernel dll state
1198 // nullptr - Failed to allocate Kernel dll state
1199 //-----------------------------------------------------------------------------------------
KernelDll_SetupProcampParameters(Kdll_State * pState,Kdll_Procamp * pProcamp,int32_t iProcampSize)1200 void KernelDll_SetupProcampParameters(Kdll_State *pState,
1201 Kdll_Procamp *pProcamp,
1202 int32_t iProcampSize)
1203 {
1204 VP_RENDER_FUNCTION_ENTER;
1205
1206 // Setup pointer to procamp parameters
1207 pState->pProcamp = pProcamp;
1208 pState->iProcampSize = iProcampSize;
1209 }
1210
1211 //--------------------------------------------------------------
1212 // Fowler/Noll/Vo FNV-1a hash algorithm - public domain
1213 //--------------------------------------------------------------
KernelDll_SimpleHash(void * pData,int32_t iSize)1214 uint32_t KernelDll_SimpleHash(void *pData, int32_t iSize)
1215 {
1216 static const uint32_t k = 0x1000193;
1217 uint32_t hash = 0x811c9dc5;
1218 char *p = (char *)pData;
1219
1220 for(; iSize > 0; iSize--)
1221 {
1222 hash ^= (*p++);
1223 hash *= k;
1224 }
1225
1226 return hash;
1227 }
1228
1229 //--------------------------------------------------------------
1230 // KernelDll_GetCombinedKernel - Search combined kernel
1231 //--------------------------------------------------------------
KernelDll_GetCombinedKernel(Kdll_State * pState,Kdll_FilterEntry * pFilter,int32_t iFilterSize,uint32_t dwHash)1232 Kdll_CacheEntry *KernelDll_GetCombinedKernel(
1233 Kdll_State *pState,
1234 Kdll_FilterEntry *pFilter,
1235 int32_t iFilterSize,
1236 uint32_t dwHash)
1237 {
1238 Kdll_KernelHashTable *pHashTable;
1239 Kdll_KernelHashEntry *entries, *curr, *next;
1240 uint32_t folded_hash;
1241 uint16_t entry;
1242
1243 VP_RENDER_FUNCTION_ENTER;
1244
1245 // Get hash table
1246 pHashTable = &pState->KernelHashTable;
1247
1248 // fold hash from 32 to 8 bit :-)
1249 FOLD_HASH(folded_hash, dwHash)
1250
1251 // No entries
1252 entry = pHashTable->wHashTable[folded_hash];
1253 if (entry == 0 || entry > DL_MAX_COMBINED_KERNELS ) return nullptr;
1254
1255 entries = (&pHashTable->HashEntry[0]) - 1; // all indices are 1 based (0 means null)
1256 curr = &entries[entry];
1257 for (; (curr != nullptr); curr = next)
1258 {
1259 // match 32-bit hash, then compare filter
1260 if (curr->dwHash == dwHash &&
1261 curr->iFilter == iFilterSize)
1262 {
1263 if (memcmp(curr->pFilter, pFilter, iFilterSize * sizeof(Kdll_FilterEntry)) == 0)
1264 {
1265 break;
1266 }
1267 }
1268
1269 // Next entry with the same 8-bit folded hash
1270 next = (curr->next) ? (&entries[curr->next]) : nullptr;
1271 }
1272
1273 if (curr)
1274 { // Kernel already cached
1275 curr->pCacheEntry->dwRefresh = pState->dwRefresh++;
1276 return (curr->pCacheEntry);
1277 }
1278 else
1279 { // Kernel must be built
1280 return nullptr;
1281 }
1282 }
1283
1284 /*----------------------------------------------------------------------------
1285 | Name : KernelDll_FindRule
1286 | Purpose : Find a rule that matches the current search/input state
1287 |
1288 | Input : pState - Kernel Dll state
1289 | pSearchState - current DL search state
1290 |
1291 | Return :
1292 \---------------------------------------------------------------------------*/
KernelDll_FindRule(Kdll_State * pState,Kdll_SearchState * pSearchState)1293 bool KernelDll_FindRule(
1294 Kdll_State * pState,
1295 Kdll_SearchState *pSearchState)
1296 {
1297 uint32_t parser_state = (uint32_t)pSearchState->state;
1298 Kdll_RuleEntrySet * pRuleSet;
1299 const Kdll_RuleEntry *pRuleEntry;
1300 int32_t iRuleCount;
1301 int32_t iMatchCount;
1302 bool bLayerFormatMatched;
1303 bool bSrc0FormatMatched;
1304 bool bSrc1FormatMatched;
1305 bool bTargetFormatMatched;
1306 bool bSrc0SampingMatched;
1307
1308 VP_RENDER_FUNCTION_ENTER;
1309
1310 // All Custom states are handled as a single group
1311 if (parser_state >= Parser_Custom)
1312 {
1313 parser_state = Parser_Custom;
1314 }
1315
1316 pRuleSet = pState->pDllRuleTable[parser_state];
1317 iRuleCount = pState->iDllRuleCount[parser_state];
1318
1319 if (pRuleSet == nullptr || iRuleCount == 0)
1320 {
1321 VP_RENDER_NORMALMESSAGE("Search rules undefined.");
1322 pSearchState->pMatchingRuleSet = nullptr;
1323 return false;
1324 }
1325
1326 // Search matching entry
1327 for (; iRuleCount > 0; iRuleCount--, pRuleSet++)
1328 {
1329 // Points to the first rule, get number of matches
1330 pRuleEntry = pRuleSet->pRuleEntry;
1331 iMatchCount = pRuleSet->iMatchCount;
1332
1333 // Initialize for each Ruleset
1334 bLayerFormatMatched = false;
1335 bSrc0FormatMatched = false;
1336 bSrc1FormatMatched = false;
1337 bTargetFormatMatched = false;
1338 bSrc0SampingMatched = false;
1339
1340 // Match all rules within the same RuleSet
1341 for (; iMatchCount > 0; iMatchCount--, pRuleEntry++)
1342 {
1343 switch (pRuleEntry->id)
1344 {
1345 // Match current Parser State
1346 case RID_IsParserState:
1347 if (pSearchState->state == (Kdll_ParserState)pRuleEntry->value)
1348 {
1349 continue;
1350 }
1351 else
1352 {
1353 break;
1354 }
1355
1356 // Match render method
1357 case RID_IsRenderMethod:
1358 if (pSearchState->pFilter->RenderMethod == (Kdll_RenderMethod)pRuleEntry->value)
1359 {
1360 continue;
1361 }
1362 else
1363 {
1364 break;
1365 }
1366
1367 // Match target color space
1368 case RID_IsTargetCspace:
1369 if (KernelDll_IsCspace(pSearchState->cspace, (VPHAL_CSPACE)pRuleEntry->value))
1370 {
1371 continue;
1372 }
1373 else
1374 {
1375 break;
1376 }
1377
1378 // Match current layer ID
1379 case RID_IsLayerID:
1380 if (pSearchState->pFilter->layer == (Kdll_Layer)pRuleEntry->value)
1381 {
1382 continue;
1383 }
1384 else
1385 {
1386 break;
1387 }
1388
1389 // Match current layer format
1390 case RID_IsLayerFormat:
1391 if (pRuleEntry->logic == Kdll_Or && bLayerFormatMatched)
1392 {
1393 // Already found matching format in the ruleset
1394 continue;
1395 }
1396 else
1397 {
1398 // Check if the layer format matches the rule
1399 if (KernelDll_IsFormat(pSearchState->pFilter->format,
1400 pSearchState->pFilter->cspace,
1401 (MOS_FORMAT)pRuleEntry->value))
1402 {
1403 bLayerFormatMatched = true;
1404 }
1405
1406 if (pRuleEntry->logic == Kdll_None && !bLayerFormatMatched)
1407 {
1408 // Last entry and No matching format was found
1409 break;
1410 }
1411 else
1412 {
1413 continue;
1414 }
1415 }
1416
1417 // Match shuffling requirement
1418 case RID_IsShuffling:
1419 if (pSearchState->ShuffleSamplerData == (Kdll_Shuffling)pRuleEntry->value)
1420 {
1421 continue;
1422 }
1423 else
1424 {
1425 break;
1426 }
1427
1428 // Check if RT rotates
1429 case RID_IsRTRotate:
1430 if (pSearchState->bRTRotate == (pRuleEntry->value ? true : false))
1431 {
1432 continue;
1433 }
1434 else
1435 {
1436 break;
1437 }
1438
1439 // Match current layer rotation
1440 case RID_IsLayerRotation:
1441 if (pSearchState->pFilter->rotation == (VPHAL_ROTATION)pRuleEntry->value)
1442 {
1443 continue;
1444 }
1445 else
1446 {
1447 break;
1448 }
1449
1450 // Match Src0 source format (surface)
1451 case RID_IsSrc0Format:
1452 if (pRuleEntry->logic == Kdll_Or && bSrc0FormatMatched)
1453 {
1454 // Already found matching format in the ruleset
1455 continue;
1456 }
1457 else
1458 {
1459 // Check if the source 0 format matches the rule
1460 // The intermediate colorspace is used to determine
1461 // if palettized input is given in RGB or YUV format.
1462 if (KernelDll_IsFormat(pSearchState->src0_format,
1463 pSearchState->cspace,
1464 (MOS_FORMAT)pRuleEntry->value))
1465 {
1466 bSrc0FormatMatched = true;
1467 }
1468
1469 if (pRuleEntry->logic == Kdll_None && !bSrc0FormatMatched)
1470 {
1471 // Last entry and No matching format was found
1472 break;
1473 }
1474 else
1475 {
1476 continue;
1477 }
1478 }
1479
1480 // Match Src0 sampling mode
1481 case RID_IsSrc0Sampling:
1482 // Check if the layer format matches the rule
1483 if (pSearchState->src0_sampling == (Kdll_Sampling)pRuleEntry->value)
1484 {
1485 bSrc0SampingMatched = true;
1486 continue;
1487 }
1488 else if (bSrc0SampingMatched || pRuleEntry->logic == Kdll_Or)
1489 {
1490 continue;
1491 }
1492 else if ((Kdll_Sampling)pRuleEntry->value == Sample_Any &&
1493 pSearchState->src0_sampling != Sample_None)
1494 {
1495 continue;
1496 }
1497 else
1498 {
1499 break;
1500 }
1501
1502 // Match Src0 rotation
1503 case RID_IsSrc0Rotation:
1504 if (pSearchState->src0_rotation == (VPHAL_ROTATION)pRuleEntry->value)
1505 {
1506 continue;
1507 }
1508 else
1509 {
1510 break;
1511 }
1512
1513 // Match Src0 Colorfill
1514 case RID_IsSrc0ColorFill:
1515 if (pSearchState->src0_colorfill == (int32_t)pRuleEntry->value)
1516 {
1517 continue;
1518 }
1519 else
1520 {
1521 break;
1522 }
1523
1524 // Match Src0 Luma Key
1525 case RID_IsSrc0LumaKey:
1526 if (pSearchState->src0_lumakey == (int32_t)pRuleEntry->value)
1527 {
1528 continue;
1529 }
1530 else
1531 {
1532 break;
1533 }
1534
1535 // Match Src0 Procamp
1536 case RID_IsSrc0Procamp:
1537 if (pSearchState->pFilter->procamp == (int32_t)pRuleEntry->value)
1538 {
1539 continue;
1540 }
1541 else
1542 {
1543 break;
1544 }
1545
1546 // Match Src0 CSC coefficients
1547 case RID_IsSrc0Coeff:
1548 if (pSearchState->src0_coeff == (Kdll_CoeffID)pRuleEntry->value)
1549 {
1550 continue;
1551 }
1552 else if ((Kdll_CoeffID)pRuleEntry->value == CoeffID_Any &&
1553 pSearchState->src0_coeff != CoeffID_None)
1554 {
1555 continue;
1556 }
1557 else
1558 {
1559 break;
1560 }
1561
1562 // Match Src0 CSC coefficients setting mode
1563 case RID_IsSetCoeffMode:
1564 if (pSearchState->pFilter->SetCSCCoeffMode == (Kdll_SetCSCCoeffMethod)pRuleEntry->value)
1565 {
1566 continue;
1567 }
1568 else
1569 {
1570 break;
1571 }
1572
1573 // Match Src0 processing mode
1574 case RID_IsSrc0Processing:
1575 if (pSearchState->src0_process == (Kdll_Processing)pRuleEntry->value)
1576 {
1577 continue;
1578 }
1579 if ((Kdll_Processing)pRuleEntry->value == Process_Any &&
1580 pSearchState->src0_process != Process_None)
1581 {
1582 continue;
1583 }
1584 else
1585 {
1586 break;
1587 }
1588
1589 // Match Src0 chromasiting mode
1590 case RID_IsSrc0Chromasiting:
1591 if (pSearchState->Filter->chromasiting == (int32_t)pRuleEntry->value)
1592 {
1593 continue;
1594 }
1595 else
1596 {
1597 break;
1598 }
1599
1600 // Match Src1 source format (surface)
1601 case RID_IsSrc1Format:
1602 if (pRuleEntry->logic == Kdll_Or && bSrc1FormatMatched)
1603 {
1604 // Already found matching format in the ruleset
1605 continue;
1606 }
1607 else
1608 {
1609 // Check if the source 1 format matches the rule
1610 // The intermediate colorspace is used to determine
1611 // if palettized input is given in RGB or YUV format.
1612 if (KernelDll_IsFormat(pSearchState->src1_format,
1613 pSearchState->cspace,
1614 (MOS_FORMAT)pRuleEntry->value))
1615 {
1616 bSrc1FormatMatched = true;
1617 }
1618
1619 if (pRuleEntry->logic == Kdll_None && !bSrc1FormatMatched)
1620 {
1621 // Last entry and No matching format was found
1622 break;
1623 }
1624 else
1625 {
1626 continue;
1627 }
1628 }
1629 // Match Src1 sampling mode
1630 case RID_IsSrc1Sampling:
1631 if (pSearchState->src1_sampling == (Kdll_Sampling)pRuleEntry->value)
1632 {
1633 continue;
1634 }
1635 else if ((Kdll_Sampling)pRuleEntry->value == Sample_Any &&
1636 pSearchState->src1_sampling != Sample_None)
1637 {
1638 continue;
1639 }
1640 else
1641 {
1642 break;
1643 }
1644
1645 // Match Src1 Luma Key
1646 case RID_IsSrc1LumaKey:
1647 if (pSearchState->src1_lumakey == (int32_t)pRuleEntry->value)
1648 {
1649 continue;
1650 }
1651 else
1652 {
1653 break;
1654 }
1655
1656 // Match Src1 Sampler LumaKey
1657 case RID_IsSrc1SamplerLumaKey:
1658 if (pSearchState->src1_samplerlumakey == (int32_t)pRuleEntry->value)
1659 {
1660 continue;
1661 }
1662 else
1663 {
1664 break;
1665 }
1666
1667 // Match Src1 Procamp
1668 case RID_IsSrc1Procamp:
1669 if (pSearchState->pFilter->procamp == (int32_t)pRuleEntry->value)
1670 {
1671 continue;
1672 }
1673 else
1674 {
1675 break;
1676 }
1677
1678 // Match Src1 CSC coefficients
1679 case RID_IsSrc1Coeff:
1680 if (pSearchState->src1_coeff == (Kdll_CoeffID)pRuleEntry->value)
1681 {
1682 continue;
1683 }
1684 else if ((Kdll_CoeffID)pRuleEntry->value == CoeffID_Any &&
1685 pSearchState->src1_coeff != CoeffID_None)
1686 {
1687 continue;
1688 }
1689 else
1690 {
1691 break;
1692 }
1693
1694 // Match Src1 processing mode
1695 case RID_IsSrc1Processing:
1696 if (pSearchState->src1_process == (Kdll_Processing)pRuleEntry->value)
1697 {
1698 continue;
1699 }
1700 if ((Kdll_Processing)pRuleEntry->value == Process_Any &&
1701 pSearchState->src1_process != Process_None)
1702 {
1703 continue;
1704 }
1705 else
1706 {
1707 break;
1708 }
1709
1710 // Match Src1 chromasiting mode
1711 case RID_IsSrc1Chromasiting:
1712 //pSearchState->pFilter is pointed to the real sub layer
1713 if (pSearchState->pFilter->chromasiting == (int32_t)pRuleEntry->value)
1714 {
1715 continue;
1716 }
1717 else
1718 {
1719 break;
1720 }
1721
1722 // Match Layer number
1723 case RID_IsLayerNumber:
1724 if (pSearchState->layer_number == (int32_t)pRuleEntry->value)
1725 {
1726 continue;
1727 }
1728 else
1729 {
1730 break;
1731 }
1732
1733 // Match quadrant
1734 case RID_IsQuadrant:
1735 if (pSearchState->quadrant == (int32_t)pRuleEntry->value)
1736 {
1737 continue;
1738 }
1739 else
1740 {
1741 break;
1742 }
1743
1744 // Set CSC flag before Mix
1745 case RID_IsCSCBeforeMix:
1746 if (pSearchState->bCscBeforeMix == (pRuleEntry->value ? true : false))
1747 {
1748 continue;
1749 }
1750 else
1751 {
1752 break;
1753 }
1754
1755 case RID_IsDualOutput:
1756 if (pSearchState->pFilter->dualout == (pRuleEntry->value ? true : false))
1757 {
1758 continue;
1759 }
1760 else
1761 {
1762 break;
1763 }
1764
1765 case RID_IsTargetFormat:
1766 if (pRuleEntry->logic == Kdll_Or && bTargetFormatMatched)
1767 {
1768 // Already found matching format in the ruleset
1769 continue;
1770 }
1771 else
1772 {
1773 if (pSearchState->target_format == (MOS_FORMAT)pRuleEntry->value)
1774 {
1775 bTargetFormatMatched = true;
1776 }
1777
1778 if (pRuleEntry->logic == Kdll_None && !bTargetFormatMatched)
1779 {
1780 // Last entry and No matching format was found
1781 break;
1782 }
1783 else
1784 {
1785 continue;
1786 }
1787 }
1788
1789 case RID_Is64BSaveEnabled:
1790 if (pSearchState->b64BSaveEnabled == (pRuleEntry->value ? true : false))
1791 {
1792 continue;
1793 }
1794 else
1795 {
1796 break;
1797 }
1798
1799 case RID_IsTargetTileType:
1800 if (pRuleEntry->logic == Kdll_None &&
1801 pSearchState->target_tiletype == (MOS_TILE_TYPE)pRuleEntry->value)
1802 {
1803 continue;
1804 }
1805 else if (pRuleEntry->logic == Kdll_Not &&
1806 pSearchState->target_tiletype != (MOS_TILE_TYPE)pRuleEntry->value)
1807 {
1808 continue;
1809 }
1810 else
1811 {
1812 break;
1813 }
1814
1815 case RID_IsProcampEnabled:
1816 if (pSearchState->bProcamp == (pRuleEntry->value ? true : false))
1817 {
1818 continue;
1819 }
1820 else
1821 {
1822 break;
1823 }
1824
1825 case RID_IsConstOutAlpha:
1826 if (pSearchState->pFilter->bFillOutputAlphaWithConstant == (pRuleEntry->value ? true : false))
1827 {
1828 continue;
1829 }
1830 else
1831 {
1832 break;
1833 }
1834
1835 case RID_IsDitherNeeded:
1836 if (pSearchState->pFilter->bIsDitherNeeded == (pRuleEntry->value ? true : false))
1837 {
1838 continue;
1839 }
1840 else
1841 {
1842 break;
1843 }
1844 // Undefined search rule will fail
1845 default:
1846 VP_RENDER_ASSERTMESSAGE("Invalid rule %d @ layer %d, state %d.", pRuleEntry->id, pSearchState->layer_number, pSearchState->state);
1847 MT_ERR3(MT_VP_KERNEL_RULE, MT_VP_KERNEL_RULE_ID, pRuleEntry->id, MT_VP_KERNEL_RULE_LAYERNUM, pSearchState->layer_number, MT_VP_KERNEL_RULE_SEARCH_STATE, pSearchState->state);
1848 break;
1849 } // End of switch to deal with all matching rule IDs
1850
1851 // Rule didn't match - try another RuleSet
1852 break;
1853 } // End of file loop to test all rules for the current RuleSet
1854
1855 // Match
1856 if (iMatchCount == 0)
1857 {
1858 pSearchState->pMatchingRuleSet = pRuleSet;
1859 return true;
1860 }
1861 } // End of for loop to test all RuleSets for the current parser state
1862
1863 // Failed to find a matching rule -> kernel search will fail
1864 VP_RENDER_NORMALMESSAGE("Fail to find a matching rule @ layer %d, state %d.", pSearchState->layer_number, pSearchState->state);
1865 MT_ERR2(MT_VP_KERNEL_RULE, MT_VP_KERNEL_RULE_LAYERNUM, pSearchState->layer_number, MT_VP_KERNEL_RULE_SEARCH_STATE, pSearchState->state);
1866
1867 // No match -> return
1868 pSearchState->pMatchingRuleSet = nullptr;
1869 return false;
1870 }
1871
1872 //--------------------------------------------------------------
1873 // Append kernel, include symbols to resolve
1874 //--------------------------------------------------------------
Kdll_AppendKernel(Kdll_KernelCache * pKernelCache,Kdll_SearchState * pSearchState,int32_t iKUID,Kdll_PatchData * pKernelPatch)1875 bool Kdll_AppendKernel(Kdll_KernelCache *pKernelCache,
1876 Kdll_SearchState * pSearchState,
1877 int32_t iKUID,
1878 Kdll_PatchData * pKernelPatch)
1879 {
1880 Kdll_State * pState;
1881 Kdll_Symbol * pSymbols;
1882 Kdll_CacheEntry *kernels;
1883 Kdll_LinkData * link;
1884 Kdll_LinkData * liSearch_reloc;
1885 uint8_t * kernel;
1886 int * size;
1887 int * left;
1888 int dwSize;
1889 int i;
1890 int base;
1891 bool bInline;
1892 bool res;
1893
1894 VP_RENDER_FUNCTION_ENTER;
1895
1896 res = false;
1897
1898 // Check if Kernel ID is valid
1899 if (iKUID >= pKernelCache->iCacheEntries)
1900 {
1901 VP_RENDER_NORMALMESSAGE("invalid Kernel ID %d.", iKUID);
1902 goto cleanup;
1903 }
1904
1905 // Get KDLL state
1906 pState = pSearchState->pKdllState;
1907
1908 // Get current combined kernel
1909 kernel = pSearchState->Kernel;
1910 size = &pSearchState->KernelSize;
1911 left = &pSearchState->KernelLeft;
1912 pSymbols = &pSearchState->KernelLink;
1913 base = (*size) >> 2;
1914
1915 // Find selected kernel and kernel size; check if there is enough space
1916 kernels = &pKernelCache->pCacheEntries[iKUID];
1917 dwSize = kernels->iSize;
1918 if (*left < dwSize)
1919 {
1920 VP_RENDER_NORMALMESSAGE("exceeded maximum kernel size.");
1921 goto cleanup;
1922 }
1923
1924 // Check if there is enough space for symbols
1925 if (pSymbols->dwCount + kernels->nLink >= pSymbols->dwSize)
1926 {
1927 VP_RENDER_NORMALMESSAGE("exceeded maximum numbers of symbols to resolve.");
1928 goto cleanup;
1929 }
1930
1931 #if EMUL || VPHAL_LIB
1932 VP_RENDER_NORMALMESSAGE("%s.", kernels->szName);
1933
1934 if (pState->pfnCbListKernel)
1935 {
1936 pState->pfnCbListKernel(pState->pToken, kernels->szName);
1937 }
1938 #elif _DEBUG // EMUL || VPHAL_LIB
1939 VP_RENDER_NORMALMESSAGE("%s.", kernels->szName);
1940 #endif // _DEBUG
1941
1942 // Append symbols to resolve, relocate symbols
1943 link = kernels->pLink;
1944 liSearch_reloc = pSymbols->pLink + pSymbols->dwCount;
1945
1946 bInline = false;
1947 if (link)
1948 {
1949 for (i = kernels->nLink; i > 0; i--, link++)
1950 {
1951 if (link->bInline)
1952 {
1953 // Inline code included
1954 if (!link->bExport)
1955 {
1956 bInline = true;
1957 }
1958 }
1959 else
1960 {
1961 *liSearch_reloc = *link;
1962 liSearch_reloc->dwOffset += base;
1963 liSearch_reloc++;
1964
1965 pSymbols->dwCount++;
1966 }
1967 }
1968 }
1969
1970 // Append kernel
1971 MOS_SecureMemcpy(&kernel[*size], dwSize, (void *)kernels->pBinary, dwSize);
1972
1973 // Patch kernel
1974 if (pKernelPatch)
1975 {
1976 uint8_t *pSource = pKernelPatch->Data;
1977 uint8_t *pDestination = kernel + (*size);
1978 int32_t i;
1979
1980 Kdll_PatchBlock *pBlock = pKernelPatch->Patch;
1981 for (i = pKernelPatch->nPatches; i > 0; i--, pBlock++)
1982 {
1983 MOS_SecureMemcpy(pDestination + pBlock->DstOffset, pBlock->BlockSize, (void *)(pSource + pBlock->SrcOffset), pBlock->BlockSize);
1984 }
1985 }
1986
1987 res = true;
1988 *size += dwSize;
1989 *left -= dwSize;
1990
1991 // Insert inline code
1992 if (bInline)
1993 {
1994 for (link = kernels->pLink, i = kernels->nLink; (i > 0) && (res); i--, link++)
1995 {
1996 if (link->bInline && (!link->bExport))
1997 {
1998 iKUID = pKernelCache->pExports[link->iLabelID].iKUID;
1999 res &= Kdll_AppendKernel(pKernelCache, pSearchState, iKUID, pKernelPatch);
2000 }
2001 }
2002 }
2003
2004 cleanup:
2005 return res;
2006 }
2007
2008 //--------------------------------------------------------------
2009 // Resolve kernel dependencies and perform patching
2010 //--------------------------------------------------------------
Kdll_ResolveKernelDependencies(Kdll_State * pState,Kdll_SearchState * pSearchState)2011 bool Kdll_ResolveKernelDependencies(
2012 Kdll_State * pState,
2013 Kdll_SearchState *pSearchState)
2014 {
2015 Kdll_KernelCache *cache = &pState->ComponentKernelCache;
2016 uint8_t * kernel = pSearchState->Kernel;
2017 Kdll_Symbol * pSymbols = &pSearchState->KernelLink;
2018 uint32_t nExports = cache->nExports;
2019 Kdll_LinkData * pExports = cache->pExports;
2020 Kdll_LinkData * pLink;
2021 int32_t iKUID;
2022 int32_t iOffset;
2023 uint32_t dwResolveOffset[DL_MAX_EXPORT_COUNT];
2024 bool bResolveDone;
2025 int32_t i;
2026 uint32_t * d;
2027
2028 VP_RENDER_FUNCTION_ENTER;
2029
2030 MOS_ZeroMemory(dwResolveOffset, sizeof(dwResolveOffset));
2031
2032 do
2033 {
2034 // Update exports
2035 for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
2036 {
2037 if (pLink->bExport)
2038 {
2039 dwResolveOffset[pLink->iLabelID] = pLink->dwOffset;
2040 }
2041 }
2042
2043 bResolveDone = true;
2044 for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
2045 {
2046 // validate label
2047 if (pLink->iLabelID > nExports || // invalid label
2048 pExports[pLink->iLabelID].bExport == 0) // label not in the export table
2049 {
2050 VP_RENDER_ASSERTMESSAGE("Invalid/unresolved label %d.", pLink->iLabelID);
2051 return false;
2052 }
2053
2054 // load dependencies
2055 if (!pLink->bExport && !dwResolveOffset[pLink->iLabelID])
2056 {
2057 // set flag for another pass as newly loaded
2058 // kernels may contain dependencies of their own
2059 bResolveDone = false;
2060
2061 // Load dependencies
2062 iKUID = pExports[pLink->iLabelID].iKUID;
2063 Kdll_AppendKernel(cache, pSearchState, iKUID, nullptr);
2064
2065 // Restart
2066 break;
2067 }
2068 } // for
2069 } while (!bResolveDone);
2070
2071 // All modules must be loaded by now, start patching
2072 for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
2073 {
2074 iOffset = (int32_t)dwResolveOffset[pLink->iLabelID] - 4;
2075 iOffset -= pLink->dwOffset;
2076
2077 d = ((uint32_t *)kernel) + pLink->dwOffset;
2078
2079 // Patch offset
2080 if (!pLink->bExport && !pLink->bInline)
2081 {
2082 d[3] = iOffset << 2; // jmpi - index * 8 bits
2083 }
2084 }
2085
2086 return true;
2087 }
2088
2089 //---------------------------------------------------------------------------------------
2090 // Kdll_SearchKernel - Performs full kernel search, including selection of best match
2091 // Search state must be initialized by KernelDll_StartKernelSearch
2092 //
2093 // Parameters:
2094 // Kdll_State *pState - [in] Dynamic Linking state
2095 // Kdll_SearchState *pSearchState - [in/out] Kernel search state
2096 //
2097 // Output: true if suceeded, false otherwise
2098 //---------------------------------------------------------------------------------------
KernelDll_SearchKernel(Kdll_State * pState,Kdll_SearchState * pSearchState)2099 bool KernelDll_SearchKernel(Kdll_State *pState,
2100 Kdll_SearchState * pSearchState)
2101 {
2102 VP_RENDER_FUNCTION_ENTER;
2103
2104 // Check parameters
2105 if ((!pSearchState) || pSearchState->iFilterSize < 1)
2106 {
2107 VP_RENDER_NORMALMESSAGE("Search is empty, must contain 2 or more layers.");
2108 return false;
2109 }
2110
2111 // Setup CSC; allocate and calculate CSC matrices
2112 if (!pState->pfnSetupCSC(pState, pSearchState))
2113 {
2114 VP_RENDER_NORMALMESSAGE("CSC setup failed.");
2115 return false;
2116 }
2117
2118 // Initial search states
2119 pSearchState->bCscBeforeMix = false;
2120 pSearchState->state = Parser_Begin;
2121 pSearchState->cspace = pSearchState->CscParams.ColorSpace;
2122 pSearchState->quadrant = 0;
2123 pSearchState->layer_number = 0;
2124
2125 pSearchState->pMatchingRuleSet = nullptr;
2126
2127 // Reset Src0 state
2128 pSearchState->src0_format = Format_None;
2129 pSearchState->src0_sampling = Sample_None;
2130 pSearchState->src0_colorfill = false;
2131 pSearchState->src0_lumakey = LumaKey_False;
2132 pSearchState->src0_coeff = CoeffID_None;
2133
2134 // Reset Src1 state
2135 pSearchState->src1_format = Format_None;
2136 pSearchState->src1_sampling = Sample_None;
2137 pSearchState->src1_lumakey = LumaKey_False;
2138 pSearchState->src1_samplerlumakey = LumaKey_False;
2139 pSearchState->src1_coeff = CoeffID_None;
2140 pSearchState->src1_process = Process_None;
2141
2142 // Search loop
2143 while (pSearchState->state != Parser_End)
2144 {
2145 #if EMUL || VPHAL_LIB
2146 if (pState->pfnCbSearchSate)
2147 {
2148 pState->pfnCbSearchSate(pState->pToken, CB_REASON_BEGIN_SEARCH, pSearchState);
2149 }
2150 #endif
2151 // Find rule that matches
2152 if (!pState->pfnFindRule(pState, pSearchState))
2153 {
2154 #if EMUL || VPHAL_LIB
2155 if (pState->pfnCbSearchSate)
2156 {
2157 pState->pfnCbSearchSate(pState->pToken, CB_REASON_SEARCH_FAILED, pSearchState);
2158 }
2159 #endif
2160 return false;
2161 }
2162
2163 #if EMUL || VPHAL_LIB
2164 if (pState->pfnCbSearchSate)
2165 {
2166 pState->pfnCbSearchSate(pState->pToken, CB_REASON_BEGIN_UPDATE, pSearchState);
2167 }
2168 #endif
2169 // Update state
2170 if (!pState->pfnUpdateState(pState, pSearchState))
2171 {
2172 #if EMUL || VPHAL_LIB
2173 if (pState->pfnCbSearchSate)
2174 {
2175 pState->pfnCbSearchSate(pState->pToken, CB_REASON_UPDATE_FAILED, pSearchState);
2176 }
2177 #endif
2178 return false;
2179 }
2180 }
2181
2182 #if EMUL || VPHAL_LIB
2183 if (pState->pfnCbSearchSate)
2184 {
2185 pState->pfnCbSearchSate(pState->pToken, CB_REASON_END_SEARCH, pSearchState);
2186 }
2187 #endif
2188
2189 VP_RENDER_VERBOSEMESSAGE("Search completed successfully.");
2190 return true;
2191 }
2192
2193 //--------------------------------------------------------------
2194 // KernelDll_BuildKernel - build kernel
2195 //--------------------------------------------------------------
KernelDll_BuildKernel(Kdll_State * pState,Kdll_SearchState * pSearchState)2196 bool KernelDll_BuildKernel(Kdll_State *pState, Kdll_SearchState *pSearchState)
2197 {
2198 Kdll_KernelCache *pKernelCache = &pState->ComponentKernelCache;
2199 Kdll_KernelCache *pCustomCache = pState->pCustomKernelCache;
2200 Kdll_PatchData * pKernelPatch;
2201 bool res;
2202 int32_t offset = 0;
2203 int32_t * pKernelID, *pGroupID, *pPatchID;
2204
2205 VP_RENDER_FUNCTION_ENTER;
2206
2207 pSearchState->KernelLink.dwSize = DL_MAX_SYMBOLS;
2208 pSearchState->KernelLink.dwCount = 0;
2209 pSearchState->KernelLink.pLink = pSearchState->LinkArray;
2210 pSearchState->KernelSize = 0;
2211 pSearchState->KernelLeft = sizeof(pSearchState->Kernel);
2212 pSearchState->KernelLink.dwCount = 0;
2213
2214 #if EMUL || VPHAL_LIB || _DEBUG || _RELEASE_INTERNAL
2215 VP_RENDER_NORMALMESSAGE("Component Kernels:");
2216 #endif // EMUL || VPHAL_LIB || _DEBUG
2217
2218 pKernelID = pSearchState->KernelID;
2219 pGroupID = pSearchState->KernelGrp;
2220 pPatchID = pSearchState->PatchID;
2221
2222 for (offset = 0; offset < pSearchState->KernelCount; offset++, pKernelID++, pGroupID++, pPatchID++)
2223 {
2224 // Get patch information associated with the kernel
2225 pKernelPatch = (*pPatchID >= 0) ? &(pSearchState->Patches[*pPatchID]) : nullptr;
2226
2227 // Append/Patch kernel from custom cache
2228 if (*pGroupID == GROUP_CUSTOM)
2229 {
2230 res = Kdll_AppendKernel(pCustomCache, pSearchState, *pKernelID, pKernelPatch);
2231 }
2232 // Append/Patch kernel from internal cache
2233 else
2234 {
2235 res = Kdll_AppendKernel(pKernelCache, pSearchState, *pKernelID, pKernelPatch);
2236 }
2237
2238 if (!res)
2239 {
2240 VP_RENDER_ASSERTMESSAGE("Failed to build kernel ID %d.", pSearchState->KernelID[offset]);
2241 return false;
2242 }
2243 else
2244 {
2245 Kdll_CacheEntry *kernels = (*pGroupID == GROUP_CUSTOM) ? &pCustomCache->pCacheEntries[*pKernelID] : &pKernelCache->pCacheEntries[*pKernelID];
2246 VP_RENDER_NORMALMESSAGE("Component kernels [%d]: %s", *pKernelID, kernels->szName);
2247 }
2248 }
2249
2250 // Resolve kernel dependencies
2251 res = Kdll_ResolveKernelDependencies(pState, pSearchState);
2252 if (!res)
2253 {
2254 VP_RENDER_ASSERTMESSAGE("Failed to resolve symbols.");
2255 return false;
2256 }
2257
2258 return true;
2259 }
2260
2261 //---------------------------------------------------------------------------------------
2262 // KernelDll_StartKernelSearch - Starts kernel search
2263 //
2264 // Parameters:
2265 // Kdll_State *pState - [in] Dynamic Linking State
2266 // Kdll_FilterEntry *pFilter - [in] Search filter (array of search entries)
2267 // int iFilterSize - [in] Search filter size
2268 // Kdll_SearchState *pSearchState - [in/out] Kernel search state
2269 //
2270 // Output: none
2271 //---------------------------------------------------------------------------------------
KernelDll_StartKernelSearch(Kdll_State * pState,Kdll_SearchState * pSearchState,Kdll_FilterEntry * pFilter,int32_t iFilterSize,uint32_t uiIs64BInstrEnabled)2272 void KernelDll_StartKernelSearch(
2273 Kdll_State * pState,
2274 Kdll_SearchState *pSearchState,
2275 Kdll_FilterEntry *pFilter,
2276 int32_t iFilterSize,
2277 uint32_t uiIs64BInstrEnabled)
2278 {
2279 int32_t nLayer;
2280
2281 VP_RENDER_FUNCTION_ENTER;
2282
2283 // Reset all states
2284 MOS_ZeroMemory(pSearchState, sizeof(Kdll_SearchState));
2285
2286 // Setup KDLL state
2287 pSearchState->pKdllState = pState; // KDLL state
2288
2289 // Cleanup kernel table
2290 pSearchState->KernelCount = 0; // # of kernels
2291
2292 // Cleanup patch data
2293 memset(pSearchState->Patches, 0, sizeof(pSearchState->Patches));
2294 memset(pSearchState->PatchID, -1, sizeof(pSearchState->PatchID));
2295 pSearchState->PatchCount = 0;
2296
2297 // Copy original filter; filter will be modified as part of the search
2298 if (pFilter && iFilterSize > 0)
2299 {
2300 MOS_SecureMemcpy(pSearchState->Filter, iFilterSize * sizeof(Kdll_FilterEntry), pFilter, iFilterSize * sizeof(Kdll_FilterEntry));
2301 pSearchState->pFilter = pSearchState->Filter;
2302 pSearchState->iFilterSize = iFilterSize;
2303
2304 for (nLayer = 0; nLayer < iFilterSize; nLayer++)
2305 {
2306 // DScale Kernels are enabled for all gen9 stepping.
2307 //For Gen9+, kernel don't support sublayer DScale+rotation
2308 //Sampler_unorm does not support Y410/RGB10, we need to use sampler_16 to support Y410/RGB10
2309 if (!pFilter[nLayer].bEnableDscale &&
2310 (!pFilter[nLayer].bWaEnableDscale ||
2311 (pFilter[nLayer].layer == Layer_SubVideo &&
2312 pFilter[nLayer].rotation != VPHAL_ROTATION_IDENTITY)))
2313 {
2314 if (pFilter[nLayer].sampler == Sample_Scaling_034x)
2315 {
2316 pSearchState->pFilter[nLayer].sampler = Sample_Scaling;
2317 }
2318 else if (pFilter[nLayer].sampler == Sample_iScaling_034x)
2319 {
2320 pSearchState->pFilter[nLayer].sampler = Sample_iScaling;
2321 }
2322 else if (pFilter[nLayer].sampler == Sample_iScaling_AVS)
2323 {
2324 pSearchState->pFilter[nLayer].sampler = Sample_iScaling_AVS;
2325 }
2326 }
2327 }
2328
2329 // Copy the render target format
2330 pSearchState->target_format = pSearchState->pFilter[iFilterSize - 1].format;
2331
2332 // Copy the render target tile type
2333 pSearchState->target_tiletype = pSearchState->pFilter[iFilterSize - 1].tiletype;
2334
2335 // Indicate whether to use 64B save kernel for render target surface
2336 if (uiIs64BInstrEnabled &&
2337 ((pSearchState->target_tiletype == MOS_TILE_X) ||
2338 (pSearchState->target_tiletype == MOS_TILE_LINEAR)))
2339 {
2340 pSearchState->b64BSaveEnabled = true;
2341 }
2342 }
2343 }
2344
2345 /*----------------------------------------------------------------------------
2346 | Name : KernelDll_SetupCSC
2347 | Purpose : Defines CSC conversions necessary for a given filter
2348 |
2349 | Input : pState - Kernel Dll state
2350 | pSearchState - current DL search state
2351 |
2352 | Return :
2353 \---------------------------------------------------------------------------*/
KernelDll_SetupCSC(Kdll_State * pState,Kdll_SearchState * pSearchState)2354 bool KernelDll_SetupCSC(
2355 Kdll_State * pState,
2356 Kdll_SearchState *pSearchState)
2357 {
2358 int i, m; // Integer iterators
2359
2360 bool bCoeffID_0_Used = false;
2361
2362 VPHAL_CSPACE cspace = CSpace_None; // Current ColorSpace
2363 VPHAL_CSPACE out_cspace = CSpace_None; // Render Target CS
2364 VPHAL_CSPACE main_cspace = CSpace_None; // Main video CS
2365 VPHAL_CSPACE sel_cspace = CSpace_Any; // Selected CS
2366
2367 Kdll_FilterEntry *pFilter; // Current Filter information
2368 int iFilterSize = pSearchState->iFilterSize;
2369 Kdll_CSC_Params * pCSC = &pSearchState->CscParams;
2370
2371 int csc_count; // Number of CSC operations
2372 int matrix_count; // Number of Matrices in use
2373 int procamp_count = 0; // Number of PA operations
2374 int sel_csc_count = -1; // Minimum number of CSC operations
2375 int iCoeffID; // coeffID for layers other than main video
2376 uint8_t cspace_in_use[CSpace_Count]; // Color Spaces in use
2377
2378 Kdll_CSC_Matrix curr_matrix;
2379 Kdll_CSC_Matrix *matrix = pCSC->Matrix; // Color Space conversion matrix
2380 uint8_t * matrixID = pCSC->MatrixID; // CSC coefficient allocation table
2381 bool forceToTargetColorSpace = false;
2382
2383 // Clear all CSC matrices
2384 MOS_ZeroMemory(matrix, sizeof(pCSC->Matrix));
2385 memset(matrixID, DL_CSC_DISABLED, sizeof(pCSC->MatrixID));
2386 memset(pCSC->PatchMatrixID, DL_CSC_DISABLED, sizeof(pCSC->PatchMatrixID));
2387 pCSC->PatchMatrixNum = 0;
2388
2389 // Clear array of color spaces in use
2390 MOS_ZeroMemory(cspace_in_use, sizeof(cspace_in_use));
2391
2392 //---------------------------------------------------------------//
2393 // Collect information about Color Spaces in use
2394 // Get Primary Video and Render Target Color Spaces
2395 // Force xvYCC passthrough if enabled
2396 //---------------------------------------------------------------//
2397 for (i = iFilterSize, pFilter = pSearchState->Filter; i > 0; i--, pFilter++)
2398 {
2399 if (pFilter->forceToTargetColorSpace)
2400 {
2401 forceToTargetColorSpace = true;
2402 }
2403 // Disable Procamp for all layers except Main Video
2404 // Disable Procamp if source is RGB
2405 if (pFilter->layer != Layer_MainVideo ||
2406 pFilter->cspace == CSpace_sRGB ||
2407 pFilter->cspace == CSpace_stRGB)
2408 {
2409 pFilter->procamp = DL_PROCAMP_DISABLED;
2410 }
2411
2412 // Count number of procamp operations (limited by number of independent coefficients)
2413 // Ignore layers with palletized/constant colors
2414 if (pFilter->procamp != DL_PROCAMP_DISABLED &&
2415 pFilter->cspace != CSpace_Any)
2416 {
2417 procamp_count++;
2418 }
2419
2420 // Set xvYCC passthrough mode
2421 if (pFilter->cspace == CSpace_xvYCC709 ||
2422 pFilter->cspace == CSpace_xvYCC601)
2423 {
2424 sel_cspace = pFilter->cspace;
2425 }
2426
2427 // Get Main Video color space
2428 if (pFilter->layer == Layer_MainVideo)
2429 {
2430 main_cspace = pFilter->cspace;
2431 }
2432
2433 // Get Render Target color space
2434 if (pFilter->layer == Layer_RenderTarget)
2435 {
2436 // Target is sRGB/stRGB
2437 if (!KernelDll_IsYUVFormat(pFilter->format))
2438 {
2439 // Disable xvYCC passthrough (sRGB cannot have extended gamut)
2440 sel_cspace = CSpace_Any;
2441 }
2442 out_cspace = pFilter->cspace;
2443 }
2444
2445 // Mark color spaces in use for search that follows
2446 if (pFilter->cspace > CSpace_Any && pFilter->cspace < CSpace_Count)
2447 {
2448 cspace_in_use[pFilter->cspace] = 1;
2449 }
2450 }
2451
2452 // Check max number of procamp operations
2453 if (procamp_count > DL_PROCAMP_MAX)
2454 {
2455 return false;
2456 }
2457
2458 //---------------------------------------------------------------//
2459 // Search Color Space that provides minimum number of CSC conversions
2460 // If there are multiple solutions, select main video cspace (quality)
2461 //---------------------------------------------------------------//
2462 if (sel_cspace == CSpace_Any)
2463 {
2464 if (forceToTargetColorSpace)
2465 {
2466 sel_cspace = out_cspace;
2467 }
2468 else
2469 {
2470 int cs;
2471 for (cs = (CSpace_Any + 1); cs < CSpace_Count; cs++)
2472 {
2473 // Skip color spaces not in use
2474 cspace = (VPHAL_CSPACE)cs;
2475 if (!cspace_in_use[cspace])
2476 {
2477 continue;
2478 }
2479
2480 // xvYCC and BT are treated as same for CSC considerations (BT.x to xvYCC.x matrix is I)
2481 cspace = KernelDll_TranslateCspace(cspace);
2482
2483 // Count # of CS conversions and matrices
2484 csc_count = 0;
2485 for (i = iFilterSize, pFilter = pSearchState->Filter; i > 0; i--, pFilter++)
2486 {
2487 // Ignore layers where the Color Space may be set in software (colorfill, palletized)
2488 if (pFilter->cspace == CSpace_Any)
2489 {
2490 continue;
2491 }
2492
2493 // Check if CSC/PA is required
2494 if (KernelDll_TranslateCspace(pFilter->cspace) != cspace ||
2495 pFilter->procamp != DL_PROCAMP_DISABLED)
2496 {
2497 csc_count++;
2498 }
2499 }
2500
2501 // Save best choice as requiring minimum number of CSC operations
2502 if ((sel_csc_count < 0) || // Initial value
2503 (csc_count < sel_csc_count) || // Minimum number of CSC operations
2504 (csc_count == sel_csc_count && cs == main_cspace)) // Use main cspace as default if same CSC count
2505 {
2506 sel_cspace = cspace;
2507 sel_csc_count = csc_count;
2508 }
2509 }
2510 }
2511 }
2512
2513 // Due to put the colorfill behind CSC, so Src0 cspace needs to change
2514 // to selspace in order to fill colorfill values correctly.
2515 pState->colorfill_cspace = sel_cspace;
2516
2517 // color space is selected by now... setup CSC matrices
2518 matrix_count = 0;
2519 iCoeffID = 1;
2520 for (i = iFilterSize, pFilter = pSearchState->Filter; i > 0; i--, pFilter++)
2521 {
2522 // Setup CSC for palettized/colorfill layers
2523 if (pFilter->cspace == CSpace_Any)
2524 {
2525 // Set Color Space and format (for software)
2526 if (pFilter->format == Format_Any)
2527 {
2528 pFilter->format = KernelDll_IsCspace(sel_cspace, CSpace_YUV) ? Format_AYUV : Format_A8R8G8B8;
2529 }
2530
2531 pFilter->cspace = sel_cspace;
2532 pFilter->matrix = DL_CSC_DISABLED;
2533 }
2534 else
2535 {
2536 // Setup CSC parameters: SrcSpace is the layer color space,
2537 // DstSpace is the internal color space selected
2538 curr_matrix.SrcSpace = KernelDll_TranslateCspace(pFilter->cspace);
2539 curr_matrix.DstSpace = KernelDll_TranslateCspace(sel_cspace);
2540 curr_matrix.iProcampID = pFilter->procamp;
2541
2542 // Check if CSC is necessary
2543 if (curr_matrix.SrcSpace == curr_matrix.DstSpace &&
2544 curr_matrix.iProcampID == DL_PROCAMP_DISABLED)
2545 {
2546 pFilter->matrix = DL_CSC_DISABLED;
2547 curr_matrix.iCoeffID = CoeffID_None;
2548 continue;
2549 }
2550
2551 // Reserve CoeffID_0 for main video - CoeffID_0 gets CSC coeff from static parameters
2552 // If main video doesn't use CoeffID_0, assign to RT
2553 if ((pFilter->layer == Layer_MainVideo) ||
2554 (pFilter->layer == Layer_RenderTarget))
2555 {
2556 if (bCoeffID_0_Used)
2557 {
2558 curr_matrix.iCoeffID = (Kdll_CoeffID)iCoeffID++;
2559 }
2560 else
2561 {
2562 curr_matrix.iCoeffID = CoeffID_0;
2563 bCoeffID_0_Used = true;
2564 }
2565 }
2566 else
2567 {
2568 curr_matrix.iCoeffID = (Kdll_CoeffID)iCoeffID++;
2569 }
2570
2571 // CSC at the target layer is from internal cspace (SrcSpace)
2572 // to external cspace (DstCspace)
2573 if (pFilter->layer == Layer_RenderTarget)
2574 {
2575 VPHAL_CSPACE aux = curr_matrix.SrcSpace;
2576 curr_matrix.SrcSpace = curr_matrix.DstSpace;
2577 curr_matrix.DstSpace = aux;
2578 }
2579
2580 // Search CSC matrix - avoid duplicated CSC matrices
2581 for (m = 0; m < matrix_count; m++)
2582 {
2583 if (curr_matrix.SrcSpace == matrix[m].SrcSpace &&
2584 curr_matrix.DstSpace == matrix[m].DstSpace &&
2585 curr_matrix.iProcampID == matrix[m].iProcampID)
2586 {
2587 break;
2588 }
2589 }
2590
2591 // Check limit
2592 if (m == matrix_count)
2593 {
2594 // Exceeded number of CSC matrices allowed
2595 if (matrix_count == DL_CSC_MAX)
2596 {
2597 VP_RENDER_ASSERTMESSAGE("CSC matrix count %d exceeded number of CSC matrices allowed!", matrix_count);
2598 return false;
2599 }
2600
2601 matrix[m].bInUse = true;
2602 matrix[m].SrcSpace = curr_matrix.SrcSpace;
2603 matrix[m].DstSpace = curr_matrix.DstSpace;
2604 matrix[m].iProcampID = curr_matrix.iProcampID;
2605 matrix[m].iCoeffID = curr_matrix.iCoeffID;
2606
2607 // Calculate coefficients for the first time
2608 KernelDll_UpdateCscCoefficients(pState, &matrix[m]);
2609
2610 // Next matrix
2611 matrix_count++;
2612 }
2613
2614 // point to the matrix
2615 pFilter->matrix = m;
2616 }
2617 }
2618
2619 // Link matrices to kernel coefficients (and vice-versa)
2620 matrix = pCSC->Matrix;
2621 for (m = 0; m < matrix_count; m++, matrix++)
2622 {
2623 // Coefficient table points to matrix index
2624 matrixID[matrix->iCoeffID] = (uint8_t)m;
2625 }
2626
2627 // Save selected color space
2628 pCSC->ColorSpace = sel_cspace;
2629
2630 return true;
2631 }
2632
2633 /*----------------------------------------------------------------------------
2634 | Name : KernelDll_GetPatchData
2635 | Purpose : Get binary data block to be used for kernel patching
2636 |
2637 | Input : pState - [in] Current DL state
2638 | pSearchState - [in] Current DL search state
2639 | iPatchKind - [in] Patch kind
2640 | pSize - [out] Data block Size
2641 |
2642 | Return : nullptr - Unsupported patch data kind
2643 | <>nullptr - Pointer to data block
2644 \---------------------------------------------------------------------------*/
KernelDll_GetPatchData(Kdll_State * pState,Kdll_SearchState * pSearchState,int32_t iPatchKind,int32_t * pSize)2645 static uint8_t *KernelDll_GetPatchData(
2646 Kdll_State * pState,
2647 Kdll_SearchState *pSearchState,
2648 int32_t iPatchKind,
2649 int32_t * pSize)
2650 {
2651 MOS_UNUSED(pState);
2652
2653 VP_RENDER_FUNCTION_ENTER;
2654
2655 if (iPatchKind == PatchKind_CSC_Coeff_Src0 ||
2656 iPatchKind == PatchKind_CSC_Coeff_Src1)
2657 {
2658 Kdll_CoeffID coeffID = CoeffID_None;
2659 uint8_t matrixID = DL_CSC_DISABLED;
2660
2661 // Get matrix id
2662 if (iPatchKind == PatchKind_CSC_Coeff_Src0)
2663 {
2664 coeffID = pSearchState->src0_coeff;
2665 }
2666 else
2667 {
2668 coeffID = pSearchState->src1_coeff;
2669 }
2670
2671 // Get matrix associated with the coefficient ID
2672 if (coeffID > CoeffID_None)
2673 {
2674 matrixID = pSearchState->CscParams.MatrixID[coeffID];
2675 }
2676
2677 // Found matrix
2678 if (matrixID < DL_CSC_MAX)
2679 {
2680 Kdll_CSC_Matrix *pMatrix = &(pSearchState->CscParams.Matrix[matrixID]);
2681
2682 *pSize = 12 * sizeof(uint16_t);
2683
2684 if (pState->bEnableCMFC)
2685 {
2686 if (pSearchState->CscParams.PatchMatrixNum < DL_CSC_MAX)
2687 {
2688 pSearchState->CscParams.PatchMatrixID[pSearchState->CscParams.PatchMatrixNum] = matrixID;
2689 pSearchState->CscParams.PatchMatrixNum++;
2690 }
2691 else
2692 {
2693 VP_RENDER_ASSERTMESSAGE("Patch CSC coefficient number %d exceed limitation %d!", pSearchState->CscParams.PatchMatrixNum, DL_CSC_MAX);
2694 return nullptr;
2695 }
2696 }
2697
2698 return ((uint8_t *)pMatrix->Coeff);
2699 }
2700 }
2701 else
2702 {
2703 VP_RENDER_ASSERTMESSAGE("Invalid patch kind %d.", iPatchKind);
2704 }
2705
2706 return nullptr;
2707 }
2708
2709 /*----------------------------------------------------------------------------
2710 | Name : KernelDll_UpdateState
2711 | Purpose : Update search state using current matching rule
2712 |
2713 | Input : pState - Kernel Dll state
2714 | pSearchState - current DL search state
2715 |
2716 | Return :
2717 \---------------------------------------------------------------------------*/
KernelDll_UpdateState(Kdll_State * pState,Kdll_SearchState * pSearchState)2718 bool KernelDll_UpdateState(
2719 Kdll_State * pState,
2720 Kdll_SearchState *pSearchState)
2721 {
2722 Kdll_RuleEntrySet * pRuleSet = pSearchState->pMatchingRuleSet;
2723 const Kdll_RuleEntry *pRuleEntry;
2724 int32_t iSetCount;
2725
2726 VP_RENDER_FUNCTION_ENTER;
2727
2728 // Ensures that we have a matching rule
2729 if (pRuleSet == nullptr)
2730 {
2731 return false;
2732 }
2733
2734 // Get rule entry and number of state update ("Set") rules; validate
2735 pRuleEntry = pRuleSet->pRuleEntry;
2736 iSetCount = pRuleSet->iSetCount;
2737 if (pRuleEntry == nullptr || iSetCount < 1)
2738 {
2739 VP_RENDER_NORMALMESSAGE("Invalid rule set.");
2740 return false;
2741 }
2742
2743 // Jump to set rules (skip match rules)
2744 pRuleEntry += pRuleSet->iMatchCount;
2745
2746 // Apply state update rules
2747 for (; iSetCount > 0; iSetCount--, pRuleEntry++)
2748 {
2749 switch (pRuleEntry->id)
2750 {
2751 // Add kernel to the Dynamic Linking array
2752 case RID_SetKernel:
2753 if (pSearchState->KernelCount < DL_MAX_KERNELS)
2754 {
2755 int32_t i = pSearchState->KernelCount++;
2756 pSearchState->KernelID[i] = pRuleEntry->value;
2757 pSearchState->KernelGrp[i] = pRuleSet->iGroup; // Group associated with the kernel ID
2758 }
2759 else
2760 {
2761 VP_RENDER_ASSERTMESSAGE("reached maximum number of component kernels.");
2762 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
2763 return false;
2764 }
2765 break;
2766
2767 // Set Parser State
2768 case RID_SetParserState:
2769 pSearchState->state = (Kdll_ParserState)pRuleEntry->value;
2770 break;
2771
2772 // Move to Next/Prev Layer
2773 case RID_SetNextLayer:
2774 if (pRuleEntry->value == -1)
2775 {
2776 pSearchState->layer_number--;
2777 pSearchState->pFilter--;
2778 }
2779 else if (pRuleEntry->value == -2) // jump to layer main video
2780 {
2781 do
2782 {
2783 pSearchState->layer_number--;
2784 pSearchState->pFilter--;
2785 if (pSearchState->pFilter == nullptr || pSearchState->layer_number < 0)
2786 {
2787 return false;
2788 }
2789 } while (pSearchState->pFilter->layer != Layer_MainVideo);
2790 }
2791 else if (pRuleEntry->value == 2) // jump to target layer
2792 {
2793 while (pSearchState->pFilter->layer < Layer_RenderTarget)
2794 {
2795 pSearchState->layer_number++;
2796 pSearchState->pFilter++;
2797 }
2798 }
2799 else
2800 {
2801 pSearchState->layer_number++;
2802 pSearchState->pFilter++;
2803 }
2804 break;
2805
2806 // Set patch data
2807 case RID_SetPatchData: {
2808 uint8_t * pData = nullptr;
2809 int32_t iSize = 0;
2810 int32_t iKernelIndex = pSearchState->KernelCount - 1;
2811 int32_t iPatchIndex;
2812 Kdll_PatchData *pPatch;
2813
2814 // Get block of data for patching
2815 pData = KernelDll_GetPatchData(pState, pSearchState, (Kdll_PatchKind)pRuleEntry->value, &iSize);
2816 if (pData == nullptr || iSize == 0)
2817 {
2818 VP_RENDER_ASSERTMESSAGE("invalid patch.");
2819 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
2820 return false;
2821 }
2822
2823 // Append to the existing patch data block
2824 iPatchIndex = pSearchState->PatchID[iKernelIndex];
2825
2826 // Allocate new patch structure
2827 if (iPatchIndex < 0)
2828 {
2829 // Fail to allocate
2830 if (pSearchState->PatchCount >= DL_MAX_PATCHES)
2831 {
2832 VP_RENDER_ASSERTMESSAGE("reached maximum number of patches.");
2833 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
2834 return false;
2835 }
2836
2837 // Get new patch block
2838 iPatchIndex = pSearchState->PatchCount++;
2839 pSearchState->PatchID[iKernelIndex] = iPatchIndex;
2840
2841 // Reset new patch entry
2842 pPatch = &(pSearchState->Patches[iPatchIndex]);
2843 MOS_ZeroMemory(pPatch, sizeof(Kdll_PatchData));
2844 }
2845 else
2846 {
2847 // Get Patch entry already in use
2848 pPatch = &(pSearchState->Patches[iPatchIndex]);
2849 }
2850
2851 // Check if data can be appended
2852 if (pPatch->iPatchDataSize + iSize > DL_MAX_PATCH_DATA_SIZE)
2853 {
2854 VP_RENDER_ASSERTMESSAGE("exceeded maximum patch size.");
2855 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
2856 return false;
2857 }
2858
2859 // Append patch data
2860 MOS_SecureMemcpy(pPatch->Data + pPatch->iPatchDataSize, iSize, (void *)pData, iSize);
2861 pPatch->iPatchDataSize += iSize;
2862 }
2863 break;
2864
2865 // Set patch operation
2866 case RID_SetPatch: {
2867 int32_t iKernelIndex = pSearchState->KernelCount - 1;
2868 int32_t iPatchIndex = pSearchState->PatchID[iKernelIndex];
2869 Kdll_PatchData * pPatch;
2870 uint8_t * pPatchRule;
2871 Kdll_PatchBlock *pPatchBlock;
2872 int32_t nPatches;
2873
2874 // No patch associated with the current kernel
2875 if (iPatchIndex < 0)
2876 {
2877 return false;
2878 }
2879
2880 // Get Patch entry
2881 pPatch = &(pSearchState->Patches[iPatchIndex]);
2882
2883 // Get number of patches and pointer to first rule extension (patch rule)
2884 nPatches = pRuleEntry->value;
2885 pPatchRule = (uint8_t *)(pRuleEntry + 1);
2886
2887 // Check if rules can be applied
2888 if (nPatches + pPatch->nPatches > DL_MAX_PATCH_BLOCKS)
2889 {
2890 VP_RENDER_ASSERTMESSAGE("exceeded number of patch blocks.");
2891 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
2892 return false;
2893 }
2894
2895 // Set Patches
2896 pPatchBlock = &(pPatch->Patch[pPatch->nPatches]);
2897 for (; nPatches > 0; nPatches--, pPatchBlock++, pPatch->nPatches++)
2898 {
2899 pPatchBlock->BlockSize = ((Kdll_PatchRuleEntry *)pPatchRule)->Size;
2900 pPatchBlock->SrcOffset = ((Kdll_PatchRuleEntry *)pPatchRule)->Source;
2901 pPatchBlock->DstOffset = ((Kdll_PatchRuleEntry *)pPatchRule)->Dest;
2902 pPatchRule += sizeof(Kdll_RuleEntry);
2903 }
2904
2905 // Skip rule extensions
2906 iSetCount -= pRuleEntry->value;
2907 pRuleEntry += pRuleEntry->value;
2908 }
2909 break;
2910
2911 // Set destination colorspace
2912 case RID_SetTargetCspace:
2913 if ((VPHAL_CSPACE)pRuleEntry->value == CSpace_Source)
2914 {
2915 pSearchState->cspace = pSearchState->pFilter->cspace;
2916 }
2917 else
2918 {
2919 pSearchState->cspace = (VPHAL_CSPACE)pRuleEntry->value;
2920 }
2921 break;
2922
2923 // Set Src0 source format
2924 case RID_SetSrc0Format:
2925 if ((MOS_FORMAT)pRuleEntry->value == Format_Source)
2926 {
2927 pSearchState->src0_format = pSearchState->pFilter->format;
2928 }
2929 else
2930 {
2931 pSearchState->src0_format = (MOS_FORMAT)pRuleEntry->value;
2932 }
2933 break;
2934
2935 // Set Src0 sampling mode
2936 case RID_SetSrc0Sampling:
2937 if ((Kdll_Sampling)pRuleEntry->value == Sample_Source)
2938 {
2939 pSearchState->src0_sampling = pSearchState->pFilter->sampler;
2940 }
2941 else
2942 {
2943 pSearchState->src0_sampling = (Kdll_Sampling)pRuleEntry->value;
2944 }
2945 break;
2946
2947 // Set Src0 Rotation
2948 case RID_SetSrc0Rotation:
2949 pSearchState->src0_rotation = pSearchState->pFilter->rotation;
2950 break;
2951
2952 // Set Src0 Colorfill
2953 case RID_SetSrc0ColorFill:
2954 if ((int32_t)pRuleEntry->value == ColorFill_Source)
2955 {
2956 pSearchState->src0_colorfill = pSearchState->pFilter->colorfill;
2957 }
2958 else
2959 {
2960 pSearchState->src0_colorfill = (int32_t)pRuleEntry->value;
2961 }
2962 break;
2963
2964 // Set Src0 luma key
2965 case RID_SetSrc0LumaKey:
2966 if (pRuleEntry->value == LumaKey_Source)
2967 {
2968 pSearchState->src0_lumakey = pSearchState->pFilter->lumakey;
2969 }
2970 else
2971 {
2972 pSearchState->src0_lumakey = (int32_t)pRuleEntry->value;
2973 }
2974 break;
2975
2976 // Set Src0 Procamp
2977 case RID_SetSrc0Procamp:
2978 if (pRuleEntry->value == Procamp_Source)
2979 {
2980 pSearchState->src0_procamp = pSearchState->pFilter->procamp;
2981 }
2982 else
2983 {
2984 pSearchState->src0_procamp = (int32_t)pRuleEntry->value;
2985 }
2986 break;
2987
2988 // Set Src0 CSC coefficients
2989 case RID_SetSrc0Coeff:
2990 if ((Kdll_CoeffID)pRuleEntry->value == CoeffID_Source)
2991 {
2992 if (pSearchState->pFilter->matrix == DL_CSC_DISABLED)
2993 {
2994 pSearchState->src0_coeff = CoeffID_None;
2995 }
2996 else
2997 {
2998 Kdll_CSC_Matrix *matrix = pSearchState->CscParams.Matrix;
2999 matrix += pSearchState->pFilter->matrix;
3000
3001 pSearchState->src0_coeff = matrix->iCoeffID;
3002 }
3003 }
3004 else
3005 {
3006 pSearchState->src0_coeff = (Kdll_CoeffID)pRuleEntry->value;
3007 }
3008 break;
3009
3010 case RID_SetSrc0Processing:
3011 if ((Kdll_Processing)pRuleEntry->value == Process_Source)
3012 {
3013 pSearchState->src0_process = pSearchState->pFilter->process;
3014 }
3015 else
3016 {
3017 pSearchState->src0_process = (Kdll_Processing)pRuleEntry->value;
3018 }
3019 break;
3020
3021 // Set Src1 source format
3022 case RID_SetSrc1Format:
3023 if ((MOS_FORMAT)pRuleEntry->value == Format_Source)
3024 {
3025 pSearchState->src1_format = pSearchState->pFilter->format;
3026 }
3027 else
3028 {
3029 pSearchState->src1_format = (MOS_FORMAT)pRuleEntry->value;
3030 }
3031 break;
3032
3033 // Set Src1 sampling mode
3034 case RID_SetSrc1Sampling:
3035 if ((Kdll_Sampling)pRuleEntry->value == Sample_Source)
3036 {
3037 pSearchState->src1_sampling = pSearchState->pFilter->sampler;
3038 }
3039 else
3040 {
3041 pSearchState->src1_sampling = (Kdll_Sampling)pRuleEntry->value;
3042 }
3043 break;
3044
3045 // Set Src1 Rotation
3046 case RID_SetSrc1Rotation:
3047 pSearchState->src1_rotation = pSearchState->pFilter->rotation;
3048 break;
3049
3050 // Set Src1 luma key
3051 case RID_SetSrc1LumaKey:
3052 if (pRuleEntry->value == LumaKey_Source)
3053 {
3054 pSearchState->src1_lumakey = pSearchState->pFilter->lumakey;
3055 }
3056 else
3057 {
3058 pSearchState->src1_lumakey = (int32_t)pRuleEntry->value;
3059 }
3060 break;
3061
3062 // Set Src1 Sampler LumaKey
3063 case RID_SetSrc1SamplerLumaKey:
3064 if (pRuleEntry->value == LumaKey_Source)
3065 {
3066 pSearchState->src1_samplerlumakey = pSearchState->pFilter->samplerlumakey;
3067 }
3068 else
3069 {
3070 pSearchState->src1_samplerlumakey = (int32_t)pRuleEntry->value;
3071 }
3072 break;
3073
3074 // Set Src1 Procamp
3075 case RID_SetSrc1Procamp:
3076 if (pRuleEntry->value == Procamp_Source)
3077 {
3078 pSearchState->src1_procamp = pSearchState->pFilter->procamp;
3079 }
3080 else
3081 {
3082 pSearchState->src1_procamp = (int32_t)pRuleEntry->value;
3083 }
3084 break;
3085
3086 // Set Src1 CSC coefficients
3087 case RID_SetSrc1Coeff:
3088 if ((Kdll_CoeffID)pRuleEntry->value == CoeffID_Source)
3089 {
3090 if (pSearchState->pFilter->matrix == DL_CSC_DISABLED)
3091 {
3092 pSearchState->src1_coeff = CoeffID_None;
3093 }
3094 else
3095 {
3096 Kdll_CSC_Matrix *matrix = pSearchState->CscParams.Matrix;
3097 matrix += pSearchState->pFilter->matrix;
3098
3099 pSearchState->src1_coeff = matrix->iCoeffID;
3100 }
3101 }
3102 else
3103 {
3104 pSearchState->src1_coeff = (Kdll_CoeffID)pRuleEntry->value;
3105 }
3106 break;
3107
3108 // Set Src1 processing mode
3109 case RID_SetSrc1Processing:
3110 if ((Kdll_Processing)pRuleEntry->value == Process_Source)
3111 {
3112 pSearchState->src1_process = pSearchState->pFilter->process;
3113 }
3114 else
3115 {
3116 pSearchState->src1_process = (Kdll_Processing)pRuleEntry->value;
3117 }
3118 break;
3119
3120 // Set current quadrant
3121 case RID_SetQuadrant:
3122 pSearchState->quadrant = (int32_t)pRuleEntry->value;
3123 break;
3124
3125 // Set CSC flag before Mix
3126 case RID_SetCSCBeforeMix:
3127 pSearchState->bCscBeforeMix = pRuleEntry->value ? true : false;
3128 break;
3129
3130 // Unsupported "Set" rule
3131 default:
3132 // Failed to find a matching rule -> kernel search will fail
3133 VP_RENDER_ASSERTMESSAGE("Invalid rule %d @ layer %d, state %d.", pRuleEntry->id, pSearchState->layer_number, pSearchState->state);
3134 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3135 return false;
3136 }
3137 }
3138
3139 // Reset matching rule
3140 pSearchState->pMatchingRuleSet = nullptr;
3141 return true;
3142 }
3143
3144 //-----------------------------------------------------------------------------------------
3145 // KernelDll_SortRuleTable - Sort master dynamic linking rule table
3146 //
3147 // Parameters:
3148 // char *pState - [in] Kernel Dll state
3149 //
3150 // Output: true - Master rule table (and acceleration table) successfully created
3151 // false - Failed to setup master rule table
3152 //-----------------------------------------------------------------------------------------
KernelDll_SortRuleTable(Kdll_State * pState)3153 bool KernelDll_SortRuleTable(Kdll_State *pState)
3154 {
3155 uint8_t group;
3156 int32_t state;
3157 const Kdll_RuleEntry *pRule = nullptr;
3158 Kdll_RuleEntrySet * pRuleSet;
3159 int32_t i, j;
3160
3161 int32_t iTotal = 0;
3162 int32_t iNoOverr[Parser_Count]; // Non-overridable (enforced) rules
3163 int32_t iDefault[Parser_Count]; // Default rules
3164 int32_t iCustom[Parser_Count]; // Custom rules
3165
3166 VP_RENDER_FUNCTION_ENTER;
3167
3168 // Release previous table (rule table update)
3169 if (pState->pSortedRules)
3170 {
3171 MOS_FreeMemory(pState->pSortedRules);
3172 pState->pSortedRules = nullptr;
3173
3174 MOS_ZeroMemory(pState->pDllRuleTable, sizeof(pState->pDllRuleTable));
3175 MOS_ZeroMemory(pState->iDllRuleCount, sizeof(pState->iDllRuleCount));
3176 }
3177
3178 // Zero counters
3179 MOS_ZeroMemory(iNoOverr, sizeof(iNoOverr));
3180 MOS_ZeroMemory(iDefault, sizeof(iDefault));
3181 MOS_ZeroMemory(iCustom, sizeof(iCustom));
3182
3183 // Count number of entries for each state
3184 for (i = 0; i < 2; i++)
3185 {
3186 if (i == 0)
3187 {
3188 pRule = pState->pRuleTableDefault;
3189 }
3190 else if (i == 1)
3191 {
3192 pRule = pState->pRuleTableCustom;
3193 }
3194
3195 // Table not set - continue
3196 if (!pRule)
3197 continue;
3198
3199 for (; pRule->id != RID_Op_EOF; pRule++)
3200 {
3201 // Skip extended rules (variable lenght)
3202 if (RID_IS_EXTENDED(pRule->id))
3203 { // value contains number of entries
3204 pRule += pRule->value;
3205 }
3206 else if (pRule->id == RID_Op_NewEntry)
3207 {
3208 // Save Rule Group
3209 if (i == 0)
3210 {
3211 group = pRule->value;
3212 }
3213 else
3214 {
3215 group = RULE_CUSTOM;
3216 }
3217
3218 // Second rule must always be RID_IsParserState
3219 pRule++;
3220 if (pRule->id != RID_IsParserState)
3221 {
3222 VP_RENDER_ASSERTMESSAGE("Rule does not start with State.");
3223 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3224 return false;
3225 }
3226
3227 // Get Parser State -> validate value
3228 state = pRule->value;
3229 if (state < Parser_Begin)
3230 {
3231 VP_RENDER_ASSERTMESSAGE("Invalid State %d.", state);
3232 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3233 return false;
3234 }
3235 else if (state >= Parser_Custom)
3236 { // Custom states are set together in the same entry
3237 state = Parser_Custom;
3238 }
3239
3240 if (group == RULE_NO_OVERRIDE)
3241 {
3242 iNoOverr[state]++;
3243 }
3244 else if (group == RULE_DEFAULT)
3245 {
3246 iDefault[state]++;
3247 }
3248 else
3249 {
3250 iCustom[state]++;
3251 }
3252
3253 iTotal++;
3254 }
3255 }
3256 }
3257
3258 // Allocate rules
3259 pState->pSortedRules = (Kdll_RuleEntrySet *)MOS_AllocAndZeroMemory(iTotal * sizeof(Kdll_RuleEntrySet));
3260 if (!pState->pSortedRules)
3261 {
3262 VP_RENDER_ASSERTMESSAGE("Failed to allocate rule table.");
3263 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3264 return false;
3265 }
3266
3267 // Setup pointers to sorted rules
3268 pState->pDllRuleTable[0] = pState->pSortedRules;
3269 for (j = 0, i = 0; i < Parser_Count; i++)
3270 {
3271 // Setup start pointer and number of entries to search for each state
3272 pState->pDllRuleTable[i] = pState->pDllRuleTable[j] + pState->iDllRuleCount[j];
3273 pState->iDllRuleCount[i] = iNoOverr[i] + iCustom[i] + iDefault[i];
3274
3275 // Setup offsets to rules for sorting
3276 iDefault[i] = iNoOverr[i] + iCustom[i]; // Last set of rules
3277 iCustom[i] = iNoOverr[i]; // 2nd set of rules
3278 iNoOverr[i] = 0; // 1st set of rules
3279
3280 j = i;
3281 }
3282
3283 // Sort rules for fast access
3284 // Integrate enforced, custom, default rules into one single access table
3285 for (i = 0; i < 2; i++)
3286 {
3287 if (i == 0)
3288 {
3289 pRule = pState->pRuleTableDefault;
3290 }
3291 else if (i == 1)
3292 {
3293 pRule = pState->pRuleTableCustom;
3294 }
3295
3296 // Table not set - continue
3297 if (!pRule)
3298 continue;
3299
3300 while (pRule->id != RID_Op_EOF)
3301 {
3302 if (pRule->id != RID_Op_NewEntry)
3303 {
3304 VP_RENDER_ASSERTMESSAGE("New rule entry expected.");
3305 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3306 return false;
3307 }
3308
3309 // Save Rule Group
3310 if (i == 0)
3311 {
3312 group = pRule->value;
3313 }
3314 else
3315 {
3316 group = RULE_CUSTOM;
3317 }
3318
3319 // Get Parser State -> validate value
3320 pRule++;
3321 state = pRule->value;
3322 if (state >= Parser_Custom)
3323 { // Custom states are set together in the same entry
3324 state = Parser_Custom;
3325 }
3326 else
3327 { // Skip state check - already handled by acceleration table
3328 pRule++;
3329 }
3330
3331 // Point to sorted rule set entry
3332 if (group == RULE_NO_OVERRIDE)
3333 {
3334 j = iNoOverr[state]++;
3335 }
3336 else if (group == RULE_DEFAULT)
3337 {
3338 j = iDefault[state]++;
3339 }
3340 else
3341 {
3342 j = iCustom[state]++;
3343 }
3344
3345 // Point to sorted ruleset for the current parser state
3346 pRuleSet = pState->pDllRuleTable[state] + j;
3347
3348 // Fill RuleSet
3349 pRuleSet->pRuleEntry = pRule;
3350 pRuleSet->iGroup = group;
3351
3352 // Count number of match rules, including extended rules
3353 while (RID_IS_MATCH(pRule->id))
3354 {
3355 if (RID_IS_EXTENDED(pRule->id))
3356 {
3357 pRuleSet->iMatchCount += pRule->value;
3358 pRule += pRule->value;
3359 }
3360
3361 pRuleSet->iMatchCount++;
3362 pRule++;
3363 }
3364
3365 // Count number of set rules, including extended rules
3366 while (RID_IS_SET(pRule->id))
3367 {
3368 if (RID_IS_EXTENDED(pRule->id))
3369 {
3370 pRuleSet->iSetCount += pRule->value;
3371 pRule += pRule->value;
3372 }
3373
3374 pRuleSet->iSetCount++;
3375 pRule++;
3376 }
3377
3378 // Rule must have at least one "Set" rule
3379 if (pRuleSet->iSetCount < 1)
3380 {
3381 VP_RENDER_ASSERTMESSAGE("Ruleset must have at least one set rule.");
3382 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3383 return false;
3384 }
3385 }
3386 }
3387
3388 // Rule table is now sorted and integrated with custom rules
3389 return true;
3390 }
3391
3392 //---------------------------------------------------------------------------------------
3393 // KernelDll_AllocateStates - Allocate Kernel Dynamic Linking/Loading (Dll) States
3394 //
3395 // - Allocate DL states
3396 // - Setup export/import list for linking
3397 // - Prepare pool of search nodes
3398 // - Load component kernels from binary file
3399 // - Setup kernel cache
3400 // - Setup kernel dynamic linking rules
3401 //
3402 // Parameters: [in] pKernelBin - Pointer to Kernel binary file loaded in sys memory
3403 // [in] uKernelSize - Kernel file size
3404 // [in] pFcPatchBin - Pointer to FC patch binary file loaded in sys memory
3405 // [in] uFcPatchCacheSize - FC patch binary file size
3406 // [in] platform - Gfx platform
3407 // [in] pDefaultRules - Dynamic Linking Rules Table
3408 //
3409 // Output: Pointer to allocated Kernel dll state
3410 // nullptr - Failed to allocate Kernel dll state
3411 //-----------------------------------------------------------------------------------------
KernelDll_AllocateStates(void * pKernelBin,uint32_t uKernelSize,void * pFcPatchCache,uint32_t uFcPatchCacheSize,const Kdll_RuleEntry * pDefaultRules,void (* ModifyFunctionPointers)(PKdll_State))3412 Kdll_State *KernelDll_AllocateStates(
3413 void * pKernelBin,
3414 uint32_t uKernelSize,
3415 void * pFcPatchCache,
3416 uint32_t uFcPatchCacheSize,
3417 const Kdll_RuleEntry *pDefaultRules,
3418 void (*ModifyFunctionPointers)(PKdll_State))
3419 {
3420 Kdll_State * pState;
3421 Kdll_CacheEntry * pCacheEntry;
3422 Kdll_KernelCache * pKernelCache;
3423 Kdll_KernelHashTable *pHashTable;
3424 Kdll_KernelHashEntry *pHashEntries;
3425
3426 int32_t iSize;
3427 int32_t nExports = 0;
3428 int32_t nImports = 0;
3429 uint32_t * pLinkOffset = nullptr;
3430 Kdll_LinkData * pLinkSort = nullptr;
3431 Kdll_LinkData * pLinkData;
3432 Kdll_LinkData * pExports;
3433 Kdll_LinkFileHeader *pLinkHeader;
3434
3435 int32_t i, j;
3436 uint32_t *pOffsets;
3437 uint8_t * pBase;
3438
3439 VP_RENDER_FUNCTION_ENTER;
3440
3441 // Allocate dynamic linking states
3442 i = sizeof(Kdll_State); // Dynamic linking states
3443 i += sizeof(Kdll_CacheEntry) * IDR_VP_TOTAL_NUM_KERNELS; // Component kernel cache entries
3444 i += sizeof(Kdll_CacheEntry) * IDR_VP_TOTAL_NUM_KERNELS; // CMFC kernel patch cache entries
3445 i += sizeof(Kdll_CacheEntry) * DL_DEFAULT_COMBINED_KERNELS; // Combined kernel cache entries
3446 i += DL_COMBINED_KERNEL_CACHE_SIZE; // Combined kernel buffer
3447 i += sizeof(Kdll_LinkData) * DL_MAX_EXPORT_COUNT; // Kernel Export table
3448
3449 pState = (Kdll_State *)MOS_AllocAndZeroMemory(i);
3450 if (!pState)
3451 {
3452 VP_RENDER_ASSERTMESSAGE("Failed to allocate kernel dll states.");
3453 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3454 goto cleanup;
3455 }
3456 pState->iSize = i;
3457 pState->dwRefresh = 0;
3458 pState->pProcamp = nullptr;
3459 pState->iProcampSize = 0;
3460 pState->pSortedRules = nullptr;
3461
3462 if ((pFcPatchCache != nullptr) && (uFcPatchCacheSize != 0))
3463 {
3464 pState->bEnableCMFC = true;
3465 }
3466
3467 // Initialize platform specific function pointers
3468 if (!KernelDll_SetupFunctionPointers(pState, ModifyFunctionPointers))
3469 {
3470 VP_RENDER_ASSERTMESSAGE("Failed to setup function pointers.");
3471 MT_ERR1(MT_VP_KERNEL_RULE, MT_CODE_LINE, __LINE__);
3472 goto cleanup;
3473 }
3474
3475 pKernelCache = &pState->ComponentKernelCache;
3476
3477 // No custom kernels/rules
3478 pState->pRuleTableCustom = nullptr;
3479 pState->pCustomKernelCache = nullptr;
3480
3481 // Set Kernel DLL Rules
3482 pState->pRuleTableDefault = pDefaultRules;
3483
3484 // Integrate and sort rule tables
3485 KernelDll_SortRuleTable(pState);
3486
3487 // Setup component kernel cache
3488 pKernelCache->pCache = (uint8_t *)pKernelBin;
3489 pKernelCache->iCacheSize = (int32_t)uKernelSize;
3490 pKernelCache->iCacheFree = 0;
3491 pKernelCache->iCacheMaxEntries = IDR_VP_TOTAL_NUM_KERNELS;
3492 pKernelCache->iCacheEntries = IDR_VP_TOTAL_NUM_KERNELS;
3493 pKernelCache->pCacheEntries = (Kdll_CacheEntry *)(pState + 1);
3494
3495 pOffsets = (uint32_t *)pKernelCache->pCache;
3496 pBase = (uint8_t *)(pOffsets + IDR_VP_TOTAL_NUM_KERNELS + 1);
3497 pCacheEntry = pKernelCache->pCacheEntries;
3498 for (i = 0; i < IDR_VP_TOTAL_NUM_KERNELS; i++, pCacheEntry++)
3499 {
3500 pCacheEntry->iKUID = i;
3501 pCacheEntry->iKCID = -1;
3502 pCacheEntry->dwLoaded = 0;
3503 pCacheEntry->dwRefresh = 0;
3504 pCacheEntry->wHashEntry = 0;
3505 pCacheEntry->szName = g_cInit_ComponentNames[i];
3506 pCacheEntry->iSize = pOffsets[i + 1] - pOffsets[i];
3507 pCacheEntry->pBinary = (pCacheEntry->iSize > 0) ? (pBase + pOffsets[i]) : nullptr;
3508 }
3509
3510 // Setup CMFC kernel patch cache
3511 pKernelCache = &pState->CmFcPatchCache;
3512
3513 if (pState->bEnableCMFC && pFcPatchCache)
3514 {
3515 pKernelCache->pCache = (uint8_t *)pFcPatchCache;
3516 pKernelCache->iCacheSize = (int32_t)uFcPatchCacheSize;
3517 pKernelCache->iCacheFree = 0;
3518 pKernelCache->iCacheMaxEntries = IDR_VP_TOTAL_NUM_KERNELS;
3519 pKernelCache->iCacheEntries = IDR_VP_TOTAL_NUM_KERNELS;
3520 pKernelCache->pCacheEntries = pCacheEntry;
3521
3522 pOffsets = (uint32_t *)pKernelCache->pCache;
3523 pBase = (uint8_t *)(pOffsets + IDR_VP_TOTAL_NUM_KERNELS + 1);
3524 for (i = 0; i < IDR_VP_TOTAL_NUM_KERNELS; i++, pCacheEntry++)
3525 {
3526 pCacheEntry->iKUID = i;
3527 pCacheEntry->iKCID = -1;
3528 pCacheEntry->dwLoaded = 0;
3529 pCacheEntry->dwRefresh = 0;
3530 pCacheEntry->wHashEntry = 0;
3531 pCacheEntry->szName = g_cInit_ComponentNames[i];
3532 pCacheEntry->iSize = pOffsets[i + 1] - pOffsets[i];
3533 pCacheEntry->pBinary = (pCacheEntry->iSize > 0) ? (pBase + pOffsets[i]) : nullptr;
3534 }
3535 }
3536 else
3537 {
3538 pCacheEntry += IDR_VP_TOTAL_NUM_KERNELS;
3539 }
3540
3541 // Setup combined kernel cache
3542 pKernelCache = &pState->KernelCache;
3543 pKernelCache->iCacheMaxEntries = DL_DEFAULT_COMBINED_KERNELS;
3544 pKernelCache->iCacheEntries = 0;
3545 pKernelCache->iCacheSize = DL_COMBINED_KERNEL_CACHE_SIZE; // Size of kernel cache
3546 pKernelCache->iCacheFree = DL_COMBINED_KERNEL_CACHE_SIZE; // Free cache size
3547 pKernelCache->iCacheID = 0x00010000; // Cache ID
3548 pKernelCache->pCacheEntries = pCacheEntry; // Cached kernel entries
3549 pKernelCache->pCache = (uint8_t *)(pCacheEntry + DL_DEFAULT_COMBINED_KERNELS); // kernels
3550
3551 // reset cache entries
3552 for (i = 0; i < DL_DEFAULT_COMBINED_KERNELS; i++, pCacheEntry++)
3553 {
3554 pCacheEntry->iKUID = -1;
3555 pCacheEntry->iKCID = -1;
3556 pCacheEntry->pBinary = pKernelCache->pCache + i * DL_CACHE_BLOCK_SIZE;
3557 if (i != DL_DEFAULT_COMBINED_KERNELS - 1)
3558 {
3559 pCacheEntry->pNextEntry = pCacheEntry + 1;
3560 }
3561 else
3562 {
3563 pCacheEntry->pNextEntry = nullptr;
3564 }
3565 }
3566
3567 //------------------------------------
3568 // Setup hash table
3569 //------------------------------------
3570 pHashTable = &pState->KernelHashTable;
3571 pHashEntries = pState->KernelHashTable.HashEntry - 1;
3572
3573 pHashTable->pool = 1; // first in pool (1 based index)
3574 pHashTable->last = DL_MAX_COMBINED_KERNELS; // last in pool (for releasing)
3575
3576 for (i = 1; i <= DL_MAX_COMBINED_KERNELS; i++)
3577 {
3578 pHashEntries[i].next = i + 1;
3579 }
3580 pHashEntries[i - 1].next = 0; // last entry
3581
3582 //------------------------------------
3583 // Setup dynamic linking import/export array
3584 //------------------------------------
3585 pCacheEntry = pState->ComponentKernelCache.pCacheEntries;
3586 iSize = pCacheEntry[IDR_VP_LinkFile].iSize;
3587 if (iSize == 0)
3588 {
3589 VP_RENDER_NORMALMESSAGE("Link file is missing.");
3590 goto cleanup;
3591 }
3592
3593 // Get link file binary data
3594 pLinkHeader = (Kdll_LinkFileHeader *)pCacheEntry[IDR_VP_LinkFile].pBinary;
3595 if (pLinkHeader == nullptr ||
3596 pLinkHeader->dwVersion != IDR_VP_LINKFILE_VERSION ||
3597 sizeof(Kdll_LinkFileHeader) != IDR_VP_LINKFILE_HEADER)
3598 {
3599 VP_RENDER_ASSERTMESSAGE("Invalid link file version.");
3600 goto cleanup;
3601 }
3602 iSize = (iSize - IDR_VP_LINKFILE_HEADER) / sizeof(Kdll_LinkData);
3603
3604 // Create temporary list of sorted link data and offsets
3605 pLinkSort = (Kdll_LinkData *)MOS_AllocAndZeroMemory(iSize * sizeof(Kdll_LinkData));
3606 pLinkOffset = (uint32_t *)MOS_AllocAndZeroMemory((IDR_VP_TOTAL_NUM_KERNELS + 1) * sizeof(uint32_t));
3607 if (!pLinkSort || !pLinkOffset)
3608 {
3609 VP_RENDER_ASSERTMESSAGE("Failed to allocate temporary buffers.");
3610 goto cleanup;
3611 }
3612
3613 // Count number of imports for each component kernel
3614 pCacheEntry[0].pLink = pLinkData = (Kdll_LinkData *)(pLinkHeader + 1);
3615 for (i = iSize; i > 0; i--, pLinkData++)
3616 {
3617 if (pLinkData->iKUID < IDR_VP_TOTAL_NUM_KERNELS)
3618 {
3619 pCacheEntry[pLinkData->iKUID].nLink++;
3620 }
3621
3622 nExports += pLinkData->bExport;
3623 nImports += !pLinkData->bExport;
3624 }
3625
3626 // Sanity check
3627 if (nExports != (int32_t)pLinkHeader->dwExports ||
3628 nImports != (int32_t)pLinkHeader->dwImports)
3629 {
3630 VP_RENDER_ASSERTMESSAGE("Inconsistent header data.");
3631 goto cleanup;
3632 }
3633
3634 if (nExports > DL_MAX_EXPORT_COUNT)
3635 {
3636 VP_RENDER_ASSERTMESSAGE("Unsupported number of exports %d > %d.", nExports, DL_MAX_EXPORT_COUNT);
3637 goto cleanup;
3638 }
3639
3640 pState->ComponentKernelCache.pExports = pExports = (Kdll_LinkData *)(pKernelCache->pCache + pKernelCache->iCacheSize);
3641 pState->ComponentKernelCache.nExports = nExports;
3642
3643 // Calculate offsets for sorting
3644 pLinkOffset[0] = 0;
3645 pLinkData = pCacheEntry[0].pLink;
3646 for (i = 1; i < IDR_VP_TOTAL_NUM_KERNELS; i++)
3647 {
3648 pLinkOffset[i] = pLinkOffset[i - 1] + pCacheEntry[i - 1].nLink;
3649 pCacheEntry[i].pLink = (pCacheEntry[i].nLink) ? (pLinkData + pLinkOffset[i]) : nullptr;
3650 }
3651 pLinkOffset[i] = pLinkOffset[i - 1] + pCacheEntry[i - 1].nLink;
3652
3653 // Sort link data
3654 for (i = iSize; i > 0; i--, pLinkData++)
3655 {
3656 j = pLinkOffset[MOS_MIN(pLinkData->iKUID, IDR_VP_TOTAL_NUM_KERNELS)]++;
3657 pLinkSort[j] = *pLinkData;
3658
3659 // Add to export table
3660 if (pLinkData->bExport &&
3661 pLinkData->iLabelID < DL_MAX_EXPORT_COUNT)
3662 {
3663 pExports[pLinkData->iLabelID] = *pLinkData;
3664 }
3665 }
3666
3667 // Copy sort data
3668 pLinkData = pCacheEntry[0].pLink;
3669 MOS_SecureMemcpy(pLinkData, iSize * sizeof(Kdll_LinkData), (void *)pLinkSort, iSize * sizeof(Kdll_LinkData));
3670
3671 // Release sort buffers
3672 MOS_FreeMemory(pLinkOffset);
3673 MOS_FreeMemory(pLinkSort);
3674
3675 // Return
3676 return pState;
3677
3678 cleanup:
3679 if (pState)
3680 {
3681 MOS_FreeMemory(pState->pSortedRules);
3682 pState->pSortedRules = nullptr;
3683 }
3684
3685 // Free DL States and temporary sort buffers
3686 MOS_FreeMemory(pState);
3687 MOS_FreeMemory(pLinkSort);
3688 MOS_FreeMemory(pLinkOffset);
3689
3690 return nullptr;
3691 }
3692
3693 //--------------------------------------------------------------
3694 // KernelDll_ReleaseAdditionalCacheEntries - Release the additional kernel cache entries
3695 //--------------------------------------------------------------
KernelDll_ReleaseAdditionalCacheEntries(Kdll_KernelCache * pCache)3696 void KernelDll_ReleaseAdditionalCacheEntries(Kdll_KernelCache *pCache)
3697 {
3698 VP_RENDER_FUNCTION_ENTER;
3699 if (pCache->iCacheMaxEntries > DL_DEFAULT_COMBINED_KERNELS)
3700 {
3701 Kdll_CacheEntry *pNewEntries, *pEntries;
3702 pNewEntries = (pCache->pCacheEntries + DL_DEFAULT_COMBINED_KERNELS - 1)->pNextEntry;
3703 for (int i = 0; i < (pCache->iCacheMaxEntries - DL_DEFAULT_COMBINED_KERNELS) / DL_NEW_COMBINED_KERNELS; i++)
3704 {
3705 pEntries = (pNewEntries + DL_NEW_COMBINED_KERNELS - 1)->pNextEntry;
3706 MOS_FreeMemory(pNewEntries);
3707 pNewEntries = pEntries;
3708 }
3709 }
3710 }
3711
3712 //---------------------------------------------------------------------------------------
3713 // KernelDll_ReleaseStates - Release Kernel Dynamic Linking/Loading (Dll) States
3714 //
3715 // Parameters:
3716 // Kdll_State *pState - [in] Kernel dll State to release
3717 //
3718 // Output: Pointer to allocated Kernel dll state
3719 // nullptr - Failed to allocate Kernel dll state
3720 //-----------------------------------------------------------------------------------------
KernelDll_ReleaseStates(Kdll_State * pState)3721 void KernelDll_ReleaseStates(Kdll_State *pState)
3722 {
3723 VP_RENDER_FUNCTION_ENTER;
3724
3725 if (!pState)
3726 return;
3727 KernelDll_ReleaseAdditionalCacheEntries(&pState->KernelCache);
3728 MOS_FreeMemory(pState->ComponentKernelCache.pCache);
3729 MOS_FreeMemory(pState->CmFcPatchCache.pCache);
3730 MOS_FreeMemory(pState->pSortedRules);
3731 MOS_FreeMemory(pState);
3732 }
3733
3734 //---------------------------------------------------------------------------------------
3735 // KernelDll_SetupFunctionPointers - Setup Function pointers based on platform
3736 //
3737 // Parameters:
3738 // char *pState - [in/out] Kernel Dll state
3739 // platform - [in] platform
3740 //
3741 // Output: true - Function pointers are set
3742 // false - Failed to setup function pointers (invalid platform)
3743 //-----------------------------------------------------------------------------------------
KernelDll_SetupFunctionPointers(Kdll_State * pState,void (* ModifyFunctionPointers)(PKdll_State))3744 static bool KernelDll_SetupFunctionPointers(
3745 Kdll_State *pState,
3746 void (*ModifyFunctionPointers)(PKdll_State))
3747 {
3748 VP_RENDER_FUNCTION_ENTER;
3749
3750 pState->pfnSetupCSC = KernelDll_SetupCSC;
3751 pState->pfnMapCSCMatrix = KernelDll_MapCSCMatrix;
3752 pState->pfnFindRule = KernelDll_FindRule;
3753 pState->pfnUpdateState = KernelDll_UpdateState;
3754 pState->pfnSearchKernel = KernelDll_SearchKernel;
3755 pState->pfnBuildKernel = KernelDll_BuildKernel;
3756 pState->pfnStartKernelSearch = KernelDll_StartKernelSearch;
3757
3758 if (ModifyFunctionPointers != nullptr)
3759 {
3760 (*ModifyFunctionPointers)(pState);
3761 }
3762
3763 #if EMUL || VPHAL_LIB
3764 // Disable callbacks
3765 pState->pToken = nullptr;
3766 pState->pfnCbListKernel = nullptr;
3767 pState->pfnCbSearchSate = nullptr;
3768 #endif // EMUL || VPHAL_LIB
3769
3770 return true;
3771 }
3772
3773 //---------------------------------------------------------------------------------------
3774 // Kdll_AddKernelList - Add kernel to CM FC kernel list
3775 //
3776 // Parameters:
3777 // Kdll_KernelCache *pKernelCache - [in] Component kernel cache
3778 // Kdll_KernelCache *pCmFcPatchCache - [in] Component kernel patch data cache
3779 // Kdll_SearchState *pSearchState - [in/out] Kernel search state
3780 // Kdll_PatchData *pKernelPatch - [in] Kernel Patch data
3781 // void *pPatchDst - [in] Patch data Dst address
3782 // int32_t iKUID - [in] Kernel Unique ID
3783 // cm_fc_kernel_t *Cm_Fc_Kernels - [in/out] CM FC Kernels
3784 //
3785 // Output: true if suceeded, false otherwise
3786 //---------------------------------------------------------------------------------------
Kdll_AddKernelList(Kdll_KernelCache * pKernelCache,Kdll_KernelCache * pCmFcPatchCache,Kdll_SearchState * pSearchState,int32_t iKUID,Kdll_PatchData * pKernelPatch,void * pPatchDst,cm_fc_kernel_t * Cm_Fc_Kernels)3787 bool Kdll_AddKernelList(Kdll_KernelCache *pKernelCache,
3788 Kdll_KernelCache * pCmFcPatchCache,
3789 Kdll_SearchState * pSearchState,
3790 int32_t iKUID,
3791 Kdll_PatchData * pKernelPatch,
3792 void * pPatchDst,
3793 cm_fc_kernel_t * Cm_Fc_Kernels)
3794 {
3795 Kdll_State * pState;
3796 Kdll_Symbol * pSymbols;
3797 Kdll_CacheEntry *kernels;
3798 Kdll_CacheEntry *pPatch;
3799 Kdll_LinkData * link;
3800 Kdll_LinkData * liSearch_reloc;
3801 int * size;
3802 int * left;
3803 int dwSize;
3804 int i;
3805 int base;
3806 bool bInline;
3807 bool res;
3808
3809 VP_RENDER_FUNCTION_ENTER;
3810
3811 res = false;
3812
3813 // Check if Kernel ID is valid
3814 if (iKUID >= pKernelCache->iCacheEntries)
3815 {
3816 VP_RENDER_NORMALMESSAGE("invalid Kernel ID %d.", iKUID);
3817 goto finish;
3818 }
3819
3820 // Get KDLL state
3821 pState = pSearchState->pKdllState;
3822
3823 // Get current combined kernel
3824 size = &pSearchState->KernelSize;
3825 left = &pSearchState->KernelLeft;
3826 pSymbols = &pSearchState->KernelLink;
3827 base = (*size) >> 2;
3828
3829 // Find selected kernel/patch and kernel size; check if there is enough space
3830 kernels = &pKernelCache->pCacheEntries[iKUID];
3831 pPatch = &pCmFcPatchCache->pCacheEntries[iKUID];
3832 dwSize = kernels->iSize;
3833 if (*left < dwSize)
3834 {
3835 VP_RENDER_NORMALMESSAGE("exceeded maximum kernel size.");
3836 goto finish;
3837 }
3838
3839 // Check if there is enough space for symbols
3840 if (pSymbols->dwCount + kernels->nLink >= pSymbols->dwSize)
3841 {
3842 VP_RENDER_NORMALMESSAGE("exceeded maximum numbers of symbols to resolve.");
3843 goto finish;
3844 }
3845
3846 #if EMUL || VPHAL_LIB
3847 VP_RENDER_NORMALMESSAGE("%s.", kernels->szName);
3848
3849 if (pState->pfnCbListKernel)
3850 {
3851 pState->pfnCbListKernel(pState->pToken, kernels->szName);
3852 }
3853 #elif _DEBUG || _RELEASE_INTERNAL // EMUL || VPHAL_LIB
3854 VP_RENDER_NORMALMESSAGE("%s.", kernels->szName);
3855 #endif // _DEBUG
3856
3857 MT_LOG1(MT_VP_KERNEL_LIST_ADD, MT_NORMAL, MT_VP_KERNEL_ID, iKUID);
3858
3859 // Append symbols to resolve, relocate symbols
3860 link = kernels->pLink;
3861 liSearch_reloc = pSymbols->pLink + pSymbols->dwCount;
3862
3863 bInline = false;
3864 if (link)
3865 {
3866 for (i = kernels->nLink; i > 0; i--, link++)
3867 {
3868 if (link->bInline)
3869 {
3870 // Inline code included
3871 if (!link->bExport)
3872 {
3873 bInline = true;
3874 }
3875 }
3876 else
3877 {
3878 *liSearch_reloc = *link;
3879 liSearch_reloc->dwOffset += base;
3880 liSearch_reloc++;
3881
3882 pSymbols->dwCount++;
3883 }
3884 }
3885 }
3886
3887 *size += dwSize;
3888 *left -= dwSize;
3889 Cm_Fc_Kernels->binary_buf = (const char *)kernels->pBinary;
3890 Cm_Fc_Kernels->binary_size = kernels->iSize;
3891 Cm_Fc_Kernels->patch_buf = (const char *)pPatch->pBinary;
3892 Cm_Fc_Kernels->patch_size = pPatch->iSize;
3893 res = true;
3894
3895 finish:
3896 return res;
3897 }
3898
3899 //---------------------------------------------------------------------------------------
3900 // KernelDll_BuildKernel_CmFc - Build CM based FC combine Kernel
3901 //
3902 // Parameters: [in/out] pState - Pointer to Kernel binary file loaded in sys memory
3903 // [in/out] pSearchState - Kernel file size
3904 //
3905 // Output: bool
3906 // TRUE - Successful FALSE - Failed
3907 //-----------------------------------------------------------------------------------------
KernelDll_BuildKernel_CmFc(Kdll_State * pState,Kdll_SearchState * pSearchState)3908 bool KernelDll_BuildKernel_CmFc(Kdll_State *pState, Kdll_SearchState *pSearchState)
3909 {
3910 Kdll_KernelCache *pKernelCache = &pState->ComponentKernelCache;
3911 Kdll_KernelCache *pPatchCache = &pState->CmFcPatchCache;
3912 Kdll_KernelCache *pCustomCache = pState->pCustomKernelCache;
3913 bool res;
3914 int32_t offset = 0;
3915 int32_t * pKernelID, *pPatchID;
3916 uint8_t * pPatchData;
3917 Kdll_PatchData * pKernelPatch;
3918 uint8_t * kernel = pSearchState->Kernel;
3919 Kdll_Symbol * pSymbols = &pSearchState->KernelLink;
3920 uint32_t nExports = pKernelCache->nExports;
3921 Kdll_LinkData * pExports = pKernelCache->pExports;
3922 Kdll_LinkData * pLink;
3923 int32_t iOffset;
3924 uint32_t dwResolveOffset[DL_MAX_EXPORT_COUNT];
3925 uint32_t dwTotalKernelCount;
3926 size_t stEstimatedKernelSize;
3927 int32_t iKUID;
3928 bool bResolveDone;
3929 int32_t i;
3930 cm_fc_kernel_t Cm_Fc_kernels[DL_MAX_KERNELS];
3931
3932 VP_RENDER_FUNCTION_ENTER;
3933
3934 // Disable pop-up box window for STL assertion to avoid VM hang in auto test.
3935 #if (!LINUX && !ANDROID)
3936 uint32_t prevErrorMode = ::SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX);
3937 #if defined(_MSC_VER)
3938 ::_set_error_mode(_OUT_TO_STDERR);
3939 _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
3940 _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
3941 _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
3942 _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
3943 _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
3944 _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
3945 #endif
3946 #endif
3947
3948 pSearchState->KernelLink.dwSize = DL_MAX_SYMBOLS;
3949 pSearchState->KernelLink.dwCount = 0;
3950 pSearchState->KernelLink.pLink = pSearchState->LinkArray;
3951 pSearchState->KernelSize = 0;
3952 pSearchState->KernelLeft = sizeof(pSearchState->Kernel);
3953 pSearchState->KernelLink.dwCount = 0;
3954
3955 MOS_ZeroMemory(Cm_Fc_kernels, sizeof(Cm_Fc_kernels));
3956 dwTotalKernelCount = 0;
3957 stEstimatedKernelSize = 0;
3958
3959 #if EMUL || VPHAL_LIB || _DEBUG
3960 VP_RENDER_NORMALMESSAGE("Component Kernels:");
3961 #endif // EMUL || VPHAL_LIB || _DEBUG
3962
3963 pKernelID = pSearchState->KernelID;
3964 pPatchID = pSearchState->PatchID;
3965 pPatchData = nullptr;
3966
3967 for (offset = 0; offset < pSearchState->KernelCount; offset++, pKernelID++, pPatchID++, dwTotalKernelCount++)
3968 {
3969 // Get patch information associated with the kernel
3970 pKernelPatch = (*pPatchID >= 0) ? &(pSearchState->Patches[*pPatchID]) : nullptr;
3971
3972 // Append/Patch kernel from internal cache
3973 res = Kdll_AddKernelList(pKernelCache, pPatchCache, pSearchState, *pKernelID, pKernelPatch, pPatchData, &Cm_Fc_kernels[dwTotalKernelCount]);
3974
3975 stEstimatedKernelSize += Cm_Fc_kernels[dwTotalKernelCount].binary_size;
3976
3977 if (*pKernelID == IDR_VP_EOT)
3978 {
3979 dwTotalKernelCount--;
3980 }
3981
3982 if (!res)
3983 {
3984 VP_RENDER_NORMALMESSAGE("Failed to build kernel ID %d.", pSearchState->KernelID[offset]);
3985 res = false;
3986 goto finish;
3987 }
3988 }
3989
3990 // Resolve kernel dependencies
3991 MOS_ZeroMemory(dwResolveOffset, sizeof(dwResolveOffset));
3992
3993 do
3994 {
3995 // Update exports
3996 for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
3997 {
3998 if (pLink->bExport)
3999 {
4000 dwResolveOffset[pLink->iLabelID] = pLink->dwOffset;
4001 }
4002 }
4003
4004 bResolveDone = true;
4005 for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
4006 {
4007 // validate label
4008 if (pLink->iLabelID > nExports || // invalid label
4009 pExports[pLink->iLabelID].bExport == 0) // label not in the export table
4010 {
4011 VP_RENDER_NORMALMESSAGE("Invalid/unresolved label %d.", pLink->iLabelID);
4012 res = false;
4013 goto finish;
4014 }
4015
4016 // load dependencies
4017 if (!pLink->bExport && !dwResolveOffset[pLink->iLabelID])
4018 {
4019 // set flag for another pass as newly loaded
4020 // kernels may contain dependencies of their own
4021 bResolveDone = false;
4022
4023 // Add dependencies to kernel list
4024 iKUID = pExports[pLink->iLabelID].iKUID;
4025 res = Kdll_AddKernelList(pKernelCache, pPatchCache, pSearchState, iKUID, nullptr, nullptr, &Cm_Fc_kernels[dwTotalKernelCount]);
4026
4027 if (!res)
4028 {
4029 VP_RENDER_NORMALMESSAGE("Failed to build kernel ID %d.", pSearchState->KernelID[offset]);
4030 res = false;
4031 goto finish;
4032 }
4033
4034 dwTotalKernelCount++;
4035
4036 // Restart
4037 break;
4038 }
4039 } // for
4040 } while (!bResolveDone);
4041
4042 if (stEstimatedKernelSize > DL_MAX_KERNEL_SIZE)
4043 {
4044 res = false;
4045 VP_RENDER_NORMALMESSAGE("Kernel size exceeded kdll limitatin.");
4046 goto finish;
4047 }
4048
4049 stEstimatedKernelSize = DL_MAX_KERNEL_SIZE;
4050
4051 // Get combine kernel binary from CMFC lib
4052 if (CM_FC_OK != cm_fc_combine_kernels(dwTotalKernelCount, Cm_Fc_kernels, (char *)pSearchState->Kernel, &stEstimatedKernelSize, nullptr))
4053 {
4054 res = false;
4055 VP_RENDER_NORMALMESSAGE("cm_fc_combine_kernels() function call failed.");
4056 goto finish;
4057 }
4058
4059 // Get combine kernel binary size from CMFC lib
4060 pSearchState->KernelSize = (int)stEstimatedKernelSize;
4061
4062 res = true;
4063
4064 finish:
4065 #if (!LINUX && !ANDROID)
4066 ::SetErrorMode(prevErrorMode);
4067 #endif
4068 return res;
4069 }
4070
4071 //--------------------------------------------------------------
4072 // KernelDll_AllocateHashEntry - Allocate hash entry
4073 //--------------------------------------------------------------
KernelDll_AllocateHashEntry(Kdll_KernelHashTable * pHashTable,uint32_t hash)4074 uint16_t KernelDll_AllocateHashEntry(Kdll_KernelHashTable *pHashTable,
4075 uint32_t hash)
4076 {
4077 Kdll_KernelHashEntry *pHashEntry = &pHashTable->HashEntry[0] - 1;
4078 Kdll_KernelHashEntry *pNewEntry;
4079 uint32_t folded_hash;
4080 uint16_t entry;
4081
4082 VP_RENDER_FUNCTION_ENTER;
4083
4084 entry = pHashTable->pool;
4085 if (!entry)
4086 {
4087 return 0;
4088 }
4089
4090 // Get entry from pool
4091 pNewEntry = &pHashEntry[entry];
4092 pHashTable->pool = pNewEntry->next;
4093 if (pHashTable->last == entry)
4094 {
4095 pHashTable->last = 0;
4096 }
4097
4098 // Initialize entry, attach to the hash table
4099 FOLD_HASH(folded_hash, hash);
4100 pNewEntry->dwHash = hash;
4101 pNewEntry->next = pHashTable->wHashTable[folded_hash];
4102 pNewEntry->iFilter = 0;
4103 pNewEntry->pFilter = nullptr;
4104 pNewEntry->pCacheEntry = nullptr;
4105 pHashTable->wHashTable[folded_hash] = entry;
4106 return entry;
4107 }
4108
4109 //--------------------------------------------------------------
4110 // KernelDll_CacheGarbageCollection - performs garbage collection
4111 //--------------------------------------------------------------
KernelDll_GarbageCollection(Kdll_State * pState,int32_t size)4112 bool KernelDll_GarbageCollection(Kdll_State *pState, int32_t size)
4113 {
4114 Kdll_KernelCache *pCache = &pState->KernelCache;
4115 Kdll_CacheEntry *pEntry = pCache->pCacheEntries;
4116 Kdll_CacheEntry *pOldest = nullptr;
4117 Kdll_KernelHashTable *pHashTable = &pState->KernelHashTable;
4118 Kdll_KernelHashEntry *pHashEntry = &pHashTable->HashEntry[0] - 1;
4119 uint32_t dwOldest = (uint32_t)-1;
4120 uint16_t wEntry = 0;
4121 int32_t i;
4122
4123 MOS_UNUSED(size);
4124
4125 VP_RENDER_FUNCTION_ENTER;
4126
4127 // Adjust refresh values to avoid overflow
4128 if (pState->dwRefresh > 0xffff0000)
4129 {
4130 pState->dwRefresh -= 0x80000000;
4131 for (i = pCache->iCacheMaxEntries; i > 0; i--)
4132 {
4133 if (pEntry->dwRefresh < 0x80000000)
4134 pEntry->dwRefresh = 0;
4135 else
4136 pEntry->dwRefresh -= 0x80000000;
4137 pEntry = pEntry->pNextEntry;
4138 }
4139 }
4140
4141 // No need to deallocate old entries
4142 if (pCache->iCacheEntries < DL_MAX_COMBINED_KERNELS)
4143 {
4144 return true;
4145 }
4146
4147 for (i = pCache->iCacheMaxEntries; i > 0; i--)
4148 {
4149 // deallocate old unreferenced entries
4150 if (pEntry->iKCID != -1 && pEntry->dwLoaded == 0)
4151 {
4152 if (pEntry->dwRefresh < dwOldest)
4153 {
4154 pOldest = pEntry;
4155 dwOldest = pEntry->dwRefresh;
4156 wEntry = pEntry->wHashEntry;
4157 }
4158 }
4159 pEntry = pEntry->pNextEntry;
4160 }
4161
4162 // No entry to release, sanity checks
4163 pHashEntry += wEntry;
4164 if (!pOldest ||
4165 wEntry == 0 ||
4166 pHashEntry->pCacheEntry != pOldest)
4167 {
4168 VP_RENDER_ASSERT(false);
4169 return false;
4170 }
4171
4172 // Release hash and cache entries
4173 KernelDll_ReleaseHashEntry(pHashTable, wEntry);
4174 KernelDll_ReleaseCacheEntry(pCache, pOldest);
4175
4176 return true;
4177 }
4178
4179 //--------------------------------------------------------------
4180 // KernelDll_AllocateCacheEntry - Allocate cache entry for a given size
4181 //--------------------------------------------------------------
4182 Kdll_CacheEntry *
KernelDll_AllocateCacheEntry(Kdll_KernelCache * pCache,int32_t iSize)4183 KernelDll_AllocateCacheEntry(Kdll_KernelCache *pCache, int32_t iSize)
4184 {
4185 Kdll_CacheEntry *pEntry = pCache->pCacheEntries;
4186 uint8_t *pCacheBinary = nullptr;
4187 Kdll_CacheEntry *pCacheNextEntry = nullptr;
4188 int32_t i, j;
4189
4190 VP_RENDER_FUNCTION_ENTER;
4191
4192 // Check size
4193 if (iSize > DL_CACHE_BLOCK_SIZE)
4194 {
4195 return nullptr;
4196 }
4197
4198 // Search empty entry
4199 j = pCache->iCacheMaxEntries;
4200 for (i = 0; i < j; i++)
4201 {
4202 if (pEntry->iKCID == -1)
4203 {
4204 break;
4205 }
4206 pEntry = pEntry->pNextEntry;
4207 }
4208 if (i == j)
4209 {
4210 // Try to allocate more cache entries
4211 pEntry = KernelDll_AllocateAdditionalCacheEntries(pCache);
4212 if(! pEntry)
4213 {
4214 return nullptr;
4215 }
4216 }
4217
4218 // Reset entry
4219 pCacheBinary = pEntry->pBinary;
4220 pCacheNextEntry = pEntry->pNextEntry;
4221 MOS_ZeroMemory(pEntry, sizeof(Kdll_CacheEntry));
4222 pEntry->iSize = iSize;
4223 pEntry->pBinary = pCacheBinary;
4224 pEntry->pNextEntry = pCacheNextEntry;
4225
4226 // Increment entries
4227 pCache->iCacheEntries++;
4228 return pEntry;
4229 }
4230
4231 //--------------------------------------------------------------
4232 // KernelDll_AllocateAdditionalCacheEntries - Allocate more kernel cache entries
4233 //--------------------------------------------------------------
4234 Kdll_CacheEntry *
KernelDll_AllocateAdditionalCacheEntries(Kdll_KernelCache * pCache)4235 KernelDll_AllocateAdditionalCacheEntries(Kdll_KernelCache *pCache)
4236 {
4237 Kdll_CacheEntry *pNewEntry = nullptr;
4238 Kdll_CacheEntry *pChcheEntry;
4239 int i, j;
4240
4241 VP_RENDER_FUNCTION_ENTER;
4242
4243 // Check num
4244 if (pCache->iCacheEntries + DL_NEW_COMBINED_KERNELS > DL_MAX_COMBINED_KERNELS)
4245 {
4246 VP_RENDER_ASSERTMESSAGE("KernelDll_AllocateAdditionalCacheEntries: Can't allocate more kernel cache entries\n");
4247 return nullptr;
4248 }
4249
4250 // Allocate the new entires
4251 i = (sizeof(Kdll_CacheEntry) + DL_CACHE_BLOCK_SIZE) * DL_NEW_COMBINED_KERNELS;
4252 pNewEntry = (Kdll_CacheEntry *)MOS_AllocAndZeroMemory(i);
4253 if (!pNewEntry)
4254 {
4255 VP_RENDER_ASSERTMESSAGE("KernelDll_AllocateAdditionalCacheEntries: Failed to allocate kernel cache entries\n");
4256 return nullptr;
4257 }
4258
4259 // Update the cache entires
4260 pChcheEntry = pCache->pCacheEntries;
4261 for(j = 0; j < pCache->iCacheMaxEntries - 1; j++)
4262 {
4263 pChcheEntry = pChcheEntry->pNextEntry;
4264 }
4265 pChcheEntry->pNextEntry = pNewEntry;
4266 for(j = 0; j < DL_NEW_COMBINED_KERNELS; j++, pNewEntry++)
4267 {
4268 pNewEntry->iKUID = -1;
4269 pNewEntry->iKCID = -1;
4270 pNewEntry->pBinary = (uint8_t *)(pNewEntry + DL_NEW_COMBINED_KERNELS - j) + j * DL_CACHE_BLOCK_SIZE;
4271 if(j != DL_NEW_COMBINED_KERNELS - 1)
4272 {
4273 pNewEntry->pNextEntry = pNewEntry + 1;
4274 }
4275 else
4276 {
4277 pNewEntry->pNextEntry = nullptr;
4278 }
4279 }
4280
4281 pCache->iCacheMaxEntries += DL_NEW_COMBINED_KERNELS;
4282 pCache->iCacheSize += DL_NEW_COMBINED_KERNELS * DL_CACHE_BLOCK_SIZE;
4283 pCache->iCacheFree += DL_NEW_COMBINED_KERNELS * DL_CACHE_BLOCK_SIZE;
4284 return (Kdll_CacheEntry *)(pNewEntry - DL_NEW_COMBINED_KERNELS);
4285 }
4286
4287 //--------------------------------------------------------------
4288 // KernelDll_AddKernel - Add kernel into hash table and kernel cache
4289 //--------------------------------------------------------------
4290 Kdll_CacheEntry *
KernelDll_AddKernel(Kdll_State * pState,Kdll_SearchState * pSearchState,Kdll_FilterEntry * pFilter,int32_t iFilterSize,uint32_t dwHash)4291 KernelDll_AddKernel(Kdll_State *pState, // Kernel Dll state
4292 Kdll_SearchState *pSearchState, // Search state
4293 Kdll_FilterEntry *pFilter, // Original filter
4294 int32_t iFilterSize, // Original filter size
4295 uint32_t dwHash)
4296 {
4297 Kdll_CacheEntry *pCacheEntry;
4298 Kdll_KernelHashTable *pHashTable;
4299 Kdll_KernelHashEntry *pHashEntry;
4300 uint16_t entry;
4301 int32_t size;
4302 uint8_t *ptr;
4303
4304 VP_RENDER_FUNCTION_ENTER;
4305
4306 // Check kernel
4307 if (pSearchState->KernelSize <= 0)
4308 {
4309 return nullptr;
4310 }
4311
4312 // Get hash table
4313 pHashTable = &pState->KernelHashTable;
4314 pHashEntry = &pHashTable->HashEntry[0] - 1; // all indices are 1 based (0 = null)
4315
4316 // allocate space in kernel cache to store the kernel, filter, CSC parameters
4317 size = pSearchState->KernelSize + // Kernel
4318 pSearchState->iFilterSize * sizeof(Kdll_FilterEntry) * 2 + // Original + Modified Filter
4319 sizeof(Kdll_CSC_Params) + // CSC parameters
4320 sizeof(VPHAL_CSPACE); // Intermediate Color Space for colorfill
4321
4322 // Run garbage collection, create space for new kernel and metadata
4323 KernelDll_GarbageCollection(pState, size);
4324
4325 // Get new kernel cache entry
4326 pCacheEntry = KernelDll_AllocateCacheEntry(&pState->KernelCache, size);
4327 if (!pCacheEntry)
4328 {
4329 VP_RENDER_ASSERTMESSAGE("Failed to allocate cache space for new kernel.");
4330 return nullptr;
4331 }
4332
4333 // Get hash entry
4334 entry = KernelDll_AllocateHashEntry(pHashTable, dwHash);
4335 if (!entry)
4336 {
4337 VP_RENDER_ASSERTMESSAGE("Failed to allocate hash entry for new kernel.");
4338 KernelDll_ReleaseCacheEntry(&pState->KernelCache, pCacheEntry);
4339 return nullptr;
4340 }
4341
4342 // Setup cache entry, copy kernel
4343 pCacheEntry->iKUID = -1;
4344 pCacheEntry->iKCID = pState->KernelCache.iCacheID; // Create new kernel cache id (KCID)
4345 pCacheEntry->dwRefresh = pState->dwRefresh++;
4346 pCacheEntry->wHashEntry = entry;
4347
4348 // Save kernel
4349 pCacheEntry->iSize = pSearchState->KernelSize;
4350 MOS_SecureMemcpy(pCacheEntry->pBinary, pSearchState->KernelSize, (void *)pSearchState->Kernel, pSearchState->KernelSize);
4351 ptr = pCacheEntry->pBinary + pSearchState->KernelSize;
4352
4353 // Save modified filter
4354 pCacheEntry->iFilterSize = pSearchState->iFilterSize;
4355 pCacheEntry->pFilter = (Kdll_FilterEntry *) (ptr);
4356 MOS_SecureMemcpy(ptr, pSearchState->iFilterSize * sizeof(Kdll_FilterEntry), (void *)pSearchState->Filter, pSearchState->iFilterSize * sizeof(Kdll_FilterEntry));
4357 ptr += pSearchState->iFilterSize * sizeof(Kdll_FilterEntry);
4358
4359 // Save CSC parameters associated with the kernel
4360 pCacheEntry->pCscParams = (Kdll_CSC_Params *) (ptr);
4361 MOS_SecureMemcpy(ptr, sizeof(Kdll_CSC_Params), (void *)&pSearchState->CscParams, sizeof(Kdll_CSC_Params));
4362 ptr += sizeof(Kdll_CSC_Params);
4363 // Save intermediate color space for colorfill
4364 pCacheEntry->colorfill_cspace = pState->colorfill_cspace;
4365 ptr += sizeof(VPHAL_CSPACE);
4366
4367 // increment KCID (Range = 0x00010000 - 0x7fffffff)
4368 pState->KernelCache.iCacheID = 0x00010000 + (pState->KernelCache.iCacheID - 0x0000ffff) % 0x7fff0000;
4369
4370 // Setup hash entry, copy filter
4371 pHashEntry += entry;
4372 pHashEntry->pCacheEntry = pCacheEntry;
4373
4374 // Save original filter for search purposes - modified filter is used for rendering
4375 pHashEntry->iFilter = iFilterSize;
4376 pHashEntry->pFilter = (Kdll_FilterEntry *) (ptr);
4377 MOS_SecureMemcpy(ptr, iFilterSize * sizeof(Kdll_FilterEntry), (void *)pFilter, iFilterSize * sizeof(Kdll_FilterEntry));
4378
4379 return pCacheEntry;
4380 }
4381
4382 //--------------------------------------------------------------
4383 // KernelDll_ReleaseHashEntry - Release hash table entry
4384 //--------------------------------------------------------------
KernelDll_ReleaseHashEntry(Kdll_KernelHashTable * pHashTable,uint16_t entry)4385 void KernelDll_ReleaseHashEntry(Kdll_KernelHashTable *pHashTable, uint16_t entry)
4386 {
4387 Kdll_KernelHashEntry *pHashEntry = &pHashTable->HashEntry[0] - 1;
4388 uint32_t folded_hash;
4389 uint16_t next;
4390
4391 VP_RENDER_FUNCTION_ENTER;
4392
4393 if (entry == 0)
4394 {
4395 return;
4396 }
4397
4398 // unlink entry
4399 next = pHashEntry[entry].next;
4400 pHashEntry[entry].next = 0;
4401
4402 // remove references to entry from hash table
4403 FOLD_HASH(folded_hash, pHashEntry[entry].dwHash);
4404 if (pHashTable->wHashTable[folded_hash] == entry)
4405 {
4406 pHashTable->wHashTable[folded_hash] = next;
4407 }
4408 else
4409 {
4410 uint16_t prev = pHashTable->wHashTable[folded_hash];
4411
4412 while (prev != 0 &&
4413 pHashEntry[prev].next != entry)
4414 {
4415 prev = pHashEntry[prev].next;
4416 }
4417
4418 if (prev)
4419 {
4420 pHashEntry[prev].next = next;
4421 }
4422 }
4423
4424 // return entry to pool
4425 if (pHashTable->pool == 0)
4426 {
4427 pHashTable->pool = entry;
4428 }
4429 else
4430 {
4431 pHashEntry[pHashTable->last].next = entry;
4432 }
4433 pHashTable->last = entry;
4434 }
4435
4436 //--------------------------------------------------------------
4437 // KernelDll_ReleaseCacheEntry - Release cache entry
4438 //--------------------------------------------------------------
KernelDll_ReleaseCacheEntry(Kdll_KernelCache * pCache,Kdll_CacheEntry * pEntry)4439 void KernelDll_ReleaseCacheEntry(Kdll_KernelCache *pCache,
4440 Kdll_CacheEntry *pEntry)
4441 {
4442 pEntry->iKUID = -1;
4443 pEntry->iKCID = -1;
4444 pCache->iCacheEntries--;
4445 }
4446
4447 //---------------------------------------------------------------------------------------
4448 // KernelDll_SetupFunctionPointers - Setup Function pointers based on platform
4449 //
4450 // Parameters:
4451 // KdllState *pState - [in/out] Kernel Dll state
4452 //
4453 // Output: true - Function pointers are set
4454 // false - Failed to setup function pointers (invalid platform)
4455 //-----------------------------------------------------------------------------------------
KernelDll_SetupFunctionPointers_Ext(Kdll_State * pState)4456 bool KernelDll_SetupFunctionPointers_Ext(
4457 Kdll_State *pState)
4458 {
4459 VP_RENDER_FUNCTION_ENTER;
4460
4461 if (pState && pState->bEnableCMFC)
4462 {
4463 pState->pfnBuildKernel = KernelDll_BuildKernel_CmFc;
4464 }
4465
4466 return true;
4467 }
4468
4469 #ifdef __cplusplus
4470 }
4471 #endif // __cplusplus
4472