xref: /aosp_15_r20/external/intel-media-driver/media_driver/agnostic/common/cm/cm_thread_space_rt.cpp (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2007-2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_thread_space_rt.cpp
24 //! \brief     Contains Class CmThreadSpaceRT implementations.
25 //!
26 
27 #include "cm_thread_space_rt.h"
28 
29 #include "cm_kernel_rt.h"
30 #include "cm_task_rt.h"
31 #include "cm_mem.h"
32 #include "cm_device_rt.h"
33 #include "cm_surface_2d.h"
34 #include "cm_extension_creator.h"
35 
36 enum CM_TS_FLAG
37 {
38     WHITE = 0,
39     GRAY  = 1,
40     BLACK = 2
41 };
42 
43 static CM_DEPENDENCY waveFrontPattern =
44 {
45     3,
46     {-1, -1, 0},
47     {0, -1, -1}
48 };
49 
50 static CM_DEPENDENCY waveFront26Pattern =
51 {
52     4,
53     {-1, -1, 0, 1},
54     {0, -1, -1, -1}
55 };
56 
57 static CM_DEPENDENCY waveFront26ZPattern =
58 {
59     5,
60     {-1, -1, -1, 0, 1},
61     { 1, 0, -1, -1, -1}
62 };
63 
64 static CM_DEPENDENCY waveFront26ZIPattern =
65 {
66     7,
67     {-1, -2, -1, -1, 0, 1, 1},
68     {1, 0, 0, -1, -1, -1, 0}
69 };
70 
71 static CM_DEPENDENCY horizontalPattern =
72 {
73     1,
74     {0},
75     {-1}
76 };
77 
78 static CM_DEPENDENCY verticalPattern =
79 {
80     1,
81     {-1},
82     {0}
83 };
84 
85 static CM_DEPENDENCY waveFront26XPattern =
86 {
87     7,
88     { -1, -1, -1, 0, 0, 0, 1 },
89     { 3, 1, -1, -1, -2, -3, -3 }
90 };
91 
92 static CM_DEPENDENCY waveFront26ZIGPattern =
93 {
94     5,
95     { -1, -1, -1, 0, 1 },
96     { 1, 0, -1, -1, -1 }
97 };
98 
99 namespace CMRT_UMD
100 {
101 //*-----------------------------------------------------------------------------
102 //| Purpose:    Reset task and clear all the kernel
103 //| Returns:    Result of the operation.
104 //*-----------------------------------------------------------------------------
Create(CmDeviceRT * device,uint32_t indexTsArray,uint32_t width,uint32_t height,CmThreadSpaceRT * & threadSpace)105 int32_t CmThreadSpaceRT::Create( CmDeviceRT* device, uint32_t indexTsArray, uint32_t width, uint32_t height, CmThreadSpaceRT* & threadSpace )
106 {
107     if( (0 == width) || (0 == height) )
108     {
109         CM_ASSERTMESSAGE("Error: Invalid thread space width or height.");
110         return CM_INVALID_THREAD_SPACE;
111     }
112 
113     int32_t result = CM_SUCCESS;
114     threadSpace = new (std::nothrow) CmThreadSpaceRT( device, indexTsArray, width, height );
115     if( threadSpace )
116     {
117         device->m_memObjectCount.threadSpaceCount++;
118 
119         result = threadSpace->Initialize( );
120         if( result != CM_SUCCESS )
121         {
122             CmThreadSpaceRT::Destroy( threadSpace);
123         }
124     }
125     else
126     {
127         CM_ASSERTMESSAGE("Error: Failed to create CmThreadSpace due to out of system memory.");
128         result = CM_OUT_OF_HOST_MEMORY;
129     }
130     return result;
131 }
132 
133 //*-----------------------------------------------------------------------------
134 //| Purpose:    Destroy CM thread space
135 //| Returns:    Result of the operation.
136 //*-----------------------------------------------------------------------------
Destroy(CmThreadSpaceRT * & threadSpace)137 int32_t CmThreadSpaceRT::Destroy( CmThreadSpaceRT* &threadSpace )
138 {
139     if( threadSpace )
140     {
141         threadSpace->m_device->m_memObjectCount.threadSpaceCount--;
142         delete threadSpace;
143         threadSpace = nullptr;
144     }
145     return CM_SUCCESS;
146 }
147 
148 //*-----------------------------------------------------------------------------
149 //| Purpose:    Constructor of CmThreadSpace
150 //| Returns:    Result of the operation.
151 //*-----------------------------------------------------------------------------
CmThreadSpaceRT(CmDeviceRT * device,uint32_t indexTsArray,uint32_t width,uint32_t height)152 CmThreadSpaceRT::CmThreadSpaceRT( CmDeviceRT* device , uint32_t indexTsArray, uint32_t width, uint32_t height ):
153     m_device( device ),
154     m_width( width ),
155     m_height( height ),
156     m_colorCountMinusOne( 0 ),
157     m_26ZIBlockWidth( CM_26ZI_BLOCK_WIDTH ),
158     m_26ZIBlockHeight( CM_26ZI_BLOCK_HEIGHT ),
159     m_threadSpaceUnit(nullptr),
160     m_threadAssociated(false),
161     m_needSetKernelPointer(false),
162     m_kernel(nullptr),
163     m_dependencyPatternType(CM_NONE_DEPENDENCY),
164     m_currentDependencyPattern(CM_NONE_DEPENDENCY),
165     m_26ZIDispatchPattern(VVERTICAL_HVERTICAL_26),
166     m_current26ZIDispatchPattern(VVERTICAL_HVERTICAL_26),
167     m_boardFlag(nullptr),
168     m_boardOrderList(nullptr),
169     m_indexInList(0),
170     m_indexInThreadSpaceArray(indexTsArray),
171     m_walkingPattern(CM_WALK_DEFAULT),
172     m_mediaWalkerParamsSet(false),
173     m_dependencyVectorsSet(false),
174     m_threadSpaceOrderSet(false),
175     m_swBoardSurf(nullptr),
176     m_swBoard(nullptr),
177     m_swScoreBoardEnabled(false),
178     m_threadGroupSpace(nullptr),
179     m_dirtyStatus(nullptr),
180     m_groupSelect(CM_MW_GROUP_NONE)
181 {
182     CmSafeMemSet( &m_dependency, 0, sizeof(CM_HAL_DEPENDENCY) );
183     CmSafeMemSet( &m_wavefront26ZDispatchInfo, 0, sizeof(CM_HAL_WAVEFRONT26Z_DISPATCH_INFO) );
184     CmSafeMemSet( &m_walkingParameters, 0, sizeof(m_walkingParameters) );
185     CmSafeMemSet( &m_dependencyVectors, 0, sizeof(m_dependencyVectors) );
186 }
187 
188 //*-----------------------------------------------------------------------------
189 //| Purpose:    Destructor of CmThreadSpaceRT
190 //| Returns:    Result of the operation.
191 //*-----------------------------------------------------------------------------
~CmThreadSpaceRT(void)192 CmThreadSpaceRT::~CmThreadSpaceRT( void )
193 {
194     MosSafeDeleteArray(m_threadSpaceUnit);
195     MosSafeDeleteArray(m_boardFlag);
196     MosSafeDeleteArray(m_boardOrderList);
197     CmSafeDelete( m_dirtyStatus );
198     CmSafeDelete(m_kernel);
199 
200     if (m_wavefront26ZDispatchInfo.numThreadsInWave)
201     {
202         MOS_FreeMemory(m_wavefront26ZDispatchInfo.numThreadsInWave);
203     }
204 
205     if (m_swScoreBoardEnabled)
206     {
207         MosSafeDeleteArray(m_swBoard);
208         if (m_swBoardSurf != nullptr)
209         {
210             m_device->DestroySurface(m_swBoardSurf);
211         }
212     }
213 
214     if (m_threadGroupSpace != nullptr)
215     {
216         m_device->DestroyThreadGroupSpace(m_threadGroupSpace);
217     }
218 }
219 
220 //*-----------------------------------------------------------------------------
221 //| Purpose:    Initialize CmThreadSpaceRT
222 //| Returns:    Result of the operation.
223 //*-----------------------------------------------------------------------------
Initialize(void)224 int32_t CmThreadSpaceRT::Initialize( void )
225 {
226     m_dirtyStatus = new (std::nothrow) CM_THREAD_SPACE_DIRTY_STATUS;
227     if(m_dirtyStatus == nullptr)
228     {
229         CM_ASSERTMESSAGE("Error: Failed to initialize CmThreadSpace due to out of system memory.");
230         return CM_OUT_OF_HOST_MEMORY;
231     }
232     *m_dirtyStatus = CM_THREAD_SPACE_CLEAN;
233 
234     m_kernel = new (std::nothrow) CmKernelRT*;
235     if (m_kernel == nullptr)
236     {
237         CM_ASSERTMESSAGE("Error: Failed to initialize CmThreadSpace due to out of system memory.");
238         return CM_OUT_OF_HOST_MEMORY;
239     }
240     *m_kernel = nullptr;
241 
242     PCM_HAL_STATE cmHalState = ((PCM_CONTEXT_DATA)m_device->GetAccelData())->cmHalState;
243     m_swScoreBoardEnabled = !(cmHalState->cmHalInterface->IsScoreboardParamNeeded());
244 
245     if (cmHalState->cmHalInterface->CheckMediaModeAvailability() == false)
246     {
247         CM_CHK_CMSTATUS_RETURN(m_device->CreateThreadGroupSpaceEx(1, 1, 1, m_width, m_height, 1, m_threadGroupSpace));
248     }
249 
250     return CM_SUCCESS;
251 }
252 
253 //*-----------------------------------------------------------------------------
254 //! Associate a thread to one uint in the 2-dimensional dependency board with default mask
255 //*-----------------------------------------------------------------------------
AssociateThread(uint32_t x,uint32_t y,CmKernel * kernel,uint32_t threadId)256 CM_RT_API int32_t CmThreadSpaceRT::AssociateThread( uint32_t x, uint32_t y, CmKernel* kernel , uint32_t threadId )
257 {
258     return AssociateThreadWithMask(x, y, kernel, threadId, CM_DEFAULT_THREAD_DEPENDENCY_MASK);
259 }
260 
261 //*-----------------------------------------------------------------------------
262 //! Associate a thread to one uint in the 2-dimensional dependency board.
263 //! If call this function twice with same x/y pair and different thread, the 2nd one will fail
264 //! Enqueue will make sure each x/y pair in the CmThreadSpaceRT object is associated with
265 //! a unique thread in the task to enqueue.Otherwise enqueue will fail.
266 //! Input :
267 //!     1) X/Y coordinats of the uint in dependency board
268 //!     2) pointer to CmKernel
269 //!     3) thread index. It is the same as the read index in
270 //!     CmKernel::SetThreadArg(uint32_t threadId, uint32_t index, size_t size, const void * pValue )
271 //! OUTPUT :
272 //!     CM_SUCCESS if the association is successful
273 //!     CM_INVALID_ARG_VALUE if the input parameters are invalid
274 //!     CM_OUT_OF_HOST_MEMORY if the necessary memory allocation is failed.
275 //*-----------------------------------------------------------------------------
AssociateThreadWithMask(uint32_t x,uint32_t y,CmKernel * kernel,uint32_t threadId,uint8_t dependencyMask)276 CM_RT_API int32_t CmThreadSpaceRT::AssociateThreadWithMask( uint32_t x, uint32_t y, CmKernel* kernel , uint32_t threadId, uint8_t dependencyMask )
277 {
278     INSERT_API_CALL_LOG(GetHalState());
279 
280     if((x >= m_width) || (y >= m_height) || (kernel == nullptr))
281     {
282         CM_ASSERTMESSAGE("Error: Invalid input arguments.");
283         return CM_INVALID_ARG_VALUE;
284     }
285 
286     //Check if the m_threadSpaceUnit is allocated, we only need allocate it once at the first time.
287     if( m_threadSpaceUnit == nullptr )
288     {
289          m_threadSpaceUnit = MOS_NewArray(CM_THREAD_SPACE_UNIT, (m_height * m_width));
290         if (m_threadSpaceUnit)
291         {
292             CmSafeMemSet(m_threadSpaceUnit, 0, sizeof(CM_THREAD_SPACE_UNIT) * m_height * m_width);
293         }
294         else
295         {
296             CM_ASSERTMESSAGE("Error: Out of system memory.");
297             return CM_OUT_OF_HOST_MEMORY;
298         }
299     }
300 
301     uint32_t linearOffset = y*m_width + x;
302     if( (m_threadSpaceUnit[linearOffset].kernel == kernel) &&
303         (m_threadSpaceUnit[linearOffset].threadId == threadId) &&
304         (m_threadSpaceUnit[linearOffset].scoreboardCoordinates.x == x) &&
305         (m_threadSpaceUnit[linearOffset].scoreboardCoordinates.y == y) )
306     {
307         if( m_threadSpaceUnit[linearOffset].dependencyMask == dependencyMask )
308         {
309             m_threadSpaceUnit[linearOffset].reset = CM_REUSE_DEPENDENCY_MASK;
310         }
311         else
312         {
313             m_threadSpaceUnit[linearOffset].dependencyMask = dependencyMask;
314             m_threadSpaceUnit[linearOffset].reset = CM_RESET_DEPENDENCY_MASK;
315         }
316         *m_dirtyStatus = CM_THREAD_SPACE_DEPENDENCY_MASK_DIRTY;
317     }
318     else
319     {
320         m_threadSpaceUnit[linearOffset].kernel = kernel;
321         m_threadSpaceUnit[linearOffset].threadId = threadId;
322         m_threadSpaceUnit[linearOffset].scoreboardCoordinates.x = x;
323         m_threadSpaceUnit[linearOffset].scoreboardCoordinates.y = y;
324         m_threadSpaceUnit[linearOffset].dependencyMask = dependencyMask;
325         m_threadSpaceUnit[linearOffset].reset = CM_NO_BATCH_BUFFER_REUSE;
326         *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
327     }
328 
329     if (!m_threadAssociated)
330     {
331         m_threadAssociated = true;
332     }
333 
334     CmKernelRT *kernelRT = static_cast<CmKernelRT *>(kernel);
335     kernelRT->SetAssociatedToTSFlag(true);
336 
337     return CM_SUCCESS;
338 }
339 
340 //*-----------------------------------------------------------------------------
341 //! Set the dependency pattern. There can be at most 8 dependent unit in the pattern.
342 //! Each dependent unit is indicated as the delta in X coordinat and the delta in Y coordinat
343 //! The call will fail if there is a pair of deltaX/Y with value ( 0, 0 )
344 //! By default, there is no dependent unit, i.e. count is 0.
345 //! Input :
346 //!     1) Total number of dependent units. It is <= 8.
347 //!     2) Array of deltaX. Array size is the first argument.
348 //!        Each deltaX is in the range of [-8, 7]
349 //!     3) Array of deltaY. Array size is the first argument.
350 //!        Each deltaY is in the range of [-8, 7]
351 //! OUTPUT :
352 //!     CM_SUCCESS if the pattern is set
353 //*-----------------------------------------------------------------------------
SetThreadDependencyPattern(uint32_t count,int32_t * deltaX,int32_t * deltaY)354 CM_RT_API int32_t CmThreadSpaceRT::SetThreadDependencyPattern( uint32_t count, int32_t *deltaX, int32_t *deltaY )
355 {
356     INSERT_API_CALL_LOG(GetHalState());
357 
358     if( count > CM_MAX_DEPENDENCY_COUNT )
359     {
360         CM_ASSERTMESSAGE("Error: Exceed dependency count limitation, which is 8.");
361         return CM_FAILURE;
362     }
363 
364     m_dependency.count = count;
365 
366     CmSafeMemCopy( m_dependency.deltaX, deltaX, sizeof( int32_t ) * count );
367     CmSafeMemCopy( m_dependency.deltaY, deltaY, sizeof( int32_t ) * count );
368 
369     return CM_SUCCESS;
370 }
371 
372 //*-----------------------------------------------------------------------------
373 //! Select from X predefined dependency patterns.
374 //! Input :
375 //!     1) pattern index
376 //! OUTPUT :
377 //!     CM_SUCCESS if the pattern is selected
378 //!     CM_OUT_OF_HOST_MEMORY if the necessary memory allocation is failed.
379 //!     CM_FAILURE if the input dependency pattern is not supported.
380 //*-----------------------------------------------------------------------------
SelectThreadDependencyPattern(CM_DEPENDENCY_PATTERN pattern)381 CM_RT_API int32_t CmThreadSpaceRT::SelectThreadDependencyPattern (CM_DEPENDENCY_PATTERN pattern )
382 {
383     INSERT_API_CALL_LOG(GetHalState());
384 
385     int32_t hr = CM_SUCCESS;
386 
387      //Check if the m_boardFlag and m_boardOrderList are NULL. We only need allocate it once at the first time
388     if ( m_boardFlag == nullptr )
389     {
390         m_boardFlag = MOS_NewArray(uint32_t, (m_height * m_width));
391         if ( m_boardFlag )
392         {
393             CmSafeMemSet(m_boardFlag, 0, sizeof(uint32_t) * m_height * m_width);
394         }
395         else
396         {
397             CM_ASSERTMESSAGE("Error: Out of system memory.");
398             return CM_OUT_OF_HOST_MEMORY;
399         }
400     }
401     if ( m_boardOrderList == nullptr )
402     {
403         m_boardOrderList = MOS_NewArray(uint32_t, (m_height * m_width));
404         if (m_boardOrderList )
405         {
406             CmSafeMemSet(m_boardOrderList, 0, sizeof(uint32_t) * m_height * m_width);
407         }
408         else
409         {
410             CM_ASSERTMESSAGE("Error: Out of system memory.");
411             MosSafeDeleteArray(m_boardFlag);
412             return CM_OUT_OF_HOST_MEMORY;
413         }
414     }
415 
416     if( (pattern != CM_NONE_DEPENDENCY) && (m_walkingPattern != CM_WALK_DEFAULT ) )
417     {
418         CM_ASSERTMESSAGE("Error: Only valid when no walking pattern has been selected.");
419         return CM_INVALID_DEPENDENCY_WITH_WALKING_PATTERN;
420     }
421 
422     switch (pattern)
423     {
424         case CM_VERTICAL_WAVE:
425             m_dependencyPatternType = CM_VERTICAL_WAVE;
426             CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(verticalPattern.count, verticalPattern.deltaX, verticalPattern.deltaY));
427             break;
428 
429         case CM_HORIZONTAL_WAVE:
430             m_dependencyPatternType = CM_HORIZONTAL_WAVE;
431             CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(horizontalPattern.count, horizontalPattern.deltaX, horizontalPattern.deltaY));
432             break;
433 
434         case CM_WAVEFRONT:
435             m_dependencyPatternType = CM_WAVEFRONT;
436             CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFrontPattern.count, waveFrontPattern.deltaX, waveFrontPattern.deltaY));
437             break;
438 
439         case CM_WAVEFRONT26:
440             m_dependencyPatternType = CM_WAVEFRONT26;
441             CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFront26Pattern.count, waveFront26Pattern.deltaX, waveFront26Pattern.deltaY));
442             break;
443 
444         case CM_WAVEFRONT26Z:
445             m_dependencyPatternType = CM_WAVEFRONT26Z;
446             CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFront26ZPattern.count, waveFront26ZPattern.deltaX, waveFront26ZPattern.deltaY));
447             m_wavefront26ZDispatchInfo.numThreadsInWave = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t) * m_width * m_height);
448             if (m_threadSpaceUnit == nullptr && !CheckThreadSpaceOrderSet())
449             {
450                 m_threadSpaceUnit = MOS_NewArray(CM_THREAD_SPACE_UNIT, (m_height * m_width));
451                 if (m_threadSpaceUnit)
452                 {
453                     CmSafeMemSet(m_threadSpaceUnit, 0, sizeof(CM_THREAD_SPACE_UNIT)* m_height * m_width);
454                 }
455                 else
456                 {
457                     return CM_OUT_OF_HOST_MEMORY;
458                 }
459                 uint32_t threadId = 0;
460                 uint32_t linearOffset = 0;
461                 for (uint32_t y = 0; y < m_height; ++y)
462                 {
463                     for (uint32_t x = 0; x < m_width; ++x)
464                     {
465                         linearOffset = y*m_width + x;
466                         m_threadSpaceUnit[linearOffset].threadId = threadId++;
467                         m_threadSpaceUnit[linearOffset].scoreboardCoordinates.x = x;
468                         m_threadSpaceUnit[linearOffset].scoreboardCoordinates.y = y;
469                         m_threadSpaceUnit[linearOffset].dependencyMask = (1 << waveFront26ZPattern.count) - 1;
470                         m_threadSpaceUnit[linearOffset].reset = CM_NO_BATCH_BUFFER_REUSE;
471                     }
472                 }
473 
474                 *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
475                 m_threadAssociated = true;
476                 m_needSetKernelPointer = true;
477             }
478             break;
479 
480         case CM_WAVEFRONT26ZI:
481             m_dependencyPatternType = CM_WAVEFRONT26ZI;
482             CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFront26ZIPattern.count, waveFront26ZIPattern.deltaX, waveFront26ZIPattern.deltaY));
483             if (m_threadSpaceUnit == nullptr&& !CheckThreadSpaceOrderSet())
484             {
485                 m_threadSpaceUnit = MOS_NewArray(CM_THREAD_SPACE_UNIT, (m_height * m_width));
486                 if (m_threadSpaceUnit)
487                 {
488                     CmSafeMemSet(m_threadSpaceUnit, 0, sizeof(CM_THREAD_SPACE_UNIT)* m_height * m_width);
489                 }
490                 else
491                 {
492                     return CM_OUT_OF_HOST_MEMORY;
493                 }
494                 uint32_t threadId = 0;
495                 uint32_t linearOffset = 0;
496                 for (uint32_t y = 0; y < m_height; ++y)
497                 {
498                     for (uint32_t x = 0; x < m_width; ++x)
499                     {
500                         linearOffset = y*m_width + x;
501                         m_threadSpaceUnit[linearOffset].threadId = threadId++;
502                         m_threadSpaceUnit[linearOffset].scoreboardCoordinates.x = x;
503                         m_threadSpaceUnit[linearOffset].scoreboardCoordinates.y = y;
504                         m_threadSpaceUnit[linearOffset].dependencyMask = (1 << waveFront26ZIPattern.count) - 1;
505                         m_threadSpaceUnit[linearOffset].reset = CM_NO_BATCH_BUFFER_REUSE;
506                     }
507                 }
508 
509                 *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
510                 m_threadAssociated = true;
511                 m_needSetKernelPointer = true;
512             }
513             break;
514 
515         case CM_WAVEFRONT26X:
516             m_dependencyPatternType = CM_WAVEFRONT26X;
517             CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFront26XPattern.count, waveFront26XPattern.deltaX, waveFront26XPattern.deltaY));
518             break;
519 
520         case CM_WAVEFRONT26ZIG:
521             m_dependencyPatternType = CM_WAVEFRONT26ZIG;
522             CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFront26ZIGPattern.count, waveFront26ZIGPattern.deltaX, waveFront26ZIGPattern.deltaY));
523             break;
524 
525         case CM_NONE_DEPENDENCY:
526             m_dependencyPatternType = CM_NONE_DEPENDENCY;
527             hr = CM_SUCCESS;
528             break;
529 
530         default:
531             hr = CM_FAILURE;
532             break;
533     }
534 
535     UpdateDependency();
536 
537     if( m_dependencyPatternType != m_currentDependencyPattern )
538     {
539         *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
540     }
541 
542 finish:
543     return hr;
544 }
545 
SelectMediaWalkingPattern(CM_WALKING_PATTERN pattern)546 CM_RT_API int32_t CmThreadSpaceRT::SelectMediaWalkingPattern( CM_WALKING_PATTERN pattern )
547 {
548     INSERT_API_CALL_LOG(GetHalState());
549 
550     int result = CM_SUCCESS;
551 
552     if( m_dependencyPatternType != CM_NONE_DEPENDENCY )
553     {
554         CM_ASSERTMESSAGE("Error: Only valid when no thread dependency has been selected.");
555         return CM_INVALID_DEPENDENCY_WITH_WALKING_PATTERN;
556     }
557 
558     switch( pattern )
559     {
560         case CM_WALK_DEFAULT:
561         case CM_WALK_HORIZONTAL:
562         case CM_WALK_VERTICAL:
563         case CM_WALK_WAVEFRONT:
564         case CM_WALK_WAVEFRONT26:
565         case CM_WALK_WAVEFRONT26ZIG:
566         case CM_WALK_WAVEFRONT26X:
567         case CM_WALK_WAVEFRONT26XALT:
568         case CM_WALK_WAVEFRONT45D:
569         case CM_WALK_WAVEFRONT45XD_2:
570         case CM_WALK_WAVEFRONT26D:
571         case CM_WALK_WAVEFRONT26XD:
572             m_walkingPattern = pattern;
573             break;
574         default:
575             CM_ASSERTMESSAGE("Error: Invalid media walking pattern.");
576             result = CM_INVALID_MEDIA_WALKING_PATTERN;
577             break;
578     }
579 
580     return result;
581 }
582 
583 //*-----------------------------------------------------------------------------
584 //| Purpose:    Sets the media walker parameters for the CmThreadSpaceRT
585 //|             Used for engineering build, no error checking
586 //| Returns:    CM_SUCCESS
587 //*-----------------------------------------------------------------------------
SelectMediaWalkingParameters(CM_WALKING_PARAMETERS parameters)588 CM_RT_API int32_t CmThreadSpaceRT::SelectMediaWalkingParameters(CM_WALKING_PARAMETERS parameters)
589 {
590     INSERT_API_CALL_LOG(GetHalState());
591 
592     // [0..11] of parameters maps to DWORD5 through DWORD16
593     // No error checking here
594 
595     if( CmSafeMemCompare(&m_walkingParameters, &parameters, sizeof(m_walkingParameters)) != 0 )
596     {
597         CmSafeMemCopy(&m_walkingParameters, &parameters, sizeof(m_walkingParameters));
598         *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
599     }
600 
601     m_mediaWalkerParamsSet = true;
602 
603     return CM_SUCCESS;
604 }
605 
606 //*-----------------------------------------------------------------------------
607 //| Purpose:    Sets the thread space order for the CmThreadSpaceRT
608 //|             Used for engineering build
609 //| Returns:
610 //|     CM_SUCCESS if thread space order is successfully set
611 //|     CM_OUT_OF_HOST_MEMORY if the necessary memory allocation is failed.
612 //|     CM_INVALID_ARG_VALUE if the input arg is not correct.
613 //*-----------------------------------------------------------------------------
SetThreadSpaceOrder(uint32_t threadCount,const CM_THREAD_PARAM * threadSpaceOrder)614 CM_RT_API int32_t CmThreadSpaceRT::SetThreadSpaceOrder(uint32_t threadCount, const CM_THREAD_PARAM* threadSpaceOrder)
615 {
616     INSERT_API_CALL_LOG(GetHalState());
617 
618     if (threadCount != m_width*m_height || threadSpaceOrder == nullptr)
619     {
620         CM_ASSERTMESSAGE("Error: Thread count does not match the thread space size.");
621         return CM_INVALID_ARG_VALUE;
622     }
623     //Check if the m_threadSpaceUnit is allocated, we only need allocate it once at the first time.
624     if (m_threadSpaceUnit == nullptr)
625     {
626         m_threadSpaceUnit = MOS_NewArray(CM_THREAD_SPACE_UNIT, (m_height * m_width));
627         if (m_threadSpaceUnit)
628         {
629             CmSafeMemSet(m_threadSpaceUnit, 0, sizeof(CM_THREAD_SPACE_UNIT)* m_height * m_width);
630         }
631         else
632         {
633             CM_ASSERTMESSAGE("Error: Out of system memory.");
634             return CM_OUT_OF_HOST_MEMORY;
635         }
636     }
637 
638     uint32_t threadId = 0;
639 
640     for (uint32_t i = 0; i < m_width*m_height; i++)
641     {
642         m_threadSpaceUnit[i].threadId = threadId++;
643         m_threadSpaceUnit[i].scoreboardCoordinates = threadSpaceOrder[i].scoreboardCoordinates;
644         m_threadSpaceUnit[i].scoreboardColor = threadSpaceOrder[i].scoreboardColor;
645         m_threadSpaceUnit[i].sliceDestinationSelect = threadSpaceOrder[i].sliceDestinationSelect;
646         m_threadSpaceUnit[i].subSliceDestinationSelect = threadSpaceOrder[i].subSliceDestinationSelect;
647         m_threadSpaceUnit[i].dependencyMask = CM_DEFAULT_THREAD_DEPENDENCY_MASK;
648         m_threadSpaceUnit[i].reset = CM_NO_BATCH_BUFFER_REUSE;
649     }
650     m_threadAssociated = true;
651     m_needSetKernelPointer = true;
652     m_threadSpaceOrderSet = true;
653     *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
654     return CM_SUCCESS;
655 }
656 //*-----------------------------------------------------------------------------
657 //| Purpose:    Sets the dependency vectors for the CmThreadSpaceRT
658 //|             Used for engineering build, no error checking
659 //| Returns:    CM_SUCCESS
660 //*-----------------------------------------------------------------------------
SelectThreadDependencyVectors(CM_DEPENDENCY dependencyVectors)661 CM_RT_API int32_t CmThreadSpaceRT::SelectThreadDependencyVectors(CM_DEPENDENCY dependencyVectors)
662 {
663     INSERT_API_CALL_LOG(GetHalState());
664 
665     if( CmSafeMemCompare(&m_dependencyVectors, &dependencyVectors, sizeof(m_dependencyVectors)) != 0 )
666     {
667         CmSafeMemCopy(&m_dependencyVectors, &dependencyVectors, sizeof(m_dependencyVectors));
668         *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
669     }
670 
671     m_dependencyVectorsSet = true;
672 
673     return CM_SUCCESS;
674 }
675 
676 //*-----------------------------------------------------------------------------
677 //| Purpose:    Sets the color count minus one of the CmThreadSpaceRT
678 //| Returns:    CM_INVALID_ARG_VALUE if colorCount is 0 or greater than 16
679 //|             CM_SUCCESS otherwise
680 //*-----------------------------------------------------------------------------
SetThreadSpaceColorCount(uint32_t colorCount)681 CM_RT_API int32_t CmThreadSpaceRT::SetThreadSpaceColorCount(uint32_t colorCount)
682 {
683     INSERT_API_CALL_LOG(GetHalState());
684 
685     int32_t result = CM_SUCCESS;
686 
687     PCM_HAL_STATE cmHalState = ((PCM_CONTEXT_DATA)m_device->GetAccelData())->cmHalState;
688 
689     result = cmHalState->cmHalInterface->ColorCountSanityCheck(colorCount);
690     if(result != CM_SUCCESS)
691     {
692         CM_ASSERTMESSAGE("Error: Color count sanity check failure.");
693         return result;
694     }
695 
696     m_colorCountMinusOne = colorCount - 1;
697 
698     return CM_SUCCESS;
699 }
700 
701 //*-----------------------------------------------------------------------------
702 //| Purpose:    Sets the dispatch pattern for 26ZI
703 //| Returns:    CM_SUCCESS if valid dispath pattern, CM_FAILURE otherwise
704 //*-----------------------------------------------------------------------------
Set26ZIDispatchPattern(CM_26ZI_DISPATCH_PATTERN pattern)705 CM_RT_API int32_t CmThreadSpaceRT::Set26ZIDispatchPattern( CM_26ZI_DISPATCH_PATTERN pattern )
706 {
707     INSERT_API_CALL_LOG(GetHalState());
708 
709      int result = CM_SUCCESS;
710 
711      switch( pattern )
712      {
713      case VVERTICAL_HVERTICAL_26:
714          m_26ZIDispatchPattern = VVERTICAL_HVERTICAL_26;
715          break;
716      case VVERTICAL_HHORIZONTAL_26:
717          m_26ZIDispatchPattern = VVERTICAL_HHORIZONTAL_26;
718          break;
719      case VVERTICAL26_HHORIZONTAL26:
720          m_26ZIDispatchPattern = VVERTICAL26_HHORIZONTAL26;
721          break;
722      case VVERTICAL1X26_HHORIZONTAL1X26:
723          m_26ZIDispatchPattern = VVERTICAL1X26_HHORIZONTAL1X26;
724          break;
725       default:
726         result = CM_FAILURE;
727         break;
728      }
729 
730     if( m_26ZIDispatchPattern != m_current26ZIDispatchPattern)
731     {
732         *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
733     }
734 
735      return result;
736 }
737 
738 //*-----------------------------------------------------------------------------
739 //| Purpose:    Sets the macro block size to be used to calculate 26ZI dispatch
740 //| Returns:    CM_SUCCESS
741 //*-----------------------------------------------------------------------------
Set26ZIMacroBlockSize(uint32_t width,uint32_t height)742 CM_RT_API int32_t CmThreadSpaceRT::Set26ZIMacroBlockSize( uint32_t width, uint32_t height )
743 {
744     INSERT_API_CALL_LOG(GetHalState());
745     int32_t hr = CM_SUCCESS;
746     m_26ZIBlockWidth = width;
747     m_26ZIBlockHeight = height;
748     hr = UpdateDependency();
749     return hr;
750 }
751 
752 //*-----------------------------------------------------------------------------
753 //| Purpose:    Get the color count of the CmThreadSpaceRT
754 //| Returns:    CM_SUCCESS.
755 //*-----------------------------------------------------------------------------
GetColorCountMinusOne(uint32_t & colorCount)756 int32_t CmThreadSpaceRT::GetColorCountMinusOne(uint32_t & colorCount)
757 {
758     colorCount = m_colorCountMinusOne;
759 
760     return CM_SUCCESS;
761 }
762 
763 //*-----------------------------------------------------------------------------
764 //| Purpose:    Get the width and height of CmThreadSpaceRT
765 //| Returns:    CM_SUCCESS.
766 //*-----------------------------------------------------------------------------
GetThreadSpaceSize(uint32_t & width,uint32_t & height)767 int32_t CmThreadSpaceRT::GetThreadSpaceSize(uint32_t & width, uint32_t & height)
768 {
769     width = m_width;
770     height = m_height;
771 
772     return CM_SUCCESS;
773 }
774 
775 //*-----------------------------------------------------------------------------
776 //| Purpose:    Get thread space's unit
777 //*-----------------------------------------------------------------------------
GetThreadSpaceUnit(CM_THREAD_SPACE_UNIT * & threadSpaceUnit)778 int32_t CmThreadSpaceRT::GetThreadSpaceUnit(CM_THREAD_SPACE_UNIT* &threadSpaceUnit)
779 {
780     threadSpaceUnit = m_threadSpaceUnit;
781     return CM_SUCCESS;
782 }
783 
784 //*-----------------------------------------------------------------------------
785 //| Purpose:    Get the dependency
786 //*-----------------------------------------------------------------------------
GetDependency(CM_HAL_DEPENDENCY * & dependency)787 int32_t CmThreadSpaceRT::GetDependency(CM_HAL_DEPENDENCY* &dependency)
788 {
789     dependency = &m_dependency;
790     return CM_SUCCESS;
791 }
792 
793 //*-----------------------------------------------------------------------------
794 //| Purpose:    Get its dependency type
795 //*-----------------------------------------------------------------------------
GetDependencyPatternType(CM_DEPENDENCY_PATTERN & dependencyPatternType)796 int32_t CmThreadSpaceRT::GetDependencyPatternType(CM_DEPENDENCY_PATTERN &dependencyPatternType)
797 {
798     dependencyPatternType = m_dependencyPatternType;
799 
800     return CM_SUCCESS;
801 }
802 
Get26ZIDispatchPattern(CM_26ZI_DISPATCH_PATTERN & pattern)803 int32_t CmThreadSpaceRT::Get26ZIDispatchPattern( CM_26ZI_DISPATCH_PATTERN &pattern)
804 {
805     pattern = m_26ZIDispatchPattern;
806 
807     return CM_SUCCESS;
808 }
809 
810 //*-----------------------------------------------------------------------------
811 //| Purpose:    Get walking pattern
812 //*-----------------------------------------------------------------------------
GetWalkingPattern(CM_WALKING_PATTERN & walkingPattern)813 int32_t CmThreadSpaceRT::GetWalkingPattern(CM_WALKING_PATTERN &walkingPattern)
814 {
815     walkingPattern = m_walkingPattern;
816     return CM_SUCCESS;
817 }
818 
819 //*-----------------------------------------------------------------------------
820 //| Purpose:    Get media walking parameters
821 //*-----------------------------------------------------------------------------
GetWalkingParameters(CM_WALKING_PARAMETERS & walkingParameters)822 int32_t CmThreadSpaceRT::GetWalkingParameters(CM_WALKING_PARAMETERS &walkingParameters)
823 {
824     CmSafeMemCopy(&walkingParameters, &m_walkingParameters, sizeof(m_walkingParameters));
825     return CM_SUCCESS;
826 }
827 
828 //*-----------------------------------------------------------------------------
829 //| Purpose:    Return true if media walker parameters are set, false otherwise
830 //*-----------------------------------------------------------------------------
CheckWalkingParametersSet()831 bool CmThreadSpaceRT::CheckWalkingParametersSet( )
832 {
833     return m_mediaWalkerParamsSet;
834 }
835 
836 //*-----------------------------------------------------------------------------
837 //| Purpose:    Get dependency vectors
838 //*-----------------------------------------------------------------------------
GetDependencyVectors(CM_HAL_DEPENDENCY & dependencyVectors)839 int32_t CmThreadSpaceRT::GetDependencyVectors(CM_HAL_DEPENDENCY &dependencyVectors)
840 {
841     CmSafeMemCopy(&dependencyVectors, &m_dependencyVectors, sizeof(m_dependencyVectors));
842     return CM_SUCCESS;
843 }
844 
845 //*-----------------------------------------------------------------------------
846 //| Purpose:    Return true if dependency vectors are set, false otherwise
847 //*-----------------------------------------------------------------------------
CheckDependencyVectorsSet()848 bool CmThreadSpaceRT::CheckDependencyVectorsSet( )
849 {
850     return m_dependencyVectorsSet;
851 }
852 
853 //*-----------------------------------------------------------------------------
854 //| Purpose:    Return true if thread space order is set, false otherwise
855 //*-----------------------------------------------------------------------------
CheckThreadSpaceOrderSet()856 bool CmThreadSpaceRT::CheckThreadSpaceOrderSet()
857 {
858     return m_threadSpaceOrderSet;
859 }
860 
861 //*-----------------------------------------------------------------------------
862 //| Purpose:    Get Wavefront26ZDispatchInfo
863 //*-----------------------------------------------------------------------------
GetWavefront26ZDispatchInfo(CM_HAL_WAVEFRONT26Z_DISPATCH_INFO & dispatchInfo)864 int32_t CmThreadSpaceRT::GetWavefront26ZDispatchInfo(CM_HAL_WAVEFRONT26Z_DISPATCH_INFO &dispatchInfo)
865 {
866     dispatchInfo = m_wavefront26ZDispatchInfo;
867     return CM_SUCCESS;
868 }
869 
870 //*-----------------------------------------------------------------------------
871 //| Purpose:    Check the integrity of thread space' association
872 //*-----------------------------------------------------------------------------
IntegrityCheck(CmTaskRT * task)873 bool CmThreadSpaceRT::IntegrityCheck(CmTaskRT* task)
874 {
875     CmKernelRT *kernelRT = nullptr;
876     uint32_t i;
877     uint32_t kernelCount = 0;
878     uint32_t threadNumber = 0;
879     uint32_t kernelIndex = 0;
880     uint32_t unassociated = 0;
881     int32_t hr = CM_SUCCESS;
882 
883     uint8_t **threadSpaceMapping = nullptr;
884     uint8_t *kernelInScoreboard = nullptr;
885 
886     kernelCount = task->GetKernelCount();
887     //Check if it is mult-kernel task, since no threadspace is allowed for multi-kernel tasks
888     if (kernelCount > 1)
889     {
890         CM_ASSERTMESSAGE("Error: threadSpace->IntegrityCheck Failed: ThreadSpace is not allowed in multi-kernel task.");
891         return false;
892     }
893 
894     kernelRT = task->GetKernelPointer(0);
895     CM_CHK_NULL_GOTOFINISH_CMERROR(kernelRT);
896 
897     //To check if the thread space size is matched with thread count
898     kernelRT->GetThreadCount(threadNumber);
899 
900     //Till now, all disallowed settings are abort, now we need check if the thread space association is correct.
901     if (this->IsThreadAssociated())
902     {
903         //For future extending to multiple kernels cases, we're using a general mechanism to check the integrity
904 
905         threadSpaceMapping = MOS_NewArray(uint8_t*, kernelCount);
906         kernelInScoreboard = MOS_NewArray(uint8_t, kernelCount);
907 
908         CM_CHK_NULL_GOTOFINISH_CMERROR(threadSpaceMapping);
909         CM_CHK_NULL_GOTOFINISH_CMERROR(kernelInScoreboard);
910 
911         CmSafeMemSet(threadSpaceMapping, 0, kernelCount*sizeof(uint8_t *));
912         CmSafeMemSet(kernelInScoreboard, 0, kernelCount*sizeof(uint8_t));
913 
914         for (i = 0; i < kernelCount; i++)
915         {
916             kernelRT = task->GetKernelPointer(i);
917             CM_CHK_NULL_GOTOFINISH_CMERROR(kernelRT);
918             kernelRT->GetThreadCount(threadNumber);
919             if (threadNumber == 0)
920             {
921                 threadNumber = m_width * m_height;
922             }
923             threadSpaceMapping[i] = MOS_NewArray(uint8_t, threadNumber);
924             CM_CHK_NULL_GOTOFINISH_CMERROR(threadSpaceMapping[i]);
925             CmSafeMemSet(threadSpaceMapping[i], 0, threadNumber * sizeof(uint8_t));
926             kernelInScoreboard[i] = 0;
927         }
928 
929         for (i = 0; i < m_width * m_height; i ++ )
930         {
931             kernelRT = static_cast<CmKernelRT *> (m_threadSpaceUnit[i].kernel);
932             if (kernelRT == nullptr)
933             {
934                 if (m_needSetKernelPointer)
935                 {
936                     kernelRT = *m_kernel;
937                 }
938             }
939             CM_CHK_NULL_GOTOFINISH_CMERROR(kernelRT);
940 
941             kernelIndex = kernelRT->GetIndexInTask();
942             threadSpaceMapping[kernelIndex][m_threadSpaceUnit[i].threadId] = 1;
943             kernelInScoreboard[kernelIndex] = 1;
944         }
945 
946         for (i = 0; i < kernelCount; i ++)
947         {
948             if(kernelInScoreboard[i])
949             {
950                 kernelRT = task->GetKernelPointer(i);
951                 CM_CHK_NULL_GOTOFINISH_CMERROR(kernelRT);
952 
953                 kernelRT->GetThreadCount(threadNumber);
954                 if (threadNumber == 0)
955                 {
956                     threadNumber = m_width * m_height;
957                 }
958                 kernelRT->SetAssociatedToTSFlag(true);
959                 for (uint32_t j = 0; j < threadNumber; j++)
960                 {
961                     if (threadSpaceMapping[i][j] == 0)
962                     {
963                         unassociated ++;
964                         break;
965                     }
966                 }
967             }
968             MosSafeDeleteArray(threadSpaceMapping[i]);
969         }
970 
971         if (unassociated != 0)
972         {
973             CM_ASSERTMESSAGE("Error: The thread space association is not correct.");
974             hr = CM_FAILURE;
975         }
976     }
977 
978 finish:
979 
980     MosSafeDeleteArray(threadSpaceMapping);
981     MosSafeDeleteArray(kernelInScoreboard);
982 
983     return (hr == CM_SUCCESS)? true: false;
984 }
985 
986 //*-----------------------------------------------------------------------------
987 //| Purpose:    Generate Wave45 Sequence
988 //*-----------------------------------------------------------------------------
Wavefront45Sequence()989 int32_t CmThreadSpaceRT::Wavefront45Sequence()
990 {
991     if ( m_currentDependencyPattern == CM_WAVEFRONT )
992     {
993         return CM_SUCCESS;
994     }
995     m_currentDependencyPattern = CM_WAVEFRONT;
996 
997     CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
998     m_indexInList = 0;
999 
1000     for (uint32_t y = 0; y < m_height; y ++)
1001     {
1002         for (uint32_t x = 0; x < m_width; x ++)
1003         {
1004             CM_COORDINATE tempCoordinate;
1005             int32_t linearOffset = y * m_width + x;
1006             if (m_boardFlag[linearOffset] == WHITE)
1007             {
1008                 m_boardOrderList[m_indexInList ++] = linearOffset;
1009                 m_boardFlag[linearOffset] = BLACK;
1010                 tempCoordinate.x = x - 1;
1011                 tempCoordinate.y = y + 1;
1012                 while ((tempCoordinate.x >= 0) && (tempCoordinate.y >= 0) &&
1013                     (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height))
1014                 {
1015                     if (m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1016                     {
1017                         m_boardOrderList[m_indexInList ++] = tempCoordinate.y * m_width + tempCoordinate.x;
1018                         m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1019                     }
1020                     tempCoordinate.x = tempCoordinate.x - 1;
1021                     tempCoordinate.y = tempCoordinate.y + 1;
1022                 }
1023             }
1024         }
1025     }
1026 
1027     return CM_SUCCESS;
1028 }
1029 
1030 //*-----------------------------------------------------------------------------
1031 //| Purpose:    Generate Wave26 Sequence
1032 //*-----------------------------------------------------------------------------
Wavefront26Sequence()1033 int32_t CmThreadSpaceRT::Wavefront26Sequence()
1034 {
1035     if ( m_currentDependencyPattern == CM_WAVEFRONT26 )
1036     {
1037         return CM_SUCCESS;
1038     }
1039     m_currentDependencyPattern = CM_WAVEFRONT26;
1040 
1041     CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1042     m_indexInList = 0;
1043 
1044     for (uint32_t y = 0; y < m_height; y ++)
1045     {
1046         for (uint32_t x = 0; x < m_width; x ++)
1047         {
1048             CM_COORDINATE tempCoordinate;
1049             int32_t linearOffset = y * m_width + x;
1050             if (m_boardFlag[linearOffset] == WHITE)
1051             {
1052                 m_boardOrderList[m_indexInList ++] = linearOffset;
1053                 m_boardFlag[linearOffset] = BLACK;
1054                 tempCoordinate.x = x - 2;
1055                 tempCoordinate.y = y + 1;
1056                 while ((tempCoordinate.x >= 0) && (tempCoordinate.y >= 0) &&
1057                     (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height))
1058                 {
1059                     if (m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1060                     {
1061                         m_boardOrderList[m_indexInList ++] = tempCoordinate.y * m_width + tempCoordinate.x;
1062                         m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1063                     }
1064                     tempCoordinate.x = tempCoordinate.x - 2;
1065                     tempCoordinate.y = tempCoordinate.y + 1;
1066                 }
1067             }
1068         }
1069     }
1070 
1071    return CM_SUCCESS;
1072 }
1073 
1074 //*-----------------------------------------------------------------------------
1075 //| Purpose:    Generate Wave26Z Sequence
1076 //*-----------------------------------------------------------------------------
Wavefront26ZSequence()1077 int32_t CmThreadSpaceRT::Wavefront26ZSequence()
1078 {
1079     if ( m_currentDependencyPattern == CM_WAVEFRONT26Z )
1080     {
1081         return CM_SUCCESS;
1082     }
1083     m_currentDependencyPattern = CM_WAVEFRONT26Z;
1084 
1085     uint32_t threadsInWave = 0;
1086     uint32_t numWaves = 0;
1087 
1088     if ( ( m_height % 2 != 0 ) || ( m_width % 2 != 0 ) )
1089     {
1090         return CM_INVALID_ARG_SIZE;
1091     }
1092     CmSafeMemSet( m_boardFlag, WHITE, m_width * m_height * sizeof( uint32_t ) );
1093     m_indexInList = 0;
1094 
1095     uint32_t iX, iY, nOffset;
1096     iX = iY = nOffset = 0;
1097 
1098     uint32_t *waveFrontPosition = MOS_NewArray(uint32_t, m_width);
1099     uint32_t *waveFrontOffset = MOS_NewArray(uint32_t, m_width);
1100     if ( ( waveFrontPosition == nullptr ) || ( waveFrontOffset == nullptr ) )
1101     {
1102         MosSafeDeleteArray( waveFrontPosition );
1103         MosSafeDeleteArray( waveFrontOffset );
1104         return CM_FAILURE;
1105     }
1106     CmSafeMemSet( waveFrontPosition, 0, m_width * sizeof( int ) );
1107 
1108     // set initial value
1109     m_boardFlag[ 0 ] = BLACK;
1110     m_boardOrderList[ 0 ] = 0;
1111     waveFrontPosition[ 0 ] = 1;
1112     m_indexInList = 0;
1113 
1114     CM_COORDINATE mask[ 8 ];
1115     uint32_t nMaskNumber = 0;
1116 
1117     m_wavefront26ZDispatchInfo.numThreadsInWave[numWaves] = 1;
1118     numWaves++;
1119 
1120     while ( m_indexInList < m_width * m_height - 1 )
1121     {
1122 
1123         CmSafeMemSet( waveFrontOffset, 0, m_width * sizeof( int ) );
1124         for ( uint32_t iX = 0; iX < m_width; ++iX )
1125         {
1126             uint32_t iY = waveFrontPosition[ iX ];
1127             nOffset = iY * m_width + iX;
1128             CmSafeMemSet( mask, 0, sizeof( mask ) );
1129 
1130             if ( m_boardFlag[ nOffset ] == WHITE )
1131             {
1132                 if ( ( iX % 2 == 0 ) && ( iY % 2 == 0 ) )
1133                 {
1134                     if ( iX == 0 )
1135                     {
1136                         mask[ 0 ].x = 0;
1137                         mask[ 0 ].y = -1;
1138                         mask[ 1 ].x = 1;
1139                         mask[ 1 ].y = -1;
1140                         nMaskNumber = 2;
1141                     }
1142                     else if ( iY == 0 )
1143                     {
1144                         mask[ 0 ].x = -1;
1145                         mask[ 0 ].y = 1;
1146                         mask[ 1 ].x = -1;
1147                         mask[ 1 ].y = 0;
1148                         nMaskNumber = 2;
1149                     }
1150                     else
1151                     {
1152                         mask[ 0 ].x = -1;
1153                         mask[ 0 ].y = 1;
1154                         mask[ 1 ].x = -1;
1155                         mask[ 1 ].y = 0;
1156                         mask[ 2 ].x = 0;
1157                         mask[ 2 ].y = -1;
1158                         mask[ 3 ].x = 1;
1159                         mask[ 3 ].y = -1;
1160                         nMaskNumber = 4;
1161                     }
1162                 }
1163                 else if ( ( iX % 2 == 0 ) && ( iY % 2 == 1 ) )
1164                 {
1165                     if ( iX == 0 )
1166                     {
1167                         mask[ 0 ].x = 0;
1168                         mask[ 0 ].y = -1;
1169                         mask[ 1 ].x = 1;
1170                         mask[ 1 ].y = -1;
1171                         nMaskNumber = 2;
1172                     }
1173                     else
1174                     {
1175                         mask[ 0 ].x = -1;
1176                         mask[ 0 ].y = 0;
1177                         mask[ 1 ].x = 0;
1178                         mask[ 1 ].y = -1;
1179                         mask[ 2 ].x = 1;
1180                         mask[ 2 ].y = -1;
1181                         nMaskNumber = 3;
1182                     }
1183                 }
1184                 else if ( ( iX % 2 == 1 ) && ( iY % 2 == 0 ) )
1185                 {
1186                     if ( iY == 0 )
1187                     {
1188                         mask[ 0 ].x = -1;
1189                         mask[ 0 ].y = 0;
1190                         nMaskNumber = 1;
1191                     }
1192                     else if ( iX == m_width - 1 )
1193                     {
1194                         mask[ 0 ].x = -1;
1195                         mask[ 0 ].y = 0;
1196                         mask[ 1 ].x = 0;
1197                         mask[ 1 ].y = -1;
1198                         nMaskNumber = 2;
1199                     }
1200                     else
1201                     {
1202                         mask[ 0 ].x = -1;
1203                         mask[ 0 ].y = 0;
1204                         mask[ 1 ].x = 0;
1205                         mask[ 1 ].y = -1;
1206                         mask[ 2 ].x = 1;
1207                         mask[ 2 ].y = -1;
1208                         nMaskNumber = 3;
1209                     }
1210                 }
1211                 else
1212                 {
1213                     mask[ 0 ].x = -1;
1214                     mask[ 0 ].y = 0;
1215                     mask[ 1 ].x = 0;
1216                     mask[ 1 ].y = -1;
1217                     nMaskNumber = 2;
1218                 }
1219 
1220                 // check if all of the dependencies are in the dispatch queue
1221                 bool allInQueue = true;
1222                 for ( uint32_t i = 0; i < nMaskNumber; ++i )
1223                 {
1224                     if ( m_boardFlag[ nOffset + mask[ i ].x + mask[ i ].y * m_width ] == WHITE )
1225                     {
1226                         allInQueue = false;
1227                         break;
1228                     }
1229                 }
1230                 if ( allInQueue )
1231                 {
1232                     waveFrontOffset[ iX ] = nOffset;
1233                     if( waveFrontPosition[ iX ] < m_height - 1 )
1234                     {
1235                         waveFrontPosition[ iX ]++;
1236                     }
1237                 }
1238             }
1239         }
1240 
1241         for ( uint32_t iX = 0; iX < m_width; ++iX )
1242         {
1243             if ( ( m_boardFlag[ waveFrontOffset[ iX ] ] == WHITE ) && ( waveFrontOffset[ iX ] != 0 ) )
1244             {
1245                 m_indexInList++;
1246                 m_boardOrderList[ m_indexInList ] = waveFrontOffset[ iX ];
1247                 m_boardFlag[ waveFrontOffset[ iX ] ] = BLACK;
1248                 threadsInWave++;
1249             }
1250         }
1251 
1252         m_wavefront26ZDispatchInfo.numThreadsInWave[numWaves] = threadsInWave;
1253         threadsInWave = 0;
1254         numWaves++;
1255     }
1256 
1257     MosSafeDeleteArray( waveFrontPosition );
1258     MosSafeDeleteArray( waveFrontOffset );
1259 
1260     m_wavefront26ZDispatchInfo.numWaves = numWaves;
1261 
1262     return CM_SUCCESS;
1263 }
1264 
1265 //*-----------------------------------------------------------------------------
1266 //| Purpose:    Generate Wavefront26ZI Sequence
1267 //|             Dispatch order:
1268 //|                Vertical threads vertically in macro block
1269 //|                Horizontal threads vertically in macro block
1270 //|                Overall 26 pattern
1271 //*-----------------------------------------------------------------------------
Wavefront26ZISeqVVHV26()1272 int32_t CmThreadSpaceRT::Wavefront26ZISeqVVHV26()
1273 {
1274     if ( m_currentDependencyPattern == CM_WAVEFRONT26ZI  &&
1275         ( m_current26ZIDispatchPattern == VVERTICAL_HVERTICAL_26 ) )
1276     {
1277         return CM_SUCCESS;
1278     }
1279 
1280     m_currentDependencyPattern = CM_WAVEFRONT26ZI;
1281     m_current26ZIDispatchPattern = VVERTICAL_HVERTICAL_26;
1282 
1283     CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1284     m_indexInList = 0;
1285 
1286     for( uint32_t y = 0; y < m_height; y = y + m_26ZIBlockHeight )
1287     {
1288         for( uint32_t x = 0; x < m_width; x = x + m_26ZIBlockWidth )
1289         {
1290             CM_COORDINATE tempCoordinateFor26;
1291             tempCoordinateFor26.x = x;
1292             tempCoordinateFor26.y = y;
1293 
1294             do
1295             {
1296                 if( m_boardFlag[tempCoordinateFor26.y * m_width + tempCoordinateFor26.x] == WHITE )
1297                 {
1298                     m_boardOrderList[m_indexInList ++] = tempCoordinateFor26.y * m_width + tempCoordinateFor26.x;
1299                     m_boardFlag[tempCoordinateFor26.y * m_width + tempCoordinateFor26.x] = BLACK;
1300 
1301                     // do vertical edges
1302                     for( uint32_t widthCount = 0; widthCount < m_26ZIBlockWidth; widthCount = widthCount + 2 )
1303                     {
1304                         CM_COORDINATE tempCoordinate;
1305                         uint32_t localHeightCounter = 0;
1306 
1307                         tempCoordinate.x = tempCoordinateFor26.x + widthCount;
1308                         tempCoordinate.y = tempCoordinateFor26.y;
1309                         while( (tempCoordinate.x >= 0) && (tempCoordinate.y >=0) &&
1310                             (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height) &&
1311                             (localHeightCounter < m_26ZIBlockHeight))
1312                         {
1313                             if( m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1314                             {
1315                                 m_boardOrderList[m_indexInList ++ ] = tempCoordinate.y * m_width + tempCoordinate.x;
1316                                 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1317                             }
1318                             tempCoordinate.y = tempCoordinate.y + 1;
1319                             localHeightCounter++;
1320                         }
1321                     } // vertical edges
1322 
1323                      // do horizontal edges
1324                     for( uint32_t widthCount = 1; widthCount < m_26ZIBlockWidth; widthCount = widthCount + 2 )
1325                     {
1326                         CM_COORDINATE tempCoordinate;
1327                         uint32_t localHeightCounter = 0;
1328 
1329                         tempCoordinate.x = tempCoordinateFor26.x + widthCount;
1330                         tempCoordinate.y = tempCoordinateFor26.y;
1331                         while( (tempCoordinate.x >= 0) && (tempCoordinate.y >=0) &&
1332                             (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height) &&
1333                             (localHeightCounter < m_26ZIBlockHeight))
1334                         {
1335                             if( m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1336                             {
1337                                 m_boardOrderList[m_indexInList ++ ] = tempCoordinate.y * m_width + tempCoordinate.x;
1338                                 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1339                             }
1340                             tempCoordinate.y = tempCoordinate.y + 1;
1341                             localHeightCounter++;
1342                         }
1343                     } // horizontal edges
1344                 }
1345 
1346                 tempCoordinateFor26.x = tempCoordinateFor26.x - (2 * m_26ZIBlockWidth);
1347                 tempCoordinateFor26.y = tempCoordinateFor26.y + (1 * m_26ZIBlockHeight);
1348 
1349             } while( ( tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0)
1350                 && (tempCoordinateFor26.x < (int32_t)m_width) && ( tempCoordinateFor26.y < (int32_t)m_height));
1351         }
1352     }
1353 
1354     return CM_SUCCESS;
1355 }
1356 
1357 //*-----------------------------------------------------------------------------
1358 //| Purpose:    Generate Wavefront26ZI Sequence
1359 //|             Dispatch order:
1360 //|                Vertical threads vertically in macro block
1361 //|                Horizontal threads horizontally in macro block
1362 //|                Overall 26 pattern
1363 //*-----------------------------------------------------------------------------
Wavefront26ZISeqVVHH26()1364 int32_t CmThreadSpaceRT::Wavefront26ZISeqVVHH26()
1365 {
1366     if ( m_currentDependencyPattern == CM_WAVEFRONT26ZI &&
1367         ( m_current26ZIDispatchPattern == VVERTICAL_HHORIZONTAL_26))
1368     {
1369         return CM_SUCCESS;
1370     }
1371 
1372     m_currentDependencyPattern = CM_WAVEFRONT26ZI;
1373     m_current26ZIDispatchPattern = VVERTICAL_HHORIZONTAL_26;
1374 
1375     CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1376     m_indexInList = 0;
1377 
1378     for( uint32_t y = 0; y < m_height; y = y + m_26ZIBlockHeight )
1379     {
1380         for( uint32_t x = 0; x < m_width; x = x + m_26ZIBlockWidth )
1381         {
1382             CM_COORDINATE tempCoordinateFor26;
1383             tempCoordinateFor26.x = x;
1384             tempCoordinateFor26.y = y;
1385 
1386             do
1387             {
1388                 if( m_boardFlag[tempCoordinateFor26.y * m_width + tempCoordinateFor26.x] == WHITE )
1389                 {
1390                     m_boardOrderList[m_indexInList ++] = tempCoordinateFor26.y * m_width + tempCoordinateFor26.x;
1391                     m_boardFlag[tempCoordinateFor26.y * m_width + tempCoordinateFor26.x] = BLACK;
1392 
1393                     // do vertical edges
1394                     for( uint32_t widthCount = 0; widthCount < m_26ZIBlockWidth; widthCount = widthCount + 2 )
1395                     {
1396                         CM_COORDINATE tempCoordinate;
1397                         uint32_t localHeightCounter = 0;
1398 
1399                         tempCoordinate.x = tempCoordinateFor26.x + widthCount;
1400                         tempCoordinate.y = tempCoordinateFor26.y;
1401                         while( (tempCoordinate.x >= 0) && (tempCoordinate.y >=0) &&
1402                             (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height) &&
1403                             (localHeightCounter < m_26ZIBlockHeight))
1404                         {
1405                             if( m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1406                             {
1407                                 m_boardOrderList[m_indexInList ++ ] = tempCoordinate.y * m_width + tempCoordinate.x;
1408                                 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1409                             }
1410                             tempCoordinate.y = tempCoordinate.y + 1;
1411                             localHeightCounter++;
1412                         }
1413                     } // vertical edges
1414 
1415                     // horizontal edges
1416                     for( uint32_t heightCount = 0; heightCount < m_26ZIBlockHeight; ++heightCount )
1417                     {
1418                         CM_COORDINATE tempCoordinate;
1419                         uint32_t localWidthCounter = 0;
1420 
1421                         tempCoordinate.x = tempCoordinateFor26.x + 1;
1422                         tempCoordinate.y = tempCoordinateFor26.y + heightCount;
1423                         while ( (tempCoordinate.x >= 0) && (tempCoordinate.y >= 0) &&
1424                             (tempCoordinate.x< (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height) &&
1425                             (localWidthCounter < (m_26ZIBlockWidth / 2) ) )
1426                         {
1427                             if( m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1428                             {
1429                                 m_boardOrderList[m_indexInList ++ ] = tempCoordinate.y * m_width + tempCoordinate.x;
1430                                 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1431                             }
1432 
1433                             tempCoordinate.x = tempCoordinate.x + 2;
1434                             localWidthCounter++;
1435                         }
1436                     }
1437                     // horizontal edges
1438                 }
1439 
1440                 tempCoordinateFor26.x = tempCoordinateFor26.x - (2 * m_26ZIBlockWidth);
1441                 tempCoordinateFor26.y = tempCoordinateFor26.y + (1 * m_26ZIBlockHeight);
1442 
1443             } while( ( tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0)
1444                 && (tempCoordinateFor26.x < (int32_t)m_width) && ( tempCoordinateFor26.y < (int32_t)m_height));
1445         }
1446     }
1447 
1448     return CM_SUCCESS;
1449 }
1450 
1451 //*-----------------------------------------------------------------------------
1452 //| Purpose:    Generate Wavefront26ZI Sequence
1453 //|             Dispatch order:
1454 //|                Vertical threads vertically in macro block and then along 26 wave
1455 //|                Horizontal threads horizontally in macro block and then along 26 wave
1456 //*-----------------------------------------------------------------------------
Wavefront26ZISeqVV26HH26()1457 int32_t CmThreadSpaceRT::Wavefront26ZISeqVV26HH26()
1458 {
1459     if( (m_currentDependencyPattern == CM_WAVEFRONT26ZI) &&
1460         (m_current26ZIDispatchPattern == VVERTICAL26_HHORIZONTAL26) )
1461     {
1462         return CM_SUCCESS;
1463     }
1464 
1465     m_currentDependencyPattern = CM_WAVEFRONT26ZI;
1466     m_current26ZIDispatchPattern = VVERTICAL26_HHORIZONTAL26;
1467 
1468     CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1469     m_indexInList = 0;
1470 
1471     uint32_t waveFrontNum = 0;
1472     uint32_t waveFrontStartX = 0;
1473     uint32_t waveFrontStartY = 0;
1474 
1475     uint32_t adjustHeight = 0;
1476 
1477     CM_COORDINATE tempCoordinateFor26;
1478     tempCoordinateFor26.x = 0;
1479     tempCoordinateFor26.y = 0;
1480 
1481     while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1482         (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) )
1483     {
1484         // use horizontal coordinates to save starting (x,y) for overall 26
1485         CM_COORDINATE tempCoordinateForHorz;
1486         tempCoordinateForHorz.x = tempCoordinateFor26.x;
1487         tempCoordinateForHorz.y = tempCoordinateFor26.y;
1488 
1489        do
1490         {
1491             CM_COORDINATE tempCoordinateForVer;
1492 
1493             for( uint32_t widthCount = 0; widthCount < m_26ZIBlockWidth; widthCount += 2 )
1494             {
1495                 uint32_t localHeightCounter = 0;
1496                 tempCoordinateForVer.x = tempCoordinateFor26.x + widthCount;
1497                 tempCoordinateForVer.y = tempCoordinateFor26.y;
1498 
1499                 while( (tempCoordinateForVer.x < (int32_t)m_width) && (tempCoordinateForVer.y < (int32_t)m_height) &&
1500                         (tempCoordinateForVer.x >= 0) && (tempCoordinateForVer.y >= 0) && (localHeightCounter < m_26ZIBlockHeight) )
1501                 {
1502                     if(m_boardFlag[tempCoordinateForVer.y * m_width + tempCoordinateForVer.x] == WHITE )
1503                     {
1504                         m_boardOrderList[m_indexInList ++] = tempCoordinateForVer.y * m_width + tempCoordinateForVer.x;
1505                         m_boardFlag[tempCoordinateForVer.y * m_width + tempCoordinateForVer.x] = BLACK;
1506                     }
1507                     tempCoordinateForVer.y += 1;
1508                     localHeightCounter++;
1509                 }
1510             }
1511 
1512             tempCoordinateFor26.x = tempCoordinateFor26.x + (2 * m_26ZIBlockWidth);
1513             tempCoordinateFor26.y = tempCoordinateFor26.y - (1 * m_26ZIBlockHeight);
1514 
1515         } while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1516             (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) );
1517 
1518         tempCoordinateFor26.x = tempCoordinateForHorz.x;
1519         tempCoordinateFor26.y = tempCoordinateForHorz.y;
1520 
1521         do
1522         {
1523             // do horizontal edges
1524             for ( uint32_t heightCount = 0; heightCount < m_26ZIBlockHeight; ++heightCount )
1525             {
1526                 uint32_t localWidthCounter = 0;
1527                 tempCoordinateForHorz.x = tempCoordinateFor26.x + 1;
1528                 tempCoordinateForHorz.y = tempCoordinateFor26.y + heightCount;
1529                 while( (tempCoordinateForHorz.x >= 0) && (tempCoordinateForHorz.y >= 0) &&
1530                     (tempCoordinateForHorz.x < (int32_t)m_width) && (tempCoordinateForHorz.y < (int32_t)m_height) &&
1531                     (localWidthCounter < (m_26ZIBlockWidth / 2)) )
1532                 {
1533                     if( m_boardFlag[tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x] == WHITE )
1534                     {
1535                         m_boardOrderList[m_indexInList ++] = tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x;
1536                         m_boardFlag[tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x] = BLACK;
1537                     }
1538 
1539                     tempCoordinateForHorz.x += 2;
1540                     localWidthCounter++;
1541                 }
1542             }
1543 
1544             tempCoordinateFor26.x = tempCoordinateFor26.x + (2 * m_26ZIBlockWidth);
1545             tempCoordinateFor26.y = tempCoordinateFor26.y - (1 * m_26ZIBlockHeight);
1546 
1547         } while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1548             (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) );
1549 
1550         if (m_width <= m_26ZIBlockWidth)
1551         {
1552             tempCoordinateFor26.x = 0;
1553             tempCoordinateFor26.y = tempCoordinateForHorz.y + m_26ZIBlockHeight;
1554         }
1555         else
1556         {
1557             // update wavefront number
1558             waveFrontNum++;
1559             adjustHeight = (uint32_t)ceil((double)m_height / m_26ZIBlockHeight);
1560 
1561             if (waveFrontNum < (2 * adjustHeight))
1562             {
1563                 waveFrontStartX = waveFrontNum & 1;
1564                 waveFrontStartY = (uint32_t)floor((double)waveFrontNum / 2);
1565             }
1566             else
1567             {
1568                 waveFrontStartX = (waveFrontNum - 2 * adjustHeight) + 2;
1569                 waveFrontStartY = (adjustHeight)-1;
1570             }
1571 
1572             tempCoordinateFor26.x = waveFrontStartX * m_26ZIBlockWidth;
1573             tempCoordinateFor26.y = waveFrontStartY * m_26ZIBlockHeight;
1574         }
1575      }
1576 
1577     return CM_SUCCESS;
1578 }
1579 
1580 //*-----------------------------------------------------------------------------
1581 //| Purpose:    Generate Wavefront26ZI Sequence
1582 //|             Dispatch order:
1583 //|                Vertical threads vertically along 26 wave then in macro block
1584 //|                Horizontal threads horizontally along 26 wave then in macro block
1585 //*-----------------------------------------------------------------------------
Wavefront26ZISeqVV1x26HH1x26()1586 int32_t CmThreadSpaceRT::Wavefront26ZISeqVV1x26HH1x26()
1587 {
1588     if ( (m_currentDependencyPattern == CM_WAVEFRONT26ZI) &&
1589         (m_current26ZIDispatchPattern == VVERTICAL1X26_HHORIZONTAL1X26))
1590     {
1591         return CM_SUCCESS;
1592     }
1593 
1594     m_currentDependencyPattern = CM_WAVEFRONT26ZI;
1595     m_current26ZIDispatchPattern = VVERTICAL1X26_HHORIZONTAL1X26;
1596 
1597     CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1598     m_indexInList = 0;
1599 
1600     uint32_t waveFrontNum = 0;
1601     uint32_t waveFrontStartX = 0;
1602     uint32_t waveFrontStartY = 0;
1603 
1604     uint32_t adjustHeight = 0;
1605 
1606     CM_COORDINATE tempCoordinateFor26;
1607     tempCoordinateFor26.x = 0;
1608     tempCoordinateFor26.y = 0;
1609 
1610     CM_COORDINATE saveTempCoordinateFor26;
1611     saveTempCoordinateFor26.x = 0;
1612     saveTempCoordinateFor26.y = 0;
1613 
1614     CM_COORDINATE tempCoordinateForVer;
1615     CM_COORDINATE tempCoordinateForHorz;
1616 
1617     while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1618         (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) )
1619     {
1620         saveTempCoordinateFor26.x = tempCoordinateFor26.x;
1621         saveTempCoordinateFor26.y = tempCoordinateFor26.y;
1622 
1623         // do vertical edges
1624         for( uint32_t widthCount = 0; widthCount < m_26ZIBlockWidth; widthCount += 2 )
1625         {
1626             // restore original starting point
1627             tempCoordinateFor26.x = saveTempCoordinateFor26.x;
1628             tempCoordinateFor26.y = saveTempCoordinateFor26.y;
1629 
1630             do
1631             {
1632                 uint32_t localHeightCounter = 0;
1633                 tempCoordinateForVer.x = tempCoordinateFor26.x + widthCount;
1634                 tempCoordinateForVer.y = tempCoordinateFor26.y;
1635                 while( (tempCoordinateForVer.x < (int32_t)m_width) && (tempCoordinateForVer.y < (int32_t)m_height) &&
1636                         (tempCoordinateForVer.x >= 0) && (tempCoordinateForVer.y >= 0) && (localHeightCounter < m_26ZIBlockHeight) )
1637                 {
1638                     if(m_boardFlag[tempCoordinateForVer.y * m_width + tempCoordinateForVer.x] == WHITE )
1639                     {
1640                         m_boardOrderList[m_indexInList ++] = tempCoordinateForVer.y * m_width + tempCoordinateForVer.x;
1641                         m_boardFlag[tempCoordinateForVer.y * m_width + tempCoordinateForVer.x] = BLACK;
1642                     }
1643                     tempCoordinateForVer.y += 1;
1644                     localHeightCounter++;
1645                 }
1646 
1647                 tempCoordinateFor26.x = tempCoordinateFor26.x + (2 * m_26ZIBlockWidth);
1648                 tempCoordinateFor26.y = tempCoordinateFor26.y - ( 1 * m_26ZIBlockHeight);
1649 
1650             } while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1651             (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) );
1652         }
1653 
1654         // do horizontal edges
1655         // restore original starting position
1656         tempCoordinateFor26.x = saveTempCoordinateFor26.x;
1657         tempCoordinateFor26.y = saveTempCoordinateFor26.y;
1658 
1659         for(uint32_t heightCount = 0; heightCount < m_26ZIBlockHeight; ++heightCount )
1660         {
1661             // restore original starting point
1662             tempCoordinateFor26.x = saveTempCoordinateFor26.x;
1663             tempCoordinateFor26.y = saveTempCoordinateFor26.y;
1664 
1665             do
1666             {
1667                 uint32_t localWidthCounter = 0;
1668                 tempCoordinateForHorz.x = tempCoordinateFor26.x + 1;
1669                 tempCoordinateForHorz.y = tempCoordinateFor26.y + heightCount;
1670                 while( (tempCoordinateForHorz.x >= 0) && (tempCoordinateForHorz.y >= 0) &&
1671                     (tempCoordinateForHorz.x < (int32_t)m_width) && (tempCoordinateForHorz.y < (int32_t)m_height) &&
1672                     (localWidthCounter < (m_26ZIBlockWidth / 2)) )
1673                 {
1674                     if( m_boardFlag[tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x] == WHITE )
1675                     {
1676                         m_boardOrderList[m_indexInList ++] = tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x;
1677                         m_boardFlag[tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x] = BLACK;
1678                     }
1679 
1680                     tempCoordinateForHorz.x += 2;
1681                     localWidthCounter++;
1682                 }
1683 
1684                 tempCoordinateFor26.x = tempCoordinateFor26.x + (2 * m_26ZIBlockWidth);
1685                 tempCoordinateFor26.y = tempCoordinateFor26.y - ( 1 * m_26ZIBlockHeight);
1686 
1687             } while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1688             (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) );
1689 
1690         }
1691 
1692         if (m_width <= m_26ZIBlockWidth)
1693         {
1694             tempCoordinateFor26.x = 0;
1695             tempCoordinateFor26.y = saveTempCoordinateFor26.y + m_26ZIBlockHeight;
1696         }
1697         else
1698         {
1699             // update wavefront number
1700             waveFrontNum++;
1701             adjustHeight = (uint32_t)ceil((double)m_height / m_26ZIBlockHeight);
1702 
1703             if (waveFrontNum < (2 * adjustHeight))
1704             {
1705                 waveFrontStartX = waveFrontNum & 1;
1706                 waveFrontStartY = (uint32_t)floor((double)waveFrontNum / 2);
1707             }
1708             else
1709             {
1710                 waveFrontStartX = (waveFrontNum - 2 * adjustHeight) + 2;
1711                 waveFrontStartY = (adjustHeight)-1;
1712             }
1713 
1714             tempCoordinateFor26.x = waveFrontStartX * m_26ZIBlockWidth;
1715             tempCoordinateFor26.y = waveFrontStartY * m_26ZIBlockHeight;
1716         }
1717     }
1718 
1719     return CM_SUCCESS;
1720 }
1721 
VerticalSequence()1722 int32_t CmThreadSpaceRT::VerticalSequence()
1723 {
1724     if ( m_currentDependencyPattern == CM_VERTICAL_WAVE)
1725     {
1726         return CM_SUCCESS;
1727     }
1728     m_currentDependencyPattern = CM_VERTICAL_WAVE;
1729 
1730     CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1731     m_indexInList = 0;
1732 
1733     for (uint32_t x = 0; x < m_width; x ++)
1734     {
1735         for (uint32_t y = 0; y < m_height; y ++)
1736         {
1737             CM_COORDINATE tempCoordinate;
1738             int32_t linearOffset = y * m_width + x;
1739             if (m_boardFlag[linearOffset] == WHITE)
1740             {
1741                 m_boardOrderList[m_indexInList ++] = linearOffset;
1742                 m_boardFlag[linearOffset] = BLACK;
1743                 tempCoordinate.x = x;
1744                 tempCoordinate.y = y + 1;
1745                 while ((tempCoordinate.x >= 0) && (tempCoordinate.y >= 0) &&
1746                     (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height))
1747                 {
1748                     if (m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1749                     {
1750                         m_boardOrderList[m_indexInList ++] = tempCoordinate.y * m_width + tempCoordinate.x;
1751                         m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1752                     }
1753                     tempCoordinate.y = tempCoordinate.y + 1;
1754                 }
1755             }
1756         }
1757     }
1758 
1759     return CM_SUCCESS;
1760 }
1761 
HorizentalSequence()1762 int32_t CmThreadSpaceRT::HorizentalSequence()
1763 {
1764     if ( m_currentDependencyPattern == CM_HORIZONTAL_WAVE)
1765     {
1766         return CM_SUCCESS;
1767     }
1768     m_currentDependencyPattern = CM_HORIZONTAL_WAVE;
1769 
1770     CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1771     m_indexInList = 0;
1772 
1773     for (uint32_t y = 0; y < m_height; y ++)
1774     {
1775         for (uint32_t x = 0; x < m_width; x ++)
1776         {
1777             CM_COORDINATE tempCoordinate;
1778             int32_t linearOffset = y * m_width + x;
1779             if (m_boardFlag[linearOffset] == WHITE)
1780             {
1781                 m_boardOrderList[m_indexInList ++] = linearOffset;
1782                 m_boardFlag[linearOffset] = BLACK;
1783                 tempCoordinate.x = x + 1;
1784                 tempCoordinate.y = y;
1785                 while ((tempCoordinate.x >= 0) && (tempCoordinate.y >= 0) &&
1786                     (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height))
1787                 {
1788                     if (m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1789                     {
1790                         m_boardOrderList[m_indexInList ++] = tempCoordinate.y * m_width + tempCoordinate.x;
1791                         m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1792                     }
1793                     tempCoordinate.x = tempCoordinate.x + 1;
1794                 }
1795             }
1796         }
1797     }
1798 
1799     return CM_SUCCESS;
1800 }
1801 
1802 //*-----------------------------------------------------------------------------
1803 //| Purpose:    Generate Wave Sequence for depenedncy vectors
1804 //*-----------------------------------------------------------------------------
WavefrontDependencyVectors()1805 int32_t CmThreadSpaceRT::WavefrontDependencyVectors()
1806 {
1807     if (m_boardFlag == nullptr)
1808     {
1809         m_boardFlag = MOS_NewArray(uint32_t, (m_height * m_width));
1810         if (m_boardFlag)
1811         {
1812             CmSafeMemSet(m_boardFlag, WHITE, (sizeof(uint32_t)* m_height * m_width));
1813         }
1814         else
1815         {
1816             CM_ASSERTMESSAGE("Error: Out of system memory.");
1817             return CM_OUT_OF_HOST_MEMORY;
1818         }
1819     }
1820     if (m_boardOrderList == nullptr)
1821     {
1822         m_boardOrderList = MOS_NewArray(uint32_t, (m_height * m_width));
1823         if (m_boardOrderList)
1824         {
1825             CmSafeMemSet(m_boardOrderList, 0, sizeof(uint32_t)* m_height * m_width);
1826         }
1827         else
1828         {
1829             CM_ASSERTMESSAGE("Error: Out of system memory.");
1830             MosSafeDeleteArray(m_boardFlag);
1831             return CM_OUT_OF_HOST_MEMORY;
1832         }
1833     }
1834     uint32_t iX, iY, nOffset;
1835     iX = iY = nOffset = 0;
1836 
1837     uint32_t *waveFrontPosition = MOS_NewArray(uint32_t, m_width);
1838     uint32_t *waveFrontOffset = MOS_NewArray(uint32_t, m_width);
1839     if ((waveFrontPosition == nullptr) || (waveFrontOffset == nullptr))
1840     {
1841         MosSafeDeleteArray(waveFrontPosition);
1842         MosSafeDeleteArray(waveFrontOffset);
1843         return CM_FAILURE;
1844     }
1845     CmSafeMemSet(waveFrontPosition, 0, m_width * sizeof(int));
1846 
1847     // set initial value
1848     m_boardFlag[0] = BLACK;
1849     m_boardOrderList[0] = 0;
1850     waveFrontPosition[0] = 1;
1851     m_indexInList = 0;
1852 
1853     while (m_indexInList < m_width * m_height - 1)
1854     {
1855         CmSafeMemSet(waveFrontOffset, 0, m_width * sizeof(int));
1856         for (uint32_t iX = 0; iX < m_width; ++iX)
1857         {
1858             uint32_t iY = waveFrontPosition[iX];
1859             nOffset = iY * m_width + iX;
1860             if (m_boardFlag[nOffset] == WHITE)
1861             {
1862                 // check if all of the dependencies are in the dispatch queue
1863                 bool allInQueue = true;
1864                 for (uint32_t i = 0; i < m_dependencyVectors.count; ++i)
1865                 {
1866                     uint32_t tempOffset = nOffset + m_dependencyVectors.deltaX[i] + m_dependencyVectors.deltaY[i] * m_width;
1867                     if (tempOffset <= m_width * m_height - 1)
1868                     {
1869                         if (m_boardFlag[nOffset + m_dependencyVectors.deltaX[i] + m_dependencyVectors.deltaY[i] * m_width] == WHITE)
1870                         {
1871                             allInQueue = false;
1872                             break;
1873                         }
1874                     }
1875                 }
1876                 if (allInQueue)
1877                 {
1878                     waveFrontOffset[iX] = nOffset;
1879                     if (waveFrontPosition[iX] < m_height - 1)
1880                     {
1881                         waveFrontPosition[iX]++;
1882                     }
1883                 }
1884             }
1885         }
1886 
1887         for (uint32_t iX = 0; iX < m_width; ++iX)
1888         {
1889             if ((m_boardFlag[waveFrontOffset[iX]] == WHITE) && (waveFrontOffset[iX] != 0))
1890             {
1891                 m_indexInList++;
1892                 m_boardOrderList[m_indexInList] = waveFrontOffset[iX];
1893                 m_boardFlag[waveFrontOffset[iX]] = BLACK;
1894             }
1895         }
1896     }
1897 
1898     MosSafeDeleteArray(waveFrontPosition);
1899     MosSafeDeleteArray(waveFrontOffset);
1900     return CM_SUCCESS;
1901 }
1902 
1903 //*-----------------------------------------------------------------------------
1904 //| Purpose:    Get Board Order list
1905 //*-----------------------------------------------------------------------------
GetBoardOrder(uint32_t * & boardOrder)1906 int32_t CmThreadSpaceRT::GetBoardOrder(uint32_t *&boardOrder)
1907 {
1908     boardOrder = m_boardOrderList;
1909     return CM_SUCCESS;
1910 }
1911 
1912 #ifdef _DEBUG
PrintBoardOrder()1913 int32_t CmThreadSpaceRT::PrintBoardOrder()
1914 {
1915     CM_NORMALMESSAGE("According to dependency, the score board order is:");
1916     for (uint32_t i = 0; i < m_height * m_width; i ++)
1917     {
1918         CM_NORMALMESSAGE("%d->", m_boardOrderList[i]);
1919     }
1920     CM_NORMALMESSAGE("NIL.");
1921     return 0;
1922 }
1923 #endif
1924 
IsThreadAssociated() const1925 bool CmThreadSpaceRT::IsThreadAssociated() const
1926 {
1927     return m_threadAssociated;
1928 }
1929 
IsDependencySet()1930 bool CmThreadSpaceRT::IsDependencySet()
1931 {
1932     return ((m_dependencyPatternType != CM_NONE_DEPENDENCY) ? true : false);
1933 }
1934 
GetNeedSetKernelPointer() const1935 bool CmThreadSpaceRT::GetNeedSetKernelPointer() const
1936 {
1937     return m_needSetKernelPointer;
1938 }
1939 
SetKernelPointer(CmKernelRT * kernel) const1940 int32_t CmThreadSpaceRT::SetKernelPointer(CmKernelRT* kernel) const
1941 {
1942     *m_kernel = kernel;
1943     return CM_SUCCESS;
1944 }
1945 
KernelPointerIsNULL() const1946 bool CmThreadSpaceRT::KernelPointerIsNULL() const
1947 {
1948     if (*m_kernel == nullptr)
1949     {
1950         return true;
1951     }
1952     else
1953     {
1954         return false;
1955     }
1956 }
1957 
GetKernelPointer() const1958 CmKernelRT* CmThreadSpaceRT::GetKernelPointer() const
1959 {
1960     return *m_kernel;
1961 }
1962 
GetIndexInTsArray()1963 uint32_t CmThreadSpaceRT::GetIndexInTsArray()
1964 {
1965     return m_indexInThreadSpaceArray;
1966 }
1967 
GetDirtyStatus() const1968 CM_THREAD_SPACE_DIRTY_STATUS CmThreadSpaceRT::GetDirtyStatus() const
1969 {
1970     return *m_dirtyStatus;
1971 }
1972 
SetDirtyStatus(CM_THREAD_SPACE_DIRTY_STATUS dirtyStatus) const1973 uint32_t CmThreadSpaceRT::SetDirtyStatus(CM_THREAD_SPACE_DIRTY_STATUS dirtyStatus) const
1974 {
1975     *m_dirtyStatus = dirtyStatus;
1976     return CM_SUCCESS;
1977 }
1978 
SetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT groupSelect)1979 CM_RT_API int32_t CmThreadSpaceRT::SetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT groupSelect)
1980 {
1981     if (groupSelect != m_groupSelect)
1982     {
1983         m_groupSelect = groupSelect;
1984         *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
1985     }
1986 
1987     return CM_SUCCESS;
1988 }
1989 
GetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT & groupSelect)1990 int32_t CmThreadSpaceRT::GetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT &groupSelect)
1991 {
1992     groupSelect = m_groupSelect;
1993     return CM_SUCCESS;
1994 }
1995 
UpdateDependency()1996 int32_t CmThreadSpaceRT::UpdateDependency()
1997 {
1998     //Init SW scoreboard
1999     if (!m_swScoreBoardEnabled)
2000     {
2001         return CM_SUCCESS;
2002     }
2003     if (m_swBoard == nullptr)
2004     {
2005         m_swBoard = MOS_NewArray(uint32_t, (m_height * m_width));
2006         if (m_swBoard)
2007         {
2008             CmSafeMemSet(m_swBoard, 0, sizeof(uint32_t)* m_height * m_width);
2009         }
2010         else
2011         {
2012             CM_ASSERTMESSAGE("Error: Out of system memory.");
2013             MosSafeDeleteArray(m_swBoard);
2014             return CM_OUT_OF_HOST_MEMORY;
2015         }
2016     }
2017     if (m_swBoardSurf == nullptr)
2018     {
2019         //for 2D atomic
2020         CM_CHK_CMSTATUS_RETURN(m_device->CreateSurface2D(m_width,
2021                 m_height,
2022                 Format_R32S,
2023                 m_swBoardSurf));
2024     }
2025     CM_CHK_CMSTATUS_RETURN(InitSwScoreBoard());
2026     CM_CHK_CMSTATUS_RETURN(m_swBoardSurf->WriteSurface((uint8_t *)m_swBoard, nullptr));
2027     return CM_SUCCESS;
2028 }
2029 
SetDependencyArgToKernel(CmKernelRT * pKernel) const2030 int32_t CmThreadSpaceRT::SetDependencyArgToKernel(CmKernelRT *pKernel) const
2031 {
2032     if (!m_swScoreBoardEnabled)
2033     {
2034         return CM_SUCCESS;
2035     }
2036     int32_t hr = CM_SUCCESS;
2037 
2038     for (uint32_t k = 0; k < pKernel->m_argCount; k++)
2039     {
2040         if (pKernel->m_args[k].unitKind == ARG_KIND_SURFACE_2D_SCOREBOARD)
2041         {
2042             SurfaceIndex* ScoreboardIndex = nullptr;
2043             CM_CHK_CMSTATUS_RETURN(m_swBoardSurf->GetIndex(ScoreboardIndex));
2044             CM_CHK_CMSTATUS_RETURN(pKernel->SetKernelArg(k, sizeof(SurfaceIndex), ScoreboardIndex));
2045         }
2046         else if (pKernel->m_args[k].unitKind == ARG_KIND_GENERAL_DEPVEC)
2047         {
2048             char vectors[CM_MAX_DEPENDENCY_COUNT * 2];
2049             for (int ii = 0; ii < CM_MAX_DEPENDENCY_COUNT; ii++)
2050             {
2051                 vectors[ii] = (char)m_dependency.deltaX[ii];
2052                 vectors[ii + CM_MAX_DEPENDENCY_COUNT] = (char)m_dependency.deltaY[ii];
2053             }
2054             CM_CHK_CMSTATUS_RETURN(pKernel->SetKernelArg(k, (sizeof(char)*CM_MAX_DEPENDENCY_COUNT * 2), vectors));
2055         }
2056         else if (pKernel->m_args[k].unitKind == ARG_KIND_GENERAL_DEPCNT)
2057         {
2058             CM_CHK_CMSTATUS_RETURN(pKernel->SetKernelArg(k, sizeof(uint32_t), &(m_dependency.count)));
2059         }
2060     }
2061 
2062     return CM_SUCCESS;
2063 }
2064 
InitSwScoreBoard()2065 int32_t CmThreadSpaceRT::InitSwScoreBoard()
2066 {
2067     int SB_BufLen = m_height * m_width;
2068     int bufIdx = 0;
2069     int temp_x = 0, temp_y = 0;
2070     for (int i = 0; i < SB_BufLen; i++)
2071     {
2072         int x = i % m_width;
2073         int y = i / m_width;
2074         uint32_t entry_value = 0;   //only support for 8 dependencies, but in uint32_t type
2075         for (uint32_t j = 0; j < m_dependency.count; j++)
2076         {
2077             if (((x + m_dependency.deltaX[j]) >= 0) &&
2078                 ((x + m_dependency.deltaX[j]) < (int)m_width)
2079                 && ((y + m_dependency.deltaY[j]) >= 0)
2080                 && ((y + m_dependency.deltaY[j]) < (int)m_height))
2081             {
2082                 entry_value |= (1 << j);
2083             }
2084         }
2085         switch (m_dependencyPatternType)
2086         {
2087             case CM_WAVEFRONT26Z:
2088             case CM_WAVEFRONT26ZIG:
2089                 if ((x % 2) == 1 && (y % 2) == 1) {
2090                     entry_value &= 0xE; // force 0 bit and 4th bit to be zero
2091                 }
2092                 else if ((x % 2) != 0 || (y % 2) != 0) {
2093                     entry_value &= 0x1E; // force 0 bit to be zero
2094                 }
2095                 break;
2096             case CM_WAVEFRONT26X:
2097                 if ((y % 4) == 3) {
2098                     entry_value &= 0x3C; // force 0, 1 and 6th bit of dependency value to be zero. 7th is by default 0
2099                 }
2100                 else if ((y % 4) != 0) {
2101                     entry_value &= 0x7E; // force 0th bit of dependency value to be zero.
2102                 }
2103                 break;
2104             case CM_WAVEFRONT26ZI:
2105                 temp_x = x % m_26ZIBlockWidth;
2106                 temp_y = y % m_26ZIBlockHeight;
2107                 if (temp_x == 0) {
2108                     if (temp_y == m_26ZIBlockHeight - 1)
2109                         entry_value &= 0x1E;
2110                     else if (temp_y == 0)
2111                         entry_value &= 0x3F;
2112                     else
2113                         entry_value &= 0x1F;
2114                 }
2115                 else if (temp_x == m_26ZIBlockWidth - 1) {
2116                     if (m_26ZIBlockWidth % 2 == 0) {
2117                         if (temp_y == m_26ZIBlockHeight - 1)
2118                             entry_value &= 0x1E;
2119                         else if (temp_y == 0)
2120                             entry_value &= 0x3F;
2121                         else
2122                             entry_value &= 0x1F;
2123                     }
2124                     else {
2125                         if (temp_y == 0)
2126                             entry_value &= 0x1A;
2127                         else
2128                             entry_value &= 0x12;
2129                     }
2130 
2131                 }
2132                 else if ((temp_x % 2) != 0) {
2133                     if (temp_y == m_26ZIBlockHeight - 1)
2134                         entry_value &= 0x7E;
2135                 }
2136                 else{ // ((temp_x % 2) == 0)
2137                     if (temp_y == 0)
2138                         entry_value &= 0x3A;
2139                     else
2140                         entry_value &= 0x12;
2141                 }
2142                 break;
2143             case CM_NONE_DEPENDENCY:
2144             case CM_WAVEFRONT:
2145             case CM_WAVEFRONT26:
2146             case CM_VERTICAL_WAVE:
2147             case CM_HORIZONTAL_WAVE:
2148             default:
2149                 break;
2150         }
2151 
2152         *(m_swBoard + i) = entry_value;
2153     }
2154     return CM_SUCCESS;
2155 }
2156 
2157 #if CM_LOG_ON
Log()2158 std::string CmThreadSpaceRT::Log()
2159 {
2160     std::ostringstream oss;
2161 
2162     oss << "Thread Space Parameters"
2163         << " Width :"<< m_width
2164         << " Height :" << m_height
2165         << " DependencyPatten :" << (int)m_dependencyPatternType
2166         << " IsAssociated :" <<m_threadAssociated
2167         << std::endl;
2168 
2169     return oss.str();
2170 }
2171 
GetHalState()2172 CM_HAL_STATE* CmThreadSpaceRT::GetHalState() { return m_device->GetHalState(); }
2173 
2174 #endif  // #if CM_LOG_ON
2175 
GetThreadGroupSpace() const2176 CmThreadGroupSpace *CmThreadSpaceRT::GetThreadGroupSpace() const
2177 {
2178     return m_threadGroupSpace;
2179 }
2180 }  // namespace
2181