1 /*
2 * Copyright (c) 2007-2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_thread_space_rt.cpp
24 //! \brief Contains Class CmThreadSpaceRT implementations.
25 //!
26
27 #include "cm_thread_space_rt.h"
28
29 #include "cm_kernel_rt.h"
30 #include "cm_task_rt.h"
31 #include "cm_mem.h"
32 #include "cm_device_rt.h"
33 #include "cm_surface_2d.h"
34 #include "cm_extension_creator.h"
35
36 enum CM_TS_FLAG
37 {
38 WHITE = 0,
39 GRAY = 1,
40 BLACK = 2
41 };
42
43 static CM_DEPENDENCY waveFrontPattern =
44 {
45 3,
46 {-1, -1, 0},
47 {0, -1, -1}
48 };
49
50 static CM_DEPENDENCY waveFront26Pattern =
51 {
52 4,
53 {-1, -1, 0, 1},
54 {0, -1, -1, -1}
55 };
56
57 static CM_DEPENDENCY waveFront26ZPattern =
58 {
59 5,
60 {-1, -1, -1, 0, 1},
61 { 1, 0, -1, -1, -1}
62 };
63
64 static CM_DEPENDENCY waveFront26ZIPattern =
65 {
66 7,
67 {-1, -2, -1, -1, 0, 1, 1},
68 {1, 0, 0, -1, -1, -1, 0}
69 };
70
71 static CM_DEPENDENCY horizontalPattern =
72 {
73 1,
74 {0},
75 {-1}
76 };
77
78 static CM_DEPENDENCY verticalPattern =
79 {
80 1,
81 {-1},
82 {0}
83 };
84
85 static CM_DEPENDENCY waveFront26XPattern =
86 {
87 7,
88 { -1, -1, -1, 0, 0, 0, 1 },
89 { 3, 1, -1, -1, -2, -3, -3 }
90 };
91
92 static CM_DEPENDENCY waveFront26ZIGPattern =
93 {
94 5,
95 { -1, -1, -1, 0, 1 },
96 { 1, 0, -1, -1, -1 }
97 };
98
99 namespace CMRT_UMD
100 {
101 //*-----------------------------------------------------------------------------
102 //| Purpose: Reset task and clear all the kernel
103 //| Returns: Result of the operation.
104 //*-----------------------------------------------------------------------------
Create(CmDeviceRT * device,uint32_t indexTsArray,uint32_t width,uint32_t height,CmThreadSpaceRT * & threadSpace)105 int32_t CmThreadSpaceRT::Create( CmDeviceRT* device, uint32_t indexTsArray, uint32_t width, uint32_t height, CmThreadSpaceRT* & threadSpace )
106 {
107 if( (0 == width) || (0 == height) )
108 {
109 CM_ASSERTMESSAGE("Error: Invalid thread space width or height.");
110 return CM_INVALID_THREAD_SPACE;
111 }
112
113 int32_t result = CM_SUCCESS;
114 threadSpace = new (std::nothrow) CmThreadSpaceRT( device, indexTsArray, width, height );
115 if( threadSpace )
116 {
117 device->m_memObjectCount.threadSpaceCount++;
118
119 result = threadSpace->Initialize( );
120 if( result != CM_SUCCESS )
121 {
122 CmThreadSpaceRT::Destroy( threadSpace);
123 }
124 }
125 else
126 {
127 CM_ASSERTMESSAGE("Error: Failed to create CmThreadSpace due to out of system memory.");
128 result = CM_OUT_OF_HOST_MEMORY;
129 }
130 return result;
131 }
132
133 //*-----------------------------------------------------------------------------
134 //| Purpose: Destroy CM thread space
135 //| Returns: Result of the operation.
136 //*-----------------------------------------------------------------------------
Destroy(CmThreadSpaceRT * & threadSpace)137 int32_t CmThreadSpaceRT::Destroy( CmThreadSpaceRT* &threadSpace )
138 {
139 if( threadSpace )
140 {
141 threadSpace->m_device->m_memObjectCount.threadSpaceCount--;
142 delete threadSpace;
143 threadSpace = nullptr;
144 }
145 return CM_SUCCESS;
146 }
147
148 //*-----------------------------------------------------------------------------
149 //| Purpose: Constructor of CmThreadSpace
150 //| Returns: Result of the operation.
151 //*-----------------------------------------------------------------------------
CmThreadSpaceRT(CmDeviceRT * device,uint32_t indexTsArray,uint32_t width,uint32_t height)152 CmThreadSpaceRT::CmThreadSpaceRT( CmDeviceRT* device , uint32_t indexTsArray, uint32_t width, uint32_t height ):
153 m_device( device ),
154 m_width( width ),
155 m_height( height ),
156 m_colorCountMinusOne( 0 ),
157 m_26ZIBlockWidth( CM_26ZI_BLOCK_WIDTH ),
158 m_26ZIBlockHeight( CM_26ZI_BLOCK_HEIGHT ),
159 m_threadSpaceUnit(nullptr),
160 m_threadAssociated(false),
161 m_needSetKernelPointer(false),
162 m_kernel(nullptr),
163 m_dependencyPatternType(CM_NONE_DEPENDENCY),
164 m_currentDependencyPattern(CM_NONE_DEPENDENCY),
165 m_26ZIDispatchPattern(VVERTICAL_HVERTICAL_26),
166 m_current26ZIDispatchPattern(VVERTICAL_HVERTICAL_26),
167 m_boardFlag(nullptr),
168 m_boardOrderList(nullptr),
169 m_indexInList(0),
170 m_indexInThreadSpaceArray(indexTsArray),
171 m_walkingPattern(CM_WALK_DEFAULT),
172 m_mediaWalkerParamsSet(false),
173 m_dependencyVectorsSet(false),
174 m_threadSpaceOrderSet(false),
175 m_swBoardSurf(nullptr),
176 m_swBoard(nullptr),
177 m_swScoreBoardEnabled(false),
178 m_threadGroupSpace(nullptr),
179 m_dirtyStatus(nullptr),
180 m_groupSelect(CM_MW_GROUP_NONE)
181 {
182 CmSafeMemSet( &m_dependency, 0, sizeof(CM_HAL_DEPENDENCY) );
183 CmSafeMemSet( &m_wavefront26ZDispatchInfo, 0, sizeof(CM_HAL_WAVEFRONT26Z_DISPATCH_INFO) );
184 CmSafeMemSet( &m_walkingParameters, 0, sizeof(m_walkingParameters) );
185 CmSafeMemSet( &m_dependencyVectors, 0, sizeof(m_dependencyVectors) );
186 }
187
188 //*-----------------------------------------------------------------------------
189 //| Purpose: Destructor of CmThreadSpaceRT
190 //| Returns: Result of the operation.
191 //*-----------------------------------------------------------------------------
~CmThreadSpaceRT(void)192 CmThreadSpaceRT::~CmThreadSpaceRT( void )
193 {
194 MosSafeDeleteArray(m_threadSpaceUnit);
195 MosSafeDeleteArray(m_boardFlag);
196 MosSafeDeleteArray(m_boardOrderList);
197 CmSafeDelete( m_dirtyStatus );
198 CmSafeDelete(m_kernel);
199
200 if (m_wavefront26ZDispatchInfo.numThreadsInWave)
201 {
202 MOS_FreeMemory(m_wavefront26ZDispatchInfo.numThreadsInWave);
203 }
204
205 if (m_swScoreBoardEnabled)
206 {
207 MosSafeDeleteArray(m_swBoard);
208 if (m_swBoardSurf != nullptr)
209 {
210 m_device->DestroySurface(m_swBoardSurf);
211 }
212 }
213
214 if (m_threadGroupSpace != nullptr)
215 {
216 m_device->DestroyThreadGroupSpace(m_threadGroupSpace);
217 }
218 }
219
220 //*-----------------------------------------------------------------------------
221 //| Purpose: Initialize CmThreadSpaceRT
222 //| Returns: Result of the operation.
223 //*-----------------------------------------------------------------------------
Initialize(void)224 int32_t CmThreadSpaceRT::Initialize( void )
225 {
226 m_dirtyStatus = new (std::nothrow) CM_THREAD_SPACE_DIRTY_STATUS;
227 if(m_dirtyStatus == nullptr)
228 {
229 CM_ASSERTMESSAGE("Error: Failed to initialize CmThreadSpace due to out of system memory.");
230 return CM_OUT_OF_HOST_MEMORY;
231 }
232 *m_dirtyStatus = CM_THREAD_SPACE_CLEAN;
233
234 m_kernel = new (std::nothrow) CmKernelRT*;
235 if (m_kernel == nullptr)
236 {
237 CM_ASSERTMESSAGE("Error: Failed to initialize CmThreadSpace due to out of system memory.");
238 return CM_OUT_OF_HOST_MEMORY;
239 }
240 *m_kernel = nullptr;
241
242 PCM_HAL_STATE cmHalState = ((PCM_CONTEXT_DATA)m_device->GetAccelData())->cmHalState;
243 m_swScoreBoardEnabled = !(cmHalState->cmHalInterface->IsScoreboardParamNeeded());
244
245 if (cmHalState->cmHalInterface->CheckMediaModeAvailability() == false)
246 {
247 CM_CHK_CMSTATUS_RETURN(m_device->CreateThreadGroupSpaceEx(1, 1, 1, m_width, m_height, 1, m_threadGroupSpace));
248 }
249
250 return CM_SUCCESS;
251 }
252
253 //*-----------------------------------------------------------------------------
254 //! Associate a thread to one uint in the 2-dimensional dependency board with default mask
255 //*-----------------------------------------------------------------------------
AssociateThread(uint32_t x,uint32_t y,CmKernel * kernel,uint32_t threadId)256 CM_RT_API int32_t CmThreadSpaceRT::AssociateThread( uint32_t x, uint32_t y, CmKernel* kernel , uint32_t threadId )
257 {
258 return AssociateThreadWithMask(x, y, kernel, threadId, CM_DEFAULT_THREAD_DEPENDENCY_MASK);
259 }
260
261 //*-----------------------------------------------------------------------------
262 //! Associate a thread to one uint in the 2-dimensional dependency board.
263 //! If call this function twice with same x/y pair and different thread, the 2nd one will fail
264 //! Enqueue will make sure each x/y pair in the CmThreadSpaceRT object is associated with
265 //! a unique thread in the task to enqueue.Otherwise enqueue will fail.
266 //! Input :
267 //! 1) X/Y coordinats of the uint in dependency board
268 //! 2) pointer to CmKernel
269 //! 3) thread index. It is the same as the read index in
270 //! CmKernel::SetThreadArg(uint32_t threadId, uint32_t index, size_t size, const void * pValue )
271 //! OUTPUT :
272 //! CM_SUCCESS if the association is successful
273 //! CM_INVALID_ARG_VALUE if the input parameters are invalid
274 //! CM_OUT_OF_HOST_MEMORY if the necessary memory allocation is failed.
275 //*-----------------------------------------------------------------------------
AssociateThreadWithMask(uint32_t x,uint32_t y,CmKernel * kernel,uint32_t threadId,uint8_t dependencyMask)276 CM_RT_API int32_t CmThreadSpaceRT::AssociateThreadWithMask( uint32_t x, uint32_t y, CmKernel* kernel , uint32_t threadId, uint8_t dependencyMask )
277 {
278 INSERT_API_CALL_LOG(GetHalState());
279
280 if((x >= m_width) || (y >= m_height) || (kernel == nullptr))
281 {
282 CM_ASSERTMESSAGE("Error: Invalid input arguments.");
283 return CM_INVALID_ARG_VALUE;
284 }
285
286 //Check if the m_threadSpaceUnit is allocated, we only need allocate it once at the first time.
287 if( m_threadSpaceUnit == nullptr )
288 {
289 m_threadSpaceUnit = MOS_NewArray(CM_THREAD_SPACE_UNIT, (m_height * m_width));
290 if (m_threadSpaceUnit)
291 {
292 CmSafeMemSet(m_threadSpaceUnit, 0, sizeof(CM_THREAD_SPACE_UNIT) * m_height * m_width);
293 }
294 else
295 {
296 CM_ASSERTMESSAGE("Error: Out of system memory.");
297 return CM_OUT_OF_HOST_MEMORY;
298 }
299 }
300
301 uint32_t linearOffset = y*m_width + x;
302 if( (m_threadSpaceUnit[linearOffset].kernel == kernel) &&
303 (m_threadSpaceUnit[linearOffset].threadId == threadId) &&
304 (m_threadSpaceUnit[linearOffset].scoreboardCoordinates.x == x) &&
305 (m_threadSpaceUnit[linearOffset].scoreboardCoordinates.y == y) )
306 {
307 if( m_threadSpaceUnit[linearOffset].dependencyMask == dependencyMask )
308 {
309 m_threadSpaceUnit[linearOffset].reset = CM_REUSE_DEPENDENCY_MASK;
310 }
311 else
312 {
313 m_threadSpaceUnit[linearOffset].dependencyMask = dependencyMask;
314 m_threadSpaceUnit[linearOffset].reset = CM_RESET_DEPENDENCY_MASK;
315 }
316 *m_dirtyStatus = CM_THREAD_SPACE_DEPENDENCY_MASK_DIRTY;
317 }
318 else
319 {
320 m_threadSpaceUnit[linearOffset].kernel = kernel;
321 m_threadSpaceUnit[linearOffset].threadId = threadId;
322 m_threadSpaceUnit[linearOffset].scoreboardCoordinates.x = x;
323 m_threadSpaceUnit[linearOffset].scoreboardCoordinates.y = y;
324 m_threadSpaceUnit[linearOffset].dependencyMask = dependencyMask;
325 m_threadSpaceUnit[linearOffset].reset = CM_NO_BATCH_BUFFER_REUSE;
326 *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
327 }
328
329 if (!m_threadAssociated)
330 {
331 m_threadAssociated = true;
332 }
333
334 CmKernelRT *kernelRT = static_cast<CmKernelRT *>(kernel);
335 kernelRT->SetAssociatedToTSFlag(true);
336
337 return CM_SUCCESS;
338 }
339
340 //*-----------------------------------------------------------------------------
341 //! Set the dependency pattern. There can be at most 8 dependent unit in the pattern.
342 //! Each dependent unit is indicated as the delta in X coordinat and the delta in Y coordinat
343 //! The call will fail if there is a pair of deltaX/Y with value ( 0, 0 )
344 //! By default, there is no dependent unit, i.e. count is 0.
345 //! Input :
346 //! 1) Total number of dependent units. It is <= 8.
347 //! 2) Array of deltaX. Array size is the first argument.
348 //! Each deltaX is in the range of [-8, 7]
349 //! 3) Array of deltaY. Array size is the first argument.
350 //! Each deltaY is in the range of [-8, 7]
351 //! OUTPUT :
352 //! CM_SUCCESS if the pattern is set
353 //*-----------------------------------------------------------------------------
SetThreadDependencyPattern(uint32_t count,int32_t * deltaX,int32_t * deltaY)354 CM_RT_API int32_t CmThreadSpaceRT::SetThreadDependencyPattern( uint32_t count, int32_t *deltaX, int32_t *deltaY )
355 {
356 INSERT_API_CALL_LOG(GetHalState());
357
358 if( count > CM_MAX_DEPENDENCY_COUNT )
359 {
360 CM_ASSERTMESSAGE("Error: Exceed dependency count limitation, which is 8.");
361 return CM_FAILURE;
362 }
363
364 m_dependency.count = count;
365
366 CmSafeMemCopy( m_dependency.deltaX, deltaX, sizeof( int32_t ) * count );
367 CmSafeMemCopy( m_dependency.deltaY, deltaY, sizeof( int32_t ) * count );
368
369 return CM_SUCCESS;
370 }
371
372 //*-----------------------------------------------------------------------------
373 //! Select from X predefined dependency patterns.
374 //! Input :
375 //! 1) pattern index
376 //! OUTPUT :
377 //! CM_SUCCESS if the pattern is selected
378 //! CM_OUT_OF_HOST_MEMORY if the necessary memory allocation is failed.
379 //! CM_FAILURE if the input dependency pattern is not supported.
380 //*-----------------------------------------------------------------------------
SelectThreadDependencyPattern(CM_DEPENDENCY_PATTERN pattern)381 CM_RT_API int32_t CmThreadSpaceRT::SelectThreadDependencyPattern (CM_DEPENDENCY_PATTERN pattern )
382 {
383 INSERT_API_CALL_LOG(GetHalState());
384
385 int32_t hr = CM_SUCCESS;
386
387 //Check if the m_boardFlag and m_boardOrderList are NULL. We only need allocate it once at the first time
388 if ( m_boardFlag == nullptr )
389 {
390 m_boardFlag = MOS_NewArray(uint32_t, (m_height * m_width));
391 if ( m_boardFlag )
392 {
393 CmSafeMemSet(m_boardFlag, 0, sizeof(uint32_t) * m_height * m_width);
394 }
395 else
396 {
397 CM_ASSERTMESSAGE("Error: Out of system memory.");
398 return CM_OUT_OF_HOST_MEMORY;
399 }
400 }
401 if ( m_boardOrderList == nullptr )
402 {
403 m_boardOrderList = MOS_NewArray(uint32_t, (m_height * m_width));
404 if (m_boardOrderList )
405 {
406 CmSafeMemSet(m_boardOrderList, 0, sizeof(uint32_t) * m_height * m_width);
407 }
408 else
409 {
410 CM_ASSERTMESSAGE("Error: Out of system memory.");
411 MosSafeDeleteArray(m_boardFlag);
412 return CM_OUT_OF_HOST_MEMORY;
413 }
414 }
415
416 if( (pattern != CM_NONE_DEPENDENCY) && (m_walkingPattern != CM_WALK_DEFAULT ) )
417 {
418 CM_ASSERTMESSAGE("Error: Only valid when no walking pattern has been selected.");
419 return CM_INVALID_DEPENDENCY_WITH_WALKING_PATTERN;
420 }
421
422 switch (pattern)
423 {
424 case CM_VERTICAL_WAVE:
425 m_dependencyPatternType = CM_VERTICAL_WAVE;
426 CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(verticalPattern.count, verticalPattern.deltaX, verticalPattern.deltaY));
427 break;
428
429 case CM_HORIZONTAL_WAVE:
430 m_dependencyPatternType = CM_HORIZONTAL_WAVE;
431 CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(horizontalPattern.count, horizontalPattern.deltaX, horizontalPattern.deltaY));
432 break;
433
434 case CM_WAVEFRONT:
435 m_dependencyPatternType = CM_WAVEFRONT;
436 CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFrontPattern.count, waveFrontPattern.deltaX, waveFrontPattern.deltaY));
437 break;
438
439 case CM_WAVEFRONT26:
440 m_dependencyPatternType = CM_WAVEFRONT26;
441 CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFront26Pattern.count, waveFront26Pattern.deltaX, waveFront26Pattern.deltaY));
442 break;
443
444 case CM_WAVEFRONT26Z:
445 m_dependencyPatternType = CM_WAVEFRONT26Z;
446 CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFront26ZPattern.count, waveFront26ZPattern.deltaX, waveFront26ZPattern.deltaY));
447 m_wavefront26ZDispatchInfo.numThreadsInWave = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t) * m_width * m_height);
448 if (m_threadSpaceUnit == nullptr && !CheckThreadSpaceOrderSet())
449 {
450 m_threadSpaceUnit = MOS_NewArray(CM_THREAD_SPACE_UNIT, (m_height * m_width));
451 if (m_threadSpaceUnit)
452 {
453 CmSafeMemSet(m_threadSpaceUnit, 0, sizeof(CM_THREAD_SPACE_UNIT)* m_height * m_width);
454 }
455 else
456 {
457 return CM_OUT_OF_HOST_MEMORY;
458 }
459 uint32_t threadId = 0;
460 uint32_t linearOffset = 0;
461 for (uint32_t y = 0; y < m_height; ++y)
462 {
463 for (uint32_t x = 0; x < m_width; ++x)
464 {
465 linearOffset = y*m_width + x;
466 m_threadSpaceUnit[linearOffset].threadId = threadId++;
467 m_threadSpaceUnit[linearOffset].scoreboardCoordinates.x = x;
468 m_threadSpaceUnit[linearOffset].scoreboardCoordinates.y = y;
469 m_threadSpaceUnit[linearOffset].dependencyMask = (1 << waveFront26ZPattern.count) - 1;
470 m_threadSpaceUnit[linearOffset].reset = CM_NO_BATCH_BUFFER_REUSE;
471 }
472 }
473
474 *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
475 m_threadAssociated = true;
476 m_needSetKernelPointer = true;
477 }
478 break;
479
480 case CM_WAVEFRONT26ZI:
481 m_dependencyPatternType = CM_WAVEFRONT26ZI;
482 CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFront26ZIPattern.count, waveFront26ZIPattern.deltaX, waveFront26ZIPattern.deltaY));
483 if (m_threadSpaceUnit == nullptr&& !CheckThreadSpaceOrderSet())
484 {
485 m_threadSpaceUnit = MOS_NewArray(CM_THREAD_SPACE_UNIT, (m_height * m_width));
486 if (m_threadSpaceUnit)
487 {
488 CmSafeMemSet(m_threadSpaceUnit, 0, sizeof(CM_THREAD_SPACE_UNIT)* m_height * m_width);
489 }
490 else
491 {
492 return CM_OUT_OF_HOST_MEMORY;
493 }
494 uint32_t threadId = 0;
495 uint32_t linearOffset = 0;
496 for (uint32_t y = 0; y < m_height; ++y)
497 {
498 for (uint32_t x = 0; x < m_width; ++x)
499 {
500 linearOffset = y*m_width + x;
501 m_threadSpaceUnit[linearOffset].threadId = threadId++;
502 m_threadSpaceUnit[linearOffset].scoreboardCoordinates.x = x;
503 m_threadSpaceUnit[linearOffset].scoreboardCoordinates.y = y;
504 m_threadSpaceUnit[linearOffset].dependencyMask = (1 << waveFront26ZIPattern.count) - 1;
505 m_threadSpaceUnit[linearOffset].reset = CM_NO_BATCH_BUFFER_REUSE;
506 }
507 }
508
509 *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
510 m_threadAssociated = true;
511 m_needSetKernelPointer = true;
512 }
513 break;
514
515 case CM_WAVEFRONT26X:
516 m_dependencyPatternType = CM_WAVEFRONT26X;
517 CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFront26XPattern.count, waveFront26XPattern.deltaX, waveFront26XPattern.deltaY));
518 break;
519
520 case CM_WAVEFRONT26ZIG:
521 m_dependencyPatternType = CM_WAVEFRONT26ZIG;
522 CM_CHK_CMSTATUS_GOTOFINISH(SetThreadDependencyPattern(waveFront26ZIGPattern.count, waveFront26ZIGPattern.deltaX, waveFront26ZIGPattern.deltaY));
523 break;
524
525 case CM_NONE_DEPENDENCY:
526 m_dependencyPatternType = CM_NONE_DEPENDENCY;
527 hr = CM_SUCCESS;
528 break;
529
530 default:
531 hr = CM_FAILURE;
532 break;
533 }
534
535 UpdateDependency();
536
537 if( m_dependencyPatternType != m_currentDependencyPattern )
538 {
539 *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
540 }
541
542 finish:
543 return hr;
544 }
545
SelectMediaWalkingPattern(CM_WALKING_PATTERN pattern)546 CM_RT_API int32_t CmThreadSpaceRT::SelectMediaWalkingPattern( CM_WALKING_PATTERN pattern )
547 {
548 INSERT_API_CALL_LOG(GetHalState());
549
550 int result = CM_SUCCESS;
551
552 if( m_dependencyPatternType != CM_NONE_DEPENDENCY )
553 {
554 CM_ASSERTMESSAGE("Error: Only valid when no thread dependency has been selected.");
555 return CM_INVALID_DEPENDENCY_WITH_WALKING_PATTERN;
556 }
557
558 switch( pattern )
559 {
560 case CM_WALK_DEFAULT:
561 case CM_WALK_HORIZONTAL:
562 case CM_WALK_VERTICAL:
563 case CM_WALK_WAVEFRONT:
564 case CM_WALK_WAVEFRONT26:
565 case CM_WALK_WAVEFRONT26ZIG:
566 case CM_WALK_WAVEFRONT26X:
567 case CM_WALK_WAVEFRONT26XALT:
568 case CM_WALK_WAVEFRONT45D:
569 case CM_WALK_WAVEFRONT45XD_2:
570 case CM_WALK_WAVEFRONT26D:
571 case CM_WALK_WAVEFRONT26XD:
572 m_walkingPattern = pattern;
573 break;
574 default:
575 CM_ASSERTMESSAGE("Error: Invalid media walking pattern.");
576 result = CM_INVALID_MEDIA_WALKING_PATTERN;
577 break;
578 }
579
580 return result;
581 }
582
583 //*-----------------------------------------------------------------------------
584 //| Purpose: Sets the media walker parameters for the CmThreadSpaceRT
585 //| Used for engineering build, no error checking
586 //| Returns: CM_SUCCESS
587 //*-----------------------------------------------------------------------------
SelectMediaWalkingParameters(CM_WALKING_PARAMETERS parameters)588 CM_RT_API int32_t CmThreadSpaceRT::SelectMediaWalkingParameters(CM_WALKING_PARAMETERS parameters)
589 {
590 INSERT_API_CALL_LOG(GetHalState());
591
592 // [0..11] of parameters maps to DWORD5 through DWORD16
593 // No error checking here
594
595 if( CmSafeMemCompare(&m_walkingParameters, ¶meters, sizeof(m_walkingParameters)) != 0 )
596 {
597 CmSafeMemCopy(&m_walkingParameters, ¶meters, sizeof(m_walkingParameters));
598 *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
599 }
600
601 m_mediaWalkerParamsSet = true;
602
603 return CM_SUCCESS;
604 }
605
606 //*-----------------------------------------------------------------------------
607 //| Purpose: Sets the thread space order for the CmThreadSpaceRT
608 //| Used for engineering build
609 //| Returns:
610 //| CM_SUCCESS if thread space order is successfully set
611 //| CM_OUT_OF_HOST_MEMORY if the necessary memory allocation is failed.
612 //| CM_INVALID_ARG_VALUE if the input arg is not correct.
613 //*-----------------------------------------------------------------------------
SetThreadSpaceOrder(uint32_t threadCount,const CM_THREAD_PARAM * threadSpaceOrder)614 CM_RT_API int32_t CmThreadSpaceRT::SetThreadSpaceOrder(uint32_t threadCount, const CM_THREAD_PARAM* threadSpaceOrder)
615 {
616 INSERT_API_CALL_LOG(GetHalState());
617
618 if (threadCount != m_width*m_height || threadSpaceOrder == nullptr)
619 {
620 CM_ASSERTMESSAGE("Error: Thread count does not match the thread space size.");
621 return CM_INVALID_ARG_VALUE;
622 }
623 //Check if the m_threadSpaceUnit is allocated, we only need allocate it once at the first time.
624 if (m_threadSpaceUnit == nullptr)
625 {
626 m_threadSpaceUnit = MOS_NewArray(CM_THREAD_SPACE_UNIT, (m_height * m_width));
627 if (m_threadSpaceUnit)
628 {
629 CmSafeMemSet(m_threadSpaceUnit, 0, sizeof(CM_THREAD_SPACE_UNIT)* m_height * m_width);
630 }
631 else
632 {
633 CM_ASSERTMESSAGE("Error: Out of system memory.");
634 return CM_OUT_OF_HOST_MEMORY;
635 }
636 }
637
638 uint32_t threadId = 0;
639
640 for (uint32_t i = 0; i < m_width*m_height; i++)
641 {
642 m_threadSpaceUnit[i].threadId = threadId++;
643 m_threadSpaceUnit[i].scoreboardCoordinates = threadSpaceOrder[i].scoreboardCoordinates;
644 m_threadSpaceUnit[i].scoreboardColor = threadSpaceOrder[i].scoreboardColor;
645 m_threadSpaceUnit[i].sliceDestinationSelect = threadSpaceOrder[i].sliceDestinationSelect;
646 m_threadSpaceUnit[i].subSliceDestinationSelect = threadSpaceOrder[i].subSliceDestinationSelect;
647 m_threadSpaceUnit[i].dependencyMask = CM_DEFAULT_THREAD_DEPENDENCY_MASK;
648 m_threadSpaceUnit[i].reset = CM_NO_BATCH_BUFFER_REUSE;
649 }
650 m_threadAssociated = true;
651 m_needSetKernelPointer = true;
652 m_threadSpaceOrderSet = true;
653 *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
654 return CM_SUCCESS;
655 }
656 //*-----------------------------------------------------------------------------
657 //| Purpose: Sets the dependency vectors for the CmThreadSpaceRT
658 //| Used for engineering build, no error checking
659 //| Returns: CM_SUCCESS
660 //*-----------------------------------------------------------------------------
SelectThreadDependencyVectors(CM_DEPENDENCY dependencyVectors)661 CM_RT_API int32_t CmThreadSpaceRT::SelectThreadDependencyVectors(CM_DEPENDENCY dependencyVectors)
662 {
663 INSERT_API_CALL_LOG(GetHalState());
664
665 if( CmSafeMemCompare(&m_dependencyVectors, &dependencyVectors, sizeof(m_dependencyVectors)) != 0 )
666 {
667 CmSafeMemCopy(&m_dependencyVectors, &dependencyVectors, sizeof(m_dependencyVectors));
668 *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
669 }
670
671 m_dependencyVectorsSet = true;
672
673 return CM_SUCCESS;
674 }
675
676 //*-----------------------------------------------------------------------------
677 //| Purpose: Sets the color count minus one of the CmThreadSpaceRT
678 //| Returns: CM_INVALID_ARG_VALUE if colorCount is 0 or greater than 16
679 //| CM_SUCCESS otherwise
680 //*-----------------------------------------------------------------------------
SetThreadSpaceColorCount(uint32_t colorCount)681 CM_RT_API int32_t CmThreadSpaceRT::SetThreadSpaceColorCount(uint32_t colorCount)
682 {
683 INSERT_API_CALL_LOG(GetHalState());
684
685 int32_t result = CM_SUCCESS;
686
687 PCM_HAL_STATE cmHalState = ((PCM_CONTEXT_DATA)m_device->GetAccelData())->cmHalState;
688
689 result = cmHalState->cmHalInterface->ColorCountSanityCheck(colorCount);
690 if(result != CM_SUCCESS)
691 {
692 CM_ASSERTMESSAGE("Error: Color count sanity check failure.");
693 return result;
694 }
695
696 m_colorCountMinusOne = colorCount - 1;
697
698 return CM_SUCCESS;
699 }
700
701 //*-----------------------------------------------------------------------------
702 //| Purpose: Sets the dispatch pattern for 26ZI
703 //| Returns: CM_SUCCESS if valid dispath pattern, CM_FAILURE otherwise
704 //*-----------------------------------------------------------------------------
Set26ZIDispatchPattern(CM_26ZI_DISPATCH_PATTERN pattern)705 CM_RT_API int32_t CmThreadSpaceRT::Set26ZIDispatchPattern( CM_26ZI_DISPATCH_PATTERN pattern )
706 {
707 INSERT_API_CALL_LOG(GetHalState());
708
709 int result = CM_SUCCESS;
710
711 switch( pattern )
712 {
713 case VVERTICAL_HVERTICAL_26:
714 m_26ZIDispatchPattern = VVERTICAL_HVERTICAL_26;
715 break;
716 case VVERTICAL_HHORIZONTAL_26:
717 m_26ZIDispatchPattern = VVERTICAL_HHORIZONTAL_26;
718 break;
719 case VVERTICAL26_HHORIZONTAL26:
720 m_26ZIDispatchPattern = VVERTICAL26_HHORIZONTAL26;
721 break;
722 case VVERTICAL1X26_HHORIZONTAL1X26:
723 m_26ZIDispatchPattern = VVERTICAL1X26_HHORIZONTAL1X26;
724 break;
725 default:
726 result = CM_FAILURE;
727 break;
728 }
729
730 if( m_26ZIDispatchPattern != m_current26ZIDispatchPattern)
731 {
732 *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
733 }
734
735 return result;
736 }
737
738 //*-----------------------------------------------------------------------------
739 //| Purpose: Sets the macro block size to be used to calculate 26ZI dispatch
740 //| Returns: CM_SUCCESS
741 //*-----------------------------------------------------------------------------
Set26ZIMacroBlockSize(uint32_t width,uint32_t height)742 CM_RT_API int32_t CmThreadSpaceRT::Set26ZIMacroBlockSize( uint32_t width, uint32_t height )
743 {
744 INSERT_API_CALL_LOG(GetHalState());
745 int32_t hr = CM_SUCCESS;
746 m_26ZIBlockWidth = width;
747 m_26ZIBlockHeight = height;
748 hr = UpdateDependency();
749 return hr;
750 }
751
752 //*-----------------------------------------------------------------------------
753 //| Purpose: Get the color count of the CmThreadSpaceRT
754 //| Returns: CM_SUCCESS.
755 //*-----------------------------------------------------------------------------
GetColorCountMinusOne(uint32_t & colorCount)756 int32_t CmThreadSpaceRT::GetColorCountMinusOne(uint32_t & colorCount)
757 {
758 colorCount = m_colorCountMinusOne;
759
760 return CM_SUCCESS;
761 }
762
763 //*-----------------------------------------------------------------------------
764 //| Purpose: Get the width and height of CmThreadSpaceRT
765 //| Returns: CM_SUCCESS.
766 //*-----------------------------------------------------------------------------
GetThreadSpaceSize(uint32_t & width,uint32_t & height)767 int32_t CmThreadSpaceRT::GetThreadSpaceSize(uint32_t & width, uint32_t & height)
768 {
769 width = m_width;
770 height = m_height;
771
772 return CM_SUCCESS;
773 }
774
775 //*-----------------------------------------------------------------------------
776 //| Purpose: Get thread space's unit
777 //*-----------------------------------------------------------------------------
GetThreadSpaceUnit(CM_THREAD_SPACE_UNIT * & threadSpaceUnit)778 int32_t CmThreadSpaceRT::GetThreadSpaceUnit(CM_THREAD_SPACE_UNIT* &threadSpaceUnit)
779 {
780 threadSpaceUnit = m_threadSpaceUnit;
781 return CM_SUCCESS;
782 }
783
784 //*-----------------------------------------------------------------------------
785 //| Purpose: Get the dependency
786 //*-----------------------------------------------------------------------------
GetDependency(CM_HAL_DEPENDENCY * & dependency)787 int32_t CmThreadSpaceRT::GetDependency(CM_HAL_DEPENDENCY* &dependency)
788 {
789 dependency = &m_dependency;
790 return CM_SUCCESS;
791 }
792
793 //*-----------------------------------------------------------------------------
794 //| Purpose: Get its dependency type
795 //*-----------------------------------------------------------------------------
GetDependencyPatternType(CM_DEPENDENCY_PATTERN & dependencyPatternType)796 int32_t CmThreadSpaceRT::GetDependencyPatternType(CM_DEPENDENCY_PATTERN &dependencyPatternType)
797 {
798 dependencyPatternType = m_dependencyPatternType;
799
800 return CM_SUCCESS;
801 }
802
Get26ZIDispatchPattern(CM_26ZI_DISPATCH_PATTERN & pattern)803 int32_t CmThreadSpaceRT::Get26ZIDispatchPattern( CM_26ZI_DISPATCH_PATTERN &pattern)
804 {
805 pattern = m_26ZIDispatchPattern;
806
807 return CM_SUCCESS;
808 }
809
810 //*-----------------------------------------------------------------------------
811 //| Purpose: Get walking pattern
812 //*-----------------------------------------------------------------------------
GetWalkingPattern(CM_WALKING_PATTERN & walkingPattern)813 int32_t CmThreadSpaceRT::GetWalkingPattern(CM_WALKING_PATTERN &walkingPattern)
814 {
815 walkingPattern = m_walkingPattern;
816 return CM_SUCCESS;
817 }
818
819 //*-----------------------------------------------------------------------------
820 //| Purpose: Get media walking parameters
821 //*-----------------------------------------------------------------------------
GetWalkingParameters(CM_WALKING_PARAMETERS & walkingParameters)822 int32_t CmThreadSpaceRT::GetWalkingParameters(CM_WALKING_PARAMETERS &walkingParameters)
823 {
824 CmSafeMemCopy(&walkingParameters, &m_walkingParameters, sizeof(m_walkingParameters));
825 return CM_SUCCESS;
826 }
827
828 //*-----------------------------------------------------------------------------
829 //| Purpose: Return true if media walker parameters are set, false otherwise
830 //*-----------------------------------------------------------------------------
CheckWalkingParametersSet()831 bool CmThreadSpaceRT::CheckWalkingParametersSet( )
832 {
833 return m_mediaWalkerParamsSet;
834 }
835
836 //*-----------------------------------------------------------------------------
837 //| Purpose: Get dependency vectors
838 //*-----------------------------------------------------------------------------
GetDependencyVectors(CM_HAL_DEPENDENCY & dependencyVectors)839 int32_t CmThreadSpaceRT::GetDependencyVectors(CM_HAL_DEPENDENCY &dependencyVectors)
840 {
841 CmSafeMemCopy(&dependencyVectors, &m_dependencyVectors, sizeof(m_dependencyVectors));
842 return CM_SUCCESS;
843 }
844
845 //*-----------------------------------------------------------------------------
846 //| Purpose: Return true if dependency vectors are set, false otherwise
847 //*-----------------------------------------------------------------------------
CheckDependencyVectorsSet()848 bool CmThreadSpaceRT::CheckDependencyVectorsSet( )
849 {
850 return m_dependencyVectorsSet;
851 }
852
853 //*-----------------------------------------------------------------------------
854 //| Purpose: Return true if thread space order is set, false otherwise
855 //*-----------------------------------------------------------------------------
CheckThreadSpaceOrderSet()856 bool CmThreadSpaceRT::CheckThreadSpaceOrderSet()
857 {
858 return m_threadSpaceOrderSet;
859 }
860
861 //*-----------------------------------------------------------------------------
862 //| Purpose: Get Wavefront26ZDispatchInfo
863 //*-----------------------------------------------------------------------------
GetWavefront26ZDispatchInfo(CM_HAL_WAVEFRONT26Z_DISPATCH_INFO & dispatchInfo)864 int32_t CmThreadSpaceRT::GetWavefront26ZDispatchInfo(CM_HAL_WAVEFRONT26Z_DISPATCH_INFO &dispatchInfo)
865 {
866 dispatchInfo = m_wavefront26ZDispatchInfo;
867 return CM_SUCCESS;
868 }
869
870 //*-----------------------------------------------------------------------------
871 //| Purpose: Check the integrity of thread space' association
872 //*-----------------------------------------------------------------------------
IntegrityCheck(CmTaskRT * task)873 bool CmThreadSpaceRT::IntegrityCheck(CmTaskRT* task)
874 {
875 CmKernelRT *kernelRT = nullptr;
876 uint32_t i;
877 uint32_t kernelCount = 0;
878 uint32_t threadNumber = 0;
879 uint32_t kernelIndex = 0;
880 uint32_t unassociated = 0;
881 int32_t hr = CM_SUCCESS;
882
883 uint8_t **threadSpaceMapping = nullptr;
884 uint8_t *kernelInScoreboard = nullptr;
885
886 kernelCount = task->GetKernelCount();
887 //Check if it is mult-kernel task, since no threadspace is allowed for multi-kernel tasks
888 if (kernelCount > 1)
889 {
890 CM_ASSERTMESSAGE("Error: threadSpace->IntegrityCheck Failed: ThreadSpace is not allowed in multi-kernel task.");
891 return false;
892 }
893
894 kernelRT = task->GetKernelPointer(0);
895 CM_CHK_NULL_GOTOFINISH_CMERROR(kernelRT);
896
897 //To check if the thread space size is matched with thread count
898 kernelRT->GetThreadCount(threadNumber);
899
900 //Till now, all disallowed settings are abort, now we need check if the thread space association is correct.
901 if (this->IsThreadAssociated())
902 {
903 //For future extending to multiple kernels cases, we're using a general mechanism to check the integrity
904
905 threadSpaceMapping = MOS_NewArray(uint8_t*, kernelCount);
906 kernelInScoreboard = MOS_NewArray(uint8_t, kernelCount);
907
908 CM_CHK_NULL_GOTOFINISH_CMERROR(threadSpaceMapping);
909 CM_CHK_NULL_GOTOFINISH_CMERROR(kernelInScoreboard);
910
911 CmSafeMemSet(threadSpaceMapping, 0, kernelCount*sizeof(uint8_t *));
912 CmSafeMemSet(kernelInScoreboard, 0, kernelCount*sizeof(uint8_t));
913
914 for (i = 0; i < kernelCount; i++)
915 {
916 kernelRT = task->GetKernelPointer(i);
917 CM_CHK_NULL_GOTOFINISH_CMERROR(kernelRT);
918 kernelRT->GetThreadCount(threadNumber);
919 if (threadNumber == 0)
920 {
921 threadNumber = m_width * m_height;
922 }
923 threadSpaceMapping[i] = MOS_NewArray(uint8_t, threadNumber);
924 CM_CHK_NULL_GOTOFINISH_CMERROR(threadSpaceMapping[i]);
925 CmSafeMemSet(threadSpaceMapping[i], 0, threadNumber * sizeof(uint8_t));
926 kernelInScoreboard[i] = 0;
927 }
928
929 for (i = 0; i < m_width * m_height; i ++ )
930 {
931 kernelRT = static_cast<CmKernelRT *> (m_threadSpaceUnit[i].kernel);
932 if (kernelRT == nullptr)
933 {
934 if (m_needSetKernelPointer)
935 {
936 kernelRT = *m_kernel;
937 }
938 }
939 CM_CHK_NULL_GOTOFINISH_CMERROR(kernelRT);
940
941 kernelIndex = kernelRT->GetIndexInTask();
942 threadSpaceMapping[kernelIndex][m_threadSpaceUnit[i].threadId] = 1;
943 kernelInScoreboard[kernelIndex] = 1;
944 }
945
946 for (i = 0; i < kernelCount; i ++)
947 {
948 if(kernelInScoreboard[i])
949 {
950 kernelRT = task->GetKernelPointer(i);
951 CM_CHK_NULL_GOTOFINISH_CMERROR(kernelRT);
952
953 kernelRT->GetThreadCount(threadNumber);
954 if (threadNumber == 0)
955 {
956 threadNumber = m_width * m_height;
957 }
958 kernelRT->SetAssociatedToTSFlag(true);
959 for (uint32_t j = 0; j < threadNumber; j++)
960 {
961 if (threadSpaceMapping[i][j] == 0)
962 {
963 unassociated ++;
964 break;
965 }
966 }
967 }
968 MosSafeDeleteArray(threadSpaceMapping[i]);
969 }
970
971 if (unassociated != 0)
972 {
973 CM_ASSERTMESSAGE("Error: The thread space association is not correct.");
974 hr = CM_FAILURE;
975 }
976 }
977
978 finish:
979
980 MosSafeDeleteArray(threadSpaceMapping);
981 MosSafeDeleteArray(kernelInScoreboard);
982
983 return (hr == CM_SUCCESS)? true: false;
984 }
985
986 //*-----------------------------------------------------------------------------
987 //| Purpose: Generate Wave45 Sequence
988 //*-----------------------------------------------------------------------------
Wavefront45Sequence()989 int32_t CmThreadSpaceRT::Wavefront45Sequence()
990 {
991 if ( m_currentDependencyPattern == CM_WAVEFRONT )
992 {
993 return CM_SUCCESS;
994 }
995 m_currentDependencyPattern = CM_WAVEFRONT;
996
997 CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
998 m_indexInList = 0;
999
1000 for (uint32_t y = 0; y < m_height; y ++)
1001 {
1002 for (uint32_t x = 0; x < m_width; x ++)
1003 {
1004 CM_COORDINATE tempCoordinate;
1005 int32_t linearOffset = y * m_width + x;
1006 if (m_boardFlag[linearOffset] == WHITE)
1007 {
1008 m_boardOrderList[m_indexInList ++] = linearOffset;
1009 m_boardFlag[linearOffset] = BLACK;
1010 tempCoordinate.x = x - 1;
1011 tempCoordinate.y = y + 1;
1012 while ((tempCoordinate.x >= 0) && (tempCoordinate.y >= 0) &&
1013 (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height))
1014 {
1015 if (m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1016 {
1017 m_boardOrderList[m_indexInList ++] = tempCoordinate.y * m_width + tempCoordinate.x;
1018 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1019 }
1020 tempCoordinate.x = tempCoordinate.x - 1;
1021 tempCoordinate.y = tempCoordinate.y + 1;
1022 }
1023 }
1024 }
1025 }
1026
1027 return CM_SUCCESS;
1028 }
1029
1030 //*-----------------------------------------------------------------------------
1031 //| Purpose: Generate Wave26 Sequence
1032 //*-----------------------------------------------------------------------------
Wavefront26Sequence()1033 int32_t CmThreadSpaceRT::Wavefront26Sequence()
1034 {
1035 if ( m_currentDependencyPattern == CM_WAVEFRONT26 )
1036 {
1037 return CM_SUCCESS;
1038 }
1039 m_currentDependencyPattern = CM_WAVEFRONT26;
1040
1041 CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1042 m_indexInList = 0;
1043
1044 for (uint32_t y = 0; y < m_height; y ++)
1045 {
1046 for (uint32_t x = 0; x < m_width; x ++)
1047 {
1048 CM_COORDINATE tempCoordinate;
1049 int32_t linearOffset = y * m_width + x;
1050 if (m_boardFlag[linearOffset] == WHITE)
1051 {
1052 m_boardOrderList[m_indexInList ++] = linearOffset;
1053 m_boardFlag[linearOffset] = BLACK;
1054 tempCoordinate.x = x - 2;
1055 tempCoordinate.y = y + 1;
1056 while ((tempCoordinate.x >= 0) && (tempCoordinate.y >= 0) &&
1057 (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height))
1058 {
1059 if (m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1060 {
1061 m_boardOrderList[m_indexInList ++] = tempCoordinate.y * m_width + tempCoordinate.x;
1062 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1063 }
1064 tempCoordinate.x = tempCoordinate.x - 2;
1065 tempCoordinate.y = tempCoordinate.y + 1;
1066 }
1067 }
1068 }
1069 }
1070
1071 return CM_SUCCESS;
1072 }
1073
1074 //*-----------------------------------------------------------------------------
1075 //| Purpose: Generate Wave26Z Sequence
1076 //*-----------------------------------------------------------------------------
Wavefront26ZSequence()1077 int32_t CmThreadSpaceRT::Wavefront26ZSequence()
1078 {
1079 if ( m_currentDependencyPattern == CM_WAVEFRONT26Z )
1080 {
1081 return CM_SUCCESS;
1082 }
1083 m_currentDependencyPattern = CM_WAVEFRONT26Z;
1084
1085 uint32_t threadsInWave = 0;
1086 uint32_t numWaves = 0;
1087
1088 if ( ( m_height % 2 != 0 ) || ( m_width % 2 != 0 ) )
1089 {
1090 return CM_INVALID_ARG_SIZE;
1091 }
1092 CmSafeMemSet( m_boardFlag, WHITE, m_width * m_height * sizeof( uint32_t ) );
1093 m_indexInList = 0;
1094
1095 uint32_t iX, iY, nOffset;
1096 iX = iY = nOffset = 0;
1097
1098 uint32_t *waveFrontPosition = MOS_NewArray(uint32_t, m_width);
1099 uint32_t *waveFrontOffset = MOS_NewArray(uint32_t, m_width);
1100 if ( ( waveFrontPosition == nullptr ) || ( waveFrontOffset == nullptr ) )
1101 {
1102 MosSafeDeleteArray( waveFrontPosition );
1103 MosSafeDeleteArray( waveFrontOffset );
1104 return CM_FAILURE;
1105 }
1106 CmSafeMemSet( waveFrontPosition, 0, m_width * sizeof( int ) );
1107
1108 // set initial value
1109 m_boardFlag[ 0 ] = BLACK;
1110 m_boardOrderList[ 0 ] = 0;
1111 waveFrontPosition[ 0 ] = 1;
1112 m_indexInList = 0;
1113
1114 CM_COORDINATE mask[ 8 ];
1115 uint32_t nMaskNumber = 0;
1116
1117 m_wavefront26ZDispatchInfo.numThreadsInWave[numWaves] = 1;
1118 numWaves++;
1119
1120 while ( m_indexInList < m_width * m_height - 1 )
1121 {
1122
1123 CmSafeMemSet( waveFrontOffset, 0, m_width * sizeof( int ) );
1124 for ( uint32_t iX = 0; iX < m_width; ++iX )
1125 {
1126 uint32_t iY = waveFrontPosition[ iX ];
1127 nOffset = iY * m_width + iX;
1128 CmSafeMemSet( mask, 0, sizeof( mask ) );
1129
1130 if ( m_boardFlag[ nOffset ] == WHITE )
1131 {
1132 if ( ( iX % 2 == 0 ) && ( iY % 2 == 0 ) )
1133 {
1134 if ( iX == 0 )
1135 {
1136 mask[ 0 ].x = 0;
1137 mask[ 0 ].y = -1;
1138 mask[ 1 ].x = 1;
1139 mask[ 1 ].y = -1;
1140 nMaskNumber = 2;
1141 }
1142 else if ( iY == 0 )
1143 {
1144 mask[ 0 ].x = -1;
1145 mask[ 0 ].y = 1;
1146 mask[ 1 ].x = -1;
1147 mask[ 1 ].y = 0;
1148 nMaskNumber = 2;
1149 }
1150 else
1151 {
1152 mask[ 0 ].x = -1;
1153 mask[ 0 ].y = 1;
1154 mask[ 1 ].x = -1;
1155 mask[ 1 ].y = 0;
1156 mask[ 2 ].x = 0;
1157 mask[ 2 ].y = -1;
1158 mask[ 3 ].x = 1;
1159 mask[ 3 ].y = -1;
1160 nMaskNumber = 4;
1161 }
1162 }
1163 else if ( ( iX % 2 == 0 ) && ( iY % 2 == 1 ) )
1164 {
1165 if ( iX == 0 )
1166 {
1167 mask[ 0 ].x = 0;
1168 mask[ 0 ].y = -1;
1169 mask[ 1 ].x = 1;
1170 mask[ 1 ].y = -1;
1171 nMaskNumber = 2;
1172 }
1173 else
1174 {
1175 mask[ 0 ].x = -1;
1176 mask[ 0 ].y = 0;
1177 mask[ 1 ].x = 0;
1178 mask[ 1 ].y = -1;
1179 mask[ 2 ].x = 1;
1180 mask[ 2 ].y = -1;
1181 nMaskNumber = 3;
1182 }
1183 }
1184 else if ( ( iX % 2 == 1 ) && ( iY % 2 == 0 ) )
1185 {
1186 if ( iY == 0 )
1187 {
1188 mask[ 0 ].x = -1;
1189 mask[ 0 ].y = 0;
1190 nMaskNumber = 1;
1191 }
1192 else if ( iX == m_width - 1 )
1193 {
1194 mask[ 0 ].x = -1;
1195 mask[ 0 ].y = 0;
1196 mask[ 1 ].x = 0;
1197 mask[ 1 ].y = -1;
1198 nMaskNumber = 2;
1199 }
1200 else
1201 {
1202 mask[ 0 ].x = -1;
1203 mask[ 0 ].y = 0;
1204 mask[ 1 ].x = 0;
1205 mask[ 1 ].y = -1;
1206 mask[ 2 ].x = 1;
1207 mask[ 2 ].y = -1;
1208 nMaskNumber = 3;
1209 }
1210 }
1211 else
1212 {
1213 mask[ 0 ].x = -1;
1214 mask[ 0 ].y = 0;
1215 mask[ 1 ].x = 0;
1216 mask[ 1 ].y = -1;
1217 nMaskNumber = 2;
1218 }
1219
1220 // check if all of the dependencies are in the dispatch queue
1221 bool allInQueue = true;
1222 for ( uint32_t i = 0; i < nMaskNumber; ++i )
1223 {
1224 if ( m_boardFlag[ nOffset + mask[ i ].x + mask[ i ].y * m_width ] == WHITE )
1225 {
1226 allInQueue = false;
1227 break;
1228 }
1229 }
1230 if ( allInQueue )
1231 {
1232 waveFrontOffset[ iX ] = nOffset;
1233 if( waveFrontPosition[ iX ] < m_height - 1 )
1234 {
1235 waveFrontPosition[ iX ]++;
1236 }
1237 }
1238 }
1239 }
1240
1241 for ( uint32_t iX = 0; iX < m_width; ++iX )
1242 {
1243 if ( ( m_boardFlag[ waveFrontOffset[ iX ] ] == WHITE ) && ( waveFrontOffset[ iX ] != 0 ) )
1244 {
1245 m_indexInList++;
1246 m_boardOrderList[ m_indexInList ] = waveFrontOffset[ iX ];
1247 m_boardFlag[ waveFrontOffset[ iX ] ] = BLACK;
1248 threadsInWave++;
1249 }
1250 }
1251
1252 m_wavefront26ZDispatchInfo.numThreadsInWave[numWaves] = threadsInWave;
1253 threadsInWave = 0;
1254 numWaves++;
1255 }
1256
1257 MosSafeDeleteArray( waveFrontPosition );
1258 MosSafeDeleteArray( waveFrontOffset );
1259
1260 m_wavefront26ZDispatchInfo.numWaves = numWaves;
1261
1262 return CM_SUCCESS;
1263 }
1264
1265 //*-----------------------------------------------------------------------------
1266 //| Purpose: Generate Wavefront26ZI Sequence
1267 //| Dispatch order:
1268 //| Vertical threads vertically in macro block
1269 //| Horizontal threads vertically in macro block
1270 //| Overall 26 pattern
1271 //*-----------------------------------------------------------------------------
Wavefront26ZISeqVVHV26()1272 int32_t CmThreadSpaceRT::Wavefront26ZISeqVVHV26()
1273 {
1274 if ( m_currentDependencyPattern == CM_WAVEFRONT26ZI &&
1275 ( m_current26ZIDispatchPattern == VVERTICAL_HVERTICAL_26 ) )
1276 {
1277 return CM_SUCCESS;
1278 }
1279
1280 m_currentDependencyPattern = CM_WAVEFRONT26ZI;
1281 m_current26ZIDispatchPattern = VVERTICAL_HVERTICAL_26;
1282
1283 CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1284 m_indexInList = 0;
1285
1286 for( uint32_t y = 0; y < m_height; y = y + m_26ZIBlockHeight )
1287 {
1288 for( uint32_t x = 0; x < m_width; x = x + m_26ZIBlockWidth )
1289 {
1290 CM_COORDINATE tempCoordinateFor26;
1291 tempCoordinateFor26.x = x;
1292 tempCoordinateFor26.y = y;
1293
1294 do
1295 {
1296 if( m_boardFlag[tempCoordinateFor26.y * m_width + tempCoordinateFor26.x] == WHITE )
1297 {
1298 m_boardOrderList[m_indexInList ++] = tempCoordinateFor26.y * m_width + tempCoordinateFor26.x;
1299 m_boardFlag[tempCoordinateFor26.y * m_width + tempCoordinateFor26.x] = BLACK;
1300
1301 // do vertical edges
1302 for( uint32_t widthCount = 0; widthCount < m_26ZIBlockWidth; widthCount = widthCount + 2 )
1303 {
1304 CM_COORDINATE tempCoordinate;
1305 uint32_t localHeightCounter = 0;
1306
1307 tempCoordinate.x = tempCoordinateFor26.x + widthCount;
1308 tempCoordinate.y = tempCoordinateFor26.y;
1309 while( (tempCoordinate.x >= 0) && (tempCoordinate.y >=0) &&
1310 (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height) &&
1311 (localHeightCounter < m_26ZIBlockHeight))
1312 {
1313 if( m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1314 {
1315 m_boardOrderList[m_indexInList ++ ] = tempCoordinate.y * m_width + tempCoordinate.x;
1316 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1317 }
1318 tempCoordinate.y = tempCoordinate.y + 1;
1319 localHeightCounter++;
1320 }
1321 } // vertical edges
1322
1323 // do horizontal edges
1324 for( uint32_t widthCount = 1; widthCount < m_26ZIBlockWidth; widthCount = widthCount + 2 )
1325 {
1326 CM_COORDINATE tempCoordinate;
1327 uint32_t localHeightCounter = 0;
1328
1329 tempCoordinate.x = tempCoordinateFor26.x + widthCount;
1330 tempCoordinate.y = tempCoordinateFor26.y;
1331 while( (tempCoordinate.x >= 0) && (tempCoordinate.y >=0) &&
1332 (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height) &&
1333 (localHeightCounter < m_26ZIBlockHeight))
1334 {
1335 if( m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1336 {
1337 m_boardOrderList[m_indexInList ++ ] = tempCoordinate.y * m_width + tempCoordinate.x;
1338 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1339 }
1340 tempCoordinate.y = tempCoordinate.y + 1;
1341 localHeightCounter++;
1342 }
1343 } // horizontal edges
1344 }
1345
1346 tempCoordinateFor26.x = tempCoordinateFor26.x - (2 * m_26ZIBlockWidth);
1347 tempCoordinateFor26.y = tempCoordinateFor26.y + (1 * m_26ZIBlockHeight);
1348
1349 } while( ( tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0)
1350 && (tempCoordinateFor26.x < (int32_t)m_width) && ( tempCoordinateFor26.y < (int32_t)m_height));
1351 }
1352 }
1353
1354 return CM_SUCCESS;
1355 }
1356
1357 //*-----------------------------------------------------------------------------
1358 //| Purpose: Generate Wavefront26ZI Sequence
1359 //| Dispatch order:
1360 //| Vertical threads vertically in macro block
1361 //| Horizontal threads horizontally in macro block
1362 //| Overall 26 pattern
1363 //*-----------------------------------------------------------------------------
Wavefront26ZISeqVVHH26()1364 int32_t CmThreadSpaceRT::Wavefront26ZISeqVVHH26()
1365 {
1366 if ( m_currentDependencyPattern == CM_WAVEFRONT26ZI &&
1367 ( m_current26ZIDispatchPattern == VVERTICAL_HHORIZONTAL_26))
1368 {
1369 return CM_SUCCESS;
1370 }
1371
1372 m_currentDependencyPattern = CM_WAVEFRONT26ZI;
1373 m_current26ZIDispatchPattern = VVERTICAL_HHORIZONTAL_26;
1374
1375 CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1376 m_indexInList = 0;
1377
1378 for( uint32_t y = 0; y < m_height; y = y + m_26ZIBlockHeight )
1379 {
1380 for( uint32_t x = 0; x < m_width; x = x + m_26ZIBlockWidth )
1381 {
1382 CM_COORDINATE tempCoordinateFor26;
1383 tempCoordinateFor26.x = x;
1384 tempCoordinateFor26.y = y;
1385
1386 do
1387 {
1388 if( m_boardFlag[tempCoordinateFor26.y * m_width + tempCoordinateFor26.x] == WHITE )
1389 {
1390 m_boardOrderList[m_indexInList ++] = tempCoordinateFor26.y * m_width + tempCoordinateFor26.x;
1391 m_boardFlag[tempCoordinateFor26.y * m_width + tempCoordinateFor26.x] = BLACK;
1392
1393 // do vertical edges
1394 for( uint32_t widthCount = 0; widthCount < m_26ZIBlockWidth; widthCount = widthCount + 2 )
1395 {
1396 CM_COORDINATE tempCoordinate;
1397 uint32_t localHeightCounter = 0;
1398
1399 tempCoordinate.x = tempCoordinateFor26.x + widthCount;
1400 tempCoordinate.y = tempCoordinateFor26.y;
1401 while( (tempCoordinate.x >= 0) && (tempCoordinate.y >=0) &&
1402 (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height) &&
1403 (localHeightCounter < m_26ZIBlockHeight))
1404 {
1405 if( m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1406 {
1407 m_boardOrderList[m_indexInList ++ ] = tempCoordinate.y * m_width + tempCoordinate.x;
1408 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1409 }
1410 tempCoordinate.y = tempCoordinate.y + 1;
1411 localHeightCounter++;
1412 }
1413 } // vertical edges
1414
1415 // horizontal edges
1416 for( uint32_t heightCount = 0; heightCount < m_26ZIBlockHeight; ++heightCount )
1417 {
1418 CM_COORDINATE tempCoordinate;
1419 uint32_t localWidthCounter = 0;
1420
1421 tempCoordinate.x = tempCoordinateFor26.x + 1;
1422 tempCoordinate.y = tempCoordinateFor26.y + heightCount;
1423 while ( (tempCoordinate.x >= 0) && (tempCoordinate.y >= 0) &&
1424 (tempCoordinate.x< (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height) &&
1425 (localWidthCounter < (m_26ZIBlockWidth / 2) ) )
1426 {
1427 if( m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1428 {
1429 m_boardOrderList[m_indexInList ++ ] = tempCoordinate.y * m_width + tempCoordinate.x;
1430 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1431 }
1432
1433 tempCoordinate.x = tempCoordinate.x + 2;
1434 localWidthCounter++;
1435 }
1436 }
1437 // horizontal edges
1438 }
1439
1440 tempCoordinateFor26.x = tempCoordinateFor26.x - (2 * m_26ZIBlockWidth);
1441 tempCoordinateFor26.y = tempCoordinateFor26.y + (1 * m_26ZIBlockHeight);
1442
1443 } while( ( tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0)
1444 && (tempCoordinateFor26.x < (int32_t)m_width) && ( tempCoordinateFor26.y < (int32_t)m_height));
1445 }
1446 }
1447
1448 return CM_SUCCESS;
1449 }
1450
1451 //*-----------------------------------------------------------------------------
1452 //| Purpose: Generate Wavefront26ZI Sequence
1453 //| Dispatch order:
1454 //| Vertical threads vertically in macro block and then along 26 wave
1455 //| Horizontal threads horizontally in macro block and then along 26 wave
1456 //*-----------------------------------------------------------------------------
Wavefront26ZISeqVV26HH26()1457 int32_t CmThreadSpaceRT::Wavefront26ZISeqVV26HH26()
1458 {
1459 if( (m_currentDependencyPattern == CM_WAVEFRONT26ZI) &&
1460 (m_current26ZIDispatchPattern == VVERTICAL26_HHORIZONTAL26) )
1461 {
1462 return CM_SUCCESS;
1463 }
1464
1465 m_currentDependencyPattern = CM_WAVEFRONT26ZI;
1466 m_current26ZIDispatchPattern = VVERTICAL26_HHORIZONTAL26;
1467
1468 CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1469 m_indexInList = 0;
1470
1471 uint32_t waveFrontNum = 0;
1472 uint32_t waveFrontStartX = 0;
1473 uint32_t waveFrontStartY = 0;
1474
1475 uint32_t adjustHeight = 0;
1476
1477 CM_COORDINATE tempCoordinateFor26;
1478 tempCoordinateFor26.x = 0;
1479 tempCoordinateFor26.y = 0;
1480
1481 while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1482 (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) )
1483 {
1484 // use horizontal coordinates to save starting (x,y) for overall 26
1485 CM_COORDINATE tempCoordinateForHorz;
1486 tempCoordinateForHorz.x = tempCoordinateFor26.x;
1487 tempCoordinateForHorz.y = tempCoordinateFor26.y;
1488
1489 do
1490 {
1491 CM_COORDINATE tempCoordinateForVer;
1492
1493 for( uint32_t widthCount = 0; widthCount < m_26ZIBlockWidth; widthCount += 2 )
1494 {
1495 uint32_t localHeightCounter = 0;
1496 tempCoordinateForVer.x = tempCoordinateFor26.x + widthCount;
1497 tempCoordinateForVer.y = tempCoordinateFor26.y;
1498
1499 while( (tempCoordinateForVer.x < (int32_t)m_width) && (tempCoordinateForVer.y < (int32_t)m_height) &&
1500 (tempCoordinateForVer.x >= 0) && (tempCoordinateForVer.y >= 0) && (localHeightCounter < m_26ZIBlockHeight) )
1501 {
1502 if(m_boardFlag[tempCoordinateForVer.y * m_width + tempCoordinateForVer.x] == WHITE )
1503 {
1504 m_boardOrderList[m_indexInList ++] = tempCoordinateForVer.y * m_width + tempCoordinateForVer.x;
1505 m_boardFlag[tempCoordinateForVer.y * m_width + tempCoordinateForVer.x] = BLACK;
1506 }
1507 tempCoordinateForVer.y += 1;
1508 localHeightCounter++;
1509 }
1510 }
1511
1512 tempCoordinateFor26.x = tempCoordinateFor26.x + (2 * m_26ZIBlockWidth);
1513 tempCoordinateFor26.y = tempCoordinateFor26.y - (1 * m_26ZIBlockHeight);
1514
1515 } while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1516 (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) );
1517
1518 tempCoordinateFor26.x = tempCoordinateForHorz.x;
1519 tempCoordinateFor26.y = tempCoordinateForHorz.y;
1520
1521 do
1522 {
1523 // do horizontal edges
1524 for ( uint32_t heightCount = 0; heightCount < m_26ZIBlockHeight; ++heightCount )
1525 {
1526 uint32_t localWidthCounter = 0;
1527 tempCoordinateForHorz.x = tempCoordinateFor26.x + 1;
1528 tempCoordinateForHorz.y = tempCoordinateFor26.y + heightCount;
1529 while( (tempCoordinateForHorz.x >= 0) && (tempCoordinateForHorz.y >= 0) &&
1530 (tempCoordinateForHorz.x < (int32_t)m_width) && (tempCoordinateForHorz.y < (int32_t)m_height) &&
1531 (localWidthCounter < (m_26ZIBlockWidth / 2)) )
1532 {
1533 if( m_boardFlag[tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x] == WHITE )
1534 {
1535 m_boardOrderList[m_indexInList ++] = tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x;
1536 m_boardFlag[tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x] = BLACK;
1537 }
1538
1539 tempCoordinateForHorz.x += 2;
1540 localWidthCounter++;
1541 }
1542 }
1543
1544 tempCoordinateFor26.x = tempCoordinateFor26.x + (2 * m_26ZIBlockWidth);
1545 tempCoordinateFor26.y = tempCoordinateFor26.y - (1 * m_26ZIBlockHeight);
1546
1547 } while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1548 (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) );
1549
1550 if (m_width <= m_26ZIBlockWidth)
1551 {
1552 tempCoordinateFor26.x = 0;
1553 tempCoordinateFor26.y = tempCoordinateForHorz.y + m_26ZIBlockHeight;
1554 }
1555 else
1556 {
1557 // update wavefront number
1558 waveFrontNum++;
1559 adjustHeight = (uint32_t)ceil((double)m_height / m_26ZIBlockHeight);
1560
1561 if (waveFrontNum < (2 * adjustHeight))
1562 {
1563 waveFrontStartX = waveFrontNum & 1;
1564 waveFrontStartY = (uint32_t)floor((double)waveFrontNum / 2);
1565 }
1566 else
1567 {
1568 waveFrontStartX = (waveFrontNum - 2 * adjustHeight) + 2;
1569 waveFrontStartY = (adjustHeight)-1;
1570 }
1571
1572 tempCoordinateFor26.x = waveFrontStartX * m_26ZIBlockWidth;
1573 tempCoordinateFor26.y = waveFrontStartY * m_26ZIBlockHeight;
1574 }
1575 }
1576
1577 return CM_SUCCESS;
1578 }
1579
1580 //*-----------------------------------------------------------------------------
1581 //| Purpose: Generate Wavefront26ZI Sequence
1582 //| Dispatch order:
1583 //| Vertical threads vertically along 26 wave then in macro block
1584 //| Horizontal threads horizontally along 26 wave then in macro block
1585 //*-----------------------------------------------------------------------------
Wavefront26ZISeqVV1x26HH1x26()1586 int32_t CmThreadSpaceRT::Wavefront26ZISeqVV1x26HH1x26()
1587 {
1588 if ( (m_currentDependencyPattern == CM_WAVEFRONT26ZI) &&
1589 (m_current26ZIDispatchPattern == VVERTICAL1X26_HHORIZONTAL1X26))
1590 {
1591 return CM_SUCCESS;
1592 }
1593
1594 m_currentDependencyPattern = CM_WAVEFRONT26ZI;
1595 m_current26ZIDispatchPattern = VVERTICAL1X26_HHORIZONTAL1X26;
1596
1597 CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1598 m_indexInList = 0;
1599
1600 uint32_t waveFrontNum = 0;
1601 uint32_t waveFrontStartX = 0;
1602 uint32_t waveFrontStartY = 0;
1603
1604 uint32_t adjustHeight = 0;
1605
1606 CM_COORDINATE tempCoordinateFor26;
1607 tempCoordinateFor26.x = 0;
1608 tempCoordinateFor26.y = 0;
1609
1610 CM_COORDINATE saveTempCoordinateFor26;
1611 saveTempCoordinateFor26.x = 0;
1612 saveTempCoordinateFor26.y = 0;
1613
1614 CM_COORDINATE tempCoordinateForVer;
1615 CM_COORDINATE tempCoordinateForHorz;
1616
1617 while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1618 (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) )
1619 {
1620 saveTempCoordinateFor26.x = tempCoordinateFor26.x;
1621 saveTempCoordinateFor26.y = tempCoordinateFor26.y;
1622
1623 // do vertical edges
1624 for( uint32_t widthCount = 0; widthCount < m_26ZIBlockWidth; widthCount += 2 )
1625 {
1626 // restore original starting point
1627 tempCoordinateFor26.x = saveTempCoordinateFor26.x;
1628 tempCoordinateFor26.y = saveTempCoordinateFor26.y;
1629
1630 do
1631 {
1632 uint32_t localHeightCounter = 0;
1633 tempCoordinateForVer.x = tempCoordinateFor26.x + widthCount;
1634 tempCoordinateForVer.y = tempCoordinateFor26.y;
1635 while( (tempCoordinateForVer.x < (int32_t)m_width) && (tempCoordinateForVer.y < (int32_t)m_height) &&
1636 (tempCoordinateForVer.x >= 0) && (tempCoordinateForVer.y >= 0) && (localHeightCounter < m_26ZIBlockHeight) )
1637 {
1638 if(m_boardFlag[tempCoordinateForVer.y * m_width + tempCoordinateForVer.x] == WHITE )
1639 {
1640 m_boardOrderList[m_indexInList ++] = tempCoordinateForVer.y * m_width + tempCoordinateForVer.x;
1641 m_boardFlag[tempCoordinateForVer.y * m_width + tempCoordinateForVer.x] = BLACK;
1642 }
1643 tempCoordinateForVer.y += 1;
1644 localHeightCounter++;
1645 }
1646
1647 tempCoordinateFor26.x = tempCoordinateFor26.x + (2 * m_26ZIBlockWidth);
1648 tempCoordinateFor26.y = tempCoordinateFor26.y - ( 1 * m_26ZIBlockHeight);
1649
1650 } while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1651 (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) );
1652 }
1653
1654 // do horizontal edges
1655 // restore original starting position
1656 tempCoordinateFor26.x = saveTempCoordinateFor26.x;
1657 tempCoordinateFor26.y = saveTempCoordinateFor26.y;
1658
1659 for(uint32_t heightCount = 0; heightCount < m_26ZIBlockHeight; ++heightCount )
1660 {
1661 // restore original starting point
1662 tempCoordinateFor26.x = saveTempCoordinateFor26.x;
1663 tempCoordinateFor26.y = saveTempCoordinateFor26.y;
1664
1665 do
1666 {
1667 uint32_t localWidthCounter = 0;
1668 tempCoordinateForHorz.x = tempCoordinateFor26.x + 1;
1669 tempCoordinateForHorz.y = tempCoordinateFor26.y + heightCount;
1670 while( (tempCoordinateForHorz.x >= 0) && (tempCoordinateForHorz.y >= 0) &&
1671 (tempCoordinateForHorz.x < (int32_t)m_width) && (tempCoordinateForHorz.y < (int32_t)m_height) &&
1672 (localWidthCounter < (m_26ZIBlockWidth / 2)) )
1673 {
1674 if( m_boardFlag[tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x] == WHITE )
1675 {
1676 m_boardOrderList[m_indexInList ++] = tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x;
1677 m_boardFlag[tempCoordinateForHorz.y * m_width + tempCoordinateForHorz.x] = BLACK;
1678 }
1679
1680 tempCoordinateForHorz.x += 2;
1681 localWidthCounter++;
1682 }
1683
1684 tempCoordinateFor26.x = tempCoordinateFor26.x + (2 * m_26ZIBlockWidth);
1685 tempCoordinateFor26.y = tempCoordinateFor26.y - ( 1 * m_26ZIBlockHeight);
1686
1687 } while( (tempCoordinateFor26.x >= 0) && (tempCoordinateFor26.y >= 0) &&
1688 (tempCoordinateFor26.x < (int32_t)m_width) && (tempCoordinateFor26.y < (int32_t)m_height) );
1689
1690 }
1691
1692 if (m_width <= m_26ZIBlockWidth)
1693 {
1694 tempCoordinateFor26.x = 0;
1695 tempCoordinateFor26.y = saveTempCoordinateFor26.y + m_26ZIBlockHeight;
1696 }
1697 else
1698 {
1699 // update wavefront number
1700 waveFrontNum++;
1701 adjustHeight = (uint32_t)ceil((double)m_height / m_26ZIBlockHeight);
1702
1703 if (waveFrontNum < (2 * adjustHeight))
1704 {
1705 waveFrontStartX = waveFrontNum & 1;
1706 waveFrontStartY = (uint32_t)floor((double)waveFrontNum / 2);
1707 }
1708 else
1709 {
1710 waveFrontStartX = (waveFrontNum - 2 * adjustHeight) + 2;
1711 waveFrontStartY = (adjustHeight)-1;
1712 }
1713
1714 tempCoordinateFor26.x = waveFrontStartX * m_26ZIBlockWidth;
1715 tempCoordinateFor26.y = waveFrontStartY * m_26ZIBlockHeight;
1716 }
1717 }
1718
1719 return CM_SUCCESS;
1720 }
1721
VerticalSequence()1722 int32_t CmThreadSpaceRT::VerticalSequence()
1723 {
1724 if ( m_currentDependencyPattern == CM_VERTICAL_WAVE)
1725 {
1726 return CM_SUCCESS;
1727 }
1728 m_currentDependencyPattern = CM_VERTICAL_WAVE;
1729
1730 CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1731 m_indexInList = 0;
1732
1733 for (uint32_t x = 0; x < m_width; x ++)
1734 {
1735 for (uint32_t y = 0; y < m_height; y ++)
1736 {
1737 CM_COORDINATE tempCoordinate;
1738 int32_t linearOffset = y * m_width + x;
1739 if (m_boardFlag[linearOffset] == WHITE)
1740 {
1741 m_boardOrderList[m_indexInList ++] = linearOffset;
1742 m_boardFlag[linearOffset] = BLACK;
1743 tempCoordinate.x = x;
1744 tempCoordinate.y = y + 1;
1745 while ((tempCoordinate.x >= 0) && (tempCoordinate.y >= 0) &&
1746 (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height))
1747 {
1748 if (m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1749 {
1750 m_boardOrderList[m_indexInList ++] = tempCoordinate.y * m_width + tempCoordinate.x;
1751 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1752 }
1753 tempCoordinate.y = tempCoordinate.y + 1;
1754 }
1755 }
1756 }
1757 }
1758
1759 return CM_SUCCESS;
1760 }
1761
HorizentalSequence()1762 int32_t CmThreadSpaceRT::HorizentalSequence()
1763 {
1764 if ( m_currentDependencyPattern == CM_HORIZONTAL_WAVE)
1765 {
1766 return CM_SUCCESS;
1767 }
1768 m_currentDependencyPattern = CM_HORIZONTAL_WAVE;
1769
1770 CmSafeMemSet(m_boardFlag, WHITE, m_width*m_height*sizeof(uint32_t));
1771 m_indexInList = 0;
1772
1773 for (uint32_t y = 0; y < m_height; y ++)
1774 {
1775 for (uint32_t x = 0; x < m_width; x ++)
1776 {
1777 CM_COORDINATE tempCoordinate;
1778 int32_t linearOffset = y * m_width + x;
1779 if (m_boardFlag[linearOffset] == WHITE)
1780 {
1781 m_boardOrderList[m_indexInList ++] = linearOffset;
1782 m_boardFlag[linearOffset] = BLACK;
1783 tempCoordinate.x = x + 1;
1784 tempCoordinate.y = y;
1785 while ((tempCoordinate.x >= 0) && (tempCoordinate.y >= 0) &&
1786 (tempCoordinate.x < (int32_t)m_width) && (tempCoordinate.y < (int32_t)m_height))
1787 {
1788 if (m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] == WHITE)
1789 {
1790 m_boardOrderList[m_indexInList ++] = tempCoordinate.y * m_width + tempCoordinate.x;
1791 m_boardFlag[tempCoordinate.y * m_width + tempCoordinate.x] = BLACK;
1792 }
1793 tempCoordinate.x = tempCoordinate.x + 1;
1794 }
1795 }
1796 }
1797 }
1798
1799 return CM_SUCCESS;
1800 }
1801
1802 //*-----------------------------------------------------------------------------
1803 //| Purpose: Generate Wave Sequence for depenedncy vectors
1804 //*-----------------------------------------------------------------------------
WavefrontDependencyVectors()1805 int32_t CmThreadSpaceRT::WavefrontDependencyVectors()
1806 {
1807 if (m_boardFlag == nullptr)
1808 {
1809 m_boardFlag = MOS_NewArray(uint32_t, (m_height * m_width));
1810 if (m_boardFlag)
1811 {
1812 CmSafeMemSet(m_boardFlag, WHITE, (sizeof(uint32_t)* m_height * m_width));
1813 }
1814 else
1815 {
1816 CM_ASSERTMESSAGE("Error: Out of system memory.");
1817 return CM_OUT_OF_HOST_MEMORY;
1818 }
1819 }
1820 if (m_boardOrderList == nullptr)
1821 {
1822 m_boardOrderList = MOS_NewArray(uint32_t, (m_height * m_width));
1823 if (m_boardOrderList)
1824 {
1825 CmSafeMemSet(m_boardOrderList, 0, sizeof(uint32_t)* m_height * m_width);
1826 }
1827 else
1828 {
1829 CM_ASSERTMESSAGE("Error: Out of system memory.");
1830 MosSafeDeleteArray(m_boardFlag);
1831 return CM_OUT_OF_HOST_MEMORY;
1832 }
1833 }
1834 uint32_t iX, iY, nOffset;
1835 iX = iY = nOffset = 0;
1836
1837 uint32_t *waveFrontPosition = MOS_NewArray(uint32_t, m_width);
1838 uint32_t *waveFrontOffset = MOS_NewArray(uint32_t, m_width);
1839 if ((waveFrontPosition == nullptr) || (waveFrontOffset == nullptr))
1840 {
1841 MosSafeDeleteArray(waveFrontPosition);
1842 MosSafeDeleteArray(waveFrontOffset);
1843 return CM_FAILURE;
1844 }
1845 CmSafeMemSet(waveFrontPosition, 0, m_width * sizeof(int));
1846
1847 // set initial value
1848 m_boardFlag[0] = BLACK;
1849 m_boardOrderList[0] = 0;
1850 waveFrontPosition[0] = 1;
1851 m_indexInList = 0;
1852
1853 while (m_indexInList < m_width * m_height - 1)
1854 {
1855 CmSafeMemSet(waveFrontOffset, 0, m_width * sizeof(int));
1856 for (uint32_t iX = 0; iX < m_width; ++iX)
1857 {
1858 uint32_t iY = waveFrontPosition[iX];
1859 nOffset = iY * m_width + iX;
1860 if (m_boardFlag[nOffset] == WHITE)
1861 {
1862 // check if all of the dependencies are in the dispatch queue
1863 bool allInQueue = true;
1864 for (uint32_t i = 0; i < m_dependencyVectors.count; ++i)
1865 {
1866 uint32_t tempOffset = nOffset + m_dependencyVectors.deltaX[i] + m_dependencyVectors.deltaY[i] * m_width;
1867 if (tempOffset <= m_width * m_height - 1)
1868 {
1869 if (m_boardFlag[nOffset + m_dependencyVectors.deltaX[i] + m_dependencyVectors.deltaY[i] * m_width] == WHITE)
1870 {
1871 allInQueue = false;
1872 break;
1873 }
1874 }
1875 }
1876 if (allInQueue)
1877 {
1878 waveFrontOffset[iX] = nOffset;
1879 if (waveFrontPosition[iX] < m_height - 1)
1880 {
1881 waveFrontPosition[iX]++;
1882 }
1883 }
1884 }
1885 }
1886
1887 for (uint32_t iX = 0; iX < m_width; ++iX)
1888 {
1889 if ((m_boardFlag[waveFrontOffset[iX]] == WHITE) && (waveFrontOffset[iX] != 0))
1890 {
1891 m_indexInList++;
1892 m_boardOrderList[m_indexInList] = waveFrontOffset[iX];
1893 m_boardFlag[waveFrontOffset[iX]] = BLACK;
1894 }
1895 }
1896 }
1897
1898 MosSafeDeleteArray(waveFrontPosition);
1899 MosSafeDeleteArray(waveFrontOffset);
1900 return CM_SUCCESS;
1901 }
1902
1903 //*-----------------------------------------------------------------------------
1904 //| Purpose: Get Board Order list
1905 //*-----------------------------------------------------------------------------
GetBoardOrder(uint32_t * & boardOrder)1906 int32_t CmThreadSpaceRT::GetBoardOrder(uint32_t *&boardOrder)
1907 {
1908 boardOrder = m_boardOrderList;
1909 return CM_SUCCESS;
1910 }
1911
1912 #ifdef _DEBUG
PrintBoardOrder()1913 int32_t CmThreadSpaceRT::PrintBoardOrder()
1914 {
1915 CM_NORMALMESSAGE("According to dependency, the score board order is:");
1916 for (uint32_t i = 0; i < m_height * m_width; i ++)
1917 {
1918 CM_NORMALMESSAGE("%d->", m_boardOrderList[i]);
1919 }
1920 CM_NORMALMESSAGE("NIL.");
1921 return 0;
1922 }
1923 #endif
1924
IsThreadAssociated() const1925 bool CmThreadSpaceRT::IsThreadAssociated() const
1926 {
1927 return m_threadAssociated;
1928 }
1929
IsDependencySet()1930 bool CmThreadSpaceRT::IsDependencySet()
1931 {
1932 return ((m_dependencyPatternType != CM_NONE_DEPENDENCY) ? true : false);
1933 }
1934
GetNeedSetKernelPointer() const1935 bool CmThreadSpaceRT::GetNeedSetKernelPointer() const
1936 {
1937 return m_needSetKernelPointer;
1938 }
1939
SetKernelPointer(CmKernelRT * kernel) const1940 int32_t CmThreadSpaceRT::SetKernelPointer(CmKernelRT* kernel) const
1941 {
1942 *m_kernel = kernel;
1943 return CM_SUCCESS;
1944 }
1945
KernelPointerIsNULL() const1946 bool CmThreadSpaceRT::KernelPointerIsNULL() const
1947 {
1948 if (*m_kernel == nullptr)
1949 {
1950 return true;
1951 }
1952 else
1953 {
1954 return false;
1955 }
1956 }
1957
GetKernelPointer() const1958 CmKernelRT* CmThreadSpaceRT::GetKernelPointer() const
1959 {
1960 return *m_kernel;
1961 }
1962
GetIndexInTsArray()1963 uint32_t CmThreadSpaceRT::GetIndexInTsArray()
1964 {
1965 return m_indexInThreadSpaceArray;
1966 }
1967
GetDirtyStatus() const1968 CM_THREAD_SPACE_DIRTY_STATUS CmThreadSpaceRT::GetDirtyStatus() const
1969 {
1970 return *m_dirtyStatus;
1971 }
1972
SetDirtyStatus(CM_THREAD_SPACE_DIRTY_STATUS dirtyStatus) const1973 uint32_t CmThreadSpaceRT::SetDirtyStatus(CM_THREAD_SPACE_DIRTY_STATUS dirtyStatus) const
1974 {
1975 *m_dirtyStatus = dirtyStatus;
1976 return CM_SUCCESS;
1977 }
1978
SetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT groupSelect)1979 CM_RT_API int32_t CmThreadSpaceRT::SetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT groupSelect)
1980 {
1981 if (groupSelect != m_groupSelect)
1982 {
1983 m_groupSelect = groupSelect;
1984 *m_dirtyStatus = CM_THREAD_SPACE_DATA_DIRTY;
1985 }
1986
1987 return CM_SUCCESS;
1988 }
1989
GetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT & groupSelect)1990 int32_t CmThreadSpaceRT::GetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT &groupSelect)
1991 {
1992 groupSelect = m_groupSelect;
1993 return CM_SUCCESS;
1994 }
1995
UpdateDependency()1996 int32_t CmThreadSpaceRT::UpdateDependency()
1997 {
1998 //Init SW scoreboard
1999 if (!m_swScoreBoardEnabled)
2000 {
2001 return CM_SUCCESS;
2002 }
2003 if (m_swBoard == nullptr)
2004 {
2005 m_swBoard = MOS_NewArray(uint32_t, (m_height * m_width));
2006 if (m_swBoard)
2007 {
2008 CmSafeMemSet(m_swBoard, 0, sizeof(uint32_t)* m_height * m_width);
2009 }
2010 else
2011 {
2012 CM_ASSERTMESSAGE("Error: Out of system memory.");
2013 MosSafeDeleteArray(m_swBoard);
2014 return CM_OUT_OF_HOST_MEMORY;
2015 }
2016 }
2017 if (m_swBoardSurf == nullptr)
2018 {
2019 //for 2D atomic
2020 CM_CHK_CMSTATUS_RETURN(m_device->CreateSurface2D(m_width,
2021 m_height,
2022 Format_R32S,
2023 m_swBoardSurf));
2024 }
2025 CM_CHK_CMSTATUS_RETURN(InitSwScoreBoard());
2026 CM_CHK_CMSTATUS_RETURN(m_swBoardSurf->WriteSurface((uint8_t *)m_swBoard, nullptr));
2027 return CM_SUCCESS;
2028 }
2029
SetDependencyArgToKernel(CmKernelRT * pKernel) const2030 int32_t CmThreadSpaceRT::SetDependencyArgToKernel(CmKernelRT *pKernel) const
2031 {
2032 if (!m_swScoreBoardEnabled)
2033 {
2034 return CM_SUCCESS;
2035 }
2036 int32_t hr = CM_SUCCESS;
2037
2038 for (uint32_t k = 0; k < pKernel->m_argCount; k++)
2039 {
2040 if (pKernel->m_args[k].unitKind == ARG_KIND_SURFACE_2D_SCOREBOARD)
2041 {
2042 SurfaceIndex* ScoreboardIndex = nullptr;
2043 CM_CHK_CMSTATUS_RETURN(m_swBoardSurf->GetIndex(ScoreboardIndex));
2044 CM_CHK_CMSTATUS_RETURN(pKernel->SetKernelArg(k, sizeof(SurfaceIndex), ScoreboardIndex));
2045 }
2046 else if (pKernel->m_args[k].unitKind == ARG_KIND_GENERAL_DEPVEC)
2047 {
2048 char vectors[CM_MAX_DEPENDENCY_COUNT * 2];
2049 for (int ii = 0; ii < CM_MAX_DEPENDENCY_COUNT; ii++)
2050 {
2051 vectors[ii] = (char)m_dependency.deltaX[ii];
2052 vectors[ii + CM_MAX_DEPENDENCY_COUNT] = (char)m_dependency.deltaY[ii];
2053 }
2054 CM_CHK_CMSTATUS_RETURN(pKernel->SetKernelArg(k, (sizeof(char)*CM_MAX_DEPENDENCY_COUNT * 2), vectors));
2055 }
2056 else if (pKernel->m_args[k].unitKind == ARG_KIND_GENERAL_DEPCNT)
2057 {
2058 CM_CHK_CMSTATUS_RETURN(pKernel->SetKernelArg(k, sizeof(uint32_t), &(m_dependency.count)));
2059 }
2060 }
2061
2062 return CM_SUCCESS;
2063 }
2064
InitSwScoreBoard()2065 int32_t CmThreadSpaceRT::InitSwScoreBoard()
2066 {
2067 int SB_BufLen = m_height * m_width;
2068 int bufIdx = 0;
2069 int temp_x = 0, temp_y = 0;
2070 for (int i = 0; i < SB_BufLen; i++)
2071 {
2072 int x = i % m_width;
2073 int y = i / m_width;
2074 uint32_t entry_value = 0; //only support for 8 dependencies, but in uint32_t type
2075 for (uint32_t j = 0; j < m_dependency.count; j++)
2076 {
2077 if (((x + m_dependency.deltaX[j]) >= 0) &&
2078 ((x + m_dependency.deltaX[j]) < (int)m_width)
2079 && ((y + m_dependency.deltaY[j]) >= 0)
2080 && ((y + m_dependency.deltaY[j]) < (int)m_height))
2081 {
2082 entry_value |= (1 << j);
2083 }
2084 }
2085 switch (m_dependencyPatternType)
2086 {
2087 case CM_WAVEFRONT26Z:
2088 case CM_WAVEFRONT26ZIG:
2089 if ((x % 2) == 1 && (y % 2) == 1) {
2090 entry_value &= 0xE; // force 0 bit and 4th bit to be zero
2091 }
2092 else if ((x % 2) != 0 || (y % 2) != 0) {
2093 entry_value &= 0x1E; // force 0 bit to be zero
2094 }
2095 break;
2096 case CM_WAVEFRONT26X:
2097 if ((y % 4) == 3) {
2098 entry_value &= 0x3C; // force 0, 1 and 6th bit of dependency value to be zero. 7th is by default 0
2099 }
2100 else if ((y % 4) != 0) {
2101 entry_value &= 0x7E; // force 0th bit of dependency value to be zero.
2102 }
2103 break;
2104 case CM_WAVEFRONT26ZI:
2105 temp_x = x % m_26ZIBlockWidth;
2106 temp_y = y % m_26ZIBlockHeight;
2107 if (temp_x == 0) {
2108 if (temp_y == m_26ZIBlockHeight - 1)
2109 entry_value &= 0x1E;
2110 else if (temp_y == 0)
2111 entry_value &= 0x3F;
2112 else
2113 entry_value &= 0x1F;
2114 }
2115 else if (temp_x == m_26ZIBlockWidth - 1) {
2116 if (m_26ZIBlockWidth % 2 == 0) {
2117 if (temp_y == m_26ZIBlockHeight - 1)
2118 entry_value &= 0x1E;
2119 else if (temp_y == 0)
2120 entry_value &= 0x3F;
2121 else
2122 entry_value &= 0x1F;
2123 }
2124 else {
2125 if (temp_y == 0)
2126 entry_value &= 0x1A;
2127 else
2128 entry_value &= 0x12;
2129 }
2130
2131 }
2132 else if ((temp_x % 2) != 0) {
2133 if (temp_y == m_26ZIBlockHeight - 1)
2134 entry_value &= 0x7E;
2135 }
2136 else{ // ((temp_x % 2) == 0)
2137 if (temp_y == 0)
2138 entry_value &= 0x3A;
2139 else
2140 entry_value &= 0x12;
2141 }
2142 break;
2143 case CM_NONE_DEPENDENCY:
2144 case CM_WAVEFRONT:
2145 case CM_WAVEFRONT26:
2146 case CM_VERTICAL_WAVE:
2147 case CM_HORIZONTAL_WAVE:
2148 default:
2149 break;
2150 }
2151
2152 *(m_swBoard + i) = entry_value;
2153 }
2154 return CM_SUCCESS;
2155 }
2156
2157 #if CM_LOG_ON
Log()2158 std::string CmThreadSpaceRT::Log()
2159 {
2160 std::ostringstream oss;
2161
2162 oss << "Thread Space Parameters"
2163 << " Width :"<< m_width
2164 << " Height :" << m_height
2165 << " DependencyPatten :" << (int)m_dependencyPatternType
2166 << " IsAssociated :" <<m_threadAssociated
2167 << std::endl;
2168
2169 return oss.str();
2170 }
2171
GetHalState()2172 CM_HAL_STATE* CmThreadSpaceRT::GetHalState() { return m_device->GetHalState(); }
2173
2174 #endif // #if CM_LOG_ON
2175
GetThreadGroupSpace() const2176 CmThreadGroupSpace *CmThreadSpaceRT::GetThreadGroupSpace() const
2177 {
2178 return m_threadGroupSpace;
2179 }
2180 } // namespace
2181