1 /*
2 * Copyright (c) 2017-2020, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_mem.h
24 //! \brief Contains CM memory function definitions
25 //!
26 #pragma once
27
28 #include <mmintrin.h>
29 #include <xmmintrin.h>
30 #include <emmintrin.h>
31 #include "cm_debug.h"
32 #include "mos_utilities.h"
33
34 enum CPU_INSTRUCTION_LEVEL
35 {
36 CPU_INSTRUCTION_LEVEL_UNKNOWN,
37 CPU_INSTRUCTION_LEVEL_MMX,
38 CPU_INSTRUCTION_LEVEL_SSE,
39 CPU_INSTRUCTION_LEVEL_SSE2,
40 CPU_INSTRUCTION_LEVEL_SSE3,
41 CPU_INSTRUCTION_LEVEL_SSE4,
42 CPU_INSTRUCTION_LEVEL_SSE4_1,
43 NUM_CPU_INSTRUCTION_LEVELS
44 };
45
46 typedef __m128 DQWORD; // 128-bits, 16-bytes
47 typedef uint32_t PREFETCH[8]; // 32-bytes
48 typedef uint32_t CACHELINE[8]; // 32-bytes
49 typedef uint16_t DHWORD[32]; // 512-bits, 64-bytes
50
51 #define CmSafeDeleteArray(_ptr) {if(_ptr != nullptr) {delete[] (_ptr); (_ptr)=nullptr;}}
52 #define CmSafeDelete(_ptr) {if(_ptr != nullptr) {delete (_ptr);(_ptr)=nullptr;}}
53 #define MosSafeDeleteArray(_ptr) {MOS_DeleteArray(_ptr); (_ptr)=nullptr;}
54 #define MosSafeDelete(_ptr) {MOS_Delete(_ptr); (_ptr)=nullptr;}
55 inline void CmSafeMemSet( void* dst, const int data, const size_t bytes );
56 inline void CmDwordMemSet( void* dst, const uint32_t data, const size_t bytes );
57 inline void CmSafeMemCopy( void* pdst, const void *psrc, const size_t bytes );
58 inline int CmSafeMemCompare( const void*, const void*, const size_t );
59
60 inline bool IsPowerOfTwo( const size_t number );
61 inline void* Align( void* const ptr, const size_t alignment );
62 inline size_t GetAlignmentOffset( void* const ptr, const size_t alignSize );
63 inline bool IsAligned( void * ptr, const size_t alignSize );
64 inline size_t Round( const size_t value, const size_t size );
65
66 void CmFastMemCopy( void* dst, const void* src, const size_t bytes );
67 void CmFastMemCopyWC( void* dst, const void* src, const size_t bytes );
68
69 inline void Prefetch( const void* ptr );
70
71 /*****************************************************************************\
72 MACROS:
73 EMIT_R_MR
74 Example: movntdqa xmm1, xmmword ptr [eax]
75
76 EMIT_R_MR_OFFSET
77 Example: movntdqa xmm1, xmmword ptr [eax + 0x10]
78
79 Description:
80 Used to encode SSE4.1 instructions with parametrs
81 \*****************************************************************************/
82 #define EMIT_R_MR(OPCODE, X, Y ) \
83 OPCODE \
84 __asm _emit (0x00 + X*8 + Y)
85
86 #define EMIT_R_MR_OFFSET(OPCODE, X, Y, OFFSET) \
87 OPCODE \
88 __asm _emit (0x80 + X*8 + Y) \
89 __asm _emit (OFFSET&0xFF) \
90 __asm _emit ((OFFSET>>8)&0xFF) \
91 __asm _emit ((OFFSET>>16)&0xFF) \
92 __asm _emit ((OFFSET>>24)&0xFF)
93
94 /*****************************************************************************\
95 MACROS:
96 MOVNTDQA_OP
97 MOVNTDQA_R_MR
98 MOVNTDQA_R_MRB
99
100 Description:
101 Used to emit SSE4_1 movntdqa (streaming load) instructions
102 SRC - XMM Register, destination data is to be stored
103 DST - General Purpose Register containing source address
104 OFFSET - Offset to be added to the source address
105 \*****************************************************************************/
106 #define MOVNTDQA_OP \
107 _asm _emit 0x66 \
108 _asm _emit 0x0F \
109 _asm _emit 0x38 \
110 _asm _emit 0x2A
111
112 #define MOVNTDQA_R_MR(DST, SRC) \
113 EMIT_R_MR(MOVNTDQA_OP, DST, SRC)
114
115 #define MOVNTDQA_R_MR_OFFSET(DST, SRC, OFFSET) \
116 EMIT_R_MR_OFFSET(MOVNTDQA_OP, DST, SRC, OFFSET)
117
118 #ifndef BIT
119 #define BIT( n ) ( 1 << (n) )
120 #endif
121
122 #include "cm_mem_os.h"
123
124 /*****************************************************************************\
125 Inline Function:
126 CmSafeMemSet
127
128 Description:
129 Memory set
130 \*****************************************************************************/
CmSafeMemSet(void * dst,const int data,const size_t bytes)131 inline void CmSafeMemSet( void* dst, const int data, const size_t bytes )
132 {
133 #if defined(_DEBUG)
134 __try
135 #endif
136 {
137 ::memset( dst, data, bytes );
138 }
139 #if defined(_DEBUG)
140 // catch exceptions here so they are easily debugged
141 __except( EXCEPTION_EXECUTE_HANDLER )
142 {
143 CM_ASSERTMESSAGE("Error: Memory Set failure.");
144 }
145 #endif
146 }
147
148 /*****************************************************************************\
149 Inline Function:
150 CmDwordMemSet
151
152 Description:
153 uint32_t memory set
154 \*****************************************************************************/
CmDwordMemSet(void * dst,const uint32_t data,const size_t bytes)155 inline void CmDwordMemSet( void* dst, const uint32_t data, const size_t bytes )
156 {
157 uint32_t *ptr = reinterpret_cast<uint32_t*>( dst );
158 uint32_t sizeInDwords = (uint32_t)(bytes >> 2); // divide by 4 byte to dword
159 uint32_t *maxPtr = ptr + sizeInDwords;
160 while(ptr < maxPtr)
161 *ptr++ = data;
162 }
163
164 /*****************************************************************************\
165 Inline Function:
166 CmSafeMemCopy
167
168 Description:
169 Memory Copy
170 \*****************************************************************************/
CmSafeMemCopy(void * dst,const void * src,const size_t bytes)171 inline void CmSafeMemCopy( void* dst, const void *src, const size_t bytes )
172 {
173 uint8_t *cacheDst = (uint8_t*)dst;
174 uint8_t *cacheSrc = (uint8_t*)src;
175
176 #if defined(_DEBUG)
177 __try
178 #endif
179 {
180 MOS_SecureMemcpy( cacheDst, bytes, cacheSrc, bytes );
181 }
182 #if defined(_DEBUG)
183 // catch exceptions here so they are easily debugged
184 __except( EXCEPTION_EXECUTE_HANDLER )
185 {
186 CM_ASSERTMESSAGE("Error: Memory Copy failure.");
187 }
188 #endif
189 }
190
191 /*****************************************************************************\
192 Inline Function:
193 CmSafeMemCompare
194
195 Description:
196 Exception Handler Memory Compare function
197 \*****************************************************************************/
CmSafeMemCompare(const void * dst,const void * src,const size_t bytes)198 inline int CmSafeMemCompare( const void* dst, const void* src, const size_t bytes )
199 {
200 #if defined(_DEBUG)
201 __try
202 #endif
203 {
204 return ::memcmp( dst, src, bytes );
205 }
206 #if defined(_DEBUG)
207 // catch exceptions here so they are easily debugged
208 __except(EXCEPTION_EXECUTE_HANDLER)
209 {
210 CM_ASSERTMESSAGE("Error: Memory Compare failure.");
211 return -1;
212 }
213 #endif
214 }
215
216 /*****************************************************************************\
217 Inline Function:
218 GetCpuInstructionLevel
219
220 Description:
221 Returns the highest level of IA32 intruction extensions supported by the CPU
222 ( i.e. SSE, SSE2, SSE4, etc )
223
224 Output:
225 CPU_INSTRUCTION_LEVEL - highest level of IA32 instruction extension(s) supported
226 by CPU
227 \*****************************************************************************/
GetCpuInstructionLevel(void)228 inline CPU_INSTRUCTION_LEVEL GetCpuInstructionLevel( void )
229 {
230 int cpuInfo[4];
231 memset( cpuInfo, 0, 4*sizeof(int) );
232
233 GetCPUID(cpuInfo, 1);
234
235 CPU_INSTRUCTION_LEVEL cpuInstructionLevel = CPU_INSTRUCTION_LEVEL_UNKNOWN;
236 if( (cpuInfo[2] & BIT(19)) && TestSSE4_1() )
237 {
238 cpuInstructionLevel = CPU_INSTRUCTION_LEVEL_SSE4_1;
239 }
240 else if( cpuInfo[2] & BIT(1) )
241 {
242 cpuInstructionLevel = CPU_INSTRUCTION_LEVEL_SSE3;
243 }
244 else if( cpuInfo[3] & BIT(26) )
245 {
246 cpuInstructionLevel = CPU_INSTRUCTION_LEVEL_SSE2;
247 }
248 else if( cpuInfo[3] & BIT(25) )
249 {
250 cpuInstructionLevel = CPU_INSTRUCTION_LEVEL_SSE;
251 }
252 else if( cpuInfo[3] & BIT(23) )
253 {
254 cpuInstructionLevel = CPU_INSTRUCTION_LEVEL_MMX;
255 }
256
257 return cpuInstructionLevel;
258 }
259
260 /*****************************************************************************\
261 Inline Function:
262 Round
263
264 Description:
265 Rounds an unsigned integer to the next multiple of (power-2) size
266 \*****************************************************************************/
Round(const size_t value,const size_t size)267 inline size_t Round( const size_t value, const size_t size )
268 {
269
270 CM_ASSERT( IsPowerOfTwo(size) );
271 size_t mask = size - 1;
272 size_t roundedValue = ( value + mask ) & ~( mask );
273 return roundedValue;
274
275 }
276
277 /*****************************************************************************\
278 Inline Template Function:
279 Max
280
281 Description:
282 Returns the max of the two values
283 \*****************************************************************************/
Max(size_t var0,size_t var1)284 __inline size_t Max( size_t var0, size_t var1 )
285 {
286 return ( var0 >= var1 ) ? var0 : var1;
287 }
288 /*****************************************************************************\
289 Inline Function:
290 IsAligned
291
292 Description:
293 Determines if the given size is aligned to the given size
294 \*****************************************************************************/
IsAligned(void * ptr,const size_t alignSize)295 inline bool IsAligned( void * ptr, const size_t alignSize )
296 {
297 return ( ( (size_t)ptr % alignSize ) == 0 );
298 }
299
300 /*****************************************************************************\
301 Inline Function:
302 IsPowerOfTwo
303
304 Description:
305 Determines if the given value is a power of two.
306 \*****************************************************************************/
IsPowerOfTwo(const size_t number)307 inline bool IsPowerOfTwo( const size_t number )
308 {
309 return ( ( number & ( number - 1 ) ) == 0 );
310 }
311
312 /*****************************************************************************\
313 Inline Function:
314 GetAlignmentOffset
315
316 Description:
317 Returns the size in bytes needed to align the given size to the
318 given alignment size
319 \*****************************************************************************/
GetAlignmentOffset(void * const ptr,const size_t alignSize)320 inline size_t GetAlignmentOffset( void* const ptr, const size_t alignSize )
321 {
322 CM_ASSERT( alignSize );
323
324 uint32_t offset = 0;
325
326 if( IsPowerOfTwo(alignSize) )
327 { // can recast 'ptr' to uint32_t, since offset is uint32_t
328 offset = uint32_t( uintptr_t( Align(ptr, alignSize) ) - (uintptr_t)(ptr) );
329 }
330 else
331 {
332 const uint32_t modulo = (uint32_t)(uintptr_t(ptr) % alignSize);
333
334 if( modulo )
335 {
336 offset = (uint32_t)alignSize - modulo;
337 }
338 }
339
340 return offset;
341 }
342
343 /*****************************************************************************\
344 Inline Function:
345 Align
346
347 Description:
348 Type-safe (power-2) alignment of a pointer.
349 \*****************************************************************************/
Align(void * const ptr,const size_t alignment)350 inline void* Align( void* const ptr, const size_t alignment )
351 {
352 CM_ASSERT( IsPowerOfTwo(alignment) );
353
354 return (void*)( ( ((size_t)ptr) + alignment-1 ) & ~( alignment-1 ) );
355 }
356