xref: /aosp_15_r20/external/lzma/C/Sha512.c (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 /* Sha512.c -- SHA-512 Hash
2 : Igor Pavlov : Public domain
3 This code is based on public domain code from Wei Dai's Crypto++ library. */
4 
5 #include "Precomp.h"
6 
7 #include <string.h>
8 
9 #include "Sha512.h"
10 #include "RotateDefs.h"
11 #include "CpuArch.h"
12 
13 #ifdef MY_CPU_X86_OR_AMD64
14   #if   defined(Z7_LLVM_CLANG_VERSION)  && (Z7_LLVM_CLANG_VERSION  >= 170001) \
15      || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 170001) \
16      || defined(Z7_GCC_VERSION)         && (Z7_GCC_VERSION         >= 140000) \
17      || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 2400) && (__INTEL_COMPILER <= 9900) \
18      || defined(_MSC_VER) && (_MSC_VER >= 1940)
19       #define Z7_COMPILER_SHA512_SUPPORTED
20   #endif
21 #elif defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
22   #if defined(__ARM_FEATURE_SHA512)
23     #define Z7_COMPILER_SHA512_SUPPORTED
24   #else
25     #if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 130000) \
26            || defined(__GNUC__) && (__GNUC__ >= 9) \
27         ) \
28       || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1940) // fix it
29       #define Z7_COMPILER_SHA512_SUPPORTED
30     #endif
31   #endif
32 #endif
33 
34 
35 
36 
37 
38 
39 
40 
41 
42 
43 
44 
45 
46 
47 void Z7_FASTCALL Sha512_UpdateBlocks(UInt64 state[8], const Byte *data, size_t numBlocks);
48 
49 #ifdef Z7_COMPILER_SHA512_SUPPORTED
50   void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks);
51 
52   static SHA512_FUNC_UPDATE_BLOCKS g_SHA512_FUNC_UPDATE_BLOCKS = Sha512_UpdateBlocks;
53   static SHA512_FUNC_UPDATE_BLOCKS g_SHA512_FUNC_UPDATE_BLOCKS_HW;
54 
55   #define SHA512_UPDATE_BLOCKS(p) p->v.vars.func_UpdateBlocks
56 #else
57   #define SHA512_UPDATE_BLOCKS(p) Sha512_UpdateBlocks
58 #endif
59 
60 
Sha512_SetFunction(CSha512 * p,unsigned algo)61 BoolInt Sha512_SetFunction(CSha512 *p, unsigned algo)
62 {
63   SHA512_FUNC_UPDATE_BLOCKS func = Sha512_UpdateBlocks;
64 
65   #ifdef Z7_COMPILER_SHA512_SUPPORTED
66     if (algo != SHA512_ALGO_SW)
67     {
68       if (algo == SHA512_ALGO_DEFAULT)
69         func = g_SHA512_FUNC_UPDATE_BLOCKS;
70       else
71       {
72         if (algo != SHA512_ALGO_HW)
73           return False;
74         func = g_SHA512_FUNC_UPDATE_BLOCKS_HW;
75         if (!func)
76           return False;
77       }
78     }
79   #else
80     if (algo > 1)
81       return False;
82   #endif
83 
84   p->v.vars.func_UpdateBlocks = func;
85   return True;
86 }
87 
88 
89 /* define it for speed optimization */
90 
91 #if 0 // 1 for size optimization
92   #define STEP_PRE 1
93   #define STEP_MAIN 1
94 #else
95   #define STEP_PRE 2
96   #define STEP_MAIN 4
97   // #define Z7_SHA512_UNROLL
98 #endif
99 
100 #undef Z7_SHA512_BIG_W
101 #if STEP_MAIN != 16
102   #define Z7_SHA512_BIG_W
103 #endif
104 
105 
106 #define U64C(x) UINT64_CONST(x)
107 
108 static MY_ALIGN(64) const UInt64 SHA512_INIT_ARRAYS[4][8] = {
109 { U64C(0x8c3d37c819544da2), U64C(0x73e1996689dcd4d6), U64C(0x1dfab7ae32ff9c82), U64C(0x679dd514582f9fcf),
110   U64C(0x0f6d2b697bd44da8), U64C(0x77e36f7304c48942), U64C(0x3f9d85a86a1d36c8), U64C(0x1112e6ad91d692a1)
111 },
112 { U64C(0x22312194fc2bf72c), U64C(0x9f555fa3c84c64c2), U64C(0x2393b86b6f53b151), U64C(0x963877195940eabd),
113   U64C(0x96283ee2a88effe3), U64C(0xbe5e1e2553863992), U64C(0x2b0199fc2c85b8aa), U64C(0x0eb72ddc81c52ca2)
114 },
115 { U64C(0xcbbb9d5dc1059ed8), U64C(0x629a292a367cd507), U64C(0x9159015a3070dd17), U64C(0x152fecd8f70e5939),
116   U64C(0x67332667ffc00b31), U64C(0x8eb44a8768581511), U64C(0xdb0c2e0d64f98fa7), U64C(0x47b5481dbefa4fa4)
117 },
118 { U64C(0x6a09e667f3bcc908), U64C(0xbb67ae8584caa73b), U64C(0x3c6ef372fe94f82b), U64C(0xa54ff53a5f1d36f1),
119   U64C(0x510e527fade682d1), U64C(0x9b05688c2b3e6c1f), U64C(0x1f83d9abfb41bd6b), U64C(0x5be0cd19137e2179)
120 }};
121 
Sha512_InitState(CSha512 * p,unsigned digestSize)122 void Sha512_InitState(CSha512 *p, unsigned digestSize)
123 {
124   p->v.vars.count = 0;
125   memcpy(p->state, SHA512_INIT_ARRAYS[(size_t)(digestSize >> 4) - 1], sizeof(p->state));
126 }
127 
Sha512_Init(CSha512 * p,unsigned digestSize)128 void Sha512_Init(CSha512 *p, unsigned digestSize)
129 {
130   p->v.vars.func_UpdateBlocks =
131   #ifdef Z7_COMPILER_SHA512_SUPPORTED
132       g_SHA512_FUNC_UPDATE_BLOCKS;
133   #else
134       NULL;
135   #endif
136   Sha512_InitState(p, digestSize);
137 }
138 
139 #define S0(x) (Z7_ROTR64(x,28) ^ Z7_ROTR64(x,34) ^ Z7_ROTR64(x,39))
140 #define S1(x) (Z7_ROTR64(x,14) ^ Z7_ROTR64(x,18) ^ Z7_ROTR64(x,41))
141 #define s0(x) (Z7_ROTR64(x, 1) ^ Z7_ROTR64(x, 8) ^ (x >> 7))
142 #define s1(x) (Z7_ROTR64(x,19) ^ Z7_ROTR64(x,61) ^ (x >> 6))
143 
144 #define Ch(x,y,z) (z^(x&(y^z)))
145 #define Maj(x,y,z) ((x&y)|(z&(x|y)))
146 
147 
148 #define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe64(data + ((size_t)(j) + i) * 8))
149 
150 #define blk2_main(j, i)  s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
151 
152 #ifdef Z7_SHA512_BIG_W
153     // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
154     #define w(j, i)     W[(size_t)(j) + i]
155     #define blk2(j, i)  (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
156 #else
157     #if STEP_MAIN == 16
158         #define w(j, i)  W[(i) & 15]
159     #else
160         #define w(j, i)  W[((size_t)(j) + (i)) & 15]
161     #endif
162     #define blk2(j, i)  (w(j, i) += blk2_main(j, i))
163 #endif
164 
165 #define W_MAIN(i)  blk2(j, i)
166 
167 
168 #define T1(wx, i) \
169     tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
170     h = g; \
171     g = f; \
172     f = e; \
173     e = d + tmp; \
174     tmp += S0(a) + Maj(a, b, c); \
175     d = c; \
176     c = b; \
177     b = a; \
178     a = tmp; \
179 
180 #define R1_PRE(i)  T1( W_PRE, i)
181 #define R1_MAIN(i) T1( W_MAIN, i)
182 
183 #if (!defined(Z7_SHA512_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
184 #define R2_MAIN(i) \
185     R1_MAIN(i) \
186     R1_MAIN(i + 1) \
187 
188 #endif
189 
190 
191 
192 #if defined(Z7_SHA512_UNROLL) && STEP_MAIN >= 8
193 
194 #define T4( a,b,c,d,e,f,g,h, wx, i) \
195     h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
196     tmp = h; \
197     h += d; \
198     d = tmp + S0(a) + Maj(a, b, c); \
199 
200 #define R4( wx, i) \
201     T4 ( a,b,c,d,e,f,g,h, wx, (i  )); \
202     T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
203     T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
204     T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
205 
206 #define R4_PRE(i)  R4( W_PRE, i)
207 #define R4_MAIN(i) R4( W_MAIN, i)
208 
209 
210 #define T8( a,b,c,d,e,f,g,h, wx, i) \
211     h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
212     d += h; \
213     h += S0(a) + Maj(a, b, c); \
214 
215 #define R8( wx, i) \
216     T8 ( a,b,c,d,e,f,g,h, wx, i  ); \
217     T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
218     T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
219     T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
220     T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
221     T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
222     T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
223     T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
224 
225 #define R8_PRE(i)  R8( W_PRE, i)
226 #define R8_MAIN(i) R8( W_MAIN, i)
227 
228 #endif
229 
230 
231 extern
232 MY_ALIGN(64) const UInt64 SHA512_K_ARRAY[80];
233 MY_ALIGN(64) const UInt64 SHA512_K_ARRAY[80] = {
234   U64C(0x428a2f98d728ae22), U64C(0x7137449123ef65cd), U64C(0xb5c0fbcfec4d3b2f), U64C(0xe9b5dba58189dbbc),
235   U64C(0x3956c25bf348b538), U64C(0x59f111f1b605d019), U64C(0x923f82a4af194f9b), U64C(0xab1c5ed5da6d8118),
236   U64C(0xd807aa98a3030242), U64C(0x12835b0145706fbe), U64C(0x243185be4ee4b28c), U64C(0x550c7dc3d5ffb4e2),
237   U64C(0x72be5d74f27b896f), U64C(0x80deb1fe3b1696b1), U64C(0x9bdc06a725c71235), U64C(0xc19bf174cf692694),
238   U64C(0xe49b69c19ef14ad2), U64C(0xefbe4786384f25e3), U64C(0x0fc19dc68b8cd5b5), U64C(0x240ca1cc77ac9c65),
239   U64C(0x2de92c6f592b0275), U64C(0x4a7484aa6ea6e483), U64C(0x5cb0a9dcbd41fbd4), U64C(0x76f988da831153b5),
240   U64C(0x983e5152ee66dfab), U64C(0xa831c66d2db43210), U64C(0xb00327c898fb213f), U64C(0xbf597fc7beef0ee4),
241   U64C(0xc6e00bf33da88fc2), U64C(0xd5a79147930aa725), U64C(0x06ca6351e003826f), U64C(0x142929670a0e6e70),
242   U64C(0x27b70a8546d22ffc), U64C(0x2e1b21385c26c926), U64C(0x4d2c6dfc5ac42aed), U64C(0x53380d139d95b3df),
243   U64C(0x650a73548baf63de), U64C(0x766a0abb3c77b2a8), U64C(0x81c2c92e47edaee6), U64C(0x92722c851482353b),
244   U64C(0xa2bfe8a14cf10364), U64C(0xa81a664bbc423001), U64C(0xc24b8b70d0f89791), U64C(0xc76c51a30654be30),
245   U64C(0xd192e819d6ef5218), U64C(0xd69906245565a910), U64C(0xf40e35855771202a), U64C(0x106aa07032bbd1b8),
246   U64C(0x19a4c116b8d2d0c8), U64C(0x1e376c085141ab53), U64C(0x2748774cdf8eeb99), U64C(0x34b0bcb5e19b48a8),
247   U64C(0x391c0cb3c5c95a63), U64C(0x4ed8aa4ae3418acb), U64C(0x5b9cca4f7763e373), U64C(0x682e6ff3d6b2b8a3),
248   U64C(0x748f82ee5defb2fc), U64C(0x78a5636f43172f60), U64C(0x84c87814a1f0ab72), U64C(0x8cc702081a6439ec),
249   U64C(0x90befffa23631e28), U64C(0xa4506cebde82bde9), U64C(0xbef9a3f7b2c67915), U64C(0xc67178f2e372532b),
250   U64C(0xca273eceea26619c), U64C(0xd186b8c721c0c207), U64C(0xeada7dd6cde0eb1e), U64C(0xf57d4f7fee6ed178),
251   U64C(0x06f067aa72176fba), U64C(0x0a637dc5a2c898a6), U64C(0x113f9804bef90dae), U64C(0x1b710b35131c471b),
252   U64C(0x28db77f523047d84), U64C(0x32caab7b40c72493), U64C(0x3c9ebe0a15c9bebc), U64C(0x431d67c49c100d4c),
253   U64C(0x4cc5d4becb3e42b6), U64C(0x597f299cfc657e2a), U64C(0x5fcb6fab3ad6faec), U64C(0x6c44198c4a475817)
254 };
255 
256 #define K SHA512_K_ARRAY
257 
258 Z7_NO_INLINE
Sha512_UpdateBlocks(UInt64 state[8],const Byte * data,size_t numBlocks)259 void Z7_FASTCALL Sha512_UpdateBlocks(UInt64 state[8], const Byte *data, size_t numBlocks)
260 {
261   UInt64 W
262 #ifdef Z7_SHA512_BIG_W
263       [80];
264 #else
265       [16];
266 #endif
267   unsigned j;
268   UInt64 a,b,c,d,e,f,g,h;
269 #if !defined(Z7_SHA512_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
270   UInt64 tmp;
271 #endif
272 
273   if (numBlocks == 0) return;
274 
275   a = state[0];
276   b = state[1];
277   c = state[2];
278   d = state[3];
279   e = state[4];
280   f = state[5];
281   g = state[6];
282   h = state[7];
283 
284   do
285   {
286 
287   for (j = 0; j < 16; j += STEP_PRE)
288   {
289     #if STEP_PRE > 4
290 
291       #if STEP_PRE < 8
292       R4_PRE(0);
293       #else
294       R8_PRE(0);
295       #if STEP_PRE == 16
296       R8_PRE(8);
297       #endif
298       #endif
299 
300     #else
301 
302       R1_PRE(0)
303       #if STEP_PRE >= 2
304       R1_PRE(1)
305       #if STEP_PRE >= 4
306       R1_PRE(2)
307       R1_PRE(3)
308       #endif
309       #endif
310 
311     #endif
312   }
313 
314   for (j = 16; j < 80; j += STEP_MAIN)
315   {
316     #if defined(Z7_SHA512_UNROLL) && STEP_MAIN >= 8
317 
318       #if STEP_MAIN < 8
319       R4_MAIN(0)
320       #else
321       R8_MAIN(0)
322       #if STEP_MAIN == 16
323       R8_MAIN(8)
324       #endif
325       #endif
326 
327     #else
328 
329       R1_MAIN(0)
330       #if STEP_MAIN >= 2
331       R1_MAIN(1)
332       #if STEP_MAIN >= 4
333       R2_MAIN(2)
334       #if STEP_MAIN >= 8
335       R2_MAIN(4)
336       R2_MAIN(6)
337       #if STEP_MAIN >= 16
338       R2_MAIN(8)
339       R2_MAIN(10)
340       R2_MAIN(12)
341       R2_MAIN(14)
342       #endif
343       #endif
344       #endif
345       #endif
346     #endif
347   }
348 
349   a += state[0]; state[0] = a;
350   b += state[1]; state[1] = b;
351   c += state[2]; state[2] = c;
352   d += state[3]; state[3] = d;
353   e += state[4]; state[4] = e;
354   f += state[5]; state[5] = f;
355   g += state[6]; state[6] = g;
356   h += state[7]; state[7] = h;
357 
358   data += SHA512_BLOCK_SIZE;
359   }
360   while (--numBlocks);
361 }
362 
363 
364 #define Sha512_UpdateBlock(p) SHA512_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
365 
Sha512_Update(CSha512 * p,const Byte * data,size_t size)366 void Sha512_Update(CSha512 *p, const Byte *data, size_t size)
367 {
368   if (size == 0)
369     return;
370   {
371     const unsigned pos = (unsigned)p->v.vars.count & (SHA512_BLOCK_SIZE - 1);
372     const unsigned num = SHA512_BLOCK_SIZE - pos;
373     p->v.vars.count += size;
374     if (num > size)
375     {
376       memcpy(p->buffer + pos, data, size);
377       return;
378     }
379     if (pos != 0)
380     {
381       size -= num;
382       memcpy(p->buffer + pos, data, num);
383       data += num;
384       Sha512_UpdateBlock(p);
385     }
386   }
387   {
388     const size_t numBlocks = size >> 7;
389     // if (numBlocks)
390     SHA512_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
391     size &= SHA512_BLOCK_SIZE - 1;
392     if (size == 0)
393       return;
394     data += (numBlocks << 7);
395     memcpy(p->buffer, data, size);
396   }
397 }
398 
399 
Sha512_Final(CSha512 * p,Byte * digest,unsigned digestSize)400 void Sha512_Final(CSha512 *p, Byte *digest, unsigned digestSize)
401 {
402   unsigned pos = (unsigned)p->v.vars.count & (SHA512_BLOCK_SIZE - 1);
403   p->buffer[pos++] = 0x80;
404   if (pos > (SHA512_BLOCK_SIZE - 8 * 2))
405   {
406     while (pos != SHA512_BLOCK_SIZE) { p->buffer[pos++] = 0; }
407     // memset(&p->buf.buffer[pos], 0, SHA512_BLOCK_SIZE - pos);
408     Sha512_UpdateBlock(p);
409     pos = 0;
410   }
411   memset(&p->buffer[pos], 0, (SHA512_BLOCK_SIZE - 8 * 2) - pos);
412   {
413     const UInt64 numBits = p->v.vars.count << 3;
414     SetBe64(p->buffer + SHA512_BLOCK_SIZE - 8 * 2, 0) // = (p->v.vars.count >> (64 - 3)); (high 64-bits)
415     SetBe64(p->buffer + SHA512_BLOCK_SIZE - 8 * 1, numBits)
416   }
417   Sha512_UpdateBlock(p);
418 #if 1 && defined(MY_CPU_BE)
419   memcpy(digest, p->state, digestSize);
420 #else
421   {
422     const unsigned numWords = digestSize >> 3;
423     unsigned i;
424     for (i = 0; i < numWords; i++)
425     {
426       const UInt64 v = p->state[i];
427       SetBe64(digest, v)
428       digest += 8;
429     }
430     if (digestSize & 4) // digestSize == SHA512_224_DIGEST_SIZE
431     {
432       const UInt32 v = (UInt32)((p->state[numWords]) >> 32);
433       SetBe32(digest, v)
434     }
435   }
436 #endif
437   Sha512_InitState(p, digestSize);
438 }
439 
440 
441 
442 
443 #if defined(_WIN32) && defined(Z7_COMPILER_SHA512_SUPPORTED) \
444     && defined(MY_CPU_ARM64)  // we can disable this check to debug in x64
445 
446 #if 1  // 0 for debug
447 
448 #include "7zWindows.h"
449 // #include <stdio.h>
450 #if 0 && defined(MY_CPU_X86_OR_AMD64)
451 #include <intrin.h> // for debug : for __ud2()
452 #endif
453 
CPU_IsSupported_SHA512(void)454 BoolInt CPU_IsSupported_SHA512(void)
455 {
456 #if defined(MY_CPU_ARM64)
457   // we have no SHA512 flag for IsProcessorFeaturePresent() still.
458   if (!CPU_IsSupported_CRYPTO())
459     return False;
460 #endif
461   // printf("\nCPU_IsSupported_SHA512\n");
462   {
463     // we can't read ID_AA64ISAR0_EL1 register from application.
464     // but ID_AA64ISAR0_EL1 register is mapped to "CP 4030" registry value.
465     HKEY key = NULL;
466     LONG res = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
467         TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
468         0, KEY_READ, &key);
469     if (res != ERROR_SUCCESS)
470       return False;
471     {
472       DWORD type = 0;
473       DWORD count = sizeof(UInt64);
474       UInt64 val = 0;
475       res = RegQueryValueEx(key, TEXT("CP 4030"), NULL,
476           &type, (LPBYTE)&val, &count);
477       RegCloseKey(key);
478       if (res != ERROR_SUCCESS
479           || type != REG_QWORD
480           || count != sizeof(UInt64)
481           || ((unsigned)(val >> 12) & 0xf) != 2)
482         return False;
483       // we parse SHA2 field of ID_AA64ISAR0_EL1 register:
484       //   0 : No SHA2 instructions implemented
485       //   1 : SHA256 implemented
486       //   2 : SHA256 and SHA512 implemented
487     }
488   }
489 
490 
491 #if 1  // 0 for debug to disable SHA512 PROBE code
492 
493 /*
494 ----- SHA512 PROBE -----
495 
496 We suppose that "CP 4030" registry reading is enough.
497 But we use additional SHA512 PROBE code, because
498 we can catch exception here, and we don't catch exceptions,
499 if we call Sha512 functions from main code.
500 
501 NOTE: arm64 PROBE code doesn't work, if we call it via Wine in linux-arm64.
502 The program just stops.
503 Also x64 version of PROBE code doesn't work, if we run it via Intel SDE emulator
504 without SHA512 support (-skl switch),
505 The program stops, and we have message from SDE:
506   TID 0 SDE-ERROR: Executed instruction not valid for specified chip (SKYLAKE): vsha512msg1
507 But we still want to catch that exception instead of process stopping.
508 Does this PROBE code work in native Windows-arm64 (with/without sha512 hw instructions)?
509 Are there any ways to fix the problems with arm64-wine and x64-SDE cases?
510 */
511 
512   // printf("\n========== CPU_IsSupported_SHA512 PROBE ========\n");
513   {
514 #ifdef __clang_major__
515   #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
516 #endif
517     __try
518     {
519 #if 0 // 1 : for debug (reduced version to detect sha512)
520       const uint64x2_t a = vdupq_n_u64(1);
521       const uint64x2_t b = vsha512hq_u64(a, a, a);
522       if ((UInt32)vgetq_lane_u64(b, 0) == 0x11800002)
523         return True;
524 #else
525       MY_ALIGN(16)
526       UInt64 temp[SHA512_NUM_DIGEST_WORDS + SHA512_NUM_BLOCK_WORDS];
527       memset(temp, 0x5a, sizeof(temp));
528 #if 0 && defined(MY_CPU_X86_OR_AMD64)
529       __ud2(); // for debug : that exception is not problem for SDE
530 #endif
531 #if 1
532       Sha512_UpdateBlocks_HW(temp,
533           (const Byte *)(const void *)(temp + SHA512_NUM_DIGEST_WORDS), 1);
534       // printf("\n==== t = %x\n", (UInt32)temp[0]);
535       if ((UInt32)temp[0] == 0xa33cfdf7)
536       {
537         // printf("\n=== PROBE SHA512: SHA512 supported\n");
538         return True;
539       }
540 #endif
541 #endif
542     }
543     __except (EXCEPTION_EXECUTE_HANDLER)
544     {
545       // printf("\n==== CPU_IsSupported_SHA512 EXCEPTION_EXECUTE_HANDLER\n");
546     }
547   }
548   return False;
549 #else
550   // without SHA512 PROBE code
551   return True;
552 #endif
553 
554 }
555 
556 #else
557 
CPU_IsSupported_SHA512(void)558 BoolInt CPU_IsSupported_SHA512(void)
559 {
560   return False;
561 }
562 
563 #endif
564 #endif // WIN32 arm64
565 
566 
Sha512Prepare(void)567 void Sha512Prepare(void)
568 {
569 #ifdef Z7_COMPILER_SHA512_SUPPORTED
570   SHA512_FUNC_UPDATE_BLOCKS f, f_hw;
571   f = Sha512_UpdateBlocks;
572   f_hw = NULL;
573 #ifdef MY_CPU_X86_OR_AMD64
574   if (CPU_IsSupported_SHA512()
575       && CPU_IsSupported_AVX2()
576       )
577 #else
578   if (CPU_IsSupported_SHA512())
579 #endif
580   {
581     // printf("\n========== HW SHA512 ======== \n");
582     f = f_hw = Sha512_UpdateBlocks_HW;
583   }
584   g_SHA512_FUNC_UPDATE_BLOCKS    = f;
585   g_SHA512_FUNC_UPDATE_BLOCKS_HW = f_hw;
586 #endif
587 }
588 
589 
590 #undef K
591 #undef S0
592 #undef S1
593 #undef s0
594 #undef s1
595 #undef Ch
596 #undef Maj
597 #undef W_MAIN
598 #undef W_PRE
599 #undef w
600 #undef blk2_main
601 #undef blk2
602 #undef T1
603 #undef T4
604 #undef T8
605 #undef R1_PRE
606 #undef R1_MAIN
607 #undef R2_MAIN
608 #undef R4
609 #undef R4_PRE
610 #undef R4_MAIN
611 #undef R8
612 #undef R8_PRE
613 #undef R8_MAIN
614 #undef STEP_PRE
615 #undef STEP_MAIN
616 #undef Z7_SHA512_BIG_W
617 #undef Z7_SHA512_UNROLL
618 #undef Z7_COMPILER_SHA512_SUPPORTED
619