xref: /aosp_15_r20/external/lzma/C/Sha256Opt.c (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1*f6dc9357SAndroid Build Coastguard Worker /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
2*f6dc9357SAndroid Build Coastguard Worker : Igor Pavlov : Public domain */
3*f6dc9357SAndroid Build Coastguard Worker 
4*f6dc9357SAndroid Build Coastguard Worker #include "Precomp.h"
5*f6dc9357SAndroid Build Coastguard Worker #include "Compiler.h"
6*f6dc9357SAndroid Build Coastguard Worker #include "CpuArch.h"
7*f6dc9357SAndroid Build Coastguard Worker 
8*f6dc9357SAndroid Build Coastguard Worker // #define Z7_USE_HW_SHA_STUB // for debug
9*f6dc9357SAndroid Build Coastguard Worker #ifdef MY_CPU_X86_OR_AMD64
10*f6dc9357SAndroid Build Coastguard Worker   #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
11*f6dc9357SAndroid Build Coastguard Worker       #define USE_HW_SHA
12*f6dc9357SAndroid Build Coastguard Worker   #elif defined(Z7_LLVM_CLANG_VERSION)  && (Z7_LLVM_CLANG_VERSION  >= 30800) \
13*f6dc9357SAndroid Build Coastguard Worker      || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
14*f6dc9357SAndroid Build Coastguard Worker      || defined(Z7_GCC_VERSION)         && (Z7_GCC_VERSION         >= 40900)
15*f6dc9357SAndroid Build Coastguard Worker       #define USE_HW_SHA
16*f6dc9357SAndroid Build Coastguard Worker       #if !defined(__INTEL_COMPILER)
17*f6dc9357SAndroid Build Coastguard Worker       // icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
18*f6dc9357SAndroid Build Coastguard Worker       #if !defined(__SHA__) || !defined(__SSSE3__)
19*f6dc9357SAndroid Build Coastguard Worker         #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
20*f6dc9357SAndroid Build Coastguard Worker       #endif
21*f6dc9357SAndroid Build Coastguard Worker       #endif
22*f6dc9357SAndroid Build Coastguard Worker   #elif defined(_MSC_VER)
23*f6dc9357SAndroid Build Coastguard Worker     #if (_MSC_VER >= 1900)
24*f6dc9357SAndroid Build Coastguard Worker       #define USE_HW_SHA
25*f6dc9357SAndroid Build Coastguard Worker     #else
26*f6dc9357SAndroid Build Coastguard Worker       #define Z7_USE_HW_SHA_STUB
27*f6dc9357SAndroid Build Coastguard Worker     #endif
28*f6dc9357SAndroid Build Coastguard Worker   #endif
29*f6dc9357SAndroid Build Coastguard Worker // #endif // MY_CPU_X86_OR_AMD64
30*f6dc9357SAndroid Build Coastguard Worker #ifndef USE_HW_SHA
31*f6dc9357SAndroid Build Coastguard Worker   // #define Z7_USE_HW_SHA_STUB // for debug
32*f6dc9357SAndroid Build Coastguard Worker #endif
33*f6dc9357SAndroid Build Coastguard Worker 
34*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_HW_SHA
35*f6dc9357SAndroid Build Coastguard Worker 
36*f6dc9357SAndroid Build Coastguard Worker // #pragma message("Sha256 HW")
37*f6dc9357SAndroid Build Coastguard Worker 
38*f6dc9357SAndroid Build Coastguard Worker 
39*f6dc9357SAndroid Build Coastguard Worker 
40*f6dc9357SAndroid Build Coastguard Worker 
41*f6dc9357SAndroid Build Coastguard Worker // sse/sse2/ssse3:
42*f6dc9357SAndroid Build Coastguard Worker #include <tmmintrin.h>
43*f6dc9357SAndroid Build Coastguard Worker // sha*:
44*f6dc9357SAndroid Build Coastguard Worker #include <immintrin.h>
45*f6dc9357SAndroid Build Coastguard Worker 
46*f6dc9357SAndroid Build Coastguard Worker #if defined (__clang__) && defined(_MSC_VER)
47*f6dc9357SAndroid Build Coastguard Worker   #if !defined(__SHA__)
48*f6dc9357SAndroid Build Coastguard Worker     #include <shaintrin.h>
49*f6dc9357SAndroid Build Coastguard Worker   #endif
50*f6dc9357SAndroid Build Coastguard Worker #else
51*f6dc9357SAndroid Build Coastguard Worker 
52*f6dc9357SAndroid Build Coastguard Worker #endif
53*f6dc9357SAndroid Build Coastguard Worker 
54*f6dc9357SAndroid Build Coastguard Worker /*
55*f6dc9357SAndroid Build Coastguard Worker SHA256 uses:
56*f6dc9357SAndroid Build Coastguard Worker SSE2:
57*f6dc9357SAndroid Build Coastguard Worker   _mm_loadu_si128
58*f6dc9357SAndroid Build Coastguard Worker   _mm_storeu_si128
59*f6dc9357SAndroid Build Coastguard Worker   _mm_set_epi32
60*f6dc9357SAndroid Build Coastguard Worker   _mm_add_epi32
61*f6dc9357SAndroid Build Coastguard Worker   _mm_shuffle_epi32 / pshufd
62*f6dc9357SAndroid Build Coastguard Worker 
63*f6dc9357SAndroid Build Coastguard Worker 
64*f6dc9357SAndroid Build Coastguard Worker 
65*f6dc9357SAndroid Build Coastguard Worker SSSE3:
66*f6dc9357SAndroid Build Coastguard Worker   _mm_shuffle_epi8 / pshufb
67*f6dc9357SAndroid Build Coastguard Worker   _mm_alignr_epi8
68*f6dc9357SAndroid Build Coastguard Worker SHA:
69*f6dc9357SAndroid Build Coastguard Worker   _mm_sha256*
70*f6dc9357SAndroid Build Coastguard Worker */
71*f6dc9357SAndroid Build Coastguard Worker 
72*f6dc9357SAndroid Build Coastguard Worker // K array must be aligned for 16-bytes at least.
73*f6dc9357SAndroid Build Coastguard Worker // The compiler can look align attribute and selects
74*f6dc9357SAndroid Build Coastguard Worker //   movdqu - for code without align attribute
75*f6dc9357SAndroid Build Coastguard Worker //   movdqa - for code with    align attribute
76*f6dc9357SAndroid Build Coastguard Worker extern
77*f6dc9357SAndroid Build Coastguard Worker MY_ALIGN(64)
78*f6dc9357SAndroid Build Coastguard Worker const UInt32 SHA256_K_ARRAY[64];
79*f6dc9357SAndroid Build Coastguard Worker #define K SHA256_K_ARRAY
80*f6dc9357SAndroid Build Coastguard Worker 
81*f6dc9357SAndroid Build Coastguard Worker 
82*f6dc9357SAndroid Build Coastguard Worker #define ADD_EPI32(dest, src)      dest = _mm_add_epi32(dest, src);
83*f6dc9357SAndroid Build Coastguard Worker #define SHA256_MSG1(dest, src)    dest = _mm_sha256msg1_epu32(dest, src);
84*f6dc9357SAndroid Build Coastguard Worker #define SHA256_MSG2(dest, src)    dest = _mm_sha256msg2_epu32(dest, src);
85*f6dc9357SAndroid Build Coastguard Worker 
86*f6dc9357SAndroid Build Coastguard Worker #define LOAD_SHUFFLE(m, k) \
87*f6dc9357SAndroid Build Coastguard Worker     m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
88*f6dc9357SAndroid Build Coastguard Worker     m = _mm_shuffle_epi8(m, mask); \
89*f6dc9357SAndroid Build Coastguard Worker 
90*f6dc9357SAndroid Build Coastguard Worker #define NNN(m0, m1, m2, m3)
91*f6dc9357SAndroid Build Coastguard Worker 
92*f6dc9357SAndroid Build Coastguard Worker #define SM1(m1, m2, m3, m0) \
93*f6dc9357SAndroid Build Coastguard Worker     SHA256_MSG1(m0, m1); \
94*f6dc9357SAndroid Build Coastguard Worker 
95*f6dc9357SAndroid Build Coastguard Worker #define SM2(m2, m3, m0, m1) \
96*f6dc9357SAndroid Build Coastguard Worker     ADD_EPI32(m0, _mm_alignr_epi8(m3, m2, 4)) \
97*f6dc9357SAndroid Build Coastguard Worker     SHA256_MSG2(m0, m3); \
98*f6dc9357SAndroid Build Coastguard Worker 
99*f6dc9357SAndroid Build Coastguard Worker #define RND2(t0, t1) \
100*f6dc9357SAndroid Build Coastguard Worker     t0 = _mm_sha256rnds2_epu32(t0, t1, msg);
101*f6dc9357SAndroid Build Coastguard Worker 
102*f6dc9357SAndroid Build Coastguard Worker 
103*f6dc9357SAndroid Build Coastguard Worker 
104*f6dc9357SAndroid Build Coastguard Worker #define R4(k, m0, m1, m2, m3, OP0, OP1) \
105*f6dc9357SAndroid Build Coastguard Worker     msg = _mm_add_epi32(m0, *(const __m128i *) (const void *) &K[(k) * 4]); \
106*f6dc9357SAndroid Build Coastguard Worker     RND2(state0, state1); \
107*f6dc9357SAndroid Build Coastguard Worker     msg = _mm_shuffle_epi32(msg, 0x0E); \
108*f6dc9357SAndroid Build Coastguard Worker     OP0(m0, m1, m2, m3) \
109*f6dc9357SAndroid Build Coastguard Worker     RND2(state1, state0); \
110*f6dc9357SAndroid Build Coastguard Worker     OP1(m0, m1, m2, m3) \
111*f6dc9357SAndroid Build Coastguard Worker 
112*f6dc9357SAndroid Build Coastguard Worker #define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
113*f6dc9357SAndroid Build Coastguard Worker     R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \
114*f6dc9357SAndroid Build Coastguard Worker     R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \
115*f6dc9357SAndroid Build Coastguard Worker     R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \
116*f6dc9357SAndroid Build Coastguard Worker     R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \
117*f6dc9357SAndroid Build Coastguard Worker 
118*f6dc9357SAndroid Build Coastguard Worker #define PREPARE_STATE \
119*f6dc9357SAndroid Build Coastguard Worker     tmp    = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
120*f6dc9357SAndroid Build Coastguard Worker     state0 = _mm_shuffle_epi32(state1, 0x1B); /* efgh */ \
121*f6dc9357SAndroid Build Coastguard Worker     state1 = state0; \
122*f6dc9357SAndroid Build Coastguard Worker     state0 = _mm_unpacklo_epi64(state0, tmp); /* cdgh */ \
123*f6dc9357SAndroid Build Coastguard Worker     state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
124*f6dc9357SAndroid Build Coastguard Worker 
125*f6dc9357SAndroid Build Coastguard Worker 
126*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
127*f6dc9357SAndroid Build Coastguard Worker #ifdef ATTRIB_SHA
128*f6dc9357SAndroid Build Coastguard Worker ATTRIB_SHA
129*f6dc9357SAndroid Build Coastguard Worker #endif
Sha256_UpdateBlocks_HW(UInt32 state[8],const Byte * data,size_t numBlocks)130*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
131*f6dc9357SAndroid Build Coastguard Worker {
132*f6dc9357SAndroid Build Coastguard Worker   const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
133*f6dc9357SAndroid Build Coastguard Worker 
134*f6dc9357SAndroid Build Coastguard Worker 
135*f6dc9357SAndroid Build Coastguard Worker   __m128i tmp, state0, state1;
136*f6dc9357SAndroid Build Coastguard Worker 
137*f6dc9357SAndroid Build Coastguard Worker   if (numBlocks == 0)
138*f6dc9357SAndroid Build Coastguard Worker     return;
139*f6dc9357SAndroid Build Coastguard Worker 
140*f6dc9357SAndroid Build Coastguard Worker   state0 = _mm_loadu_si128((const __m128i *) (const void *) &state[0]);
141*f6dc9357SAndroid Build Coastguard Worker   state1 = _mm_loadu_si128((const __m128i *) (const void *) &state[4]);
142*f6dc9357SAndroid Build Coastguard Worker 
143*f6dc9357SAndroid Build Coastguard Worker   PREPARE_STATE
144*f6dc9357SAndroid Build Coastguard Worker 
145*f6dc9357SAndroid Build Coastguard Worker   do
146*f6dc9357SAndroid Build Coastguard Worker   {
147*f6dc9357SAndroid Build Coastguard Worker     __m128i state0_save, state1_save;
148*f6dc9357SAndroid Build Coastguard Worker     __m128i m0, m1, m2, m3;
149*f6dc9357SAndroid Build Coastguard Worker     __m128i msg;
150*f6dc9357SAndroid Build Coastguard Worker     // #define msg tmp
151*f6dc9357SAndroid Build Coastguard Worker 
152*f6dc9357SAndroid Build Coastguard Worker     state0_save = state0;
153*f6dc9357SAndroid Build Coastguard Worker     state1_save = state1;
154*f6dc9357SAndroid Build Coastguard Worker 
155*f6dc9357SAndroid Build Coastguard Worker     LOAD_SHUFFLE (m0, 0)
156*f6dc9357SAndroid Build Coastguard Worker     LOAD_SHUFFLE (m1, 1)
157*f6dc9357SAndroid Build Coastguard Worker     LOAD_SHUFFLE (m2, 2)
158*f6dc9357SAndroid Build Coastguard Worker     LOAD_SHUFFLE (m3, 3)
159*f6dc9357SAndroid Build Coastguard Worker 
160*f6dc9357SAndroid Build Coastguard Worker 
161*f6dc9357SAndroid Build Coastguard Worker 
162*f6dc9357SAndroid Build Coastguard Worker     R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
163*f6dc9357SAndroid Build Coastguard Worker     R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
164*f6dc9357SAndroid Build Coastguard Worker     R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
165*f6dc9357SAndroid Build Coastguard Worker     R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
166*f6dc9357SAndroid Build Coastguard Worker 
167*f6dc9357SAndroid Build Coastguard Worker     ADD_EPI32(state0, state0_save)
168*f6dc9357SAndroid Build Coastguard Worker     ADD_EPI32(state1, state1_save)
169*f6dc9357SAndroid Build Coastguard Worker 
170*f6dc9357SAndroid Build Coastguard Worker     data += 64;
171*f6dc9357SAndroid Build Coastguard Worker   }
172*f6dc9357SAndroid Build Coastguard Worker   while (--numBlocks);
173*f6dc9357SAndroid Build Coastguard Worker 
174*f6dc9357SAndroid Build Coastguard Worker   PREPARE_STATE
175*f6dc9357SAndroid Build Coastguard Worker 
176*f6dc9357SAndroid Build Coastguard Worker   _mm_storeu_si128((__m128i *) (void *) &state[0], state0);
177*f6dc9357SAndroid Build Coastguard Worker   _mm_storeu_si128((__m128i *) (void *) &state[4], state1);
178*f6dc9357SAndroid Build Coastguard Worker }
179*f6dc9357SAndroid Build Coastguard Worker 
180*f6dc9357SAndroid Build Coastguard Worker #endif // USE_HW_SHA
181*f6dc9357SAndroid Build Coastguard Worker 
182*f6dc9357SAndroid Build Coastguard Worker #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
183*f6dc9357SAndroid Build Coastguard Worker 
184*f6dc9357SAndroid Build Coastguard Worker   #if   defined(__ARM_FEATURE_SHA2) \
185*f6dc9357SAndroid Build Coastguard Worker      || defined(__ARM_FEATURE_CRYPTO)
186*f6dc9357SAndroid Build Coastguard Worker     #define USE_HW_SHA
187*f6dc9357SAndroid Build Coastguard Worker   #else
188*f6dc9357SAndroid Build Coastguard Worker     #if  defined(MY_CPU_ARM64) \
189*f6dc9357SAndroid Build Coastguard Worker       || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
190*f6dc9357SAndroid Build Coastguard Worker       || defined(Z7_MSC_VER_ORIGINAL)
191*f6dc9357SAndroid Build Coastguard Worker     #if  defined(__ARM_FP) && \
192*f6dc9357SAndroid Build Coastguard Worker           (   defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
193*f6dc9357SAndroid Build Coastguard Worker            || defined(__GNUC__) && (__GNUC__ >= 6) \
194*f6dc9357SAndroid Build Coastguard Worker           ) \
195*f6dc9357SAndroid Build Coastguard Worker       || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
196*f6dc9357SAndroid Build Coastguard Worker     #if  defined(MY_CPU_ARM64) \
197*f6dc9357SAndroid Build Coastguard Worker       || !defined(Z7_CLANG_VERSION) \
198*f6dc9357SAndroid Build Coastguard Worker       || defined(__ARM_NEON) && \
199*f6dc9357SAndroid Build Coastguard Worker           (Z7_CLANG_VERSION < 170000 || \
200*f6dc9357SAndroid Build Coastguard Worker            Z7_CLANG_VERSION > 170001)
201*f6dc9357SAndroid Build Coastguard Worker       #define USE_HW_SHA
202*f6dc9357SAndroid Build Coastguard Worker     #endif
203*f6dc9357SAndroid Build Coastguard Worker     #endif
204*f6dc9357SAndroid Build Coastguard Worker     #endif
205*f6dc9357SAndroid Build Coastguard Worker   #endif
206*f6dc9357SAndroid Build Coastguard Worker 
207*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_HW_SHA
208*f6dc9357SAndroid Build Coastguard Worker 
209*f6dc9357SAndroid Build Coastguard Worker // #pragma message("=== Sha256 HW === ")
210*f6dc9357SAndroid Build Coastguard Worker 
211*f6dc9357SAndroid Build Coastguard Worker 
212*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__) || defined(__GNUC__)
213*f6dc9357SAndroid Build Coastguard Worker #if !defined(__ARM_FEATURE_SHA2) && \
214*f6dc9357SAndroid Build Coastguard Worker     !defined(__ARM_FEATURE_CRYPTO)
215*f6dc9357SAndroid Build Coastguard Worker   #ifdef MY_CPU_ARM64
216*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__)
217*f6dc9357SAndroid Build Coastguard Worker     #define ATTRIB_SHA __attribute__((__target__("crypto")))
218*f6dc9357SAndroid Build Coastguard Worker #else
219*f6dc9357SAndroid Build Coastguard Worker     #define ATTRIB_SHA __attribute__((__target__("+crypto")))
220*f6dc9357SAndroid Build Coastguard Worker #endif
221*f6dc9357SAndroid Build Coastguard Worker   #else
222*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__) && (__clang_major__ >= 1)
223*f6dc9357SAndroid Build Coastguard Worker     #define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2")))
224*f6dc9357SAndroid Build Coastguard Worker #else
225*f6dc9357SAndroid Build Coastguard Worker     #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
226*f6dc9357SAndroid Build Coastguard Worker #endif
227*f6dc9357SAndroid Build Coastguard Worker   #endif
228*f6dc9357SAndroid Build Coastguard Worker #endif
229*f6dc9357SAndroid Build Coastguard Worker #else
230*f6dc9357SAndroid Build Coastguard Worker   // _MSC_VER
231*f6dc9357SAndroid Build Coastguard Worker   // for arm32
232*f6dc9357SAndroid Build Coastguard Worker   #define _ARM_USE_NEW_NEON_INTRINSICS
233*f6dc9357SAndroid Build Coastguard Worker #endif
234*f6dc9357SAndroid Build Coastguard Worker 
235*f6dc9357SAndroid Build Coastguard Worker #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
236*f6dc9357SAndroid Build Coastguard Worker #include <arm64_neon.h>
237*f6dc9357SAndroid Build Coastguard Worker #else
238*f6dc9357SAndroid Build Coastguard Worker 
239*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__) && __clang_major__ < 16
240*f6dc9357SAndroid Build Coastguard Worker #if !defined(__ARM_FEATURE_SHA2) && \
241*f6dc9357SAndroid Build Coastguard Worker     !defined(__ARM_FEATURE_CRYPTO)
242*f6dc9357SAndroid Build Coastguard Worker //     #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ")
243*f6dc9357SAndroid Build Coastguard Worker     Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
244*f6dc9357SAndroid Build Coastguard Worker     #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1
245*f6dc9357SAndroid Build Coastguard Worker // #if defined(__clang__) && __clang_major__ < 13
246*f6dc9357SAndroid Build Coastguard Worker     #define __ARM_FEATURE_CRYPTO 1
247*f6dc9357SAndroid Build Coastguard Worker // #else
248*f6dc9357SAndroid Build Coastguard Worker     #define __ARM_FEATURE_SHA2 1
249*f6dc9357SAndroid Build Coastguard Worker // #endif
250*f6dc9357SAndroid Build Coastguard Worker     Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
251*f6dc9357SAndroid Build Coastguard Worker #endif
252*f6dc9357SAndroid Build Coastguard Worker #endif // clang
253*f6dc9357SAndroid Build Coastguard Worker 
254*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__)
255*f6dc9357SAndroid Build Coastguard Worker 
256*f6dc9357SAndroid Build Coastguard Worker #if defined(__ARM_ARCH) && __ARM_ARCH < 8
257*f6dc9357SAndroid Build Coastguard Worker     Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
258*f6dc9357SAndroid Build Coastguard Worker //    #pragma message("#define __ARM_ARCH 8")
259*f6dc9357SAndroid Build Coastguard Worker     #undef  __ARM_ARCH
260*f6dc9357SAndroid Build Coastguard Worker     #define __ARM_ARCH 8
261*f6dc9357SAndroid Build Coastguard Worker     Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
262*f6dc9357SAndroid Build Coastguard Worker #endif
263*f6dc9357SAndroid Build Coastguard Worker 
264*f6dc9357SAndroid Build Coastguard Worker #endif // clang
265*f6dc9357SAndroid Build Coastguard Worker 
266*f6dc9357SAndroid Build Coastguard Worker #include <arm_neon.h>
267*f6dc9357SAndroid Build Coastguard Worker 
268*f6dc9357SAndroid Build Coastguard Worker #if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \
269*f6dc9357SAndroid Build Coastguard Worker     defined(__ARM_FEATURE_CRYPTO) && \
270*f6dc9357SAndroid Build Coastguard Worker     defined(__ARM_FEATURE_SHA2)
271*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
272*f6dc9357SAndroid Build Coastguard Worker     #undef __ARM_FEATURE_CRYPTO
273*f6dc9357SAndroid Build Coastguard Worker     #undef __ARM_FEATURE_SHA2
274*f6dc9357SAndroid Build Coastguard Worker     #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET
275*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
276*f6dc9357SAndroid Build Coastguard Worker //    #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
277*f6dc9357SAndroid Build Coastguard Worker #endif
278*f6dc9357SAndroid Build Coastguard Worker 
279*f6dc9357SAndroid Build Coastguard Worker #endif // Z7_MSC_VER_ORIGINAL
280*f6dc9357SAndroid Build Coastguard Worker 
281*f6dc9357SAndroid Build Coastguard Worker typedef uint32x4_t v128;
282*f6dc9357SAndroid Build Coastguard Worker // typedef __n128 v128; // MSVC
283*f6dc9357SAndroid Build Coastguard Worker 
284*f6dc9357SAndroid Build Coastguard Worker #ifdef MY_CPU_BE
285*f6dc9357SAndroid Build Coastguard Worker   #define MY_rev32_for_LE(x) x
286*f6dc9357SAndroid Build Coastguard Worker #else
287*f6dc9357SAndroid Build Coastguard Worker   #define MY_rev32_for_LE(x) vrev32q_u8(x)
288*f6dc9357SAndroid Build Coastguard Worker #endif
289*f6dc9357SAndroid Build Coastguard Worker 
290*f6dc9357SAndroid Build Coastguard Worker #if 1 // 0 for debug
291*f6dc9357SAndroid Build Coastguard Worker // for arm32: it works slower by some reason than direct code
292*f6dc9357SAndroid Build Coastguard Worker /*
293*f6dc9357SAndroid Build Coastguard Worker for arm32 it generates:
294*f6dc9357SAndroid Build Coastguard Worker MSVC-2022, GCC-9:
295*f6dc9357SAndroid Build Coastguard Worker     vld1.32 {d18,d19}, [r10]
296*f6dc9357SAndroid Build Coastguard Worker     vst1.32 {d4,d5}, [r3]
297*f6dc9357SAndroid Build Coastguard Worker     vld1.8  {d20-d21}, [r4]
298*f6dc9357SAndroid Build Coastguard Worker there is no align hint (like [r10:128]).  So instruction allows unaligned access
299*f6dc9357SAndroid Build Coastguard Worker */
300*f6dc9357SAndroid Build Coastguard Worker #define LOAD_128_32(_p)       vld1q_u32(_p)
301*f6dc9357SAndroid Build Coastguard Worker #define LOAD_128_8(_p)        vld1q_u8 (_p)
302*f6dc9357SAndroid Build Coastguard Worker #define STORE_128_32(_p, _v)  vst1q_u32(_p, _v)
303*f6dc9357SAndroid Build Coastguard Worker #else
304*f6dc9357SAndroid Build Coastguard Worker /*
305*f6dc9357SAndroid Build Coastguard Worker for arm32:
306*f6dc9357SAndroid Build Coastguard Worker MSVC-2022:
307*f6dc9357SAndroid Build Coastguard Worker     vldm r10,{d18,d19}
308*f6dc9357SAndroid Build Coastguard Worker     vstm r3,{d4,d5}
309*f6dc9357SAndroid Build Coastguard Worker     does it require strict alignment?
310*f6dc9357SAndroid Build Coastguard Worker GCC-9:
311*f6dc9357SAndroid Build Coastguard Worker     vld1.64 {d30-d31}, [r0:64]
312*f6dc9357SAndroid Build Coastguard Worker     vldr  d28, [r0, #16]
313*f6dc9357SAndroid Build Coastguard Worker     vldr  d29, [r0, #24]
314*f6dc9357SAndroid Build Coastguard Worker     vst1.64 {d30-d31}, [r0:64]
315*f6dc9357SAndroid Build Coastguard Worker     vstr  d28, [r0, #16]
316*f6dc9357SAndroid Build Coastguard Worker     vstr  d29, [r0, #24]
317*f6dc9357SAndroid Build Coastguard Worker there is hint [r0:64], so does it requires 64-bit alignment.
318*f6dc9357SAndroid Build Coastguard Worker */
319*f6dc9357SAndroid Build Coastguard Worker #define LOAD_128_32(_p)       (*(const v128 *)(const void *)(_p))
320*f6dc9357SAndroid Build Coastguard Worker #define LOAD_128_8(_p)        vreinterpretq_u8_u32(*(const v128 *)(const void *)(_p))
321*f6dc9357SAndroid Build Coastguard Worker #define STORE_128_32(_p, _v)  *(v128 *)(void *)(_p) = (_v)
322*f6dc9357SAndroid Build Coastguard Worker #endif
323*f6dc9357SAndroid Build Coastguard Worker 
324*f6dc9357SAndroid Build Coastguard Worker #define LOAD_SHUFFLE(m, k) \
325*f6dc9357SAndroid Build Coastguard Worker     m = vreinterpretq_u32_u8( \
326*f6dc9357SAndroid Build Coastguard Worker         MY_rev32_for_LE( \
327*f6dc9357SAndroid Build Coastguard Worker         LOAD_128_8(data + (k) * 16))); \
328*f6dc9357SAndroid Build Coastguard Worker 
329*f6dc9357SAndroid Build Coastguard Worker // K array must be aligned for 16-bytes at least.
330*f6dc9357SAndroid Build Coastguard Worker extern
331*f6dc9357SAndroid Build Coastguard Worker MY_ALIGN(64)
332*f6dc9357SAndroid Build Coastguard Worker const UInt32 SHA256_K_ARRAY[64];
333*f6dc9357SAndroid Build Coastguard Worker #define K SHA256_K_ARRAY
334*f6dc9357SAndroid Build Coastguard Worker 
335*f6dc9357SAndroid Build Coastguard Worker #define SHA256_SU0(dest, src)        dest = vsha256su0q_u32(dest, src);
336*f6dc9357SAndroid Build Coastguard Worker #define SHA256_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3);
337*f6dc9357SAndroid Build Coastguard Worker 
338*f6dc9357SAndroid Build Coastguard Worker #define SM1(m0, m1, m2, m3)  SHA256_SU0(m3, m0)
339*f6dc9357SAndroid Build Coastguard Worker #define SM2(m0, m1, m2, m3)  SHA256_SU1(m2, m0, m1)
340*f6dc9357SAndroid Build Coastguard Worker #define NNN(m0, m1, m2, m3)
341*f6dc9357SAndroid Build Coastguard Worker 
342*f6dc9357SAndroid Build Coastguard Worker #define R4(k, m0, m1, m2, m3, OP0, OP1) \
343*f6dc9357SAndroid Build Coastguard Worker     msg = vaddq_u32(m0, *(const v128 *) (const void *) &K[(k) * 4]); \
344*f6dc9357SAndroid Build Coastguard Worker     tmp = state0; \
345*f6dc9357SAndroid Build Coastguard Worker     state0 = vsha256hq_u32( state0, state1, msg ); \
346*f6dc9357SAndroid Build Coastguard Worker     state1 = vsha256h2q_u32( state1, tmp, msg ); \
347*f6dc9357SAndroid Build Coastguard Worker     OP0(m0, m1, m2, m3); \
348*f6dc9357SAndroid Build Coastguard Worker     OP1(m0, m1, m2, m3); \
349*f6dc9357SAndroid Build Coastguard Worker 
350*f6dc9357SAndroid Build Coastguard Worker 
351*f6dc9357SAndroid Build Coastguard Worker #define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
352*f6dc9357SAndroid Build Coastguard Worker     R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
353*f6dc9357SAndroid Build Coastguard Worker     R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
354*f6dc9357SAndroid Build Coastguard Worker     R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
355*f6dc9357SAndroid Build Coastguard Worker     R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
356*f6dc9357SAndroid Build Coastguard Worker 
357*f6dc9357SAndroid Build Coastguard Worker 
358*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
359*f6dc9357SAndroid Build Coastguard Worker #ifdef ATTRIB_SHA
360*f6dc9357SAndroid Build Coastguard Worker ATTRIB_SHA
361*f6dc9357SAndroid Build Coastguard Worker #endif
Sha256_UpdateBlocks_HW(UInt32 state[8],const Byte * data,size_t numBlocks)362*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
363*f6dc9357SAndroid Build Coastguard Worker {
364*f6dc9357SAndroid Build Coastguard Worker   v128 state0, state1;
365*f6dc9357SAndroid Build Coastguard Worker 
366*f6dc9357SAndroid Build Coastguard Worker   if (numBlocks == 0)
367*f6dc9357SAndroid Build Coastguard Worker     return;
368*f6dc9357SAndroid Build Coastguard Worker 
369*f6dc9357SAndroid Build Coastguard Worker   state0 = LOAD_128_32(&state[0]);
370*f6dc9357SAndroid Build Coastguard Worker   state1 = LOAD_128_32(&state[4]);
371*f6dc9357SAndroid Build Coastguard Worker 
372*f6dc9357SAndroid Build Coastguard Worker   do
373*f6dc9357SAndroid Build Coastguard Worker   {
374*f6dc9357SAndroid Build Coastguard Worker     v128 state0_save, state1_save;
375*f6dc9357SAndroid Build Coastguard Worker     v128 m0, m1, m2, m3;
376*f6dc9357SAndroid Build Coastguard Worker     v128 msg, tmp;
377*f6dc9357SAndroid Build Coastguard Worker 
378*f6dc9357SAndroid Build Coastguard Worker     state0_save = state0;
379*f6dc9357SAndroid Build Coastguard Worker     state1_save = state1;
380*f6dc9357SAndroid Build Coastguard Worker 
381*f6dc9357SAndroid Build Coastguard Worker     LOAD_SHUFFLE (m0, 0)
382*f6dc9357SAndroid Build Coastguard Worker     LOAD_SHUFFLE (m1, 1)
383*f6dc9357SAndroid Build Coastguard Worker     LOAD_SHUFFLE (m2, 2)
384*f6dc9357SAndroid Build Coastguard Worker     LOAD_SHUFFLE (m3, 3)
385*f6dc9357SAndroid Build Coastguard Worker 
386*f6dc9357SAndroid Build Coastguard Worker     R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
387*f6dc9357SAndroid Build Coastguard Worker     R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
388*f6dc9357SAndroid Build Coastguard Worker     R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
389*f6dc9357SAndroid Build Coastguard Worker     R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
390*f6dc9357SAndroid Build Coastguard Worker 
391*f6dc9357SAndroid Build Coastguard Worker     state0 = vaddq_u32(state0, state0_save);
392*f6dc9357SAndroid Build Coastguard Worker     state1 = vaddq_u32(state1, state1_save);
393*f6dc9357SAndroid Build Coastguard Worker 
394*f6dc9357SAndroid Build Coastguard Worker     data += 64;
395*f6dc9357SAndroid Build Coastguard Worker   }
396*f6dc9357SAndroid Build Coastguard Worker   while (--numBlocks);
397*f6dc9357SAndroid Build Coastguard Worker 
398*f6dc9357SAndroid Build Coastguard Worker   STORE_128_32(&state[0], state0);
399*f6dc9357SAndroid Build Coastguard Worker   STORE_128_32(&state[4], state1);
400*f6dc9357SAndroid Build Coastguard Worker }
401*f6dc9357SAndroid Build Coastguard Worker 
402*f6dc9357SAndroid Build Coastguard Worker #endif // USE_HW_SHA
403*f6dc9357SAndroid Build Coastguard Worker 
404*f6dc9357SAndroid Build Coastguard Worker #endif // MY_CPU_ARM_OR_ARM64
405*f6dc9357SAndroid Build Coastguard Worker 
406*f6dc9357SAndroid Build Coastguard Worker 
407*f6dc9357SAndroid Build Coastguard Worker #if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB)
408*f6dc9357SAndroid Build Coastguard Worker // #error Stop_Compiling_UNSUPPORTED_SHA
409*f6dc9357SAndroid Build Coastguard Worker // #include <stdlib.h>
410*f6dc9357SAndroid Build Coastguard Worker // We can compile this file with another C compiler,
411*f6dc9357SAndroid Build Coastguard Worker // or we can compile asm version.
412*f6dc9357SAndroid Build Coastguard Worker // So we can generate real code instead of this stub function.
413*f6dc9357SAndroid Build Coastguard Worker // #include "Sha256.h"
414*f6dc9357SAndroid Build Coastguard Worker // #if defined(_MSC_VER)
415*f6dc9357SAndroid Build Coastguard Worker #pragma message("Sha256 HW-SW stub was used")
416*f6dc9357SAndroid Build Coastguard Worker // #endif
417*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks   (UInt32 state[8], const Byte *data, size_t numBlocks);
418*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
Sha256_UpdateBlocks_HW(UInt32 state[8],const Byte * data,size_t numBlocks)419*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
420*f6dc9357SAndroid Build Coastguard Worker {
421*f6dc9357SAndroid Build Coastguard Worker   Sha256_UpdateBlocks(state, data, numBlocks);
422*f6dc9357SAndroid Build Coastguard Worker   /*
423*f6dc9357SAndroid Build Coastguard Worker   UNUSED_VAR(state);
424*f6dc9357SAndroid Build Coastguard Worker   UNUSED_VAR(data);
425*f6dc9357SAndroid Build Coastguard Worker   UNUSED_VAR(numBlocks);
426*f6dc9357SAndroid Build Coastguard Worker   exit(1);
427*f6dc9357SAndroid Build Coastguard Worker   return;
428*f6dc9357SAndroid Build Coastguard Worker   */
429*f6dc9357SAndroid Build Coastguard Worker }
430*f6dc9357SAndroid Build Coastguard Worker #endif
431*f6dc9357SAndroid Build Coastguard Worker 
432*f6dc9357SAndroid Build Coastguard Worker 
433*f6dc9357SAndroid Build Coastguard Worker #undef K
434*f6dc9357SAndroid Build Coastguard Worker #undef RND2
435*f6dc9357SAndroid Build Coastguard Worker #undef MY_rev32_for_LE
436*f6dc9357SAndroid Build Coastguard Worker 
437*f6dc9357SAndroid Build Coastguard Worker #undef NNN
438*f6dc9357SAndroid Build Coastguard Worker #undef LOAD_128
439*f6dc9357SAndroid Build Coastguard Worker #undef STORE_128
440*f6dc9357SAndroid Build Coastguard Worker #undef LOAD_SHUFFLE
441*f6dc9357SAndroid Build Coastguard Worker #undef SM1
442*f6dc9357SAndroid Build Coastguard Worker #undef SM2
443*f6dc9357SAndroid Build Coastguard Worker 
444*f6dc9357SAndroid Build Coastguard Worker 
445*f6dc9357SAndroid Build Coastguard Worker #undef R4
446*f6dc9357SAndroid Build Coastguard Worker #undef R16
447*f6dc9357SAndroid Build Coastguard Worker #undef PREPARE_STATE
448*f6dc9357SAndroid Build Coastguard Worker #undef USE_HW_SHA
449*f6dc9357SAndroid Build Coastguard Worker #undef ATTRIB_SHA
450*f6dc9357SAndroid Build Coastguard Worker #undef USE_VER_MIN
451*f6dc9357SAndroid Build Coastguard Worker #undef Z7_USE_HW_SHA_STUB
452