1*f6dc9357SAndroid Build Coastguard Worker /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
2*f6dc9357SAndroid Build Coastguard Worker : Igor Pavlov : Public domain */
3*f6dc9357SAndroid Build Coastguard Worker
4*f6dc9357SAndroid Build Coastguard Worker #include "Precomp.h"
5*f6dc9357SAndroid Build Coastguard Worker #include "Compiler.h"
6*f6dc9357SAndroid Build Coastguard Worker #include "CpuArch.h"
7*f6dc9357SAndroid Build Coastguard Worker
8*f6dc9357SAndroid Build Coastguard Worker // #define Z7_USE_HW_SHA_STUB // for debug
9*f6dc9357SAndroid Build Coastguard Worker #ifdef MY_CPU_X86_OR_AMD64
10*f6dc9357SAndroid Build Coastguard Worker #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
11*f6dc9357SAndroid Build Coastguard Worker #define USE_HW_SHA
12*f6dc9357SAndroid Build Coastguard Worker #elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
13*f6dc9357SAndroid Build Coastguard Worker || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
14*f6dc9357SAndroid Build Coastguard Worker || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900)
15*f6dc9357SAndroid Build Coastguard Worker #define USE_HW_SHA
16*f6dc9357SAndroid Build Coastguard Worker #if !defined(__INTEL_COMPILER)
17*f6dc9357SAndroid Build Coastguard Worker // icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
18*f6dc9357SAndroid Build Coastguard Worker #if !defined(__SHA__) || !defined(__SSSE3__)
19*f6dc9357SAndroid Build Coastguard Worker #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
20*f6dc9357SAndroid Build Coastguard Worker #endif
21*f6dc9357SAndroid Build Coastguard Worker #endif
22*f6dc9357SAndroid Build Coastguard Worker #elif defined(_MSC_VER)
23*f6dc9357SAndroid Build Coastguard Worker #if (_MSC_VER >= 1900)
24*f6dc9357SAndroid Build Coastguard Worker #define USE_HW_SHA
25*f6dc9357SAndroid Build Coastguard Worker #else
26*f6dc9357SAndroid Build Coastguard Worker #define Z7_USE_HW_SHA_STUB
27*f6dc9357SAndroid Build Coastguard Worker #endif
28*f6dc9357SAndroid Build Coastguard Worker #endif
29*f6dc9357SAndroid Build Coastguard Worker // #endif // MY_CPU_X86_OR_AMD64
30*f6dc9357SAndroid Build Coastguard Worker #ifndef USE_HW_SHA
31*f6dc9357SAndroid Build Coastguard Worker // #define Z7_USE_HW_SHA_STUB // for debug
32*f6dc9357SAndroid Build Coastguard Worker #endif
33*f6dc9357SAndroid Build Coastguard Worker
34*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_HW_SHA
35*f6dc9357SAndroid Build Coastguard Worker
36*f6dc9357SAndroid Build Coastguard Worker // #pragma message("Sha256 HW")
37*f6dc9357SAndroid Build Coastguard Worker
38*f6dc9357SAndroid Build Coastguard Worker
39*f6dc9357SAndroid Build Coastguard Worker
40*f6dc9357SAndroid Build Coastguard Worker
41*f6dc9357SAndroid Build Coastguard Worker // sse/sse2/ssse3:
42*f6dc9357SAndroid Build Coastguard Worker #include <tmmintrin.h>
43*f6dc9357SAndroid Build Coastguard Worker // sha*:
44*f6dc9357SAndroid Build Coastguard Worker #include <immintrin.h>
45*f6dc9357SAndroid Build Coastguard Worker
46*f6dc9357SAndroid Build Coastguard Worker #if defined (__clang__) && defined(_MSC_VER)
47*f6dc9357SAndroid Build Coastguard Worker #if !defined(__SHA__)
48*f6dc9357SAndroid Build Coastguard Worker #include <shaintrin.h>
49*f6dc9357SAndroid Build Coastguard Worker #endif
50*f6dc9357SAndroid Build Coastguard Worker #else
51*f6dc9357SAndroid Build Coastguard Worker
52*f6dc9357SAndroid Build Coastguard Worker #endif
53*f6dc9357SAndroid Build Coastguard Worker
54*f6dc9357SAndroid Build Coastguard Worker /*
55*f6dc9357SAndroid Build Coastguard Worker SHA256 uses:
56*f6dc9357SAndroid Build Coastguard Worker SSE2:
57*f6dc9357SAndroid Build Coastguard Worker _mm_loadu_si128
58*f6dc9357SAndroid Build Coastguard Worker _mm_storeu_si128
59*f6dc9357SAndroid Build Coastguard Worker _mm_set_epi32
60*f6dc9357SAndroid Build Coastguard Worker _mm_add_epi32
61*f6dc9357SAndroid Build Coastguard Worker _mm_shuffle_epi32 / pshufd
62*f6dc9357SAndroid Build Coastguard Worker
63*f6dc9357SAndroid Build Coastguard Worker
64*f6dc9357SAndroid Build Coastguard Worker
65*f6dc9357SAndroid Build Coastguard Worker SSSE3:
66*f6dc9357SAndroid Build Coastguard Worker _mm_shuffle_epi8 / pshufb
67*f6dc9357SAndroid Build Coastguard Worker _mm_alignr_epi8
68*f6dc9357SAndroid Build Coastguard Worker SHA:
69*f6dc9357SAndroid Build Coastguard Worker _mm_sha256*
70*f6dc9357SAndroid Build Coastguard Worker */
71*f6dc9357SAndroid Build Coastguard Worker
72*f6dc9357SAndroid Build Coastguard Worker // K array must be aligned for 16-bytes at least.
73*f6dc9357SAndroid Build Coastguard Worker // The compiler can look align attribute and selects
74*f6dc9357SAndroid Build Coastguard Worker // movdqu - for code without align attribute
75*f6dc9357SAndroid Build Coastguard Worker // movdqa - for code with align attribute
76*f6dc9357SAndroid Build Coastguard Worker extern
77*f6dc9357SAndroid Build Coastguard Worker MY_ALIGN(64)
78*f6dc9357SAndroid Build Coastguard Worker const UInt32 SHA256_K_ARRAY[64];
79*f6dc9357SAndroid Build Coastguard Worker #define K SHA256_K_ARRAY
80*f6dc9357SAndroid Build Coastguard Worker
81*f6dc9357SAndroid Build Coastguard Worker
82*f6dc9357SAndroid Build Coastguard Worker #define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
83*f6dc9357SAndroid Build Coastguard Worker #define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
84*f6dc9357SAndroid Build Coastguard Worker #define SHA256_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
85*f6dc9357SAndroid Build Coastguard Worker
86*f6dc9357SAndroid Build Coastguard Worker #define LOAD_SHUFFLE(m, k) \
87*f6dc9357SAndroid Build Coastguard Worker m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
88*f6dc9357SAndroid Build Coastguard Worker m = _mm_shuffle_epi8(m, mask); \
89*f6dc9357SAndroid Build Coastguard Worker
90*f6dc9357SAndroid Build Coastguard Worker #define NNN(m0, m1, m2, m3)
91*f6dc9357SAndroid Build Coastguard Worker
92*f6dc9357SAndroid Build Coastguard Worker #define SM1(m1, m2, m3, m0) \
93*f6dc9357SAndroid Build Coastguard Worker SHA256_MSG1(m0, m1); \
94*f6dc9357SAndroid Build Coastguard Worker
95*f6dc9357SAndroid Build Coastguard Worker #define SM2(m2, m3, m0, m1) \
96*f6dc9357SAndroid Build Coastguard Worker ADD_EPI32(m0, _mm_alignr_epi8(m3, m2, 4)) \
97*f6dc9357SAndroid Build Coastguard Worker SHA256_MSG2(m0, m3); \
98*f6dc9357SAndroid Build Coastguard Worker
99*f6dc9357SAndroid Build Coastguard Worker #define RND2(t0, t1) \
100*f6dc9357SAndroid Build Coastguard Worker t0 = _mm_sha256rnds2_epu32(t0, t1, msg);
101*f6dc9357SAndroid Build Coastguard Worker
102*f6dc9357SAndroid Build Coastguard Worker
103*f6dc9357SAndroid Build Coastguard Worker
104*f6dc9357SAndroid Build Coastguard Worker #define R4(k, m0, m1, m2, m3, OP0, OP1) \
105*f6dc9357SAndroid Build Coastguard Worker msg = _mm_add_epi32(m0, *(const __m128i *) (const void *) &K[(k) * 4]); \
106*f6dc9357SAndroid Build Coastguard Worker RND2(state0, state1); \
107*f6dc9357SAndroid Build Coastguard Worker msg = _mm_shuffle_epi32(msg, 0x0E); \
108*f6dc9357SAndroid Build Coastguard Worker OP0(m0, m1, m2, m3) \
109*f6dc9357SAndroid Build Coastguard Worker RND2(state1, state0); \
110*f6dc9357SAndroid Build Coastguard Worker OP1(m0, m1, m2, m3) \
111*f6dc9357SAndroid Build Coastguard Worker
112*f6dc9357SAndroid Build Coastguard Worker #define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
113*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \
114*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \
115*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \
116*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \
117*f6dc9357SAndroid Build Coastguard Worker
118*f6dc9357SAndroid Build Coastguard Worker #define PREPARE_STATE \
119*f6dc9357SAndroid Build Coastguard Worker tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
120*f6dc9357SAndroid Build Coastguard Worker state0 = _mm_shuffle_epi32(state1, 0x1B); /* efgh */ \
121*f6dc9357SAndroid Build Coastguard Worker state1 = state0; \
122*f6dc9357SAndroid Build Coastguard Worker state0 = _mm_unpacklo_epi64(state0, tmp); /* cdgh */ \
123*f6dc9357SAndroid Build Coastguard Worker state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
124*f6dc9357SAndroid Build Coastguard Worker
125*f6dc9357SAndroid Build Coastguard Worker
126*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
127*f6dc9357SAndroid Build Coastguard Worker #ifdef ATTRIB_SHA
128*f6dc9357SAndroid Build Coastguard Worker ATTRIB_SHA
129*f6dc9357SAndroid Build Coastguard Worker #endif
Sha256_UpdateBlocks_HW(UInt32 state[8],const Byte * data,size_t numBlocks)130*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
131*f6dc9357SAndroid Build Coastguard Worker {
132*f6dc9357SAndroid Build Coastguard Worker const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
133*f6dc9357SAndroid Build Coastguard Worker
134*f6dc9357SAndroid Build Coastguard Worker
135*f6dc9357SAndroid Build Coastguard Worker __m128i tmp, state0, state1;
136*f6dc9357SAndroid Build Coastguard Worker
137*f6dc9357SAndroid Build Coastguard Worker if (numBlocks == 0)
138*f6dc9357SAndroid Build Coastguard Worker return;
139*f6dc9357SAndroid Build Coastguard Worker
140*f6dc9357SAndroid Build Coastguard Worker state0 = _mm_loadu_si128((const __m128i *) (const void *) &state[0]);
141*f6dc9357SAndroid Build Coastguard Worker state1 = _mm_loadu_si128((const __m128i *) (const void *) &state[4]);
142*f6dc9357SAndroid Build Coastguard Worker
143*f6dc9357SAndroid Build Coastguard Worker PREPARE_STATE
144*f6dc9357SAndroid Build Coastguard Worker
145*f6dc9357SAndroid Build Coastguard Worker do
146*f6dc9357SAndroid Build Coastguard Worker {
147*f6dc9357SAndroid Build Coastguard Worker __m128i state0_save, state1_save;
148*f6dc9357SAndroid Build Coastguard Worker __m128i m0, m1, m2, m3;
149*f6dc9357SAndroid Build Coastguard Worker __m128i msg;
150*f6dc9357SAndroid Build Coastguard Worker // #define msg tmp
151*f6dc9357SAndroid Build Coastguard Worker
152*f6dc9357SAndroid Build Coastguard Worker state0_save = state0;
153*f6dc9357SAndroid Build Coastguard Worker state1_save = state1;
154*f6dc9357SAndroid Build Coastguard Worker
155*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m0, 0)
156*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m1, 1)
157*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m2, 2)
158*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m3, 3)
159*f6dc9357SAndroid Build Coastguard Worker
160*f6dc9357SAndroid Build Coastguard Worker
161*f6dc9357SAndroid Build Coastguard Worker
162*f6dc9357SAndroid Build Coastguard Worker R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
163*f6dc9357SAndroid Build Coastguard Worker R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
164*f6dc9357SAndroid Build Coastguard Worker R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
165*f6dc9357SAndroid Build Coastguard Worker R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
166*f6dc9357SAndroid Build Coastguard Worker
167*f6dc9357SAndroid Build Coastguard Worker ADD_EPI32(state0, state0_save)
168*f6dc9357SAndroid Build Coastguard Worker ADD_EPI32(state1, state1_save)
169*f6dc9357SAndroid Build Coastguard Worker
170*f6dc9357SAndroid Build Coastguard Worker data += 64;
171*f6dc9357SAndroid Build Coastguard Worker }
172*f6dc9357SAndroid Build Coastguard Worker while (--numBlocks);
173*f6dc9357SAndroid Build Coastguard Worker
174*f6dc9357SAndroid Build Coastguard Worker PREPARE_STATE
175*f6dc9357SAndroid Build Coastguard Worker
176*f6dc9357SAndroid Build Coastguard Worker _mm_storeu_si128((__m128i *) (void *) &state[0], state0);
177*f6dc9357SAndroid Build Coastguard Worker _mm_storeu_si128((__m128i *) (void *) &state[4], state1);
178*f6dc9357SAndroid Build Coastguard Worker }
179*f6dc9357SAndroid Build Coastguard Worker
180*f6dc9357SAndroid Build Coastguard Worker #endif // USE_HW_SHA
181*f6dc9357SAndroid Build Coastguard Worker
182*f6dc9357SAndroid Build Coastguard Worker #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
183*f6dc9357SAndroid Build Coastguard Worker
184*f6dc9357SAndroid Build Coastguard Worker #if defined(__ARM_FEATURE_SHA2) \
185*f6dc9357SAndroid Build Coastguard Worker || defined(__ARM_FEATURE_CRYPTO)
186*f6dc9357SAndroid Build Coastguard Worker #define USE_HW_SHA
187*f6dc9357SAndroid Build Coastguard Worker #else
188*f6dc9357SAndroid Build Coastguard Worker #if defined(MY_CPU_ARM64) \
189*f6dc9357SAndroid Build Coastguard Worker || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
190*f6dc9357SAndroid Build Coastguard Worker || defined(Z7_MSC_VER_ORIGINAL)
191*f6dc9357SAndroid Build Coastguard Worker #if defined(__ARM_FP) && \
192*f6dc9357SAndroid Build Coastguard Worker ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
193*f6dc9357SAndroid Build Coastguard Worker || defined(__GNUC__) && (__GNUC__ >= 6) \
194*f6dc9357SAndroid Build Coastguard Worker ) \
195*f6dc9357SAndroid Build Coastguard Worker || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
196*f6dc9357SAndroid Build Coastguard Worker #if defined(MY_CPU_ARM64) \
197*f6dc9357SAndroid Build Coastguard Worker || !defined(Z7_CLANG_VERSION) \
198*f6dc9357SAndroid Build Coastguard Worker || defined(__ARM_NEON) && \
199*f6dc9357SAndroid Build Coastguard Worker (Z7_CLANG_VERSION < 170000 || \
200*f6dc9357SAndroid Build Coastguard Worker Z7_CLANG_VERSION > 170001)
201*f6dc9357SAndroid Build Coastguard Worker #define USE_HW_SHA
202*f6dc9357SAndroid Build Coastguard Worker #endif
203*f6dc9357SAndroid Build Coastguard Worker #endif
204*f6dc9357SAndroid Build Coastguard Worker #endif
205*f6dc9357SAndroid Build Coastguard Worker #endif
206*f6dc9357SAndroid Build Coastguard Worker
207*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_HW_SHA
208*f6dc9357SAndroid Build Coastguard Worker
209*f6dc9357SAndroid Build Coastguard Worker // #pragma message("=== Sha256 HW === ")
210*f6dc9357SAndroid Build Coastguard Worker
211*f6dc9357SAndroid Build Coastguard Worker
212*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__) || defined(__GNUC__)
213*f6dc9357SAndroid Build Coastguard Worker #if !defined(__ARM_FEATURE_SHA2) && \
214*f6dc9357SAndroid Build Coastguard Worker !defined(__ARM_FEATURE_CRYPTO)
215*f6dc9357SAndroid Build Coastguard Worker #ifdef MY_CPU_ARM64
216*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__)
217*f6dc9357SAndroid Build Coastguard Worker #define ATTRIB_SHA __attribute__((__target__("crypto")))
218*f6dc9357SAndroid Build Coastguard Worker #else
219*f6dc9357SAndroid Build Coastguard Worker #define ATTRIB_SHA __attribute__((__target__("+crypto")))
220*f6dc9357SAndroid Build Coastguard Worker #endif
221*f6dc9357SAndroid Build Coastguard Worker #else
222*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__) && (__clang_major__ >= 1)
223*f6dc9357SAndroid Build Coastguard Worker #define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2")))
224*f6dc9357SAndroid Build Coastguard Worker #else
225*f6dc9357SAndroid Build Coastguard Worker #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
226*f6dc9357SAndroid Build Coastguard Worker #endif
227*f6dc9357SAndroid Build Coastguard Worker #endif
228*f6dc9357SAndroid Build Coastguard Worker #endif
229*f6dc9357SAndroid Build Coastguard Worker #else
230*f6dc9357SAndroid Build Coastguard Worker // _MSC_VER
231*f6dc9357SAndroid Build Coastguard Worker // for arm32
232*f6dc9357SAndroid Build Coastguard Worker #define _ARM_USE_NEW_NEON_INTRINSICS
233*f6dc9357SAndroid Build Coastguard Worker #endif
234*f6dc9357SAndroid Build Coastguard Worker
235*f6dc9357SAndroid Build Coastguard Worker #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
236*f6dc9357SAndroid Build Coastguard Worker #include <arm64_neon.h>
237*f6dc9357SAndroid Build Coastguard Worker #else
238*f6dc9357SAndroid Build Coastguard Worker
239*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__) && __clang_major__ < 16
240*f6dc9357SAndroid Build Coastguard Worker #if !defined(__ARM_FEATURE_SHA2) && \
241*f6dc9357SAndroid Build Coastguard Worker !defined(__ARM_FEATURE_CRYPTO)
242*f6dc9357SAndroid Build Coastguard Worker // #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ")
243*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
244*f6dc9357SAndroid Build Coastguard Worker #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1
245*f6dc9357SAndroid Build Coastguard Worker // #if defined(__clang__) && __clang_major__ < 13
246*f6dc9357SAndroid Build Coastguard Worker #define __ARM_FEATURE_CRYPTO 1
247*f6dc9357SAndroid Build Coastguard Worker // #else
248*f6dc9357SAndroid Build Coastguard Worker #define __ARM_FEATURE_SHA2 1
249*f6dc9357SAndroid Build Coastguard Worker // #endif
250*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
251*f6dc9357SAndroid Build Coastguard Worker #endif
252*f6dc9357SAndroid Build Coastguard Worker #endif // clang
253*f6dc9357SAndroid Build Coastguard Worker
254*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__)
255*f6dc9357SAndroid Build Coastguard Worker
256*f6dc9357SAndroid Build Coastguard Worker #if defined(__ARM_ARCH) && __ARM_ARCH < 8
257*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
258*f6dc9357SAndroid Build Coastguard Worker // #pragma message("#define __ARM_ARCH 8")
259*f6dc9357SAndroid Build Coastguard Worker #undef __ARM_ARCH
260*f6dc9357SAndroid Build Coastguard Worker #define __ARM_ARCH 8
261*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
262*f6dc9357SAndroid Build Coastguard Worker #endif
263*f6dc9357SAndroid Build Coastguard Worker
264*f6dc9357SAndroid Build Coastguard Worker #endif // clang
265*f6dc9357SAndroid Build Coastguard Worker
266*f6dc9357SAndroid Build Coastguard Worker #include <arm_neon.h>
267*f6dc9357SAndroid Build Coastguard Worker
268*f6dc9357SAndroid Build Coastguard Worker #if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \
269*f6dc9357SAndroid Build Coastguard Worker defined(__ARM_FEATURE_CRYPTO) && \
270*f6dc9357SAndroid Build Coastguard Worker defined(__ARM_FEATURE_SHA2)
271*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
272*f6dc9357SAndroid Build Coastguard Worker #undef __ARM_FEATURE_CRYPTO
273*f6dc9357SAndroid Build Coastguard Worker #undef __ARM_FEATURE_SHA2
274*f6dc9357SAndroid Build Coastguard Worker #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET
275*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
276*f6dc9357SAndroid Build Coastguard Worker // #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
277*f6dc9357SAndroid Build Coastguard Worker #endif
278*f6dc9357SAndroid Build Coastguard Worker
279*f6dc9357SAndroid Build Coastguard Worker #endif // Z7_MSC_VER_ORIGINAL
280*f6dc9357SAndroid Build Coastguard Worker
281*f6dc9357SAndroid Build Coastguard Worker typedef uint32x4_t v128;
282*f6dc9357SAndroid Build Coastguard Worker // typedef __n128 v128; // MSVC
283*f6dc9357SAndroid Build Coastguard Worker
284*f6dc9357SAndroid Build Coastguard Worker #ifdef MY_CPU_BE
285*f6dc9357SAndroid Build Coastguard Worker #define MY_rev32_for_LE(x) x
286*f6dc9357SAndroid Build Coastguard Worker #else
287*f6dc9357SAndroid Build Coastguard Worker #define MY_rev32_for_LE(x) vrev32q_u8(x)
288*f6dc9357SAndroid Build Coastguard Worker #endif
289*f6dc9357SAndroid Build Coastguard Worker
290*f6dc9357SAndroid Build Coastguard Worker #if 1 // 0 for debug
291*f6dc9357SAndroid Build Coastguard Worker // for arm32: it works slower by some reason than direct code
292*f6dc9357SAndroid Build Coastguard Worker /*
293*f6dc9357SAndroid Build Coastguard Worker for arm32 it generates:
294*f6dc9357SAndroid Build Coastguard Worker MSVC-2022, GCC-9:
295*f6dc9357SAndroid Build Coastguard Worker vld1.32 {d18,d19}, [r10]
296*f6dc9357SAndroid Build Coastguard Worker vst1.32 {d4,d5}, [r3]
297*f6dc9357SAndroid Build Coastguard Worker vld1.8 {d20-d21}, [r4]
298*f6dc9357SAndroid Build Coastguard Worker there is no align hint (like [r10:128]). So instruction allows unaligned access
299*f6dc9357SAndroid Build Coastguard Worker */
300*f6dc9357SAndroid Build Coastguard Worker #define LOAD_128_32(_p) vld1q_u32(_p)
301*f6dc9357SAndroid Build Coastguard Worker #define LOAD_128_8(_p) vld1q_u8 (_p)
302*f6dc9357SAndroid Build Coastguard Worker #define STORE_128_32(_p, _v) vst1q_u32(_p, _v)
303*f6dc9357SAndroid Build Coastguard Worker #else
304*f6dc9357SAndroid Build Coastguard Worker /*
305*f6dc9357SAndroid Build Coastguard Worker for arm32:
306*f6dc9357SAndroid Build Coastguard Worker MSVC-2022:
307*f6dc9357SAndroid Build Coastguard Worker vldm r10,{d18,d19}
308*f6dc9357SAndroid Build Coastguard Worker vstm r3,{d4,d5}
309*f6dc9357SAndroid Build Coastguard Worker does it require strict alignment?
310*f6dc9357SAndroid Build Coastguard Worker GCC-9:
311*f6dc9357SAndroid Build Coastguard Worker vld1.64 {d30-d31}, [r0:64]
312*f6dc9357SAndroid Build Coastguard Worker vldr d28, [r0, #16]
313*f6dc9357SAndroid Build Coastguard Worker vldr d29, [r0, #24]
314*f6dc9357SAndroid Build Coastguard Worker vst1.64 {d30-d31}, [r0:64]
315*f6dc9357SAndroid Build Coastguard Worker vstr d28, [r0, #16]
316*f6dc9357SAndroid Build Coastguard Worker vstr d29, [r0, #24]
317*f6dc9357SAndroid Build Coastguard Worker there is hint [r0:64], so does it requires 64-bit alignment.
318*f6dc9357SAndroid Build Coastguard Worker */
319*f6dc9357SAndroid Build Coastguard Worker #define LOAD_128_32(_p) (*(const v128 *)(const void *)(_p))
320*f6dc9357SAndroid Build Coastguard Worker #define LOAD_128_8(_p) vreinterpretq_u8_u32(*(const v128 *)(const void *)(_p))
321*f6dc9357SAndroid Build Coastguard Worker #define STORE_128_32(_p, _v) *(v128 *)(void *)(_p) = (_v)
322*f6dc9357SAndroid Build Coastguard Worker #endif
323*f6dc9357SAndroid Build Coastguard Worker
324*f6dc9357SAndroid Build Coastguard Worker #define LOAD_SHUFFLE(m, k) \
325*f6dc9357SAndroid Build Coastguard Worker m = vreinterpretq_u32_u8( \
326*f6dc9357SAndroid Build Coastguard Worker MY_rev32_for_LE( \
327*f6dc9357SAndroid Build Coastguard Worker LOAD_128_8(data + (k) * 16))); \
328*f6dc9357SAndroid Build Coastguard Worker
329*f6dc9357SAndroid Build Coastguard Worker // K array must be aligned for 16-bytes at least.
330*f6dc9357SAndroid Build Coastguard Worker extern
331*f6dc9357SAndroid Build Coastguard Worker MY_ALIGN(64)
332*f6dc9357SAndroid Build Coastguard Worker const UInt32 SHA256_K_ARRAY[64];
333*f6dc9357SAndroid Build Coastguard Worker #define K SHA256_K_ARRAY
334*f6dc9357SAndroid Build Coastguard Worker
335*f6dc9357SAndroid Build Coastguard Worker #define SHA256_SU0(dest, src) dest = vsha256su0q_u32(dest, src);
336*f6dc9357SAndroid Build Coastguard Worker #define SHA256_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3);
337*f6dc9357SAndroid Build Coastguard Worker
338*f6dc9357SAndroid Build Coastguard Worker #define SM1(m0, m1, m2, m3) SHA256_SU0(m3, m0)
339*f6dc9357SAndroid Build Coastguard Worker #define SM2(m0, m1, m2, m3) SHA256_SU1(m2, m0, m1)
340*f6dc9357SAndroid Build Coastguard Worker #define NNN(m0, m1, m2, m3)
341*f6dc9357SAndroid Build Coastguard Worker
342*f6dc9357SAndroid Build Coastguard Worker #define R4(k, m0, m1, m2, m3, OP0, OP1) \
343*f6dc9357SAndroid Build Coastguard Worker msg = vaddq_u32(m0, *(const v128 *) (const void *) &K[(k) * 4]); \
344*f6dc9357SAndroid Build Coastguard Worker tmp = state0; \
345*f6dc9357SAndroid Build Coastguard Worker state0 = vsha256hq_u32( state0, state1, msg ); \
346*f6dc9357SAndroid Build Coastguard Worker state1 = vsha256h2q_u32( state1, tmp, msg ); \
347*f6dc9357SAndroid Build Coastguard Worker OP0(m0, m1, m2, m3); \
348*f6dc9357SAndroid Build Coastguard Worker OP1(m0, m1, m2, m3); \
349*f6dc9357SAndroid Build Coastguard Worker
350*f6dc9357SAndroid Build Coastguard Worker
351*f6dc9357SAndroid Build Coastguard Worker #define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
352*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
353*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
354*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
355*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
356*f6dc9357SAndroid Build Coastguard Worker
357*f6dc9357SAndroid Build Coastguard Worker
358*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
359*f6dc9357SAndroid Build Coastguard Worker #ifdef ATTRIB_SHA
360*f6dc9357SAndroid Build Coastguard Worker ATTRIB_SHA
361*f6dc9357SAndroid Build Coastguard Worker #endif
Sha256_UpdateBlocks_HW(UInt32 state[8],const Byte * data,size_t numBlocks)362*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
363*f6dc9357SAndroid Build Coastguard Worker {
364*f6dc9357SAndroid Build Coastguard Worker v128 state0, state1;
365*f6dc9357SAndroid Build Coastguard Worker
366*f6dc9357SAndroid Build Coastguard Worker if (numBlocks == 0)
367*f6dc9357SAndroid Build Coastguard Worker return;
368*f6dc9357SAndroid Build Coastguard Worker
369*f6dc9357SAndroid Build Coastguard Worker state0 = LOAD_128_32(&state[0]);
370*f6dc9357SAndroid Build Coastguard Worker state1 = LOAD_128_32(&state[4]);
371*f6dc9357SAndroid Build Coastguard Worker
372*f6dc9357SAndroid Build Coastguard Worker do
373*f6dc9357SAndroid Build Coastguard Worker {
374*f6dc9357SAndroid Build Coastguard Worker v128 state0_save, state1_save;
375*f6dc9357SAndroid Build Coastguard Worker v128 m0, m1, m2, m3;
376*f6dc9357SAndroid Build Coastguard Worker v128 msg, tmp;
377*f6dc9357SAndroid Build Coastguard Worker
378*f6dc9357SAndroid Build Coastguard Worker state0_save = state0;
379*f6dc9357SAndroid Build Coastguard Worker state1_save = state1;
380*f6dc9357SAndroid Build Coastguard Worker
381*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m0, 0)
382*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m1, 1)
383*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m2, 2)
384*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m3, 3)
385*f6dc9357SAndroid Build Coastguard Worker
386*f6dc9357SAndroid Build Coastguard Worker R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
387*f6dc9357SAndroid Build Coastguard Worker R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
388*f6dc9357SAndroid Build Coastguard Worker R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
389*f6dc9357SAndroid Build Coastguard Worker R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
390*f6dc9357SAndroid Build Coastguard Worker
391*f6dc9357SAndroid Build Coastguard Worker state0 = vaddq_u32(state0, state0_save);
392*f6dc9357SAndroid Build Coastguard Worker state1 = vaddq_u32(state1, state1_save);
393*f6dc9357SAndroid Build Coastguard Worker
394*f6dc9357SAndroid Build Coastguard Worker data += 64;
395*f6dc9357SAndroid Build Coastguard Worker }
396*f6dc9357SAndroid Build Coastguard Worker while (--numBlocks);
397*f6dc9357SAndroid Build Coastguard Worker
398*f6dc9357SAndroid Build Coastguard Worker STORE_128_32(&state[0], state0);
399*f6dc9357SAndroid Build Coastguard Worker STORE_128_32(&state[4], state1);
400*f6dc9357SAndroid Build Coastguard Worker }
401*f6dc9357SAndroid Build Coastguard Worker
402*f6dc9357SAndroid Build Coastguard Worker #endif // USE_HW_SHA
403*f6dc9357SAndroid Build Coastguard Worker
404*f6dc9357SAndroid Build Coastguard Worker #endif // MY_CPU_ARM_OR_ARM64
405*f6dc9357SAndroid Build Coastguard Worker
406*f6dc9357SAndroid Build Coastguard Worker
407*f6dc9357SAndroid Build Coastguard Worker #if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB)
408*f6dc9357SAndroid Build Coastguard Worker // #error Stop_Compiling_UNSUPPORTED_SHA
409*f6dc9357SAndroid Build Coastguard Worker // #include <stdlib.h>
410*f6dc9357SAndroid Build Coastguard Worker // We can compile this file with another C compiler,
411*f6dc9357SAndroid Build Coastguard Worker // or we can compile asm version.
412*f6dc9357SAndroid Build Coastguard Worker // So we can generate real code instead of this stub function.
413*f6dc9357SAndroid Build Coastguard Worker // #include "Sha256.h"
414*f6dc9357SAndroid Build Coastguard Worker // #if defined(_MSC_VER)
415*f6dc9357SAndroid Build Coastguard Worker #pragma message("Sha256 HW-SW stub was used")
416*f6dc9357SAndroid Build Coastguard Worker // #endif
417*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks (UInt32 state[8], const Byte *data, size_t numBlocks);
418*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
Sha256_UpdateBlocks_HW(UInt32 state[8],const Byte * data,size_t numBlocks)419*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
420*f6dc9357SAndroid Build Coastguard Worker {
421*f6dc9357SAndroid Build Coastguard Worker Sha256_UpdateBlocks(state, data, numBlocks);
422*f6dc9357SAndroid Build Coastguard Worker /*
423*f6dc9357SAndroid Build Coastguard Worker UNUSED_VAR(state);
424*f6dc9357SAndroid Build Coastguard Worker UNUSED_VAR(data);
425*f6dc9357SAndroid Build Coastguard Worker UNUSED_VAR(numBlocks);
426*f6dc9357SAndroid Build Coastguard Worker exit(1);
427*f6dc9357SAndroid Build Coastguard Worker return;
428*f6dc9357SAndroid Build Coastguard Worker */
429*f6dc9357SAndroid Build Coastguard Worker }
430*f6dc9357SAndroid Build Coastguard Worker #endif
431*f6dc9357SAndroid Build Coastguard Worker
432*f6dc9357SAndroid Build Coastguard Worker
433*f6dc9357SAndroid Build Coastguard Worker #undef K
434*f6dc9357SAndroid Build Coastguard Worker #undef RND2
435*f6dc9357SAndroid Build Coastguard Worker #undef MY_rev32_for_LE
436*f6dc9357SAndroid Build Coastguard Worker
437*f6dc9357SAndroid Build Coastguard Worker #undef NNN
438*f6dc9357SAndroid Build Coastguard Worker #undef LOAD_128
439*f6dc9357SAndroid Build Coastguard Worker #undef STORE_128
440*f6dc9357SAndroid Build Coastguard Worker #undef LOAD_SHUFFLE
441*f6dc9357SAndroid Build Coastguard Worker #undef SM1
442*f6dc9357SAndroid Build Coastguard Worker #undef SM2
443*f6dc9357SAndroid Build Coastguard Worker
444*f6dc9357SAndroid Build Coastguard Worker
445*f6dc9357SAndroid Build Coastguard Worker #undef R4
446*f6dc9357SAndroid Build Coastguard Worker #undef R16
447*f6dc9357SAndroid Build Coastguard Worker #undef PREPARE_STATE
448*f6dc9357SAndroid Build Coastguard Worker #undef USE_HW_SHA
449*f6dc9357SAndroid Build Coastguard Worker #undef ATTRIB_SHA
450*f6dc9357SAndroid Build Coastguard Worker #undef USE_VER_MIN
451*f6dc9357SAndroid Build Coastguard Worker #undef Z7_USE_HW_SHA_STUB
452