1*f6dc9357SAndroid Build Coastguard Worker /* Sha512Opt.c -- SHA-512 optimized code for SHA-512 hardware instructions
2*f6dc9357SAndroid Build Coastguard Worker : Igor Pavlov : Public domain */
3*f6dc9357SAndroid Build Coastguard Worker
4*f6dc9357SAndroid Build Coastguard Worker #include "Precomp.h"
5*f6dc9357SAndroid Build Coastguard Worker #include "Compiler.h"
6*f6dc9357SAndroid Build Coastguard Worker #include "CpuArch.h"
7*f6dc9357SAndroid Build Coastguard Worker
8*f6dc9357SAndroid Build Coastguard Worker // #define Z7_USE_HW_SHA_STUB // for debug
9*f6dc9357SAndroid Build Coastguard Worker #ifdef MY_CPU_X86_OR_AMD64
10*f6dc9357SAndroid Build Coastguard Worker #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 2400) && (__INTEL_COMPILER <= 9900) // fix it
11*f6dc9357SAndroid Build Coastguard Worker #define USE_HW_SHA
12*f6dc9357SAndroid Build Coastguard Worker #elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 170001) \
13*f6dc9357SAndroid Build Coastguard Worker || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 170001) \
14*f6dc9357SAndroid Build Coastguard Worker || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 140000)
15*f6dc9357SAndroid Build Coastguard Worker #define USE_HW_SHA
16*f6dc9357SAndroid Build Coastguard Worker #if !defined(__INTEL_COMPILER)
17*f6dc9357SAndroid Build Coastguard Worker // icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
18*f6dc9357SAndroid Build Coastguard Worker #if !defined(__SHA512__) || !defined(__AVX2__)
19*f6dc9357SAndroid Build Coastguard Worker #define ATTRIB_SHA512 __attribute__((__target__("sha512,avx2")))
20*f6dc9357SAndroid Build Coastguard Worker #endif
21*f6dc9357SAndroid Build Coastguard Worker #endif
22*f6dc9357SAndroid Build Coastguard Worker #elif defined(Z7_MSC_VER_ORIGINAL)
23*f6dc9357SAndroid Build Coastguard Worker #if (_MSC_VER >= 1940)
24*f6dc9357SAndroid Build Coastguard Worker #define USE_HW_SHA
25*f6dc9357SAndroid Build Coastguard Worker #else
26*f6dc9357SAndroid Build Coastguard Worker // #define Z7_USE_HW_SHA_STUB
27*f6dc9357SAndroid Build Coastguard Worker #endif
28*f6dc9357SAndroid Build Coastguard Worker #endif
29*f6dc9357SAndroid Build Coastguard Worker // #endif // MY_CPU_X86_OR_AMD64
30*f6dc9357SAndroid Build Coastguard Worker #ifndef USE_HW_SHA
31*f6dc9357SAndroid Build Coastguard Worker // #define Z7_USE_HW_SHA_STUB // for debug
32*f6dc9357SAndroid Build Coastguard Worker #endif
33*f6dc9357SAndroid Build Coastguard Worker
34*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_HW_SHA
35*f6dc9357SAndroid Build Coastguard Worker
36*f6dc9357SAndroid Build Coastguard Worker // #pragma message("Sha512 HW")
37*f6dc9357SAndroid Build Coastguard Worker
38*f6dc9357SAndroid Build Coastguard Worker #include <immintrin.h>
39*f6dc9357SAndroid Build Coastguard Worker
40*f6dc9357SAndroid Build Coastguard Worker #if defined (__clang__) && defined(_MSC_VER)
41*f6dc9357SAndroid Build Coastguard Worker #if !defined(__AVX__)
42*f6dc9357SAndroid Build Coastguard Worker #include <avxintrin.h>
43*f6dc9357SAndroid Build Coastguard Worker #endif
44*f6dc9357SAndroid Build Coastguard Worker #if !defined(__AVX2__)
45*f6dc9357SAndroid Build Coastguard Worker #include <avx2intrin.h>
46*f6dc9357SAndroid Build Coastguard Worker #endif
47*f6dc9357SAndroid Build Coastguard Worker #if !defined(__SHA512__)
48*f6dc9357SAndroid Build Coastguard Worker #include <sha512intrin.h>
49*f6dc9357SAndroid Build Coastguard Worker #endif
50*f6dc9357SAndroid Build Coastguard Worker #else
51*f6dc9357SAndroid Build Coastguard Worker
52*f6dc9357SAndroid Build Coastguard Worker #endif
53*f6dc9357SAndroid Build Coastguard Worker
54*f6dc9357SAndroid Build Coastguard Worker /*
55*f6dc9357SAndroid Build Coastguard Worker SHA512 uses:
56*f6dc9357SAndroid Build Coastguard Worker AVX:
57*f6dc9357SAndroid Build Coastguard Worker _mm256_loadu_si256 (vmovdqu)
58*f6dc9357SAndroid Build Coastguard Worker _mm256_storeu_si256
59*f6dc9357SAndroid Build Coastguard Worker _mm256_set_epi32 (unused)
60*f6dc9357SAndroid Build Coastguard Worker AVX2:
61*f6dc9357SAndroid Build Coastguard Worker _mm256_add_epi64 : vpaddq
62*f6dc9357SAndroid Build Coastguard Worker _mm256_shuffle_epi8 : vpshufb
63*f6dc9357SAndroid Build Coastguard Worker _mm256_shuffle_epi32 : pshufd
64*f6dc9357SAndroid Build Coastguard Worker _mm256_blend_epi32 : vpblendd
65*f6dc9357SAndroid Build Coastguard Worker _mm256_permute4x64_epi64 : vpermq : 3c
66*f6dc9357SAndroid Build Coastguard Worker _mm256_permute2x128_si256: vperm2i128 : 3c
67*f6dc9357SAndroid Build Coastguard Worker _mm256_extracti128_si256 : vextracti128 : 3c
68*f6dc9357SAndroid Build Coastguard Worker SHA512:
69*f6dc9357SAndroid Build Coastguard Worker _mm256_sha512*
70*f6dc9357SAndroid Build Coastguard Worker */
71*f6dc9357SAndroid Build Coastguard Worker
72*f6dc9357SAndroid Build Coastguard Worker // K array must be aligned for 32-bytes at least.
73*f6dc9357SAndroid Build Coastguard Worker // The compiler can look align attribute and selects
74*f6dc9357SAndroid Build Coastguard Worker // vmovdqu - for code without align attribute
75*f6dc9357SAndroid Build Coastguard Worker // vmovdqa - for code with align attribute
76*f6dc9357SAndroid Build Coastguard Worker extern
77*f6dc9357SAndroid Build Coastguard Worker MY_ALIGN(64)
78*f6dc9357SAndroid Build Coastguard Worker const UInt64 SHA512_K_ARRAY[80];
79*f6dc9357SAndroid Build Coastguard Worker #define K SHA512_K_ARRAY
80*f6dc9357SAndroid Build Coastguard Worker
81*f6dc9357SAndroid Build Coastguard Worker
82*f6dc9357SAndroid Build Coastguard Worker #define ADD_EPI64(dest, src) dest = _mm256_add_epi64(dest, src);
83*f6dc9357SAndroid Build Coastguard Worker #define SHA512_MSG1(dest, src) dest = _mm256_sha512msg1_epi64(dest, _mm256_extracti128_si256(src, 0));
84*f6dc9357SAndroid Build Coastguard Worker #define SHA512_MSG2(dest, src) dest = _mm256_sha512msg2_epi64(dest, src);
85*f6dc9357SAndroid Build Coastguard Worker
86*f6dc9357SAndroid Build Coastguard Worker #define LOAD_SHUFFLE(m, k) \
87*f6dc9357SAndroid Build Coastguard Worker m = _mm256_loadu_si256((const __m256i *)(const void *)(data + (k) * 32)); \
88*f6dc9357SAndroid Build Coastguard Worker m = _mm256_shuffle_epi8(m, mask); \
89*f6dc9357SAndroid Build Coastguard Worker
90*f6dc9357SAndroid Build Coastguard Worker #define NNN(m0, m1, m2, m3)
91*f6dc9357SAndroid Build Coastguard Worker
92*f6dc9357SAndroid Build Coastguard Worker #define SM1(m1, m2, m3, m0) \
93*f6dc9357SAndroid Build Coastguard Worker SHA512_MSG1(m0, m1); \
94*f6dc9357SAndroid Build Coastguard Worker
95*f6dc9357SAndroid Build Coastguard Worker #define SM2(m2, m3, m0, m1) \
96*f6dc9357SAndroid Build Coastguard Worker ADD_EPI64(m0, _mm256_permute4x64_epi64(_mm256_blend_epi32(m2, m3, 3), 0x39)); \
97*f6dc9357SAndroid Build Coastguard Worker SHA512_MSG2(m0, m3); \
98*f6dc9357SAndroid Build Coastguard Worker
99*f6dc9357SAndroid Build Coastguard Worker #define RND2(t0, t1, lane) \
100*f6dc9357SAndroid Build Coastguard Worker t0 = _mm256_sha512rnds2_epi64(t0, t1, _mm256_extracti128_si256(msg, lane));
101*f6dc9357SAndroid Build Coastguard Worker
102*f6dc9357SAndroid Build Coastguard Worker
103*f6dc9357SAndroid Build Coastguard Worker
104*f6dc9357SAndroid Build Coastguard Worker #define R4(k, m0, m1, m2, m3, OP0, OP1) \
105*f6dc9357SAndroid Build Coastguard Worker msg = _mm256_add_epi64(m0, *(const __m256i *) (const void *) &K[(k) * 4]); \
106*f6dc9357SAndroid Build Coastguard Worker RND2(state0, state1, 0); OP0(m0, m1, m2, m3) \
107*f6dc9357SAndroid Build Coastguard Worker RND2(state1, state0, 1); OP1(m0, m1, m2, m3) \
108*f6dc9357SAndroid Build Coastguard Worker
109*f6dc9357SAndroid Build Coastguard Worker
110*f6dc9357SAndroid Build Coastguard Worker
111*f6dc9357SAndroid Build Coastguard Worker
112*f6dc9357SAndroid Build Coastguard Worker #define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
113*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \
114*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \
115*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \
116*f6dc9357SAndroid Build Coastguard Worker R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \
117*f6dc9357SAndroid Build Coastguard Worker
118*f6dc9357SAndroid Build Coastguard Worker #define PREPARE_STATE \
119*f6dc9357SAndroid Build Coastguard Worker state0 = _mm256_shuffle_epi32(state0, 0x4e); /* cdab */ \
120*f6dc9357SAndroid Build Coastguard Worker state1 = _mm256_shuffle_epi32(state1, 0x4e); /* ghef */ \
121*f6dc9357SAndroid Build Coastguard Worker tmp = state0; \
122*f6dc9357SAndroid Build Coastguard Worker state0 = _mm256_permute2x128_si256(state0, state1, 0x13); /* cdgh */ \
123*f6dc9357SAndroid Build Coastguard Worker state1 = _mm256_permute2x128_si256(tmp, state1, 2); /* abef */ \
124*f6dc9357SAndroid Build Coastguard Worker
125*f6dc9357SAndroid Build Coastguard Worker
126*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks);
127*f6dc9357SAndroid Build Coastguard Worker #ifdef ATTRIB_SHA512
128*f6dc9357SAndroid Build Coastguard Worker ATTRIB_SHA512
129*f6dc9357SAndroid Build Coastguard Worker #endif
Sha512_UpdateBlocks_HW(UInt64 state[8],const Byte * data,size_t numBlocks)130*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks)
131*f6dc9357SAndroid Build Coastguard Worker {
132*f6dc9357SAndroid Build Coastguard Worker const __m256i mask = _mm256_set_epi32(
133*f6dc9357SAndroid Build Coastguard Worker 0x08090a0b,0x0c0d0e0f, 0x00010203,0x04050607,
134*f6dc9357SAndroid Build Coastguard Worker 0x08090a0b,0x0c0d0e0f, 0x00010203,0x04050607);
135*f6dc9357SAndroid Build Coastguard Worker __m256i tmp, state0, state1;
136*f6dc9357SAndroid Build Coastguard Worker
137*f6dc9357SAndroid Build Coastguard Worker if (numBlocks == 0)
138*f6dc9357SAndroid Build Coastguard Worker return;
139*f6dc9357SAndroid Build Coastguard Worker
140*f6dc9357SAndroid Build Coastguard Worker state0 = _mm256_loadu_si256((const __m256i *) (const void *) &state[0]);
141*f6dc9357SAndroid Build Coastguard Worker state1 = _mm256_loadu_si256((const __m256i *) (const void *) &state[4]);
142*f6dc9357SAndroid Build Coastguard Worker
143*f6dc9357SAndroid Build Coastguard Worker PREPARE_STATE
144*f6dc9357SAndroid Build Coastguard Worker
145*f6dc9357SAndroid Build Coastguard Worker do
146*f6dc9357SAndroid Build Coastguard Worker {
147*f6dc9357SAndroid Build Coastguard Worker __m256i state0_save, state1_save;
148*f6dc9357SAndroid Build Coastguard Worker __m256i m0, m1, m2, m3;
149*f6dc9357SAndroid Build Coastguard Worker __m256i msg;
150*f6dc9357SAndroid Build Coastguard Worker // #define msg tmp
151*f6dc9357SAndroid Build Coastguard Worker
152*f6dc9357SAndroid Build Coastguard Worker state0_save = state0;
153*f6dc9357SAndroid Build Coastguard Worker state1_save = state1;
154*f6dc9357SAndroid Build Coastguard Worker
155*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m0, 0)
156*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m1, 1)
157*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m2, 2)
158*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m3, 3)
159*f6dc9357SAndroid Build Coastguard Worker
160*f6dc9357SAndroid Build Coastguard Worker
161*f6dc9357SAndroid Build Coastguard Worker
162*f6dc9357SAndroid Build Coastguard Worker R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
163*f6dc9357SAndroid Build Coastguard Worker R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
164*f6dc9357SAndroid Build Coastguard Worker R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
165*f6dc9357SAndroid Build Coastguard Worker R16 ( 3, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
166*f6dc9357SAndroid Build Coastguard Worker R16 ( 4, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
167*f6dc9357SAndroid Build Coastguard Worker ADD_EPI64(state0, state0_save)
168*f6dc9357SAndroid Build Coastguard Worker ADD_EPI64(state1, state1_save)
169*f6dc9357SAndroid Build Coastguard Worker
170*f6dc9357SAndroid Build Coastguard Worker data += 128;
171*f6dc9357SAndroid Build Coastguard Worker }
172*f6dc9357SAndroid Build Coastguard Worker while (--numBlocks);
173*f6dc9357SAndroid Build Coastguard Worker
174*f6dc9357SAndroid Build Coastguard Worker PREPARE_STATE
175*f6dc9357SAndroid Build Coastguard Worker
176*f6dc9357SAndroid Build Coastguard Worker _mm256_storeu_si256((__m256i *) (void *) &state[0], state0);
177*f6dc9357SAndroid Build Coastguard Worker _mm256_storeu_si256((__m256i *) (void *) &state[4], state1);
178*f6dc9357SAndroid Build Coastguard Worker }
179*f6dc9357SAndroid Build Coastguard Worker
180*f6dc9357SAndroid Build Coastguard Worker #endif // USE_HW_SHA
181*f6dc9357SAndroid Build Coastguard Worker
182*f6dc9357SAndroid Build Coastguard Worker // gcc 8.5 also supports sha512, but we need also support in assembler that is called by gcc
183*f6dc9357SAndroid Build Coastguard Worker #elif defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
184*f6dc9357SAndroid Build Coastguard Worker
185*f6dc9357SAndroid Build Coastguard Worker #if defined(__ARM_FEATURE_SHA512)
186*f6dc9357SAndroid Build Coastguard Worker #define USE_HW_SHA
187*f6dc9357SAndroid Build Coastguard Worker #else
188*f6dc9357SAndroid Build Coastguard Worker #if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 130000) \
189*f6dc9357SAndroid Build Coastguard Worker || defined(__GNUC__) && (__GNUC__ >= 9) \
190*f6dc9357SAndroid Build Coastguard Worker ) \
191*f6dc9357SAndroid Build Coastguard Worker || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1940) // fix it
192*f6dc9357SAndroid Build Coastguard Worker #define USE_HW_SHA
193*f6dc9357SAndroid Build Coastguard Worker #endif
194*f6dc9357SAndroid Build Coastguard Worker #endif
195*f6dc9357SAndroid Build Coastguard Worker
196*f6dc9357SAndroid Build Coastguard Worker #ifdef USE_HW_SHA
197*f6dc9357SAndroid Build Coastguard Worker
198*f6dc9357SAndroid Build Coastguard Worker // #pragma message("=== Sha512 HW === ")
199*f6dc9357SAndroid Build Coastguard Worker
200*f6dc9357SAndroid Build Coastguard Worker
201*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__) || defined(__GNUC__)
202*f6dc9357SAndroid Build Coastguard Worker #if !defined(__ARM_FEATURE_SHA512)
203*f6dc9357SAndroid Build Coastguard Worker // #pragma message("=== we define SHA3 ATTRIB_SHA512 === ")
204*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__)
205*f6dc9357SAndroid Build Coastguard Worker #define ATTRIB_SHA512 __attribute__((__target__("sha3"))) // "armv8.2-a,sha3"
206*f6dc9357SAndroid Build Coastguard Worker #else
207*f6dc9357SAndroid Build Coastguard Worker #define ATTRIB_SHA512 __attribute__((__target__("arch=armv8.2-a+sha3")))
208*f6dc9357SAndroid Build Coastguard Worker #endif
209*f6dc9357SAndroid Build Coastguard Worker #endif
210*f6dc9357SAndroid Build Coastguard Worker #endif
211*f6dc9357SAndroid Build Coastguard Worker
212*f6dc9357SAndroid Build Coastguard Worker
213*f6dc9357SAndroid Build Coastguard Worker #if defined(Z7_MSC_VER_ORIGINAL)
214*f6dc9357SAndroid Build Coastguard Worker #include <arm64_neon.h>
215*f6dc9357SAndroid Build Coastguard Worker #else
216*f6dc9357SAndroid Build Coastguard Worker
217*f6dc9357SAndroid Build Coastguard Worker #if defined(__clang__) && __clang_major__ < 16
218*f6dc9357SAndroid Build Coastguard Worker #if !defined(__ARM_FEATURE_SHA512)
219*f6dc9357SAndroid Build Coastguard Worker // #pragma message("=== we set __ARM_FEATURE_SHA512 1 === ")
220*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
221*f6dc9357SAndroid Build Coastguard Worker #define Z7_ARM_FEATURE_SHA512_WAS_SET 1
222*f6dc9357SAndroid Build Coastguard Worker #define __ARM_FEATURE_SHA512 1
223*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
224*f6dc9357SAndroid Build Coastguard Worker #endif
225*f6dc9357SAndroid Build Coastguard Worker #endif // clang
226*f6dc9357SAndroid Build Coastguard Worker
227*f6dc9357SAndroid Build Coastguard Worker #include <arm_neon.h>
228*f6dc9357SAndroid Build Coastguard Worker
229*f6dc9357SAndroid Build Coastguard Worker #if defined(Z7_ARM_FEATURE_SHA512_WAS_SET) && \
230*f6dc9357SAndroid Build Coastguard Worker defined(__ARM_FEATURE_SHA512)
231*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
232*f6dc9357SAndroid Build Coastguard Worker #undef __ARM_FEATURE_SHA512
233*f6dc9357SAndroid Build Coastguard Worker #undef Z7_ARM_FEATURE_SHA512_WAS_SET
234*f6dc9357SAndroid Build Coastguard Worker Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
235*f6dc9357SAndroid Build Coastguard Worker // #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
236*f6dc9357SAndroid Build Coastguard Worker #endif
237*f6dc9357SAndroid Build Coastguard Worker
238*f6dc9357SAndroid Build Coastguard Worker #endif // Z7_MSC_VER_ORIGINAL
239*f6dc9357SAndroid Build Coastguard Worker
240*f6dc9357SAndroid Build Coastguard Worker typedef uint64x2_t v128_64;
241*f6dc9357SAndroid Build Coastguard Worker // typedef __n128 v128_64; // MSVC
242*f6dc9357SAndroid Build Coastguard Worker
243*f6dc9357SAndroid Build Coastguard Worker #ifdef MY_CPU_BE
244*f6dc9357SAndroid Build Coastguard Worker #define MY_rev64_for_LE(x) x
245*f6dc9357SAndroid Build Coastguard Worker #else
246*f6dc9357SAndroid Build Coastguard Worker #define MY_rev64_for_LE(x) vrev64q_u8(x)
247*f6dc9357SAndroid Build Coastguard Worker #endif
248*f6dc9357SAndroid Build Coastguard Worker
249*f6dc9357SAndroid Build Coastguard Worker #define LOAD_128_64(_p) vld1q_u64(_p)
250*f6dc9357SAndroid Build Coastguard Worker #define LOAD_128_8(_p) vld1q_u8 (_p)
251*f6dc9357SAndroid Build Coastguard Worker #define STORE_128_64(_p, _v) vst1q_u64(_p, _v)
252*f6dc9357SAndroid Build Coastguard Worker
253*f6dc9357SAndroid Build Coastguard Worker #define LOAD_SHUFFLE(m, k) \
254*f6dc9357SAndroid Build Coastguard Worker m = vreinterpretq_u64_u8( \
255*f6dc9357SAndroid Build Coastguard Worker MY_rev64_for_LE( \
256*f6dc9357SAndroid Build Coastguard Worker LOAD_128_8(data + (k) * 16))); \
257*f6dc9357SAndroid Build Coastguard Worker
258*f6dc9357SAndroid Build Coastguard Worker // K array must be aligned for 16-bytes at least.
259*f6dc9357SAndroid Build Coastguard Worker extern
260*f6dc9357SAndroid Build Coastguard Worker MY_ALIGN(64)
261*f6dc9357SAndroid Build Coastguard Worker const UInt64 SHA512_K_ARRAY[80];
262*f6dc9357SAndroid Build Coastguard Worker #define K SHA512_K_ARRAY
263*f6dc9357SAndroid Build Coastguard Worker
264*f6dc9357SAndroid Build Coastguard Worker #define NN(m0, m1, m4, m5, m7)
265*f6dc9357SAndroid Build Coastguard Worker #define SM(m0, m1, m4, m5, m7) \
266*f6dc9357SAndroid Build Coastguard Worker m0 = vsha512su1q_u64(vsha512su0q_u64(m0, m1), m7, vextq_u64(m4, m5, 1));
267*f6dc9357SAndroid Build Coastguard Worker
268*f6dc9357SAndroid Build Coastguard Worker #define R2(k, m0,m1,m2,m3,m4,m5,m6,m7, a0,a1,a2,a3, OP) \
269*f6dc9357SAndroid Build Coastguard Worker OP(m0, m1, m4, m5, m7) \
270*f6dc9357SAndroid Build Coastguard Worker t = vaddq_u64(m0, vld1q_u64(k)); \
271*f6dc9357SAndroid Build Coastguard Worker t = vaddq_u64(vextq_u64(t, t, 1), a3); \
272*f6dc9357SAndroid Build Coastguard Worker t = vsha512hq_u64(t, vextq_u64(a2, a3, 1), vextq_u64(a1, a2, 1)); \
273*f6dc9357SAndroid Build Coastguard Worker a3 = vsha512h2q_u64(t, a1, a0); \
274*f6dc9357SAndroid Build Coastguard Worker a1 = vaddq_u64(a1, t); \
275*f6dc9357SAndroid Build Coastguard Worker
276*f6dc9357SAndroid Build Coastguard Worker #define R8(k, m0,m1,m2,m3,m4,m5,m6,m7, OP) \
277*f6dc9357SAndroid Build Coastguard Worker R2 ( (k)+0*2, m0,m1,m2,m3,m4,m5,m6,m7, a0,a1,a2,a3, OP ) \
278*f6dc9357SAndroid Build Coastguard Worker R2 ( (k)+1*2, m1,m2,m3,m4,m5,m6,m7,m0, a3,a0,a1,a2, OP ) \
279*f6dc9357SAndroid Build Coastguard Worker R2 ( (k)+2*2, m2,m3,m4,m5,m6,m7,m0,m1, a2,a3,a0,a1, OP ) \
280*f6dc9357SAndroid Build Coastguard Worker R2 ( (k)+3*2, m3,m4,m5,m6,m7,m0,m1,m2, a1,a2,a3,a0, OP ) \
281*f6dc9357SAndroid Build Coastguard Worker
282*f6dc9357SAndroid Build Coastguard Worker #define R16(k, OP) \
283*f6dc9357SAndroid Build Coastguard Worker R8 ( (k)+0*2, m0,m1,m2,m3,m4,m5,m6,m7, OP ) \
284*f6dc9357SAndroid Build Coastguard Worker R8 ( (k)+4*2, m4,m5,m6,m7,m0,m1,m2,m3, OP ) \
285*f6dc9357SAndroid Build Coastguard Worker
286*f6dc9357SAndroid Build Coastguard Worker
287*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks);
288*f6dc9357SAndroid Build Coastguard Worker #ifdef ATTRIB_SHA512
289*f6dc9357SAndroid Build Coastguard Worker ATTRIB_SHA512
290*f6dc9357SAndroid Build Coastguard Worker #endif
Sha512_UpdateBlocks_HW(UInt64 state[8],const Byte * data,size_t numBlocks)291*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks)
292*f6dc9357SAndroid Build Coastguard Worker {
293*f6dc9357SAndroid Build Coastguard Worker v128_64 a0, a1, a2, a3;
294*f6dc9357SAndroid Build Coastguard Worker
295*f6dc9357SAndroid Build Coastguard Worker if (numBlocks == 0)
296*f6dc9357SAndroid Build Coastguard Worker return;
297*f6dc9357SAndroid Build Coastguard Worker a0 = LOAD_128_64(&state[0]);
298*f6dc9357SAndroid Build Coastguard Worker a1 = LOAD_128_64(&state[2]);
299*f6dc9357SAndroid Build Coastguard Worker a2 = LOAD_128_64(&state[4]);
300*f6dc9357SAndroid Build Coastguard Worker a3 = LOAD_128_64(&state[6]);
301*f6dc9357SAndroid Build Coastguard Worker do
302*f6dc9357SAndroid Build Coastguard Worker {
303*f6dc9357SAndroid Build Coastguard Worker v128_64 a0_save, a1_save, a2_save, a3_save;
304*f6dc9357SAndroid Build Coastguard Worker v128_64 m0, m1, m2, m3, m4, m5, m6, m7;
305*f6dc9357SAndroid Build Coastguard Worker v128_64 t;
306*f6dc9357SAndroid Build Coastguard Worker unsigned i;
307*f6dc9357SAndroid Build Coastguard Worker const UInt64 *k_ptr;
308*f6dc9357SAndroid Build Coastguard Worker
309*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m0, 0)
310*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m1, 1)
311*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m2, 2)
312*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m3, 3)
313*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m4, 4)
314*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m5, 5)
315*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m6, 6)
316*f6dc9357SAndroid Build Coastguard Worker LOAD_SHUFFLE (m7, 7)
317*f6dc9357SAndroid Build Coastguard Worker
318*f6dc9357SAndroid Build Coastguard Worker a0_save = a0;
319*f6dc9357SAndroid Build Coastguard Worker a1_save = a1;
320*f6dc9357SAndroid Build Coastguard Worker a2_save = a2;
321*f6dc9357SAndroid Build Coastguard Worker a3_save = a3;
322*f6dc9357SAndroid Build Coastguard Worker
323*f6dc9357SAndroid Build Coastguard Worker R16 ( K, NN )
324*f6dc9357SAndroid Build Coastguard Worker k_ptr = K + 16;
325*f6dc9357SAndroid Build Coastguard Worker for (i = 0; i < 4; i++)
326*f6dc9357SAndroid Build Coastguard Worker {
327*f6dc9357SAndroid Build Coastguard Worker R16 ( k_ptr, SM )
328*f6dc9357SAndroid Build Coastguard Worker k_ptr += 16;
329*f6dc9357SAndroid Build Coastguard Worker }
330*f6dc9357SAndroid Build Coastguard Worker
331*f6dc9357SAndroid Build Coastguard Worker a0 = vaddq_u64(a0, a0_save);
332*f6dc9357SAndroid Build Coastguard Worker a1 = vaddq_u64(a1, a1_save);
333*f6dc9357SAndroid Build Coastguard Worker a2 = vaddq_u64(a2, a2_save);
334*f6dc9357SAndroid Build Coastguard Worker a3 = vaddq_u64(a3, a3_save);
335*f6dc9357SAndroid Build Coastguard Worker
336*f6dc9357SAndroid Build Coastguard Worker data += 128;
337*f6dc9357SAndroid Build Coastguard Worker }
338*f6dc9357SAndroid Build Coastguard Worker while (--numBlocks);
339*f6dc9357SAndroid Build Coastguard Worker
340*f6dc9357SAndroid Build Coastguard Worker STORE_128_64(&state[0], a0);
341*f6dc9357SAndroid Build Coastguard Worker STORE_128_64(&state[2], a1);
342*f6dc9357SAndroid Build Coastguard Worker STORE_128_64(&state[4], a2);
343*f6dc9357SAndroid Build Coastguard Worker STORE_128_64(&state[6], a3);
344*f6dc9357SAndroid Build Coastguard Worker }
345*f6dc9357SAndroid Build Coastguard Worker
346*f6dc9357SAndroid Build Coastguard Worker #endif // USE_HW_SHA
347*f6dc9357SAndroid Build Coastguard Worker
348*f6dc9357SAndroid Build Coastguard Worker #endif // MY_CPU_ARM_OR_ARM64
349*f6dc9357SAndroid Build Coastguard Worker
350*f6dc9357SAndroid Build Coastguard Worker
351*f6dc9357SAndroid Build Coastguard Worker #if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB)
352*f6dc9357SAndroid Build Coastguard Worker // #error Stop_Compiling_UNSUPPORTED_SHA
353*f6dc9357SAndroid Build Coastguard Worker // #include <stdlib.h>
354*f6dc9357SAndroid Build Coastguard Worker // We can compile this file with another C compiler,
355*f6dc9357SAndroid Build Coastguard Worker // or we can compile asm version.
356*f6dc9357SAndroid Build Coastguard Worker // So we can generate real code instead of this stub function.
357*f6dc9357SAndroid Build Coastguard Worker // #include "Sha512.h"
358*f6dc9357SAndroid Build Coastguard Worker // #if defined(_MSC_VER)
359*f6dc9357SAndroid Build Coastguard Worker #pragma message("Sha512 HW-SW stub was used")
360*f6dc9357SAndroid Build Coastguard Worker // #endif
361*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha512_UpdateBlocks (UInt64 state[8], const Byte *data, size_t numBlocks);
362*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks);
Sha512_UpdateBlocks_HW(UInt64 state[8],const Byte * data,size_t numBlocks)363*f6dc9357SAndroid Build Coastguard Worker void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks)
364*f6dc9357SAndroid Build Coastguard Worker {
365*f6dc9357SAndroid Build Coastguard Worker Sha512_UpdateBlocks(state, data, numBlocks);
366*f6dc9357SAndroid Build Coastguard Worker /*
367*f6dc9357SAndroid Build Coastguard Worker UNUSED_VAR(state);
368*f6dc9357SAndroid Build Coastguard Worker UNUSED_VAR(data);
369*f6dc9357SAndroid Build Coastguard Worker UNUSED_VAR(numBlocks);
370*f6dc9357SAndroid Build Coastguard Worker exit(1);
371*f6dc9357SAndroid Build Coastguard Worker return;
372*f6dc9357SAndroid Build Coastguard Worker */
373*f6dc9357SAndroid Build Coastguard Worker }
374*f6dc9357SAndroid Build Coastguard Worker #endif
375*f6dc9357SAndroid Build Coastguard Worker
376*f6dc9357SAndroid Build Coastguard Worker
377*f6dc9357SAndroid Build Coastguard Worker #undef K
378*f6dc9357SAndroid Build Coastguard Worker #undef RND2
379*f6dc9357SAndroid Build Coastguard Worker #undef MY_rev64_for_LE
380*f6dc9357SAndroid Build Coastguard Worker #undef NN
381*f6dc9357SAndroid Build Coastguard Worker #undef NNN
382*f6dc9357SAndroid Build Coastguard Worker #undef LOAD_128
383*f6dc9357SAndroid Build Coastguard Worker #undef STORE_128
384*f6dc9357SAndroid Build Coastguard Worker #undef LOAD_SHUFFLE
385*f6dc9357SAndroid Build Coastguard Worker #undef SM1
386*f6dc9357SAndroid Build Coastguard Worker #undef SM2
387*f6dc9357SAndroid Build Coastguard Worker #undef SM
388*f6dc9357SAndroid Build Coastguard Worker #undef R2
389*f6dc9357SAndroid Build Coastguard Worker #undef R4
390*f6dc9357SAndroid Build Coastguard Worker #undef R16
391*f6dc9357SAndroid Build Coastguard Worker #undef PREPARE_STATE
392*f6dc9357SAndroid Build Coastguard Worker #undef USE_HW_SHA
393*f6dc9357SAndroid Build Coastguard Worker #undef ATTRIB_SHA512
394*f6dc9357SAndroid Build Coastguard Worker #undef USE_VER_MIN
395*f6dc9357SAndroid Build Coastguard Worker #undef Z7_USE_HW_SHA_STUB
396