1 /* Copyright 2021 The ChromiumOS Authors
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 *
5 * SHA256 implementation using x86 SHA extension.
6 * Mainly from https://github.com/noloader/SHA-Intrinsics/blob/master/sha256-x86.c,
7 * Written and place in public domain by Jeffrey Walton
8 * Based on code from Intel, and by Sean Gulley for
9 * the miTLS project.
10 */
11 #include "2common.h"
12 #include "2sha.h"
13 #include "2sha_private.h"
14 #include "2api.h"
15
16 const uint32_t vb2_hash_seq[8] = {3, 2, 7, 6, 1, 0, 5, 4};
17
18 typedef int vb2_m128i __attribute__ ((vector_size(16)));
19
vb2_loadu_si128(vb2_m128i * ptr)20 static inline vb2_m128i vb2_loadu_si128(vb2_m128i *ptr)
21 {
22 vb2_m128i result;
23 asm volatile ("movups %1, %0" : "=x"(result) : "m"(*ptr));
24 return result;
25 }
26
vb2_storeu_si128(vb2_m128i * to,vb2_m128i from)27 static inline void vb2_storeu_si128(vb2_m128i *to, vb2_m128i from)
28 {
29 asm volatile ("movups %1, %0" : "=m"(*to) : "x"(from));
30 }
31
vb2_add_epi32(vb2_m128i a,vb2_m128i b)32 static inline vb2_m128i vb2_add_epi32(vb2_m128i a, vb2_m128i b)
33 {
34 return a + b;
35 }
36
vb2_shuffle_epi8(vb2_m128i value,vb2_m128i mask)37 static inline vb2_m128i vb2_shuffle_epi8(vb2_m128i value, vb2_m128i mask)
38 {
39 asm ("pshufb %1, %0" : "+x"(value) : "xm"(mask));
40 return value;
41 }
42
vb2_shuffle_epi32(vb2_m128i value,int mask)43 static inline vb2_m128i vb2_shuffle_epi32(vb2_m128i value, int mask)
44 {
45 vb2_m128i result;
46 asm ("pshufd %2, %1, %0" : "=x"(result) : "xm"(value), "i" (mask));
47 return result;
48 }
49
vb2_alignr_epi8(vb2_m128i a,vb2_m128i b,int imm8)50 static inline vb2_m128i vb2_alignr_epi8(vb2_m128i a, vb2_m128i b, int imm8)
51 {
52 asm ("palignr %2, %1, %0" : "+x"(a) : "xm"(b), "i"(imm8));
53 return a;
54 }
55
vb2_sha256msg1_epu32(vb2_m128i a,vb2_m128i b)56 static inline vb2_m128i vb2_sha256msg1_epu32(vb2_m128i a, vb2_m128i b)
57 {
58 asm ("sha256msg1 %1, %0" : "+x"(a) : "xm"(b));
59 return a;
60 }
61
vb2_sha256msg2_epu32(vb2_m128i a,vb2_m128i b)62 static inline vb2_m128i vb2_sha256msg2_epu32(vb2_m128i a, vb2_m128i b)
63 {
64 asm ("sha256msg2 %1, %0" : "+x"(a) : "xm"(b));
65 return a;
66 }
67
vb2_sha256rnds2_epu32(vb2_m128i a,vb2_m128i b,vb2_m128i k)68 static inline vb2_m128i vb2_sha256rnds2_epu32(vb2_m128i a, vb2_m128i b,
69 vb2_m128i k)
70 {
71 asm ("sha256rnds2 %1, %0" : "+x"(a) : "xm"(b), "Yz"(k));
72 return a;
73 }
74
75 #define SHA256_X86_PUT_STATE1(j, i) \
76 { \
77 msgtmp[j] = vb2_loadu_si128((vb2_m128i *) \
78 (message + (i << 6) + (j * 16))); \
79 msgtmp[j] = vb2_shuffle_epi8(msgtmp[j], shuf_mask); \
80 msg = vb2_add_epi32(msgtmp[j], \
81 vb2_loadu_si128((vb2_m128i *)&vb2_sha256_k[j * 4])); \
82 state1 = vb2_sha256rnds2_epu32(state1, state0, msg); \
83 }
84
85 #define SHA256_X86_PUT_STATE0() \
86 { \
87 msg = vb2_shuffle_epi32(msg, 0x0E); \
88 state0 = vb2_sha256rnds2_epu32(state0, state1, msg); \
89 }
90
91 #define SHA256_X86_LOOP(j) \
92 { \
93 int k = j & 3; \
94 int prev_k = (k + 3) & 3; \
95 int next_k = (k + 1) & 3; \
96 msg = vb2_add_epi32(msgtmp[k], \
97 vb2_loadu_si128((vb2_m128i *)&vb2_sha256_k[j * 4])); \
98 state1 = vb2_sha256rnds2_epu32(state1, state0, msg); \
99 tmp = vb2_alignr_epi8(msgtmp[k], msgtmp[prev_k], 4); \
100 msgtmp[next_k] = vb2_add_epi32(msgtmp[next_k], tmp); \
101 msgtmp[next_k] = vb2_sha256msg2_epu32(msgtmp[next_k], \
102 msgtmp[k]); \
103 SHA256_X86_PUT_STATE0(); \
104 msgtmp[prev_k] = vb2_sha256msg1_epu32(msgtmp[prev_k], \
105 msgtmp[k]); \
106 }
107
vb2_sha256_transform_x86ext(const uint8_t * message,unsigned int block_nb)108 static void vb2_sha256_transform_x86ext(const uint8_t *message,
109 unsigned int block_nb)
110 {
111 vb2_m128i state0, state1, msg, abef_save, cdgh_save;
112 vb2_m128i msgtmp[4];
113 vb2_m128i tmp;
114 int i;
115 const vb2_m128i shuf_mask = {0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f};
116
117 state0 = vb2_loadu_si128((vb2_m128i *)&vb2_sha_ctx.h[0]);
118 state1 = vb2_loadu_si128((vb2_m128i *)&vb2_sha_ctx.h[4]);
119 for (i = 0; i < (int) block_nb; i++) {
120 abef_save = state0;
121 cdgh_save = state1;
122
123 SHA256_X86_PUT_STATE1(0, i);
124 SHA256_X86_PUT_STATE0();
125
126 SHA256_X86_PUT_STATE1(1, i);
127 SHA256_X86_PUT_STATE0();
128 msgtmp[0] = vb2_sha256msg1_epu32(msgtmp[0], msgtmp[1]);
129
130 SHA256_X86_PUT_STATE1(2, i);
131 SHA256_X86_PUT_STATE0();
132 msgtmp[1] = vb2_sha256msg1_epu32(msgtmp[1], msgtmp[2]);
133
134 SHA256_X86_PUT_STATE1(3, i);
135 tmp = vb2_alignr_epi8(msgtmp[3], msgtmp[2], 4);
136 msgtmp[0] = vb2_add_epi32(msgtmp[0], tmp);
137 msgtmp[0] = vb2_sha256msg2_epu32(msgtmp[0], msgtmp[3]);
138 SHA256_X86_PUT_STATE0();
139 msgtmp[2] = vb2_sha256msg1_epu32(msgtmp[2], msgtmp[3]);
140
141 SHA256_X86_LOOP(4);
142 SHA256_X86_LOOP(5);
143 SHA256_X86_LOOP(6);
144 SHA256_X86_LOOP(7);
145 SHA256_X86_LOOP(8);
146 SHA256_X86_LOOP(9);
147 SHA256_X86_LOOP(10);
148 SHA256_X86_LOOP(11);
149 SHA256_X86_LOOP(12);
150 SHA256_X86_LOOP(13);
151 SHA256_X86_LOOP(14);
152
153 msg = vb2_add_epi32(msgtmp[3],
154 vb2_loadu_si128((vb2_m128i *)&vb2_sha256_k[15 * 4]));
155 state1 = vb2_sha256rnds2_epu32(state1, state0, msg);
156 SHA256_X86_PUT_STATE0();
157
158 state0 = vb2_add_epi32(state0, abef_save);
159 state1 = vb2_add_epi32(state1, cdgh_save);
160
161 }
162
163 vb2_storeu_si128((vb2_m128i *)&vb2_sha_ctx.h[0], state0);
164 vb2_storeu_si128((vb2_m128i *)&vb2_sha_ctx.h[4], state1);
165 }
166
vb2_sha256_transform_hwcrypto(const uint8_t * message,unsigned int block_nb)167 void vb2_sha256_transform_hwcrypto(const uint8_t *message,
168 unsigned int block_nb)
169 {
170 vb2_sha256_transform_x86ext(message, block_nb);
171 }
172