1 /* Copyright (c) 2014, Google Inc.
2 *
3 * Permission to use, copy, modify, and/or distribute this software for any
4 * purpose with or without fee is hereby granted, provided that the above
5 * copyright notice and this permission notice appear in all copies.
6 *
7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15 // Adapted from the public domain, estream code by D. Bernstein.
16
17 #include <openssl/chacha.h>
18
19 #include <assert.h>
20 #include <string.h>
21
22 #include "../internal.h"
23 #include "internal.h"
24
25
26 // sigma contains the ChaCha constants, which happen to be an ASCII string.
27 static const uint8_t sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3',
28 '2', '-', 'b', 'y', 't', 'e', ' ', 'k' };
29
30 // QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round.
31 #define QUARTERROUND(a, b, c, d) \
32 x[a] += x[b]; \
33 x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 16); \
34 x[c] += x[d]; \
35 x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 12); \
36 x[a] += x[b]; \
37 x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 8); \
38 x[c] += x[d]; \
39 x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 7);
40
CRYPTO_hchacha20(uint8_t out[32],const uint8_t key[32],const uint8_t nonce[16])41 void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],
42 const uint8_t nonce[16]) {
43 uint32_t x[16];
44 OPENSSL_memcpy(x, sigma, sizeof(sigma));
45 OPENSSL_memcpy(&x[4], key, 32);
46 OPENSSL_memcpy(&x[12], nonce, 16);
47
48 for (size_t i = 0; i < 20; i += 2) {
49 QUARTERROUND(0, 4, 8, 12)
50 QUARTERROUND(1, 5, 9, 13)
51 QUARTERROUND(2, 6, 10, 14)
52 QUARTERROUND(3, 7, 11, 15)
53 QUARTERROUND(0, 5, 10, 15)
54 QUARTERROUND(1, 6, 11, 12)
55 QUARTERROUND(2, 7, 8, 13)
56 QUARTERROUND(3, 4, 9, 14)
57 }
58
59 OPENSSL_memcpy(out, &x[0], sizeof(uint32_t) * 4);
60 OPENSSL_memcpy(&out[16], &x[12], sizeof(uint32_t) * 4);
61 }
62
63 #if defined(CHACHA20_ASM_NOHW)
ChaCha20_ctr32(uint8_t * out,const uint8_t * in,size_t in_len,const uint32_t key[8],const uint32_t counter[4])64 static void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len,
65 const uint32_t key[8], const uint32_t counter[4]) {
66 #if defined(CHACHA20_ASM_NEON)
67 if (ChaCha20_ctr32_neon_capable(in_len)) {
68 ChaCha20_ctr32_neon(out, in, in_len, key, counter);
69 return;
70 }
71 #endif
72 #if defined(CHACHA20_ASM_AVX2)
73 if (ChaCha20_ctr32_avx2_capable(in_len)) {
74 ChaCha20_ctr32_avx2(out, in, in_len, key, counter);
75 return;
76 }
77 #endif
78 #if defined(CHACHA20_ASM_SSSE3_4X)
79 if (ChaCha20_ctr32_ssse3_4x_capable(in_len)) {
80 ChaCha20_ctr32_ssse3_4x(out, in, in_len, key, counter);
81 return;
82 }
83 #endif
84 #if defined(CHACHA20_ASM_SSSE3)
85 if (ChaCha20_ctr32_ssse3_capable(in_len)) {
86 ChaCha20_ctr32_ssse3(out, in, in_len, key, counter);
87 return;
88 }
89 #endif
90 if (in_len > 0) {
91 ChaCha20_ctr32_nohw(out, in, in_len, key, counter);
92 }
93 }
94 #endif
95
96 #if defined(CHACHA20_ASM_NOHW)
97
CRYPTO_chacha_20(uint8_t * out,const uint8_t * in,size_t in_len,const uint8_t key[32],const uint8_t nonce[12],uint32_t counter)98 void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
99 const uint8_t key[32], const uint8_t nonce[12],
100 uint32_t counter) {
101 assert(!buffers_alias(out, in_len, in, in_len) || in == out);
102
103 uint32_t counter_nonce[4];
104 counter_nonce[0] = counter;
105 counter_nonce[1] = CRYPTO_load_u32_le(nonce + 0);
106 counter_nonce[2] = CRYPTO_load_u32_le(nonce + 4);
107 counter_nonce[3] = CRYPTO_load_u32_le(nonce + 8);
108
109 const uint32_t *key_ptr = (const uint32_t *)key;
110 #if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64)
111 // The assembly expects the key to be four-byte aligned.
112 uint32_t key_u32[8];
113 if ((((uintptr_t)key) & 3) != 0) {
114 key_u32[0] = CRYPTO_load_u32_le(key + 0);
115 key_u32[1] = CRYPTO_load_u32_le(key + 4);
116 key_u32[2] = CRYPTO_load_u32_le(key + 8);
117 key_u32[3] = CRYPTO_load_u32_le(key + 12);
118 key_u32[4] = CRYPTO_load_u32_le(key + 16);
119 key_u32[5] = CRYPTO_load_u32_le(key + 20);
120 key_u32[6] = CRYPTO_load_u32_le(key + 24);
121 key_u32[7] = CRYPTO_load_u32_le(key + 28);
122
123 key_ptr = key_u32;
124 }
125 #endif
126
127 while (in_len > 0) {
128 // The assembly functions do not have defined overflow behavior. While
129 // overflow is almost always a bug in the caller, we prefer our functions to
130 // behave the same across platforms, so divide into multiple calls to avoid
131 // this case.
132 uint64_t todo = 64 * ((UINT64_C(1) << 32) - counter_nonce[0]);
133 if (todo > in_len) {
134 todo = in_len;
135 }
136
137 ChaCha20_ctr32(out, in, (size_t)todo, key_ptr, counter_nonce);
138 in += todo;
139 out += todo;
140 in_len -= todo;
141
142 // We're either done and will next break out of the loop, or we stopped at
143 // the wraparound point and the counter should continue at zero.
144 counter_nonce[0] = 0;
145 }
146 }
147
148 #else
149
150 // chacha_core performs 20 rounds of ChaCha on the input words in
151 // |input| and writes the 64 output bytes to |output|.
chacha_core(uint8_t output[64],const uint32_t input[16])152 static void chacha_core(uint8_t output[64], const uint32_t input[16]) {
153 uint32_t x[16];
154 int i;
155
156 OPENSSL_memcpy(x, input, sizeof(uint32_t) * 16);
157 for (i = 20; i > 0; i -= 2) {
158 QUARTERROUND(0, 4, 8, 12)
159 QUARTERROUND(1, 5, 9, 13)
160 QUARTERROUND(2, 6, 10, 14)
161 QUARTERROUND(3, 7, 11, 15)
162 QUARTERROUND(0, 5, 10, 15)
163 QUARTERROUND(1, 6, 11, 12)
164 QUARTERROUND(2, 7, 8, 13)
165 QUARTERROUND(3, 4, 9, 14)
166 }
167
168 for (i = 0; i < 16; ++i) {
169 x[i] += input[i];
170 }
171 for (i = 0; i < 16; ++i) {
172 CRYPTO_store_u32_le(output + 4 * i, x[i]);
173 }
174 }
175
CRYPTO_chacha_20(uint8_t * out,const uint8_t * in,size_t in_len,const uint8_t key[32],const uint8_t nonce[12],uint32_t counter)176 void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
177 const uint8_t key[32], const uint8_t nonce[12],
178 uint32_t counter) {
179 assert(!buffers_alias(out, in_len, in, in_len) || in == out);
180
181 uint32_t input[16];
182 uint8_t buf[64];
183 size_t todo, i;
184
185 input[0] = CRYPTO_load_u32_le(sigma + 0);
186 input[1] = CRYPTO_load_u32_le(sigma + 4);
187 input[2] = CRYPTO_load_u32_le(sigma + 8);
188 input[3] = CRYPTO_load_u32_le(sigma + 12);
189
190 input[4] = CRYPTO_load_u32_le(key + 0);
191 input[5] = CRYPTO_load_u32_le(key + 4);
192 input[6] = CRYPTO_load_u32_le(key + 8);
193 input[7] = CRYPTO_load_u32_le(key + 12);
194
195 input[8] = CRYPTO_load_u32_le(key + 16);
196 input[9] = CRYPTO_load_u32_le(key + 20);
197 input[10] = CRYPTO_load_u32_le(key + 24);
198 input[11] = CRYPTO_load_u32_le(key + 28);
199
200 input[12] = counter;
201 input[13] = CRYPTO_load_u32_le(nonce + 0);
202 input[14] = CRYPTO_load_u32_le(nonce + 4);
203 input[15] = CRYPTO_load_u32_le(nonce + 8);
204
205 while (in_len > 0) {
206 todo = sizeof(buf);
207 if (in_len < todo) {
208 todo = in_len;
209 }
210
211 chacha_core(buf, input);
212 for (i = 0; i < todo; i++) {
213 out[i] = in[i] ^ buf[i];
214 }
215
216 out += todo;
217 in += todo;
218 in_len -= todo;
219
220 input[12]++;
221 }
222 }
223
224 #endif
225