xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/crypto/chacha/chacha.c (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 /* Copyright (c) 2014, Google Inc.
2  *
3  * Permission to use, copy, modify, and/or distribute this software for any
4  * purpose with or without fee is hereby granted, provided that the above
5  * copyright notice and this permission notice appear in all copies.
6  *
7  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14 
15 // Adapted from the public domain, estream code by D. Bernstein.
16 
17 #include <openssl/chacha.h>
18 
19 #include <assert.h>
20 #include <string.h>
21 
22 #include "../internal.h"
23 #include "internal.h"
24 
25 
26 // sigma contains the ChaCha constants, which happen to be an ASCII string.
27 static const uint8_t sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3',
28                                    '2', '-', 'b', 'y', 't', 'e', ' ', 'k' };
29 
30 // QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round.
31 #define QUARTERROUND(a, b, c, d)           \
32   x[a] += x[b];                            \
33   x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 16); \
34   x[c] += x[d];                            \
35   x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 12); \
36   x[a] += x[b];                            \
37   x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 8);  \
38   x[c] += x[d];                            \
39   x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 7);
40 
CRYPTO_hchacha20(uint8_t out[32],const uint8_t key[32],const uint8_t nonce[16])41 void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],
42                       const uint8_t nonce[16]) {
43   uint32_t x[16];
44   OPENSSL_memcpy(x, sigma, sizeof(sigma));
45   OPENSSL_memcpy(&x[4], key, 32);
46   OPENSSL_memcpy(&x[12], nonce, 16);
47 
48   for (size_t i = 0; i < 20; i += 2) {
49     QUARTERROUND(0, 4, 8, 12)
50     QUARTERROUND(1, 5, 9, 13)
51     QUARTERROUND(2, 6, 10, 14)
52     QUARTERROUND(3, 7, 11, 15)
53     QUARTERROUND(0, 5, 10, 15)
54     QUARTERROUND(1, 6, 11, 12)
55     QUARTERROUND(2, 7, 8, 13)
56     QUARTERROUND(3, 4, 9, 14)
57   }
58 
59   OPENSSL_memcpy(out, &x[0], sizeof(uint32_t) * 4);
60   OPENSSL_memcpy(&out[16], &x[12], sizeof(uint32_t) * 4);
61 }
62 
63 #if defined(CHACHA20_ASM_NOHW)
ChaCha20_ctr32(uint8_t * out,const uint8_t * in,size_t in_len,const uint32_t key[8],const uint32_t counter[4])64 static void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len,
65                            const uint32_t key[8], const uint32_t counter[4]) {
66 #if defined(CHACHA20_ASM_NEON)
67   if (ChaCha20_ctr32_neon_capable(in_len)) {
68     ChaCha20_ctr32_neon(out, in, in_len, key, counter);
69     return;
70   }
71 #endif
72 #if defined(CHACHA20_ASM_AVX2)
73   if (ChaCha20_ctr32_avx2_capable(in_len)) {
74     ChaCha20_ctr32_avx2(out, in, in_len, key, counter);
75     return;
76   }
77 #endif
78 #if defined(CHACHA20_ASM_SSSE3_4X)
79   if (ChaCha20_ctr32_ssse3_4x_capable(in_len)) {
80     ChaCha20_ctr32_ssse3_4x(out, in, in_len, key, counter);
81     return;
82   }
83 #endif
84 #if defined(CHACHA20_ASM_SSSE3)
85   if (ChaCha20_ctr32_ssse3_capable(in_len)) {
86     ChaCha20_ctr32_ssse3(out, in, in_len, key, counter);
87     return;
88   }
89 #endif
90   if (in_len > 0) {
91     ChaCha20_ctr32_nohw(out, in, in_len, key, counter);
92   }
93 }
94 #endif
95 
96 #if defined(CHACHA20_ASM_NOHW)
97 
CRYPTO_chacha_20(uint8_t * out,const uint8_t * in,size_t in_len,const uint8_t key[32],const uint8_t nonce[12],uint32_t counter)98 void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
99                       const uint8_t key[32], const uint8_t nonce[12],
100                       uint32_t counter) {
101   assert(!buffers_alias(out, in_len, in, in_len) || in == out);
102 
103   uint32_t counter_nonce[4];
104   counter_nonce[0] = counter;
105   counter_nonce[1] = CRYPTO_load_u32_le(nonce + 0);
106   counter_nonce[2] = CRYPTO_load_u32_le(nonce + 4);
107   counter_nonce[3] = CRYPTO_load_u32_le(nonce + 8);
108 
109   const uint32_t *key_ptr = (const uint32_t *)key;
110 #if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64)
111   // The assembly expects the key to be four-byte aligned.
112   uint32_t key_u32[8];
113   if ((((uintptr_t)key) & 3) != 0) {
114     key_u32[0] = CRYPTO_load_u32_le(key + 0);
115     key_u32[1] = CRYPTO_load_u32_le(key + 4);
116     key_u32[2] = CRYPTO_load_u32_le(key + 8);
117     key_u32[3] = CRYPTO_load_u32_le(key + 12);
118     key_u32[4] = CRYPTO_load_u32_le(key + 16);
119     key_u32[5] = CRYPTO_load_u32_le(key + 20);
120     key_u32[6] = CRYPTO_load_u32_le(key + 24);
121     key_u32[7] = CRYPTO_load_u32_le(key + 28);
122 
123     key_ptr = key_u32;
124   }
125 #endif
126 
127   while (in_len > 0) {
128     // The assembly functions do not have defined overflow behavior. While
129     // overflow is almost always a bug in the caller, we prefer our functions to
130     // behave the same across platforms, so divide into multiple calls to avoid
131     // this case.
132     uint64_t todo = 64 * ((UINT64_C(1) << 32) - counter_nonce[0]);
133     if (todo > in_len) {
134       todo = in_len;
135     }
136 
137     ChaCha20_ctr32(out, in, (size_t)todo, key_ptr, counter_nonce);
138     in += todo;
139     out += todo;
140     in_len -= todo;
141 
142     // We're either done and will next break out of the loop, or we stopped at
143     // the wraparound point and the counter should continue at zero.
144     counter_nonce[0] = 0;
145   }
146 }
147 
148 #else
149 
150 // chacha_core performs 20 rounds of ChaCha on the input words in
151 // |input| and writes the 64 output bytes to |output|.
chacha_core(uint8_t output[64],const uint32_t input[16])152 static void chacha_core(uint8_t output[64], const uint32_t input[16]) {
153   uint32_t x[16];
154   int i;
155 
156   OPENSSL_memcpy(x, input, sizeof(uint32_t) * 16);
157   for (i = 20; i > 0; i -= 2) {
158     QUARTERROUND(0, 4, 8, 12)
159     QUARTERROUND(1, 5, 9, 13)
160     QUARTERROUND(2, 6, 10, 14)
161     QUARTERROUND(3, 7, 11, 15)
162     QUARTERROUND(0, 5, 10, 15)
163     QUARTERROUND(1, 6, 11, 12)
164     QUARTERROUND(2, 7, 8, 13)
165     QUARTERROUND(3, 4, 9, 14)
166   }
167 
168   for (i = 0; i < 16; ++i) {
169     x[i] += input[i];
170   }
171   for (i = 0; i < 16; ++i) {
172     CRYPTO_store_u32_le(output + 4 * i, x[i]);
173   }
174 }
175 
CRYPTO_chacha_20(uint8_t * out,const uint8_t * in,size_t in_len,const uint8_t key[32],const uint8_t nonce[12],uint32_t counter)176 void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
177                       const uint8_t key[32], const uint8_t nonce[12],
178                       uint32_t counter) {
179   assert(!buffers_alias(out, in_len, in, in_len) || in == out);
180 
181   uint32_t input[16];
182   uint8_t buf[64];
183   size_t todo, i;
184 
185   input[0] = CRYPTO_load_u32_le(sigma + 0);
186   input[1] = CRYPTO_load_u32_le(sigma + 4);
187   input[2] = CRYPTO_load_u32_le(sigma + 8);
188   input[3] = CRYPTO_load_u32_le(sigma + 12);
189 
190   input[4] = CRYPTO_load_u32_le(key + 0);
191   input[5] = CRYPTO_load_u32_le(key + 4);
192   input[6] = CRYPTO_load_u32_le(key + 8);
193   input[7] = CRYPTO_load_u32_le(key + 12);
194 
195   input[8] = CRYPTO_load_u32_le(key + 16);
196   input[9] = CRYPTO_load_u32_le(key + 20);
197   input[10] = CRYPTO_load_u32_le(key + 24);
198   input[11] = CRYPTO_load_u32_le(key + 28);
199 
200   input[12] = counter;
201   input[13] = CRYPTO_load_u32_le(nonce + 0);
202   input[14] = CRYPTO_load_u32_le(nonce + 4);
203   input[15] = CRYPTO_load_u32_le(nonce + 8);
204 
205   while (in_len > 0) {
206     todo = sizeof(buf);
207     if (in_len < todo) {
208       todo = in_len;
209     }
210 
211     chacha_core(buf, input);
212     for (i = 0; i < todo; i++) {
213       out[i] = in[i] ^ buf[i];
214     }
215 
216     out += todo;
217     in += todo;
218     in_len -= todo;
219 
220     input[12]++;
221   }
222 }
223 
224 #endif
225