1 /*
2  ---------------------------------------------------------------------------
3  Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
4 
5  LICENSE TERMS
6 
7  The redistribution and use of this software (with or without changes)
8  is allowed without the payment of fees or royalties provided that:
9 
10   1. source code distributions include the above copyright notice, this
11      list of conditions and the following disclaimer;
12 
13   2. binary distributions include the above copyright notice, this list
14      of conditions and the following disclaimer in their documentation;
15 
16   3. the name of the copyright holder is not used to endorse products
17      built using this software without specific written permission.
18 
19  DISCLAIMER
20 
21  This software is provided 'as is' with no explicit or implied warranties
22  in respect of its properties, including, but not limited to, correctness
23  and/or fitness for purpose.
24  ---------------------------------------------------------------------------
25  Issue 09/09/2006
26 
27  This is an AES implementation that uses only 8-bit byte operations on the
28  cipher state (there are options to use 32-bit types if available).
29 
30  The combination of mix columns and byte substitution used here is based on
31  that developed by Karl Malbrain. His contribution is acknowledged.
32  */
33 
34 /* define if you have a fast memcpy function on your system */
35 #if 1
36 #define HAVE_MEMCPY
37 #include <string.h>
38 #if 0
39 #if defined(_MSC_VER)
40 #include <intrin.h>
41 #pragma intrinsic(memcpy)
42 #endif
43 #endif
44 #endif
45 
46 #include <stdint.h>
47 #include <stdlib.h>
48 
49 /* define if you have fast 32-bit types on your system */
50 #if 1
51 #define HAVE_UINT_32T
52 #endif
53 
54 /* define if you don't want any tables */
55 #if 1
56 #define USE_TABLES
57 #endif
58 
59 /*  On Intel Core 2 duo VERSION_1 is faster */
60 
61 /* alternative versions (test for performance on your system) */
62 #if 1
63 #define VERSION_1
64 #endif
65 
66 #include "aes.h"
67 
68 #if defined(HAVE_UINT_32T)
69 typedef uint32_t uint_32t;
70 #endif
71 
72 /* functions for finite field multiplication in the AES Galois field    */
73 
74 #define WPOLY 0x011b
75 #define BPOLY 0x1b
76 #define DPOLY 0x008d
77 
78 #define f1(x) (x)
79 #define f2(x) (((x) << 1) ^ ((((x) >> 7) & 1) * WPOLY))
80 #define f4(x) (((x) << 2) ^ ((((x) >> 6) & 1) * WPOLY) ^ ((((x) >> 6) & 2) * WPOLY))
81 #define f8(x)                                                             \
82   (((x) << 3) ^ ((((x) >> 5) & 1) * WPOLY) ^ ((((x) >> 5) & 2) * WPOLY) ^ \
83    ((((x) >> 5) & 4) * WPOLY))
84 #define d2(x) (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0))
85 
86 #define f3(x) (f2(x) ^ (x))
87 #define f9(x) (f8(x) ^ (x))
88 #define fb(x) (f8(x) ^ f2(x) ^ (x))
89 #define fd(x) (f8(x) ^ f4(x) ^ (x))
90 #define fe(x) (f8(x) ^ f4(x) ^ f2(x))
91 
92 #if defined(USE_TABLES)
93 
94 #define sb_data(w)                                                                            \
95   { /* S Box data values */                                                                   \
96     w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5), w(0x30), w(0x01), \
97             w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76), w(0xca), w(0x82), w(0xc9),  \
98             w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0), w(0xad), w(0xd4), w(0xa2), w(0xaf),  \
99             w(0x9c), w(0xa4), w(0x72), w(0xc0), w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36),  \
100             w(0x3f), w(0xf7), w(0xcc), w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8),  \
101             w(0x31), w(0x15), w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05),  \
102             w(0x9a), w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),  \
103             w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0), w(0x52),  \
104             w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84), w(0x53), w(0xd1),  \
105             w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b), w(0x6a), w(0xcb), w(0xbe),  \
106             w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf), w(0xd0), w(0xef), w(0xaa), w(0xfb),  \
107             w(0x43), w(0x4d), w(0x33), w(0x85), w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50),  \
108             w(0x3c), w(0x9f), w(0xa8), w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d),  \
109             w(0x38), w(0xf5), w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3),  \
110             w(0xd2), w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),  \
111             w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73), w(0x60),  \
112             w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88), w(0x46), w(0xee),  \
113             w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb), w(0xe0), w(0x32), w(0x3a),  \
114             w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c), w(0xc2), w(0xd3), w(0xac), w(0x62),  \
115             w(0x91), w(0x95), w(0xe4), w(0x79), w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d),  \
116             w(0xd5), w(0x4e), w(0xa9), w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a),  \
117             w(0xae), w(0x08), w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4),  \
118             w(0xc6), w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),  \
119             w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e), w(0x61),  \
120             w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e), w(0xe1), w(0xf8),  \
121             w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94), w(0x9b), w(0x1e), w(0x87),  \
122             w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf), w(0x8c), w(0xa1), w(0x89), w(0x0d),  \
123             w(0xbf), w(0xe6), w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0),  \
124             w(0x54), w(0xbb), w(0x16)                                                         \
125   }
126 
127 #define isb_data(w)                                                                           \
128   { /* inverse S Box data values */                                                           \
129     w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38), w(0xbf), w(0x40), \
130             w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb), w(0x7c), w(0xe3), w(0x39),  \
131             w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87), w(0x34), w(0x8e), w(0x43), w(0x44),  \
132             w(0xc4), w(0xde), w(0xe9), w(0xcb), w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6),  \
133             w(0xc2), w(0x23), w(0x3d), w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa),  \
134             w(0xc3), w(0x4e), w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24),  \
135             w(0xb2), w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),  \
136             w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16), w(0xd4),  \
137             w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92), w(0x6c), w(0x70),  \
138             w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda), w(0x5e), w(0x15), w(0x46),  \
139             w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84), w(0x90), w(0xd8), w(0xab), w(0x00),  \
140             w(0x8c), w(0xbc), w(0xd3), w(0x0a), w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8),  \
141             w(0xb3), w(0x45), w(0x06), w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f),  \
142             w(0x0f), w(0x02), w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a),  \
143             w(0x6b), w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),  \
144             w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73), w(0x96),  \
145             w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85), w(0xe2), w(0xf9),  \
146             w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e), w(0x47), w(0xf1), w(0x1a),  \
147             w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89), w(0x6f), w(0xb7), w(0x62), w(0x0e),  \
148             w(0xaa), w(0x18), w(0xbe), w(0x1b), w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6),  \
149             w(0xd2), w(0x79), w(0x20), w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd),  \
150             w(0x5a), w(0xf4), w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7),  \
151             w(0x31), w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),  \
152             w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d), w(0x2d),  \
153             w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef), w(0xa0), w(0xe0),  \
154             w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0), w(0xc8), w(0xeb), w(0xbb),  \
155             w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61), w(0x17), w(0x2b), w(0x04), w(0x7e),  \
156             w(0xba), w(0x77), w(0xd6), w(0x26), w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55),  \
157             w(0x21), w(0x0c), w(0x7d)                                                         \
158   }
159 
160 #define mm_data(w)                                                                            \
161   { /* basic data for forming finite field tables */                                          \
162     w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07), w(0x08), w(0x09), \
163             w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f), w(0x10), w(0x11), w(0x12),  \
164             w(0x13), w(0x14), w(0x15), w(0x16), w(0x17), w(0x18), w(0x19), w(0x1a), w(0x1b),  \
165             w(0x1c), w(0x1d), w(0x1e), w(0x1f), w(0x20), w(0x21), w(0x22), w(0x23), w(0x24),  \
166             w(0x25), w(0x26), w(0x27), w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d),  \
167             w(0x2e), w(0x2f), w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36),  \
168             w(0x37), w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),  \
169             w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47), w(0x48),  \
170             w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f), w(0x50), w(0x51),  \
171             w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57), w(0x58), w(0x59), w(0x5a),  \
172             w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f), w(0x60), w(0x61), w(0x62), w(0x63),  \
173             w(0x64), w(0x65), w(0x66), w(0x67), w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c),  \
174             w(0x6d), w(0x6e), w(0x6f), w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75),  \
175             w(0x76), w(0x77), w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e),  \
176             w(0x7f), w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),  \
177             w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f), w(0x90),  \
178             w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97), w(0x98), w(0x99),  \
179             w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f), w(0xa0), w(0xa1), w(0xa2),  \
180             w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7), w(0xa8), w(0xa9), w(0xaa), w(0xab),  \
181             w(0xac), w(0xad), w(0xae), w(0xaf), w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4),  \
182             w(0xb5), w(0xb6), w(0xb7), w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd),  \
183             w(0xbe), w(0xbf), w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6),  \
184             w(0xc7), w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),  \
185             w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7), w(0xd8),  \
186             w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf), w(0xe0), w(0xe1),  \
187             w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7), w(0xe8), w(0xe9), w(0xea),  \
188             w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef), w(0xf0), w(0xf1), w(0xf2), w(0xf3),  \
189             w(0xf4), w(0xf5), w(0xf6), w(0xf7), w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc),  \
190             w(0xfd), w(0xfe), w(0xff)                                                         \
191   }
192 
193 static const uint_8t sbox[256] = sb_data(f1);
194 static const uint_8t isbox[256] = isb_data(f1);
195 
196 static const uint_8t gfm2_sbox[256] = sb_data(f2);
197 static const uint_8t gfm3_sbox[256] = sb_data(f3);
198 
199 static const uint_8t gfmul_9[256] = mm_data(f9);
200 static const uint_8t gfmul_b[256] = mm_data(fb);
201 static const uint_8t gfmul_d[256] = mm_data(fd);
202 static const uint_8t gfmul_e[256] = mm_data(fe);
203 
204 #define s_box(x) sbox[(x)]
205 #define is_box(x) isbox[(x)]
206 #define gfm2_sb(x) gfm2_sbox[(x)]
207 #define gfm3_sb(x) gfm3_sbox[(x)]
208 #define gfm_9(x) gfmul_9[(x)]
209 #define gfm_b(x) gfmul_b[(x)]
210 #define gfm_d(x) gfmul_d[(x)]
211 #define gfm_e(x) gfmul_e[(x)]
212 
213 #else
214 
215 /* this is the high bit of x right shifted by 1 */
216 /* position. Since the starting polynomial has  */
217 /* 9 bits (0x11b), this right shift keeps the   */
218 /* values of all top bits within a byte         */
219 
hibit(const uint_8t x)220 static uint_8t hibit(const uint_8t x) {
221   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
222 
223   r |= (r >> 2);
224   r |= (r >> 4);
225   return (r + 1) >> 1;
226 }
227 
228 /* return the inverse of the finite field element x */
229 
gf_inv(const uint_8t x)230 static uint_8t gf_inv(const uint_8t x) {
231   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
232 
233   if (x < 2) {
234     return x;
235   }
236 
237   for (;;) {
238     if (n1) {
239       while (n2 >= n1) /* divide polynomial p2 by p1    */
240       {
241         n2 /= n1;               /* shift smaller polynomial left */
242         p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
243         v2 ^= (v1 * n2);        /* shift accumulated value and   */
244         n2 = hibit(p2);         /* add into result               */
245       }
246     } else {
247       return v1;
248     }
249 
250     if (n2) { /* repeat with values swapped    */
251       while (n1 >= n2) {
252         n1 /= n2;
253         p1 ^= p2 * n1;
254         v1 ^= v2 * n1;
255         n1 = hibit(p1);
256       }
257     } else {
258       return v2;
259     }
260   }
261 }
262 
263 /* The forward and inverse affine transformations used in the S-box */
fwd_affine(const uint_8t x)264 uint_8t fwd_affine(const uint_8t x) {
265 #if defined(HAVE_UINT_32T)
266   uint_32t w = x;
267   w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
268   return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
269 #else
270   return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4) ^ (x >> 7) ^ (x >> 6) ^ (x >> 5) ^
271          (x >> 4);
272 #endif
273 }
274 
inv_affine(const uint_8t x)275 uint_8t inv_affine(const uint_8t x) {
276 #if defined(HAVE_UINT_32T)
277   uint_32t w = x;
278   w = (w << 1) ^ (w << 3) ^ (w << 6);
279   return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
280 #else
281   return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6) ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
282 #endif
283 }
284 
285 #define s_box(x) fwd_affine(gf_inv(x))
286 #define is_box(x) gf_inv(inv_affine(x))
287 #define gfm2_sb(x) f2(s_box(x))
288 #define gfm3_sb(x) f3(s_box(x))
289 #define gfm_9(x) f9(x)
290 #define gfm_b(x) fb(x)
291 #define gfm_d(x) fd(x)
292 #define gfm_e(x) fe(x)
293 
294 #endif
295 
296 #if defined(HAVE_MEMCPY)
297 #define block_copy_nn(d, s, l) memcpy(d, s, l)
298 #define block_copy(d, s) memcpy(d, s, N_BLOCK)
299 #else
300 #define block_copy_nn(d, s, l) copy_block_nn(d, s, l)
301 #define block_copy(d, s) copy_block(d, s)
302 #endif
303 
304 #if !defined(HAVE_MEMCPY)
copy_block(void * d,const void * s)305 static void copy_block(void* d, const void* s) {
306 #if defined(HAVE_UINT_32T)
307   ((uint_32t*)d)[0] = ((uint_32t*)s)[0];
308   ((uint_32t*)d)[1] = ((uint_32t*)s)[1];
309   ((uint_32t*)d)[2] = ((uint_32t*)s)[2];
310   ((uint_32t*)d)[3] = ((uint_32t*)s)[3];
311 #else
312   ((uint_8t*)d)[0] = ((uint_8t*)s)[0];
313   ((uint_8t*)d)[1] = ((uint_8t*)s)[1];
314   ((uint_8t*)d)[2] = ((uint_8t*)s)[2];
315   ((uint_8t*)d)[3] = ((uint_8t*)s)[3];
316   ((uint_8t*)d)[4] = ((uint_8t*)s)[4];
317   ((uint_8t*)d)[5] = ((uint_8t*)s)[5];
318   ((uint_8t*)d)[6] = ((uint_8t*)s)[6];
319   ((uint_8t*)d)[7] = ((uint_8t*)s)[7];
320   ((uint_8t*)d)[8] = ((uint_8t*)s)[8];
321   ((uint_8t*)d)[9] = ((uint_8t*)s)[9];
322   ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
323   ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
324   ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
325   ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
326   ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
327   ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
328 #endif
329 }
330 
copy_block_nn(void * d,const void * s,uint_8t nn)331 static void copy_block_nn(void* d, const void* s, uint_8t nn) {
332   while (nn--) {
333     *((uint_8t*)d)++ = *((uint_8t*)s)++;
334   }
335 }
336 #endif
337 
xor_block(void * d,const void * s)338 static void xor_block(void* d, const void* s) {
339 #if defined(HAVE_UINT_32T)
340   ((uint_32t*)d)[0] ^= ((uint_32t*)s)[0];
341   ((uint_32t*)d)[1] ^= ((uint_32t*)s)[1];
342   ((uint_32t*)d)[2] ^= ((uint_32t*)s)[2];
343   ((uint_32t*)d)[3] ^= ((uint_32t*)s)[3];
344 #else
345   ((uint_8t*)d)[0] ^= ((uint_8t*)s)[0];
346   ((uint_8t*)d)[1] ^= ((uint_8t*)s)[1];
347   ((uint_8t*)d)[2] ^= ((uint_8t*)s)[2];
348   ((uint_8t*)d)[3] ^= ((uint_8t*)s)[3];
349   ((uint_8t*)d)[4] ^= ((uint_8t*)s)[4];
350   ((uint_8t*)d)[5] ^= ((uint_8t*)s)[5];
351   ((uint_8t*)d)[6] ^= ((uint_8t*)s)[6];
352   ((uint_8t*)d)[7] ^= ((uint_8t*)s)[7];
353   ((uint_8t*)d)[8] ^= ((uint_8t*)s)[8];
354   ((uint_8t*)d)[9] ^= ((uint_8t*)s)[9];
355   ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
356   ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
357   ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
358   ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
359   ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
360   ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
361 #endif
362 }
363 
copy_and_key(void * d,const void * s,const void * k)364 static void copy_and_key(void* d, const void* s, const void* k) {
365 #if defined(HAVE_UINT_32T)
366   ((uint_32t*)d)[0] = ((uint_32t*)s)[0] ^ ((uint_32t*)k)[0];
367   ((uint_32t*)d)[1] = ((uint_32t*)s)[1] ^ ((uint_32t*)k)[1];
368   ((uint_32t*)d)[2] = ((uint_32t*)s)[2] ^ ((uint_32t*)k)[2];
369   ((uint_32t*)d)[3] = ((uint_32t*)s)[3] ^ ((uint_32t*)k)[3];
370 #elif 1
371   ((uint_8t*)d)[0] = ((uint_8t*)s)[0] ^ ((uint_8t*)k)[0];
372   ((uint_8t*)d)[1] = ((uint_8t*)s)[1] ^ ((uint_8t*)k)[1];
373   ((uint_8t*)d)[2] = ((uint_8t*)s)[2] ^ ((uint_8t*)k)[2];
374   ((uint_8t*)d)[3] = ((uint_8t*)s)[3] ^ ((uint_8t*)k)[3];
375   ((uint_8t*)d)[4] = ((uint_8t*)s)[4] ^ ((uint_8t*)k)[4];
376   ((uint_8t*)d)[5] = ((uint_8t*)s)[5] ^ ((uint_8t*)k)[5];
377   ((uint_8t*)d)[6] = ((uint_8t*)s)[6] ^ ((uint_8t*)k)[6];
378   ((uint_8t*)d)[7] = ((uint_8t*)s)[7] ^ ((uint_8t*)k)[7];
379   ((uint_8t*)d)[8] = ((uint_8t*)s)[8] ^ ((uint_8t*)k)[8];
380   ((uint_8t*)d)[9] = ((uint_8t*)s)[9] ^ ((uint_8t*)k)[9];
381   ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
382   ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
383   ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
384   ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
385   ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
386   ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
387 #else
388   block_copy(d, s);
389   xor_block(d, k);
390 #endif
391 }
392 
add_round_key(uint_8t d[N_BLOCK],const uint_8t k[N_BLOCK])393 static void add_round_key(uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK]) { xor_block(d, k); }
394 
shift_sub_rows(uint_8t st[N_BLOCK])395 static void shift_sub_rows(uint_8t st[N_BLOCK]) {
396   uint_8t tt;
397 
398   st[0] = s_box(st[0]);
399   st[4] = s_box(st[4]);
400   st[8] = s_box(st[8]);
401   st[12] = s_box(st[12]);
402 
403   tt = st[1];
404   st[1] = s_box(st[5]);
405   st[5] = s_box(st[9]);
406   st[9] = s_box(st[13]);
407   st[13] = s_box(tt);
408 
409   tt = st[2];
410   st[2] = s_box(st[10]);
411   st[10] = s_box(tt);
412   tt = st[6];
413   st[6] = s_box(st[14]);
414   st[14] = s_box(tt);
415 
416   tt = st[15];
417   st[15] = s_box(st[11]);
418   st[11] = s_box(st[7]);
419   st[7] = s_box(st[3]);
420   st[3] = s_box(tt);
421 }
422 
inv_shift_sub_rows(uint_8t st[N_BLOCK])423 static void inv_shift_sub_rows(uint_8t st[N_BLOCK]) {
424   uint_8t tt;
425 
426   st[0] = is_box(st[0]);
427   st[4] = is_box(st[4]);
428   st[8] = is_box(st[8]);
429   st[12] = is_box(st[12]);
430 
431   tt = st[13];
432   st[13] = is_box(st[9]);
433   st[9] = is_box(st[5]);
434   st[5] = is_box(st[1]);
435   st[1] = is_box(tt);
436 
437   tt = st[2];
438   st[2] = is_box(st[10]);
439   st[10] = is_box(tt);
440   tt = st[6];
441   st[6] = is_box(st[14]);
442   st[14] = is_box(tt);
443 
444   tt = st[3];
445   st[3] = is_box(st[7]);
446   st[7] = is_box(st[11]);
447   st[11] = is_box(st[15]);
448   st[15] = is_box(tt);
449 }
450 
451 #if defined(VERSION_1)
mix_sub_columns(uint_8t dt[N_BLOCK])452 static void mix_sub_columns(uint_8t dt[N_BLOCK]) {
453   uint_8t st[N_BLOCK];
454   block_copy(st, dt);
455 #else
456 static void mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
457 #endif
458   dt[0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
459   dt[1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
460   dt[2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
461   dt[3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
462 
463   dt[4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
464   dt[5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
465   dt[6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
466   dt[7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
467 
468   dt[8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
469   dt[9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
470   dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
471   dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
472 
473   dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
474   dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
475   dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
476   dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
477 }
478 
479 #if defined(VERSION_1)
480 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK]) {
481   uint_8t st[N_BLOCK];
482   block_copy(st, dt);
483 #else
484 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
485 #endif
486   dt[0] = is_box(gfm_e(st[0]) ^ gfm_b(st[1]) ^ gfm_d(st[2]) ^ gfm_9(st[3]));
487   dt[5] = is_box(gfm_9(st[0]) ^ gfm_e(st[1]) ^ gfm_b(st[2]) ^ gfm_d(st[3]));
488   dt[10] = is_box(gfm_d(st[0]) ^ gfm_9(st[1]) ^ gfm_e(st[2]) ^ gfm_b(st[3]));
489   dt[15] = is_box(gfm_b(st[0]) ^ gfm_d(st[1]) ^ gfm_9(st[2]) ^ gfm_e(st[3]));
490 
491   dt[4] = is_box(gfm_e(st[4]) ^ gfm_b(st[5]) ^ gfm_d(st[6]) ^ gfm_9(st[7]));
492   dt[9] = is_box(gfm_9(st[4]) ^ gfm_e(st[5]) ^ gfm_b(st[6]) ^ gfm_d(st[7]));
493   dt[14] = is_box(gfm_d(st[4]) ^ gfm_9(st[5]) ^ gfm_e(st[6]) ^ gfm_b(st[7]));
494   dt[3] = is_box(gfm_b(st[4]) ^ gfm_d(st[5]) ^ gfm_9(st[6]) ^ gfm_e(st[7]));
495 
496   dt[8] = is_box(gfm_e(st[8]) ^ gfm_b(st[9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
497   dt[13] = is_box(gfm_9(st[8]) ^ gfm_e(st[9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
498   dt[2] = is_box(gfm_d(st[8]) ^ gfm_9(st[9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
499   dt[7] = is_box(gfm_b(st[8]) ^ gfm_d(st[9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
500 
501   dt[12] = is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
502   dt[1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
503   dt[6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
504   dt[11] = is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
505 }
506 
507 #if defined(AES_ENC_PREKEYED) || defined(AES_DEC_PREKEYED)
508 
509 /*  Set the cipher key for the pre-keyed version */
510 /*  NOTE: If the length_type used for the key length is an
511     unsigned 8-bit character, a key length of 256 bits must
512     be entered as a length in bytes (valid inputs are hence
513     128, 192, 16, 24 and 32).
514 */
515 
516 return_type aes_set_key(const unsigned char key[], length_type keylen, aes_context ctx[1]) {
517   uint_8t cc, rc, hi;
518 
519   switch (keylen) {
520     case 16:
521     case 128: /* length in bits (128 = 8*16) */
522       keylen = 16;
523       break;
524     case 24:
525     case 192: /* length in bits (192 = 8*24) */
526       keylen = 24;
527       break;
528     case 32:
529       /*    case 256:           length in bits (256 = 8*32) */
530       keylen = 32;
531       break;
532     default:
533       ctx->rnd = 0;
534       return (return_type)-1;
535   }
536   block_copy_nn(ctx->ksch, key, keylen);
537   hi = (keylen + 28) << 2;
538   ctx->rnd = (hi >> 4) - 1;
539   for (cc = keylen, rc = 1; cc < hi; cc += 4) {
540     uint_8t tt, t0, t1, t2, t3;
541 
542     t0 = ctx->ksch[cc - 4];
543     t1 = ctx->ksch[cc - 3];
544     t2 = ctx->ksch[cc - 2];
545     t3 = ctx->ksch[cc - 1];
546     if (cc % keylen == 0) {
547       tt = t0;
548       t0 = s_box(t1) ^ rc;
549       t1 = s_box(t2);
550       t2 = s_box(t3);
551       t3 = s_box(tt);
552       rc = f2(rc);
553     } else if (keylen > 24 && cc % keylen == 16) {
554       t0 = s_box(t0);
555       t1 = s_box(t1);
556       t2 = s_box(t2);
557       t3 = s_box(t3);
558     }
559     tt = cc - keylen;
560     ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
561     ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
562     ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
563     ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
564   }
565   return 0;
566 }
567 
568 #endif
569 
570 #if defined(AES_ENC_PREKEYED)
571 
572 /*  Encrypt a single block of 16 bytes */
573 
574 return_type aes_encrypt(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
575                         const aes_context ctx[1]) {
576   if (ctx->rnd) {
577     uint_8t s1[N_BLOCK], r;
578     copy_and_key(s1, in, ctx->ksch);
579 
580     for (r = 1; r < ctx->rnd; ++r)
581 #if defined(VERSION_1)
582     {
583       mix_sub_columns(s1);
584       add_round_key(s1, ctx->ksch + r * N_BLOCK);
585     }
586 #else
587     {
588       uint_8t s2[N_BLOCK];
589       mix_sub_columns(s2, s1);
590       copy_and_key(s1, s2, ctx->ksch + r * N_BLOCK);
591     }
592 #endif
593     shift_sub_rows(s1);
594     copy_and_key(out, s1, ctx->ksch + r * N_BLOCK);
595   } else {
596     return (return_type)-1;
597   }
598   return 0;
599 }
600 
601 /* CBC encrypt a number of blocks (input and return an IV) */
602 
603 return_type aes_cbc_encrypt(const unsigned char* in, unsigned char* out, int n_block,
604                             unsigned char iv[N_BLOCK], const aes_context ctx[1]) {
605   while (n_block--) {
606     xor_block(iv, in);
607     if (aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS) {
608       return EXIT_FAILURE;
609     }
610     memcpy(out, iv, N_BLOCK);
611     in += N_BLOCK;
612     out += N_BLOCK;
613   }
614   return EXIT_SUCCESS;
615 }
616 
617 #endif
618 
619 #if defined(AES_DEC_PREKEYED)
620 
621 /*  Decrypt a single block of 16 bytes */
622 
623 return_type aes_decrypt(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
624                         const aes_context ctx[1]) {
625   if (ctx->rnd) {
626     uint_8t s1[N_BLOCK], r;
627     copy_and_key(s1, in, ctx->ksch + ctx->rnd * N_BLOCK);
628     inv_shift_sub_rows(s1);
629 
630     for (r = ctx->rnd; --r;)
631 #if defined(VERSION_1)
632     {
633       add_round_key(s1, ctx->ksch + r * N_BLOCK);
634       inv_mix_sub_columns(s1);
635     }
636 #else
637     {
638       uint_8t s2[N_BLOCK];
639       copy_and_key(s2, s1, ctx->ksch + r * N_BLOCK);
640       inv_mix_sub_columns(s1, s2);
641     }
642 #endif
643     copy_and_key(out, s1, ctx->ksch);
644   } else {
645     return (return_type)-1;
646   }
647   return 0;
648 }
649 
650 /* CBC decrypt a number of blocks (input and return an IV) */
651 
652 return_type aes_cbc_decrypt(const unsigned char* in, unsigned char* out, int n_block,
653                             unsigned char iv[N_BLOCK], const aes_context ctx[1]) {
654   while (n_block--) {
655     uint_8t tmp[N_BLOCK];
656 
657     memcpy(tmp, in, N_BLOCK);
658     if (aes_decrypt(in, out, ctx) != EXIT_SUCCESS) {
659       return EXIT_FAILURE;
660     }
661     xor_block(out, iv);
662     memcpy(iv, tmp, N_BLOCK);
663     in += N_BLOCK;
664     out += N_BLOCK;
665   }
666   return EXIT_SUCCESS;
667 }
668 
669 #endif
670 
671 #if defined(AES_ENC_128_OTFK)
672 
673 /*  The 'on the fly' encryption key update for for 128 bit keys */
674 
675 static void update_encrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
676   uint_8t cc;
677 
678   k[0] ^= s_box(k[13]) ^ *rc;
679   k[1] ^= s_box(k[14]);
680   k[2] ^= s_box(k[15]);
681   k[3] ^= s_box(k[12]);
682   *rc = f2(*rc);
683 
684   for (cc = 4; cc < 16; cc += 4) {
685     k[cc + 0] ^= k[cc - 4];
686     k[cc + 1] ^= k[cc - 3];
687     k[cc + 2] ^= k[cc - 2];
688     k[cc + 3] ^= k[cc - 1];
689   }
690 }
691 
692 /*  Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
693 
694 void aes_encrypt_128(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
695                      const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK]) {
696   uint_8t s1[N_BLOCK], r, rc = 1;
697 
698   if (o_key != key) {
699     block_copy(o_key, key);
700   }
701   copy_and_key(s1, in, o_key);
702 
703   for (r = 1; r < 10; ++r)
704 #if defined(VERSION_1)
705   {
706     mix_sub_columns(s1);
707     update_encrypt_key_128(o_key, &rc);
708     add_round_key(s1, o_key);
709   }
710 #else
711   {
712     uint_8t s2[N_BLOCK];
713     mix_sub_columns(s2, s1);
714     update_encrypt_key_128(o_key, &rc);
715     copy_and_key(s1, s2, o_key);
716   }
717 #endif
718 
719   shift_sub_rows(s1);
720   update_encrypt_key_128(o_key, &rc);
721   copy_and_key(out, s1, o_key);
722 }
723 
724 #endif
725 
726 #if defined(AES_DEC_128_OTFK)
727 
728 /*  The 'on the fly' decryption key update for for 128 bit keys */
729 
730 static void update_decrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
731   uint_8t cc;
732 
733   for (cc = 12; cc > 0; cc -= 4) {
734     k[cc + 0] ^= k[cc - 4];
735     k[cc + 1] ^= k[cc - 3];
736     k[cc + 2] ^= k[cc - 2];
737     k[cc + 3] ^= k[cc - 1];
738   }
739   *rc = d2(*rc);
740   k[0] ^= s_box(k[13]) ^ *rc;
741   k[1] ^= s_box(k[14]);
742   k[2] ^= s_box(k[15]);
743   k[3] ^= s_box(k[12]);
744 }
745 
746 /*  Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
747 
748 void aes_decrypt_128(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
749                      const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK]) {
750   uint_8t s1[N_BLOCK], r, rc = 0x6c;
751   if (o_key != key) {
752     block_copy(o_key, key);
753   }
754 
755   copy_and_key(s1, in, o_key);
756   inv_shift_sub_rows(s1);
757 
758   for (r = 10; --r;)
759 #if defined(VERSION_1)
760   {
761     update_decrypt_key_128(o_key, &rc);
762     add_round_key(s1, o_key);
763     inv_mix_sub_columns(s1);
764   }
765 #else
766   {
767     uint_8t s2[N_BLOCK];
768     update_decrypt_key_128(o_key, &rc);
769     copy_and_key(s2, s1, o_key);
770     inv_mix_sub_columns(s1, s2);
771   }
772 #endif
773   update_decrypt_key_128(o_key, &rc);
774   copy_and_key(out, s1, o_key);
775 }
776 
777 #endif
778 
779 #if defined(AES_ENC_256_OTFK)
780 
781 /*  The 'on the fly' encryption key update for for 256 bit keys */
782 
783 static void update_encrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
784   uint_8t cc;
785 
786   k[0] ^= s_box(k[29]) ^ *rc;
787   k[1] ^= s_box(k[30]);
788   k[2] ^= s_box(k[31]);
789   k[3] ^= s_box(k[28]);
790   *rc = f2(*rc);
791 
792   for (cc = 4; cc < 16; cc += 4) {
793     k[cc + 0] ^= k[cc - 4];
794     k[cc + 1] ^= k[cc - 3];
795     k[cc + 2] ^= k[cc - 2];
796     k[cc + 3] ^= k[cc - 1];
797   }
798 
799   k[16] ^= s_box(k[12]);
800   k[17] ^= s_box(k[13]);
801   k[18] ^= s_box(k[14]);
802   k[19] ^= s_box(k[15]);
803 
804   for (cc = 20; cc < 32; cc += 4) {
805     k[cc + 0] ^= k[cc - 4];
806     k[cc + 1] ^= k[cc - 3];
807     k[cc + 2] ^= k[cc - 2];
808     k[cc + 3] ^= k[cc - 1];
809   }
810 }
811 
812 /*  Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
813 
814 void aes_encrypt_256(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
815                      const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK]) {
816   uint_8t s1[N_BLOCK], r, rc = 1;
817   if (o_key != key) {
818     block_copy(o_key, key);
819     block_copy(o_key + 16, key + 16);
820   }
821   copy_and_key(s1, in, o_key);
822 
823   for (r = 1; r < 14; ++r)
824 #if defined(VERSION_1)
825   {
826     mix_sub_columns(s1);
827     if (r & 1) {
828       add_round_key(s1, o_key + 16);
829     } else {
830       update_encrypt_key_256(o_key, &rc);
831       add_round_key(s1, o_key);
832     }
833   }
834 #else
835   {
836     uint_8t s2[N_BLOCK];
837     mix_sub_columns(s2, s1);
838     if (r & 1) {
839       copy_and_key(s1, s2, o_key + 16);
840     } else {
841       update_encrypt_key_256(o_key, &rc);
842       copy_and_key(s1, s2, o_key);
843     }
844   }
845 #endif
846 
847   shift_sub_rows(s1);
848   update_encrypt_key_256(o_key, &rc);
849   copy_and_key(out, s1, o_key);
850 }
851 
852 #endif
853 
854 #if defined(AES_DEC_256_OTFK)
855 
856 /*  The 'on the fly' encryption key update for for 256 bit keys */
857 
858 static void update_decrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
859   uint_8t cc;
860 
861   for (cc = 28; cc > 16; cc -= 4) {
862     k[cc + 0] ^= k[cc - 4];
863     k[cc + 1] ^= k[cc - 3];
864     k[cc + 2] ^= k[cc - 2];
865     k[cc + 3] ^= k[cc - 1];
866   }
867 
868   k[16] ^= s_box(k[12]);
869   k[17] ^= s_box(k[13]);
870   k[18] ^= s_box(k[14]);
871   k[19] ^= s_box(k[15]);
872 
873   for (cc = 12; cc > 0; cc -= 4) {
874     k[cc + 0] ^= k[cc - 4];
875     k[cc + 1] ^= k[cc - 3];
876     k[cc + 2] ^= k[cc - 2];
877     k[cc + 3] ^= k[cc - 1];
878   }
879 
880   *rc = d2(*rc);
881   k[0] ^= s_box(k[29]) ^ *rc;
882   k[1] ^= s_box(k[30]);
883   k[2] ^= s_box(k[31]);
884   k[3] ^= s_box(k[28]);
885 }
886 
887 /*  Decrypt a single block of 16 bytes with 'on the fly'
888     256 bit keying
889 */
890 void aes_decrypt_256(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
891                      const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK]) {
892   uint_8t s1[N_BLOCK], r, rc = 0x80;
893 
894   if (o_key != key) {
895     block_copy(o_key, key);
896     block_copy(o_key + 16, key + 16);
897   }
898 
899   copy_and_key(s1, in, o_key);
900   inv_shift_sub_rows(s1);
901 
902   for (r = 14; --r;)
903 #if defined(VERSION_1)
904   {
905     if (r & 1) {
906       update_decrypt_key_256(o_key, &rc);
907       add_round_key(s1, o_key + 16);
908     } else {
909       add_round_key(s1, o_key);
910     }
911     inv_mix_sub_columns(s1);
912   }
913 #else
914   {
915     uint_8t s2[N_BLOCK];
916     if (r & 1) {
917       update_decrypt_key_256(o_key, &rc);
918       copy_and_key(s2, s1, o_key + 16);
919     } else {
920       copy_and_key(s2, s1, o_key);
921     }
922     inv_mix_sub_columns(s1, s2);
923   }
924 #endif
925   copy_and_key(out, s1, o_key);
926 }
927 
928 #endif
929