1 /*
2 ---------------------------------------------------------------------------
3 Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
4
5 LICENSE TERMS
6
7 The redistribution and use of this software (with or without changes)
8 is allowed without the payment of fees or royalties provided that:
9
10 1. source code distributions include the above copyright notice, this
11 list of conditions and the following disclaimer;
12
13 2. binary distributions include the above copyright notice, this list
14 of conditions and the following disclaimer in their documentation;
15
16 3. the name of the copyright holder is not used to endorse products
17 built using this software without specific written permission.
18
19 DISCLAIMER
20
21 This software is provided 'as is' with no explicit or implied warranties
22 in respect of its properties, including, but not limited to, correctness
23 and/or fitness for purpose.
24 ---------------------------------------------------------------------------
25 Issue 09/09/2006
26
27 This is an AES implementation that uses only 8-bit byte operations on the
28 cipher state (there are options to use 32-bit types if available).
29
30 The combination of mix columns and byte substitution used here is based on
31 that developed by Karl Malbrain. His contribution is acknowledged.
32 */
33
34 /* define if you have a fast memcpy function on your system */
35 #if 1
36 #define HAVE_MEMCPY
37 #include <string.h>
38 #if 0
39 #if defined(_MSC_VER)
40 #include <intrin.h>
41 #pragma intrinsic(memcpy)
42 #endif
43 #endif
44 #endif
45
46 #include <stdint.h>
47 #include <stdlib.h>
48
49 /* define if you have fast 32-bit types on your system */
50 #if 1
51 #define HAVE_UINT_32T
52 #endif
53
54 /* define if you don't want any tables */
55 #if 1
56 #define USE_TABLES
57 #endif
58
59 /* On Intel Core 2 duo VERSION_1 is faster */
60
61 /* alternative versions (test for performance on your system) */
62 #if 1
63 #define VERSION_1
64 #endif
65
66 #include "aes.h"
67
68 #if defined(HAVE_UINT_32T)
69 typedef uint32_t uint_32t;
70 #endif
71
72 /* functions for finite field multiplication in the AES Galois field */
73
74 #define WPOLY 0x011b
75 #define BPOLY 0x1b
76 #define DPOLY 0x008d
77
78 #define f1(x) (x)
79 #define f2(x) (((x) << 1) ^ ((((x) >> 7) & 1) * WPOLY))
80 #define f4(x) (((x) << 2) ^ ((((x) >> 6) & 1) * WPOLY) ^ ((((x) >> 6) & 2) * WPOLY))
81 #define f8(x) \
82 (((x) << 3) ^ ((((x) >> 5) & 1) * WPOLY) ^ ((((x) >> 5) & 2) * WPOLY) ^ \
83 ((((x) >> 5) & 4) * WPOLY))
84 #define d2(x) (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0))
85
86 #define f3(x) (f2(x) ^ (x))
87 #define f9(x) (f8(x) ^ (x))
88 #define fb(x) (f8(x) ^ f2(x) ^ (x))
89 #define fd(x) (f8(x) ^ f4(x) ^ (x))
90 #define fe(x) (f8(x) ^ f4(x) ^ f2(x))
91
92 #if defined(USE_TABLES)
93
94 #define sb_data(w) \
95 { /* S Box data values */ \
96 w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5), w(0x30), w(0x01), \
97 w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76), w(0xca), w(0x82), w(0xc9), \
98 w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0), w(0xad), w(0xd4), w(0xa2), w(0xaf), \
99 w(0x9c), w(0xa4), w(0x72), w(0xc0), w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), \
100 w(0x3f), w(0xf7), w(0xcc), w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), \
101 w(0x31), w(0x15), w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), \
102 w(0x9a), w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75), \
103 w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0), w(0x52), \
104 w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84), w(0x53), w(0xd1), \
105 w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b), w(0x6a), w(0xcb), w(0xbe), \
106 w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf), w(0xd0), w(0xef), w(0xaa), w(0xfb), \
107 w(0x43), w(0x4d), w(0x33), w(0x85), w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), \
108 w(0x3c), w(0x9f), w(0xa8), w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), \
109 w(0x38), w(0xf5), w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), \
110 w(0xd2), w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17), \
111 w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73), w(0x60), \
112 w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88), w(0x46), w(0xee), \
113 w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb), w(0xe0), w(0x32), w(0x3a), \
114 w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c), w(0xc2), w(0xd3), w(0xac), w(0x62), \
115 w(0x91), w(0x95), w(0xe4), w(0x79), w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), \
116 w(0xd5), w(0x4e), w(0xa9), w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), \
117 w(0xae), w(0x08), w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), \
118 w(0xc6), w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a), \
119 w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e), w(0x61), \
120 w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e), w(0xe1), w(0xf8), \
121 w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94), w(0x9b), w(0x1e), w(0x87), \
122 w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf), w(0x8c), w(0xa1), w(0x89), w(0x0d), \
123 w(0xbf), w(0xe6), w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), \
124 w(0x54), w(0xbb), w(0x16) \
125 }
126
127 #define isb_data(w) \
128 { /* inverse S Box data values */ \
129 w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38), w(0xbf), w(0x40), \
130 w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb), w(0x7c), w(0xe3), w(0x39), \
131 w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87), w(0x34), w(0x8e), w(0x43), w(0x44), \
132 w(0xc4), w(0xde), w(0xe9), w(0xcb), w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), \
133 w(0xc2), w(0x23), w(0x3d), w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), \
134 w(0xc3), w(0x4e), w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), \
135 w(0xb2), w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25), \
136 w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16), w(0xd4), \
137 w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92), w(0x6c), w(0x70), \
138 w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda), w(0x5e), w(0x15), w(0x46), \
139 w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84), w(0x90), w(0xd8), w(0xab), w(0x00), \
140 w(0x8c), w(0xbc), w(0xd3), w(0x0a), w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), \
141 w(0xb3), w(0x45), w(0x06), w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), \
142 w(0x0f), w(0x02), w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), \
143 w(0x6b), w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea), \
144 w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73), w(0x96), \
145 w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85), w(0xe2), w(0xf9), \
146 w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e), w(0x47), w(0xf1), w(0x1a), \
147 w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89), w(0x6f), w(0xb7), w(0x62), w(0x0e), \
148 w(0xaa), w(0x18), w(0xbe), w(0x1b), w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), \
149 w(0xd2), w(0x79), w(0x20), w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), \
150 w(0x5a), w(0xf4), w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), \
151 w(0x31), w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f), \
152 w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d), w(0x2d), \
153 w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef), w(0xa0), w(0xe0), \
154 w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0), w(0xc8), w(0xeb), w(0xbb), \
155 w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61), w(0x17), w(0x2b), w(0x04), w(0x7e), \
156 w(0xba), w(0x77), w(0xd6), w(0x26), w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), \
157 w(0x21), w(0x0c), w(0x7d) \
158 }
159
160 #define mm_data(w) \
161 { /* basic data for forming finite field tables */ \
162 w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07), w(0x08), w(0x09), \
163 w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f), w(0x10), w(0x11), w(0x12), \
164 w(0x13), w(0x14), w(0x15), w(0x16), w(0x17), w(0x18), w(0x19), w(0x1a), w(0x1b), \
165 w(0x1c), w(0x1d), w(0x1e), w(0x1f), w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), \
166 w(0x25), w(0x26), w(0x27), w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), \
167 w(0x2e), w(0x2f), w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), \
168 w(0x37), w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f), \
169 w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47), w(0x48), \
170 w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f), w(0x50), w(0x51), \
171 w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57), w(0x58), w(0x59), w(0x5a), \
172 w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f), w(0x60), w(0x61), w(0x62), w(0x63), \
173 w(0x64), w(0x65), w(0x66), w(0x67), w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), \
174 w(0x6d), w(0x6e), w(0x6f), w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), \
175 w(0x76), w(0x77), w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), \
176 w(0x7f), w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87), \
177 w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f), w(0x90), \
178 w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97), w(0x98), w(0x99), \
179 w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f), w(0xa0), w(0xa1), w(0xa2), \
180 w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7), w(0xa8), w(0xa9), w(0xaa), w(0xab), \
181 w(0xac), w(0xad), w(0xae), w(0xaf), w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), \
182 w(0xb5), w(0xb6), w(0xb7), w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), \
183 w(0xbe), w(0xbf), w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), \
184 w(0xc7), w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf), \
185 w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7), w(0xd8), \
186 w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf), w(0xe0), w(0xe1), \
187 w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7), w(0xe8), w(0xe9), w(0xea), \
188 w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef), w(0xf0), w(0xf1), w(0xf2), w(0xf3), \
189 w(0xf4), w(0xf5), w(0xf6), w(0xf7), w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), \
190 w(0xfd), w(0xfe), w(0xff) \
191 }
192
193 static const uint_8t sbox[256] = sb_data(f1);
194 static const uint_8t isbox[256] = isb_data(f1);
195
196 static const uint_8t gfm2_sbox[256] = sb_data(f2);
197 static const uint_8t gfm3_sbox[256] = sb_data(f3);
198
199 static const uint_8t gfmul_9[256] = mm_data(f9);
200 static const uint_8t gfmul_b[256] = mm_data(fb);
201 static const uint_8t gfmul_d[256] = mm_data(fd);
202 static const uint_8t gfmul_e[256] = mm_data(fe);
203
204 #define s_box(x) sbox[(x)]
205 #define is_box(x) isbox[(x)]
206 #define gfm2_sb(x) gfm2_sbox[(x)]
207 #define gfm3_sb(x) gfm3_sbox[(x)]
208 #define gfm_9(x) gfmul_9[(x)]
209 #define gfm_b(x) gfmul_b[(x)]
210 #define gfm_d(x) gfmul_d[(x)]
211 #define gfm_e(x) gfmul_e[(x)]
212
213 #else
214
215 /* this is the high bit of x right shifted by 1 */
216 /* position. Since the starting polynomial has */
217 /* 9 bits (0x11b), this right shift keeps the */
218 /* values of all top bits within a byte */
219
hibit(const uint_8t x)220 static uint_8t hibit(const uint_8t x) {
221 uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
222
223 r |= (r >> 2);
224 r |= (r >> 4);
225 return (r + 1) >> 1;
226 }
227
228 /* return the inverse of the finite field element x */
229
gf_inv(const uint_8t x)230 static uint_8t gf_inv(const uint_8t x) {
231 uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
232
233 if (x < 2) {
234 return x;
235 }
236
237 for (;;) {
238 if (n1) {
239 while (n2 >= n1) /* divide polynomial p2 by p1 */
240 {
241 n2 /= n1; /* shift smaller polynomial left */
242 p2 ^= (p1 * n2) & 0xff; /* and remove from larger one */
243 v2 ^= (v1 * n2); /* shift accumulated value and */
244 n2 = hibit(p2); /* add into result */
245 }
246 } else {
247 return v1;
248 }
249
250 if (n2) { /* repeat with values swapped */
251 while (n1 >= n2) {
252 n1 /= n2;
253 p1 ^= p2 * n1;
254 v1 ^= v2 * n1;
255 n1 = hibit(p1);
256 }
257 } else {
258 return v2;
259 }
260 }
261 }
262
263 /* The forward and inverse affine transformations used in the S-box */
fwd_affine(const uint_8t x)264 uint_8t fwd_affine(const uint_8t x) {
265 #if defined(HAVE_UINT_32T)
266 uint_32t w = x;
267 w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
268 return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
269 #else
270 return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4) ^ (x >> 7) ^ (x >> 6) ^ (x >> 5) ^
271 (x >> 4);
272 #endif
273 }
274
inv_affine(const uint_8t x)275 uint_8t inv_affine(const uint_8t x) {
276 #if defined(HAVE_UINT_32T)
277 uint_32t w = x;
278 w = (w << 1) ^ (w << 3) ^ (w << 6);
279 return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
280 #else
281 return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6) ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
282 #endif
283 }
284
285 #define s_box(x) fwd_affine(gf_inv(x))
286 #define is_box(x) gf_inv(inv_affine(x))
287 #define gfm2_sb(x) f2(s_box(x))
288 #define gfm3_sb(x) f3(s_box(x))
289 #define gfm_9(x) f9(x)
290 #define gfm_b(x) fb(x)
291 #define gfm_d(x) fd(x)
292 #define gfm_e(x) fe(x)
293
294 #endif
295
296 #if defined(HAVE_MEMCPY)
297 #define block_copy_nn(d, s, l) memcpy(d, s, l)
298 #define block_copy(d, s) memcpy(d, s, N_BLOCK)
299 #else
300 #define block_copy_nn(d, s, l) copy_block_nn(d, s, l)
301 #define block_copy(d, s) copy_block(d, s)
302 #endif
303
304 #if !defined(HAVE_MEMCPY)
copy_block(void * d,const void * s)305 static void copy_block(void* d, const void* s) {
306 #if defined(HAVE_UINT_32T)
307 ((uint_32t*)d)[0] = ((uint_32t*)s)[0];
308 ((uint_32t*)d)[1] = ((uint_32t*)s)[1];
309 ((uint_32t*)d)[2] = ((uint_32t*)s)[2];
310 ((uint_32t*)d)[3] = ((uint_32t*)s)[3];
311 #else
312 ((uint_8t*)d)[0] = ((uint_8t*)s)[0];
313 ((uint_8t*)d)[1] = ((uint_8t*)s)[1];
314 ((uint_8t*)d)[2] = ((uint_8t*)s)[2];
315 ((uint_8t*)d)[3] = ((uint_8t*)s)[3];
316 ((uint_8t*)d)[4] = ((uint_8t*)s)[4];
317 ((uint_8t*)d)[5] = ((uint_8t*)s)[5];
318 ((uint_8t*)d)[6] = ((uint_8t*)s)[6];
319 ((uint_8t*)d)[7] = ((uint_8t*)s)[7];
320 ((uint_8t*)d)[8] = ((uint_8t*)s)[8];
321 ((uint_8t*)d)[9] = ((uint_8t*)s)[9];
322 ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
323 ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
324 ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
325 ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
326 ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
327 ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
328 #endif
329 }
330
copy_block_nn(void * d,const void * s,uint_8t nn)331 static void copy_block_nn(void* d, const void* s, uint_8t nn) {
332 while (nn--) {
333 *((uint_8t*)d)++ = *((uint_8t*)s)++;
334 }
335 }
336 #endif
337
xor_block(void * d,const void * s)338 static void xor_block(void* d, const void* s) {
339 #if defined(HAVE_UINT_32T)
340 ((uint_32t*)d)[0] ^= ((uint_32t*)s)[0];
341 ((uint_32t*)d)[1] ^= ((uint_32t*)s)[1];
342 ((uint_32t*)d)[2] ^= ((uint_32t*)s)[2];
343 ((uint_32t*)d)[3] ^= ((uint_32t*)s)[3];
344 #else
345 ((uint_8t*)d)[0] ^= ((uint_8t*)s)[0];
346 ((uint_8t*)d)[1] ^= ((uint_8t*)s)[1];
347 ((uint_8t*)d)[2] ^= ((uint_8t*)s)[2];
348 ((uint_8t*)d)[3] ^= ((uint_8t*)s)[3];
349 ((uint_8t*)d)[4] ^= ((uint_8t*)s)[4];
350 ((uint_8t*)d)[5] ^= ((uint_8t*)s)[5];
351 ((uint_8t*)d)[6] ^= ((uint_8t*)s)[6];
352 ((uint_8t*)d)[7] ^= ((uint_8t*)s)[7];
353 ((uint_8t*)d)[8] ^= ((uint_8t*)s)[8];
354 ((uint_8t*)d)[9] ^= ((uint_8t*)s)[9];
355 ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
356 ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
357 ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
358 ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
359 ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
360 ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
361 #endif
362 }
363
copy_and_key(void * d,const void * s,const void * k)364 static void copy_and_key(void* d, const void* s, const void* k) {
365 #if defined(HAVE_UINT_32T)
366 ((uint_32t*)d)[0] = ((uint_32t*)s)[0] ^ ((uint_32t*)k)[0];
367 ((uint_32t*)d)[1] = ((uint_32t*)s)[1] ^ ((uint_32t*)k)[1];
368 ((uint_32t*)d)[2] = ((uint_32t*)s)[2] ^ ((uint_32t*)k)[2];
369 ((uint_32t*)d)[3] = ((uint_32t*)s)[3] ^ ((uint_32t*)k)[3];
370 #elif 1
371 ((uint_8t*)d)[0] = ((uint_8t*)s)[0] ^ ((uint_8t*)k)[0];
372 ((uint_8t*)d)[1] = ((uint_8t*)s)[1] ^ ((uint_8t*)k)[1];
373 ((uint_8t*)d)[2] = ((uint_8t*)s)[2] ^ ((uint_8t*)k)[2];
374 ((uint_8t*)d)[3] = ((uint_8t*)s)[3] ^ ((uint_8t*)k)[3];
375 ((uint_8t*)d)[4] = ((uint_8t*)s)[4] ^ ((uint_8t*)k)[4];
376 ((uint_8t*)d)[5] = ((uint_8t*)s)[5] ^ ((uint_8t*)k)[5];
377 ((uint_8t*)d)[6] = ((uint_8t*)s)[6] ^ ((uint_8t*)k)[6];
378 ((uint_8t*)d)[7] = ((uint_8t*)s)[7] ^ ((uint_8t*)k)[7];
379 ((uint_8t*)d)[8] = ((uint_8t*)s)[8] ^ ((uint_8t*)k)[8];
380 ((uint_8t*)d)[9] = ((uint_8t*)s)[9] ^ ((uint_8t*)k)[9];
381 ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
382 ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
383 ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
384 ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
385 ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
386 ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
387 #else
388 block_copy(d, s);
389 xor_block(d, k);
390 #endif
391 }
392
add_round_key(uint_8t d[N_BLOCK],const uint_8t k[N_BLOCK])393 static void add_round_key(uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK]) { xor_block(d, k); }
394
shift_sub_rows(uint_8t st[N_BLOCK])395 static void shift_sub_rows(uint_8t st[N_BLOCK]) {
396 uint_8t tt;
397
398 st[0] = s_box(st[0]);
399 st[4] = s_box(st[4]);
400 st[8] = s_box(st[8]);
401 st[12] = s_box(st[12]);
402
403 tt = st[1];
404 st[1] = s_box(st[5]);
405 st[5] = s_box(st[9]);
406 st[9] = s_box(st[13]);
407 st[13] = s_box(tt);
408
409 tt = st[2];
410 st[2] = s_box(st[10]);
411 st[10] = s_box(tt);
412 tt = st[6];
413 st[6] = s_box(st[14]);
414 st[14] = s_box(tt);
415
416 tt = st[15];
417 st[15] = s_box(st[11]);
418 st[11] = s_box(st[7]);
419 st[7] = s_box(st[3]);
420 st[3] = s_box(tt);
421 }
422
inv_shift_sub_rows(uint_8t st[N_BLOCK])423 static void inv_shift_sub_rows(uint_8t st[N_BLOCK]) {
424 uint_8t tt;
425
426 st[0] = is_box(st[0]);
427 st[4] = is_box(st[4]);
428 st[8] = is_box(st[8]);
429 st[12] = is_box(st[12]);
430
431 tt = st[13];
432 st[13] = is_box(st[9]);
433 st[9] = is_box(st[5]);
434 st[5] = is_box(st[1]);
435 st[1] = is_box(tt);
436
437 tt = st[2];
438 st[2] = is_box(st[10]);
439 st[10] = is_box(tt);
440 tt = st[6];
441 st[6] = is_box(st[14]);
442 st[14] = is_box(tt);
443
444 tt = st[3];
445 st[3] = is_box(st[7]);
446 st[7] = is_box(st[11]);
447 st[11] = is_box(st[15]);
448 st[15] = is_box(tt);
449 }
450
451 #if defined(VERSION_1)
mix_sub_columns(uint_8t dt[N_BLOCK])452 static void mix_sub_columns(uint_8t dt[N_BLOCK]) {
453 uint_8t st[N_BLOCK];
454 block_copy(st, dt);
455 #else
456 static void mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
457 #endif
458 dt[0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
459 dt[1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
460 dt[2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
461 dt[3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
462
463 dt[4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
464 dt[5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
465 dt[6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
466 dt[7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
467
468 dt[8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
469 dt[9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
470 dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
471 dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
472
473 dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
474 dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
475 dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
476 dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
477 }
478
479 #if defined(VERSION_1)
480 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK]) {
481 uint_8t st[N_BLOCK];
482 block_copy(st, dt);
483 #else
484 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
485 #endif
486 dt[0] = is_box(gfm_e(st[0]) ^ gfm_b(st[1]) ^ gfm_d(st[2]) ^ gfm_9(st[3]));
487 dt[5] = is_box(gfm_9(st[0]) ^ gfm_e(st[1]) ^ gfm_b(st[2]) ^ gfm_d(st[3]));
488 dt[10] = is_box(gfm_d(st[0]) ^ gfm_9(st[1]) ^ gfm_e(st[2]) ^ gfm_b(st[3]));
489 dt[15] = is_box(gfm_b(st[0]) ^ gfm_d(st[1]) ^ gfm_9(st[2]) ^ gfm_e(st[3]));
490
491 dt[4] = is_box(gfm_e(st[4]) ^ gfm_b(st[5]) ^ gfm_d(st[6]) ^ gfm_9(st[7]));
492 dt[9] = is_box(gfm_9(st[4]) ^ gfm_e(st[5]) ^ gfm_b(st[6]) ^ gfm_d(st[7]));
493 dt[14] = is_box(gfm_d(st[4]) ^ gfm_9(st[5]) ^ gfm_e(st[6]) ^ gfm_b(st[7]));
494 dt[3] = is_box(gfm_b(st[4]) ^ gfm_d(st[5]) ^ gfm_9(st[6]) ^ gfm_e(st[7]));
495
496 dt[8] = is_box(gfm_e(st[8]) ^ gfm_b(st[9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
497 dt[13] = is_box(gfm_9(st[8]) ^ gfm_e(st[9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
498 dt[2] = is_box(gfm_d(st[8]) ^ gfm_9(st[9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
499 dt[7] = is_box(gfm_b(st[8]) ^ gfm_d(st[9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
500
501 dt[12] = is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
502 dt[1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
503 dt[6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
504 dt[11] = is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
505 }
506
507 #if defined(AES_ENC_PREKEYED) || defined(AES_DEC_PREKEYED)
508
509 /* Set the cipher key for the pre-keyed version */
510 /* NOTE: If the length_type used for the key length is an
511 unsigned 8-bit character, a key length of 256 bits must
512 be entered as a length in bytes (valid inputs are hence
513 128, 192, 16, 24 and 32).
514 */
515
516 return_type aes_set_key(const unsigned char key[], length_type keylen, aes_context ctx[1]) {
517 uint_8t cc, rc, hi;
518
519 switch (keylen) {
520 case 16:
521 case 128: /* length in bits (128 = 8*16) */
522 keylen = 16;
523 break;
524 case 24:
525 case 192: /* length in bits (192 = 8*24) */
526 keylen = 24;
527 break;
528 case 32:
529 /* case 256: length in bits (256 = 8*32) */
530 keylen = 32;
531 break;
532 default:
533 ctx->rnd = 0;
534 return (return_type)-1;
535 }
536 block_copy_nn(ctx->ksch, key, keylen);
537 hi = (keylen + 28) << 2;
538 ctx->rnd = (hi >> 4) - 1;
539 for (cc = keylen, rc = 1; cc < hi; cc += 4) {
540 uint_8t tt, t0, t1, t2, t3;
541
542 t0 = ctx->ksch[cc - 4];
543 t1 = ctx->ksch[cc - 3];
544 t2 = ctx->ksch[cc - 2];
545 t3 = ctx->ksch[cc - 1];
546 if (cc % keylen == 0) {
547 tt = t0;
548 t0 = s_box(t1) ^ rc;
549 t1 = s_box(t2);
550 t2 = s_box(t3);
551 t3 = s_box(tt);
552 rc = f2(rc);
553 } else if (keylen > 24 && cc % keylen == 16) {
554 t0 = s_box(t0);
555 t1 = s_box(t1);
556 t2 = s_box(t2);
557 t3 = s_box(t3);
558 }
559 tt = cc - keylen;
560 ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
561 ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
562 ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
563 ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
564 }
565 return 0;
566 }
567
568 #endif
569
570 #if defined(AES_ENC_PREKEYED)
571
572 /* Encrypt a single block of 16 bytes */
573
574 return_type aes_encrypt(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
575 const aes_context ctx[1]) {
576 if (ctx->rnd) {
577 uint_8t s1[N_BLOCK], r;
578 copy_and_key(s1, in, ctx->ksch);
579
580 for (r = 1; r < ctx->rnd; ++r)
581 #if defined(VERSION_1)
582 {
583 mix_sub_columns(s1);
584 add_round_key(s1, ctx->ksch + r * N_BLOCK);
585 }
586 #else
587 {
588 uint_8t s2[N_BLOCK];
589 mix_sub_columns(s2, s1);
590 copy_and_key(s1, s2, ctx->ksch + r * N_BLOCK);
591 }
592 #endif
593 shift_sub_rows(s1);
594 copy_and_key(out, s1, ctx->ksch + r * N_BLOCK);
595 } else {
596 return (return_type)-1;
597 }
598 return 0;
599 }
600
601 /* CBC encrypt a number of blocks (input and return an IV) */
602
603 return_type aes_cbc_encrypt(const unsigned char* in, unsigned char* out, int n_block,
604 unsigned char iv[N_BLOCK], const aes_context ctx[1]) {
605 while (n_block--) {
606 xor_block(iv, in);
607 if (aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS) {
608 return EXIT_FAILURE;
609 }
610 memcpy(out, iv, N_BLOCK);
611 in += N_BLOCK;
612 out += N_BLOCK;
613 }
614 return EXIT_SUCCESS;
615 }
616
617 #endif
618
619 #if defined(AES_DEC_PREKEYED)
620
621 /* Decrypt a single block of 16 bytes */
622
623 return_type aes_decrypt(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
624 const aes_context ctx[1]) {
625 if (ctx->rnd) {
626 uint_8t s1[N_BLOCK], r;
627 copy_and_key(s1, in, ctx->ksch + ctx->rnd * N_BLOCK);
628 inv_shift_sub_rows(s1);
629
630 for (r = ctx->rnd; --r;)
631 #if defined(VERSION_1)
632 {
633 add_round_key(s1, ctx->ksch + r * N_BLOCK);
634 inv_mix_sub_columns(s1);
635 }
636 #else
637 {
638 uint_8t s2[N_BLOCK];
639 copy_and_key(s2, s1, ctx->ksch + r * N_BLOCK);
640 inv_mix_sub_columns(s1, s2);
641 }
642 #endif
643 copy_and_key(out, s1, ctx->ksch);
644 } else {
645 return (return_type)-1;
646 }
647 return 0;
648 }
649
650 /* CBC decrypt a number of blocks (input and return an IV) */
651
652 return_type aes_cbc_decrypt(const unsigned char* in, unsigned char* out, int n_block,
653 unsigned char iv[N_BLOCK], const aes_context ctx[1]) {
654 while (n_block--) {
655 uint_8t tmp[N_BLOCK];
656
657 memcpy(tmp, in, N_BLOCK);
658 if (aes_decrypt(in, out, ctx) != EXIT_SUCCESS) {
659 return EXIT_FAILURE;
660 }
661 xor_block(out, iv);
662 memcpy(iv, tmp, N_BLOCK);
663 in += N_BLOCK;
664 out += N_BLOCK;
665 }
666 return EXIT_SUCCESS;
667 }
668
669 #endif
670
671 #if defined(AES_ENC_128_OTFK)
672
673 /* The 'on the fly' encryption key update for for 128 bit keys */
674
675 static void update_encrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
676 uint_8t cc;
677
678 k[0] ^= s_box(k[13]) ^ *rc;
679 k[1] ^= s_box(k[14]);
680 k[2] ^= s_box(k[15]);
681 k[3] ^= s_box(k[12]);
682 *rc = f2(*rc);
683
684 for (cc = 4; cc < 16; cc += 4) {
685 k[cc + 0] ^= k[cc - 4];
686 k[cc + 1] ^= k[cc - 3];
687 k[cc + 2] ^= k[cc - 2];
688 k[cc + 3] ^= k[cc - 1];
689 }
690 }
691
692 /* Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
693
694 void aes_encrypt_128(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
695 const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK]) {
696 uint_8t s1[N_BLOCK], r, rc = 1;
697
698 if (o_key != key) {
699 block_copy(o_key, key);
700 }
701 copy_and_key(s1, in, o_key);
702
703 for (r = 1; r < 10; ++r)
704 #if defined(VERSION_1)
705 {
706 mix_sub_columns(s1);
707 update_encrypt_key_128(o_key, &rc);
708 add_round_key(s1, o_key);
709 }
710 #else
711 {
712 uint_8t s2[N_BLOCK];
713 mix_sub_columns(s2, s1);
714 update_encrypt_key_128(o_key, &rc);
715 copy_and_key(s1, s2, o_key);
716 }
717 #endif
718
719 shift_sub_rows(s1);
720 update_encrypt_key_128(o_key, &rc);
721 copy_and_key(out, s1, o_key);
722 }
723
724 #endif
725
726 #if defined(AES_DEC_128_OTFK)
727
728 /* The 'on the fly' decryption key update for for 128 bit keys */
729
730 static void update_decrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
731 uint_8t cc;
732
733 for (cc = 12; cc > 0; cc -= 4) {
734 k[cc + 0] ^= k[cc - 4];
735 k[cc + 1] ^= k[cc - 3];
736 k[cc + 2] ^= k[cc - 2];
737 k[cc + 3] ^= k[cc - 1];
738 }
739 *rc = d2(*rc);
740 k[0] ^= s_box(k[13]) ^ *rc;
741 k[1] ^= s_box(k[14]);
742 k[2] ^= s_box(k[15]);
743 k[3] ^= s_box(k[12]);
744 }
745
746 /* Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
747
748 void aes_decrypt_128(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
749 const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK]) {
750 uint_8t s1[N_BLOCK], r, rc = 0x6c;
751 if (o_key != key) {
752 block_copy(o_key, key);
753 }
754
755 copy_and_key(s1, in, o_key);
756 inv_shift_sub_rows(s1);
757
758 for (r = 10; --r;)
759 #if defined(VERSION_1)
760 {
761 update_decrypt_key_128(o_key, &rc);
762 add_round_key(s1, o_key);
763 inv_mix_sub_columns(s1);
764 }
765 #else
766 {
767 uint_8t s2[N_BLOCK];
768 update_decrypt_key_128(o_key, &rc);
769 copy_and_key(s2, s1, o_key);
770 inv_mix_sub_columns(s1, s2);
771 }
772 #endif
773 update_decrypt_key_128(o_key, &rc);
774 copy_and_key(out, s1, o_key);
775 }
776
777 #endif
778
779 #if defined(AES_ENC_256_OTFK)
780
781 /* The 'on the fly' encryption key update for for 256 bit keys */
782
783 static void update_encrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
784 uint_8t cc;
785
786 k[0] ^= s_box(k[29]) ^ *rc;
787 k[1] ^= s_box(k[30]);
788 k[2] ^= s_box(k[31]);
789 k[3] ^= s_box(k[28]);
790 *rc = f2(*rc);
791
792 for (cc = 4; cc < 16; cc += 4) {
793 k[cc + 0] ^= k[cc - 4];
794 k[cc + 1] ^= k[cc - 3];
795 k[cc + 2] ^= k[cc - 2];
796 k[cc + 3] ^= k[cc - 1];
797 }
798
799 k[16] ^= s_box(k[12]);
800 k[17] ^= s_box(k[13]);
801 k[18] ^= s_box(k[14]);
802 k[19] ^= s_box(k[15]);
803
804 for (cc = 20; cc < 32; cc += 4) {
805 k[cc + 0] ^= k[cc - 4];
806 k[cc + 1] ^= k[cc - 3];
807 k[cc + 2] ^= k[cc - 2];
808 k[cc + 3] ^= k[cc - 1];
809 }
810 }
811
812 /* Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
813
814 void aes_encrypt_256(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
815 const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK]) {
816 uint_8t s1[N_BLOCK], r, rc = 1;
817 if (o_key != key) {
818 block_copy(o_key, key);
819 block_copy(o_key + 16, key + 16);
820 }
821 copy_and_key(s1, in, o_key);
822
823 for (r = 1; r < 14; ++r)
824 #if defined(VERSION_1)
825 {
826 mix_sub_columns(s1);
827 if (r & 1) {
828 add_round_key(s1, o_key + 16);
829 } else {
830 update_encrypt_key_256(o_key, &rc);
831 add_round_key(s1, o_key);
832 }
833 }
834 #else
835 {
836 uint_8t s2[N_BLOCK];
837 mix_sub_columns(s2, s1);
838 if (r & 1) {
839 copy_and_key(s1, s2, o_key + 16);
840 } else {
841 update_encrypt_key_256(o_key, &rc);
842 copy_and_key(s1, s2, o_key);
843 }
844 }
845 #endif
846
847 shift_sub_rows(s1);
848 update_encrypt_key_256(o_key, &rc);
849 copy_and_key(out, s1, o_key);
850 }
851
852 #endif
853
854 #if defined(AES_DEC_256_OTFK)
855
856 /* The 'on the fly' encryption key update for for 256 bit keys */
857
858 static void update_decrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
859 uint_8t cc;
860
861 for (cc = 28; cc > 16; cc -= 4) {
862 k[cc + 0] ^= k[cc - 4];
863 k[cc + 1] ^= k[cc - 3];
864 k[cc + 2] ^= k[cc - 2];
865 k[cc + 3] ^= k[cc - 1];
866 }
867
868 k[16] ^= s_box(k[12]);
869 k[17] ^= s_box(k[13]);
870 k[18] ^= s_box(k[14]);
871 k[19] ^= s_box(k[15]);
872
873 for (cc = 12; cc > 0; cc -= 4) {
874 k[cc + 0] ^= k[cc - 4];
875 k[cc + 1] ^= k[cc - 3];
876 k[cc + 2] ^= k[cc - 2];
877 k[cc + 3] ^= k[cc - 1];
878 }
879
880 *rc = d2(*rc);
881 k[0] ^= s_box(k[29]) ^ *rc;
882 k[1] ^= s_box(k[30]);
883 k[2] ^= s_box(k[31]);
884 k[3] ^= s_box(k[28]);
885 }
886
887 /* Decrypt a single block of 16 bytes with 'on the fly'
888 256 bit keying
889 */
890 void aes_decrypt_256(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
891 const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK]) {
892 uint_8t s1[N_BLOCK], r, rc = 0x80;
893
894 if (o_key != key) {
895 block_copy(o_key, key);
896 block_copy(o_key + 16, key + 16);
897 }
898
899 copy_and_key(s1, in, o_key);
900 inv_shift_sub_rows(s1);
901
902 for (r = 14; --r;)
903 #if defined(VERSION_1)
904 {
905 if (r & 1) {
906 update_decrypt_key_256(o_key, &rc);
907 add_round_key(s1, o_key + 16);
908 } else {
909 add_round_key(s1, o_key);
910 }
911 inv_mix_sub_columns(s1);
912 }
913 #else
914 {
915 uint_8t s2[N_BLOCK];
916 if (r & 1) {
917 update_decrypt_key_256(o_key, &rc);
918 copy_and_key(s2, s1, o_key + 16);
919 } else {
920 copy_and_key(s2, s1, o_key);
921 }
922 inv_mix_sub_columns(s1, s2);
923 }
924 #endif
925 copy_and_key(out, s1, o_key);
926 }
927
928 #endif
929