1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) 7#include <openssl/arm_arch.h> 8 9#if __ARM_MAX_ARCH__>=7 10.text 11.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) 12.fpu neon 13.code 32 14#undef __thumb2__ 15.align 5 16.Lrcon: 17.long 0x01,0x01,0x01,0x01 18.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 19.long 0x1b,0x1b,0x1b,0x1b 20 21.text 22 23.globl aes_hw_set_encrypt_key 24.hidden aes_hw_set_encrypt_key 25.type aes_hw_set_encrypt_key,%function 26.align 5 27aes_hw_set_encrypt_key: 28.Lenc_key: 29 mov r3,#-1 30 cmp r0,#0 31 beq .Lenc_key_abort 32 cmp r2,#0 33 beq .Lenc_key_abort 34 mov r3,#-2 35 cmp r1,#128 36 blt .Lenc_key_abort 37 cmp r1,#256 38 bgt .Lenc_key_abort 39 tst r1,#0x3f 40 bne .Lenc_key_abort 41 42 adr r3,.Lrcon 43 cmp r1,#192 44 45 veor q0,q0,q0 46 vld1.8 {q3},[r0]! 47 mov r1,#8 @ reuse r1 48 vld1.32 {q1,q2},[r3]! 49 50 blt .Loop128 51 beq .L192 52 b .L256 53 54.align 4 55.Loop128: 56 vtbl.8 d20,{q3},d4 57 vtbl.8 d21,{q3},d5 58 vext.8 q9,q0,q3,#12 59 vst1.32 {q3},[r2]! 60.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 61 subs r1,r1,#1 62 63 veor q3,q3,q9 64 vext.8 q9,q0,q9,#12 65 veor q3,q3,q9 66 vext.8 q9,q0,q9,#12 67 veor q10,q10,q1 68 veor q3,q3,q9 69 vshl.u8 q1,q1,#1 70 veor q3,q3,q10 71 bne .Loop128 72 73 vld1.32 {q1},[r3] 74 75 vtbl.8 d20,{q3},d4 76 vtbl.8 d21,{q3},d5 77 vext.8 q9,q0,q3,#12 78 vst1.32 {q3},[r2]! 79.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 80 81 veor q3,q3,q9 82 vext.8 q9,q0,q9,#12 83 veor q3,q3,q9 84 vext.8 q9,q0,q9,#12 85 veor q10,q10,q1 86 veor q3,q3,q9 87 vshl.u8 q1,q1,#1 88 veor q3,q3,q10 89 90 vtbl.8 d20,{q3},d4 91 vtbl.8 d21,{q3},d5 92 vext.8 q9,q0,q3,#12 93 vst1.32 {q3},[r2]! 94.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 95 96 veor q3,q3,q9 97 vext.8 q9,q0,q9,#12 98 veor q3,q3,q9 99 vext.8 q9,q0,q9,#12 100 veor q10,q10,q1 101 veor q3,q3,q9 102 veor q3,q3,q10 103 vst1.32 {q3},[r2] 104 add r2,r2,#0x50 105 106 mov r12,#10 107 b .Ldone 108 109.align 4 110.L192: 111 vld1.8 {d16},[r0]! 112 vmov.i8 q10,#8 @ borrow q10 113 vst1.32 {q3},[r2]! 114 vsub.i8 q2,q2,q10 @ adjust the mask 115 116.Loop192: 117 vtbl.8 d20,{q8},d4 118 vtbl.8 d21,{q8},d5 119 vext.8 q9,q0,q3,#12 120 vst1.32 {d16},[r2]! 121.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 122 subs r1,r1,#1 123 124 veor q3,q3,q9 125 vext.8 q9,q0,q9,#12 126 veor q3,q3,q9 127 vext.8 q9,q0,q9,#12 128 veor q3,q3,q9 129 130 vdup.32 q9,d7[1] 131 veor q9,q9,q8 132 veor q10,q10,q1 133 vext.8 q8,q0,q8,#12 134 vshl.u8 q1,q1,#1 135 veor q8,q8,q9 136 veor q3,q3,q10 137 veor q8,q8,q10 138 vst1.32 {q3},[r2]! 139 bne .Loop192 140 141 mov r12,#12 142 add r2,r2,#0x20 143 b .Ldone 144 145.align 4 146.L256: 147 vld1.8 {q8},[r0] 148 mov r1,#7 149 mov r12,#14 150 vst1.32 {q3},[r2]! 151 152.Loop256: 153 vtbl.8 d20,{q8},d4 154 vtbl.8 d21,{q8},d5 155 vext.8 q9,q0,q3,#12 156 vst1.32 {q8},[r2]! 157.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 158 subs r1,r1,#1 159 160 veor q3,q3,q9 161 vext.8 q9,q0,q9,#12 162 veor q3,q3,q9 163 vext.8 q9,q0,q9,#12 164 veor q10,q10,q1 165 veor q3,q3,q9 166 vshl.u8 q1,q1,#1 167 veor q3,q3,q10 168 vst1.32 {q3},[r2]! 169 beq .Ldone 170 171 vdup.32 q10,d7[1] 172 vext.8 q9,q0,q8,#12 173.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 174 175 veor q8,q8,q9 176 vext.8 q9,q0,q9,#12 177 veor q8,q8,q9 178 vext.8 q9,q0,q9,#12 179 veor q8,q8,q9 180 181 veor q8,q8,q10 182 b .Loop256 183 184.Ldone: 185 str r12,[r2] 186 mov r3,#0 187 188.Lenc_key_abort: 189 mov r0,r3 @ return value 190 191 bx lr 192.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 193 194.globl aes_hw_set_decrypt_key 195.hidden aes_hw_set_decrypt_key 196.type aes_hw_set_decrypt_key,%function 197.align 5 198aes_hw_set_decrypt_key: 199 stmdb sp!,{r4,lr} 200 bl .Lenc_key 201 202 cmp r0,#0 203 bne .Ldec_key_abort 204 205 sub r2,r2,#240 @ restore original r2 206 mov r4,#-16 207 add r0,r2,r12,lsl#4 @ end of key schedule 208 209 vld1.32 {q0},[r2] 210 vld1.32 {q1},[r0] 211 vst1.32 {q0},[r0],r4 212 vst1.32 {q1},[r2]! 213 214.Loop_imc: 215 vld1.32 {q0},[r2] 216 vld1.32 {q1},[r0] 217.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 218.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 219 vst1.32 {q0},[r0],r4 220 vst1.32 {q1},[r2]! 221 cmp r0,r2 222 bhi .Loop_imc 223 224 vld1.32 {q0},[r2] 225.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 226 vst1.32 {q0},[r0] 227 228 eor r0,r0,r0 @ return value 229.Ldec_key_abort: 230 ldmia sp!,{r4,pc} 231.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 232.globl aes_hw_encrypt 233.hidden aes_hw_encrypt 234.type aes_hw_encrypt,%function 235.align 5 236aes_hw_encrypt: 237 AARCH64_VALID_CALL_TARGET 238 ldr r3,[r2,#240] 239 vld1.32 {q0},[r2]! 240 vld1.8 {q2},[r0] 241 sub r3,r3,#2 242 vld1.32 {q1},[r2]! 243 244.Loop_enc: 245.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 246.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 247 vld1.32 {q0},[r2]! 248 subs r3,r3,#2 249.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 250.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 251 vld1.32 {q1},[r2]! 252 bgt .Loop_enc 253 254.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 255.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 256 vld1.32 {q0},[r2] 257.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 258 veor q2,q2,q0 259 260 vst1.8 {q2},[r1] 261 bx lr 262.size aes_hw_encrypt,.-aes_hw_encrypt 263.globl aes_hw_decrypt 264.hidden aes_hw_decrypt 265.type aes_hw_decrypt,%function 266.align 5 267aes_hw_decrypt: 268 AARCH64_VALID_CALL_TARGET 269 ldr r3,[r2,#240] 270 vld1.32 {q0},[r2]! 271 vld1.8 {q2},[r0] 272 sub r3,r3,#2 273 vld1.32 {q1},[r2]! 274 275.Loop_dec: 276.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 277.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 278 vld1.32 {q0},[r2]! 279 subs r3,r3,#2 280.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 281.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 282 vld1.32 {q1},[r2]! 283 bgt .Loop_dec 284 285.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 286.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 287 vld1.32 {q0},[r2] 288.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 289 veor q2,q2,q0 290 291 vst1.8 {q2},[r1] 292 bx lr 293.size aes_hw_decrypt,.-aes_hw_decrypt 294.globl aes_hw_cbc_encrypt 295.hidden aes_hw_cbc_encrypt 296.type aes_hw_cbc_encrypt,%function 297.align 5 298aes_hw_cbc_encrypt: 299 mov ip,sp 300 stmdb sp!,{r4,r5,r6,r7,r8,lr} 301 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 302 ldmia ip,{r4,r5} @ load remaining args 303 subs r2,r2,#16 304 mov r8,#16 305 blo .Lcbc_abort 306 moveq r8,#0 307 308 cmp r5,#0 @ en- or decrypting? 309 ldr r5,[r3,#240] 310 and r2,r2,#-16 311 vld1.8 {q6},[r4] 312 vld1.8 {q0},[r0],r8 313 314 vld1.32 {q8,q9},[r3] @ load key schedule... 315 sub r5,r5,#6 316 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 317 sub r5,r5,#2 318 vld1.32 {q10,q11},[r7]! 319 vld1.32 {q12,q13},[r7]! 320 vld1.32 {q14,q15},[r7]! 321 vld1.32 {q7},[r7] 322 323 add r7,r3,#32 324 mov r6,r5 325 beq .Lcbc_dec 326 327 cmp r5,#2 328 veor q0,q0,q6 329 veor q5,q8,q7 330 beq .Lcbc_enc128 331 332 vld1.32 {q2,q3},[r7] 333 add r7,r3,#16 334 add r6,r3,#16*4 335 add r12,r3,#16*5 336.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 337.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 338 add r14,r3,#16*6 339 add r3,r3,#16*7 340 b .Lenter_cbc_enc 341 342.align 4 343.Loop_cbc_enc: 344.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 345.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 346 vst1.8 {q6},[r1]! 347.Lenter_cbc_enc: 348.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 349.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 350.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 351.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 352 vld1.32 {q8},[r6] 353 cmp r5,#4 354.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 355.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 356 vld1.32 {q9},[r12] 357 beq .Lcbc_enc192 358 359.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 360.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 361 vld1.32 {q8},[r14] 362.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 363.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 364 vld1.32 {q9},[r3] 365 nop 366 367.Lcbc_enc192: 368.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 369.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 370 subs r2,r2,#16 371.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 372.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 373 moveq r8,#0 374.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 375.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 376.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 377.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 378 vld1.8 {q8},[r0],r8 379.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 380.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 381 veor q8,q8,q5 382.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 383.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 384 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 385.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 386.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 387.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 388 veor q6,q0,q7 389 bhs .Loop_cbc_enc 390 391 vst1.8 {q6},[r1]! 392 b .Lcbc_done 393 394.align 5 395.Lcbc_enc128: 396 vld1.32 {q2,q3},[r7] 397.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 398.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 399 b .Lenter_cbc_enc128 400.Loop_cbc_enc128: 401.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 402.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 403 vst1.8 {q6},[r1]! 404.Lenter_cbc_enc128: 405.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 406.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 407 subs r2,r2,#16 408.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 409.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 410 moveq r8,#0 411.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 412.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 413.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 414.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 415.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 416.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 417 vld1.8 {q8},[r0],r8 418.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 419.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 420.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 421.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 422.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 423.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 424 veor q8,q8,q5 425.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 426 veor q6,q0,q7 427 bhs .Loop_cbc_enc128 428 429 vst1.8 {q6},[r1]! 430 b .Lcbc_done 431.align 5 432.Lcbc_dec: 433 vld1.8 {q10},[r0]! 434 subs r2,r2,#32 @ bias 435 add r6,r5,#2 436 vorr q3,q0,q0 437 vorr q1,q0,q0 438 vorr q11,q10,q10 439 blo .Lcbc_dec_tail 440 441 vorr q1,q10,q10 442 vld1.8 {q10},[r0]! 443 vorr q2,q0,q0 444 vorr q3,q1,q1 445 vorr q11,q10,q10 446 447.Loop3x_cbc_dec: 448.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 449.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 450.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 451.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 452.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 453.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 454 vld1.32 {q8},[r7]! 455 subs r6,r6,#2 456.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 457.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 458.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 459.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 460.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 461.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 462 vld1.32 {q9},[r7]! 463 bgt .Loop3x_cbc_dec 464 465.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 466.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 467.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 468.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 469.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 470.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 471 veor q4,q6,q7 472 subs r2,r2,#0x30 473 veor q5,q2,q7 474 movlo r6,r2 @ r6, r6, is zero at this point 475.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 476.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 477.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 478.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 479.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 480.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 481 veor q9,q3,q7 482 add r0,r0,r6 @ r0 is adjusted in such way that 483 @ at exit from the loop q1-q10 484 @ are loaded with last "words" 485 vorr q6,q11,q11 486 mov r7,r3 487.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 488.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 489.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 490.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 491.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 492.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 493 vld1.8 {q2},[r0]! 494.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 495.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 496.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 497.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 498.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 499.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 500 vld1.8 {q3},[r0]! 501.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 502.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 503.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 504.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 505.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 506.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 507 vld1.8 {q11},[r0]! 508.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 509.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 510.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 511 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 512 add r6,r5,#2 513 veor q4,q4,q0 514 veor q5,q5,q1 515 veor q10,q10,q9 516 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 517 vst1.8 {q4},[r1]! 518 vorr q0,q2,q2 519 vst1.8 {q5},[r1]! 520 vorr q1,q3,q3 521 vst1.8 {q10},[r1]! 522 vorr q10,q11,q11 523 bhs .Loop3x_cbc_dec 524 525 cmn r2,#0x30 526 beq .Lcbc_done 527 nop 528 529.Lcbc_dec_tail: 530.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 531.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 532.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 533.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 534 vld1.32 {q8},[r7]! 535 subs r6,r6,#2 536.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 537.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 538.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 539.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 540 vld1.32 {q9},[r7]! 541 bgt .Lcbc_dec_tail 542 543.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 544.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 545.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 546.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 547.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 548.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 549.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 550.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 551.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 552.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 553.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 554.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 555 cmn r2,#0x20 556.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 557.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 558.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 559.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 560 veor q5,q6,q7 561.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 562.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 563.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 564.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 565 veor q9,q3,q7 566.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 567.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 568 beq .Lcbc_dec_one 569 veor q5,q5,q1 570 veor q9,q9,q10 571 vorr q6,q11,q11 572 vst1.8 {q5},[r1]! 573 vst1.8 {q9},[r1]! 574 b .Lcbc_done 575 576.Lcbc_dec_one: 577 veor q5,q5,q10 578 vorr q6,q11,q11 579 vst1.8 {q5},[r1]! 580 581.Lcbc_done: 582 vst1.8 {q6},[r4] 583.Lcbc_abort: 584 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 585 ldmia sp!,{r4,r5,r6,r7,r8,pc} 586.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 587.globl aes_hw_ctr32_encrypt_blocks 588.hidden aes_hw_ctr32_encrypt_blocks 589.type aes_hw_ctr32_encrypt_blocks,%function 590.align 5 591aes_hw_ctr32_encrypt_blocks: 592 mov ip,sp 593 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 594 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 595 ldr r4, [ip] @ load remaining arg 596 ldr r5,[r3,#240] 597 598 ldr r8, [r4, #12] 599 vld1.32 {q0},[r4] 600 601 vld1.32 {q8,q9},[r3] @ load key schedule... 602 sub r5,r5,#4 603 mov r12,#16 604 cmp r2,#2 605 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 606 sub r5,r5,#2 607 vld1.32 {q12,q13},[r7]! 608 vld1.32 {q14,q15},[r7]! 609 vld1.32 {q7},[r7] 610 add r7,r3,#32 611 mov r6,r5 612 movlo r12,#0 613 614 @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 615 @ affected by silicon errata #1742098 [0] and #1655431 [1], 616 @ respectively, where the second instruction of an aese/aesmc 617 @ instruction pair may execute twice if an interrupt is taken right 618 @ after the first instruction consumes an input register of which a 619 @ single 32-bit lane has been updated the last time it was modified. 620 @ 621 @ This function uses a counter in one 32-bit lane. The 622 @ could write to q1 and q10 directly, but that trips this bugs. 623 @ We write to q6 and copy to the final register as a workaround. 624 @ 625 @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 626 @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 627#ifndef __ARMEB__ 628 rev r8, r8 629#endif 630 add r10, r8, #1 631 vorr q6,q0,q0 632 rev r10, r10 633 vmov.32 d13[1],r10 634 add r8, r8, #2 635 vorr q1,q6,q6 636 bls .Lctr32_tail 637 rev r12, r8 638 vmov.32 d13[1],r12 639 sub r2,r2,#3 @ bias 640 vorr q10,q6,q6 641 b .Loop3x_ctr32 642 643.align 4 644.Loop3x_ctr32: 645.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 646.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 647.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 648.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 649.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 650.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 651 vld1.32 {q8},[r7]! 652 subs r6,r6,#2 653.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 654.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 655.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 656.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 657.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 658.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 659 vld1.32 {q9},[r7]! 660 bgt .Loop3x_ctr32 661 662.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 663.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 664.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 665.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 666 vld1.8 {q2},[r0]! 667 add r9,r8,#1 668.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 669.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 670 vld1.8 {q3},[r0]! 671 rev r9,r9 672.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 673.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 674.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 675.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 676 vld1.8 {q11},[r0]! 677 mov r7,r3 678.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 679.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 680.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 681.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 682.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 683.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 684 veor q2,q2,q7 685 add r10,r8,#2 686.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 687.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 688 veor q3,q3,q7 689 add r8,r8,#3 690.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 691.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 692.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 693.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 694 @ Note the logic to update q0, q1, and q1 is written to work 695 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 696 @ 32-bit mode. See the comment above. 697 veor q11,q11,q7 698 vmov.32 d13[1], r9 699.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 700.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 701 vorr q0,q6,q6 702 rev r10,r10 703.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 704.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 705 vmov.32 d13[1], r10 706 rev r12,r8 707.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 708.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 709 vorr q1,q6,q6 710 vmov.32 d13[1], r12 711.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 712.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 713 vorr q10,q6,q6 714 subs r2,r2,#3 715.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 716.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 717.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 718 719 veor q2,q2,q4 720 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 721 vst1.8 {q2},[r1]! 722 veor q3,q3,q5 723 mov r6,r5 724 vst1.8 {q3},[r1]! 725 veor q11,q11,q9 726 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 727 vst1.8 {q11},[r1]! 728 bhs .Loop3x_ctr32 729 730 adds r2,r2,#3 731 beq .Lctr32_done 732 cmp r2,#1 733 mov r12,#16 734 moveq r12,#0 735 736.Lctr32_tail: 737.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 738.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 739.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 740.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 741 vld1.32 {q8},[r7]! 742 subs r6,r6,#2 743.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 744.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 745.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 746.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 747 vld1.32 {q9},[r7]! 748 bgt .Lctr32_tail 749 750.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 751.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 752.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 753.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 754.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 755.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 756.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 757.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 758 vld1.8 {q2},[r0],r12 759.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 760.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 761.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 762.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 763 vld1.8 {q3},[r0] 764.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 765.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 766.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 767.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 768 veor q2,q2,q7 769.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 770.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 771.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 772.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 773 veor q3,q3,q7 774.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 775.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 776 777 cmp r2,#1 778 veor q2,q2,q0 779 veor q3,q3,q1 780 vst1.8 {q2},[r1]! 781 beq .Lctr32_done 782 vst1.8 {q3},[r1] 783 784.Lctr32_done: 785 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 786 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 787.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 788#endif 789#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) 790