1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) 7#include <openssl/arm_arch.h> 8 9#if __ARM_MAX_ARCH__>=7 10.text 11 12.section __TEXT,__const 13.align 5 14Lrcon: 15.long 0x01,0x01,0x01,0x01 16.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 17.long 0x1b,0x1b,0x1b,0x1b 18 19.text 20 21.globl _aes_hw_set_encrypt_key 22.private_extern _aes_hw_set_encrypt_key 23 24.align 5 25_aes_hw_set_encrypt_key: 26Lenc_key: 27 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 28 AARCH64_VALID_CALL_TARGET 29 stp x29,x30,[sp,#-16]! 30 add x29,sp,#0 31 mov x3,#-1 32 cmp x0,#0 33 b.eq Lenc_key_abort 34 cmp x2,#0 35 b.eq Lenc_key_abort 36 mov x3,#-2 37 cmp w1,#128 38 b.lt Lenc_key_abort 39 cmp w1,#256 40 b.gt Lenc_key_abort 41 tst w1,#0x3f 42 b.ne Lenc_key_abort 43 44 adrp x3,Lrcon@PAGE 45 add x3,x3,Lrcon@PAGEOFF 46 cmp w1,#192 47 48 eor v0.16b,v0.16b,v0.16b 49 ld1 {v3.16b},[x0],#16 50 mov w1,#8 // reuse w1 51 ld1 {v1.4s,v2.4s},[x3],#32 52 53 b.lt Loop128 54 b.eq L192 55 b L256 56 57.align 4 58Loop128: 59 tbl v6.16b,{v3.16b},v2.16b 60 ext v5.16b,v0.16b,v3.16b,#12 61 st1 {v3.4s},[x2],#16 62 aese v6.16b,v0.16b 63 subs w1,w1,#1 64 65 eor v3.16b,v3.16b,v5.16b 66 ext v5.16b,v0.16b,v5.16b,#12 67 eor v3.16b,v3.16b,v5.16b 68 ext v5.16b,v0.16b,v5.16b,#12 69 eor v6.16b,v6.16b,v1.16b 70 eor v3.16b,v3.16b,v5.16b 71 shl v1.16b,v1.16b,#1 72 eor v3.16b,v3.16b,v6.16b 73 b.ne Loop128 74 75 ld1 {v1.4s},[x3] 76 77 tbl v6.16b,{v3.16b},v2.16b 78 ext v5.16b,v0.16b,v3.16b,#12 79 st1 {v3.4s},[x2],#16 80 aese v6.16b,v0.16b 81 82 eor v3.16b,v3.16b,v5.16b 83 ext v5.16b,v0.16b,v5.16b,#12 84 eor v3.16b,v3.16b,v5.16b 85 ext v5.16b,v0.16b,v5.16b,#12 86 eor v6.16b,v6.16b,v1.16b 87 eor v3.16b,v3.16b,v5.16b 88 shl v1.16b,v1.16b,#1 89 eor v3.16b,v3.16b,v6.16b 90 91 tbl v6.16b,{v3.16b},v2.16b 92 ext v5.16b,v0.16b,v3.16b,#12 93 st1 {v3.4s},[x2],#16 94 aese v6.16b,v0.16b 95 96 eor v3.16b,v3.16b,v5.16b 97 ext v5.16b,v0.16b,v5.16b,#12 98 eor v3.16b,v3.16b,v5.16b 99 ext v5.16b,v0.16b,v5.16b,#12 100 eor v6.16b,v6.16b,v1.16b 101 eor v3.16b,v3.16b,v5.16b 102 eor v3.16b,v3.16b,v6.16b 103 st1 {v3.4s},[x2] 104 add x2,x2,#0x50 105 106 mov w12,#10 107 b Ldone 108 109.align 4 110L192: 111 ld1 {v4.8b},[x0],#8 112 movi v6.16b,#8 // borrow v6.16b 113 st1 {v3.4s},[x2],#16 114 sub v2.16b,v2.16b,v6.16b // adjust the mask 115 116Loop192: 117 tbl v6.16b,{v4.16b},v2.16b 118 ext v5.16b,v0.16b,v3.16b,#12 119 st1 {v4.8b},[x2],#8 120 aese v6.16b,v0.16b 121 subs w1,w1,#1 122 123 eor v3.16b,v3.16b,v5.16b 124 ext v5.16b,v0.16b,v5.16b,#12 125 eor v3.16b,v3.16b,v5.16b 126 ext v5.16b,v0.16b,v5.16b,#12 127 eor v3.16b,v3.16b,v5.16b 128 129 dup v5.4s,v3.s[3] 130 eor v5.16b,v5.16b,v4.16b 131 eor v6.16b,v6.16b,v1.16b 132 ext v4.16b,v0.16b,v4.16b,#12 133 shl v1.16b,v1.16b,#1 134 eor v4.16b,v4.16b,v5.16b 135 eor v3.16b,v3.16b,v6.16b 136 eor v4.16b,v4.16b,v6.16b 137 st1 {v3.4s},[x2],#16 138 b.ne Loop192 139 140 mov w12,#12 141 add x2,x2,#0x20 142 b Ldone 143 144.align 4 145L256: 146 ld1 {v4.16b},[x0] 147 mov w1,#7 148 mov w12,#14 149 st1 {v3.4s},[x2],#16 150 151Loop256: 152 tbl v6.16b,{v4.16b},v2.16b 153 ext v5.16b,v0.16b,v3.16b,#12 154 st1 {v4.4s},[x2],#16 155 aese v6.16b,v0.16b 156 subs w1,w1,#1 157 158 eor v3.16b,v3.16b,v5.16b 159 ext v5.16b,v0.16b,v5.16b,#12 160 eor v3.16b,v3.16b,v5.16b 161 ext v5.16b,v0.16b,v5.16b,#12 162 eor v6.16b,v6.16b,v1.16b 163 eor v3.16b,v3.16b,v5.16b 164 shl v1.16b,v1.16b,#1 165 eor v3.16b,v3.16b,v6.16b 166 st1 {v3.4s},[x2],#16 167 b.eq Ldone 168 169 dup v6.4s,v3.s[3] // just splat 170 ext v5.16b,v0.16b,v4.16b,#12 171 aese v6.16b,v0.16b 172 173 eor v4.16b,v4.16b,v5.16b 174 ext v5.16b,v0.16b,v5.16b,#12 175 eor v4.16b,v4.16b,v5.16b 176 ext v5.16b,v0.16b,v5.16b,#12 177 eor v4.16b,v4.16b,v5.16b 178 179 eor v4.16b,v4.16b,v6.16b 180 b Loop256 181 182Ldone: 183 str w12,[x2] 184 mov x3,#0 185 186Lenc_key_abort: 187 mov x0,x3 // return value 188 ldr x29,[sp],#16 189 ret 190 191 192.globl _aes_hw_set_decrypt_key 193.private_extern _aes_hw_set_decrypt_key 194 195.align 5 196_aes_hw_set_decrypt_key: 197 AARCH64_SIGN_LINK_REGISTER 198 stp x29,x30,[sp,#-16]! 199 add x29,sp,#0 200 bl Lenc_key 201 202 cmp x0,#0 203 b.ne Ldec_key_abort 204 205 sub x2,x2,#240 // restore original x2 206 mov x4,#-16 207 add x0,x2,x12,lsl#4 // end of key schedule 208 209 ld1 {v0.4s},[x2] 210 ld1 {v1.4s},[x0] 211 st1 {v0.4s},[x0],x4 212 st1 {v1.4s},[x2],#16 213 214Loop_imc: 215 ld1 {v0.4s},[x2] 216 ld1 {v1.4s},[x0] 217 aesimc v0.16b,v0.16b 218 aesimc v1.16b,v1.16b 219 st1 {v0.4s},[x0],x4 220 st1 {v1.4s},[x2],#16 221 cmp x0,x2 222 b.hi Loop_imc 223 224 ld1 {v0.4s},[x2] 225 aesimc v0.16b,v0.16b 226 st1 {v0.4s},[x0] 227 228 eor x0,x0,x0 // return value 229Ldec_key_abort: 230 ldp x29,x30,[sp],#16 231 AARCH64_VALIDATE_LINK_REGISTER 232 ret 233 234.globl _aes_hw_encrypt 235.private_extern _aes_hw_encrypt 236 237.align 5 238_aes_hw_encrypt: 239 AARCH64_VALID_CALL_TARGET 240 ldr w3,[x2,#240] 241 ld1 {v0.4s},[x2],#16 242 ld1 {v2.16b},[x0] 243 sub w3,w3,#2 244 ld1 {v1.4s},[x2],#16 245 246Loop_enc: 247 aese v2.16b,v0.16b 248 aesmc v2.16b,v2.16b 249 ld1 {v0.4s},[x2],#16 250 subs w3,w3,#2 251 aese v2.16b,v1.16b 252 aesmc v2.16b,v2.16b 253 ld1 {v1.4s},[x2],#16 254 b.gt Loop_enc 255 256 aese v2.16b,v0.16b 257 aesmc v2.16b,v2.16b 258 ld1 {v0.4s},[x2] 259 aese v2.16b,v1.16b 260 eor v2.16b,v2.16b,v0.16b 261 262 st1 {v2.16b},[x1] 263 ret 264 265.globl _aes_hw_decrypt 266.private_extern _aes_hw_decrypt 267 268.align 5 269_aes_hw_decrypt: 270 AARCH64_VALID_CALL_TARGET 271 ldr w3,[x2,#240] 272 ld1 {v0.4s},[x2],#16 273 ld1 {v2.16b},[x0] 274 sub w3,w3,#2 275 ld1 {v1.4s},[x2],#16 276 277Loop_dec: 278 aesd v2.16b,v0.16b 279 aesimc v2.16b,v2.16b 280 ld1 {v0.4s},[x2],#16 281 subs w3,w3,#2 282 aesd v2.16b,v1.16b 283 aesimc v2.16b,v2.16b 284 ld1 {v1.4s},[x2],#16 285 b.gt Loop_dec 286 287 aesd v2.16b,v0.16b 288 aesimc v2.16b,v2.16b 289 ld1 {v0.4s},[x2] 290 aesd v2.16b,v1.16b 291 eor v2.16b,v2.16b,v0.16b 292 293 st1 {v2.16b},[x1] 294 ret 295 296.globl _aes_hw_cbc_encrypt 297.private_extern _aes_hw_cbc_encrypt 298 299.align 5 300_aes_hw_cbc_encrypt: 301 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 302 AARCH64_VALID_CALL_TARGET 303 stp x29,x30,[sp,#-16]! 304 add x29,sp,#0 305 subs x2,x2,#16 306 mov x8,#16 307 b.lo Lcbc_abort 308 csel x8,xzr,x8,eq 309 310 cmp w5,#0 // en- or decrypting? 311 ldr w5,[x3,#240] 312 and x2,x2,#-16 313 ld1 {v6.16b},[x4] 314 ld1 {v0.16b},[x0],x8 315 316 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 317 sub w5,w5,#6 318 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 319 sub w5,w5,#2 320 ld1 {v18.4s,v19.4s},[x7],#32 321 ld1 {v20.4s,v21.4s},[x7],#32 322 ld1 {v22.4s,v23.4s},[x7],#32 323 ld1 {v7.4s},[x7] 324 325 add x7,x3,#32 326 mov w6,w5 327 b.eq Lcbc_dec 328 329 cmp w5,#2 330 eor v0.16b,v0.16b,v6.16b 331 eor v5.16b,v16.16b,v7.16b 332 b.eq Lcbc_enc128 333 334 ld1 {v2.4s,v3.4s},[x7] 335 add x7,x3,#16 336 add x6,x3,#16*4 337 add x12,x3,#16*5 338 aese v0.16b,v16.16b 339 aesmc v0.16b,v0.16b 340 add x14,x3,#16*6 341 add x3,x3,#16*7 342 b Lenter_cbc_enc 343 344.align 4 345Loop_cbc_enc: 346 aese v0.16b,v16.16b 347 aesmc v0.16b,v0.16b 348 st1 {v6.16b},[x1],#16 349Lenter_cbc_enc: 350 aese v0.16b,v17.16b 351 aesmc v0.16b,v0.16b 352 aese v0.16b,v2.16b 353 aesmc v0.16b,v0.16b 354 ld1 {v16.4s},[x6] 355 cmp w5,#4 356 aese v0.16b,v3.16b 357 aesmc v0.16b,v0.16b 358 ld1 {v17.4s},[x12] 359 b.eq Lcbc_enc192 360 361 aese v0.16b,v16.16b 362 aesmc v0.16b,v0.16b 363 ld1 {v16.4s},[x14] 364 aese v0.16b,v17.16b 365 aesmc v0.16b,v0.16b 366 ld1 {v17.4s},[x3] 367 nop 368 369Lcbc_enc192: 370 aese v0.16b,v16.16b 371 aesmc v0.16b,v0.16b 372 subs x2,x2,#16 373 aese v0.16b,v17.16b 374 aesmc v0.16b,v0.16b 375 csel x8,xzr,x8,eq 376 aese v0.16b,v18.16b 377 aesmc v0.16b,v0.16b 378 aese v0.16b,v19.16b 379 aesmc v0.16b,v0.16b 380 ld1 {v16.16b},[x0],x8 381 aese v0.16b,v20.16b 382 aesmc v0.16b,v0.16b 383 eor v16.16b,v16.16b,v5.16b 384 aese v0.16b,v21.16b 385 aesmc v0.16b,v0.16b 386 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 387 aese v0.16b,v22.16b 388 aesmc v0.16b,v0.16b 389 aese v0.16b,v23.16b 390 eor v6.16b,v0.16b,v7.16b 391 b.hs Loop_cbc_enc 392 393 st1 {v6.16b},[x1],#16 394 b Lcbc_done 395 396.align 5 397Lcbc_enc128: 398 ld1 {v2.4s,v3.4s},[x7] 399 aese v0.16b,v16.16b 400 aesmc v0.16b,v0.16b 401 b Lenter_cbc_enc128 402Loop_cbc_enc128: 403 aese v0.16b,v16.16b 404 aesmc v0.16b,v0.16b 405 st1 {v6.16b},[x1],#16 406Lenter_cbc_enc128: 407 aese v0.16b,v17.16b 408 aesmc v0.16b,v0.16b 409 subs x2,x2,#16 410 aese v0.16b,v2.16b 411 aesmc v0.16b,v0.16b 412 csel x8,xzr,x8,eq 413 aese v0.16b,v3.16b 414 aesmc v0.16b,v0.16b 415 aese v0.16b,v18.16b 416 aesmc v0.16b,v0.16b 417 aese v0.16b,v19.16b 418 aesmc v0.16b,v0.16b 419 ld1 {v16.16b},[x0],x8 420 aese v0.16b,v20.16b 421 aesmc v0.16b,v0.16b 422 aese v0.16b,v21.16b 423 aesmc v0.16b,v0.16b 424 aese v0.16b,v22.16b 425 aesmc v0.16b,v0.16b 426 eor v16.16b,v16.16b,v5.16b 427 aese v0.16b,v23.16b 428 eor v6.16b,v0.16b,v7.16b 429 b.hs Loop_cbc_enc128 430 431 st1 {v6.16b},[x1],#16 432 b Lcbc_done 433.align 5 434Lcbc_dec: 435 ld1 {v18.16b},[x0],#16 436 subs x2,x2,#32 // bias 437 add w6,w5,#2 438 orr v3.16b,v0.16b,v0.16b 439 orr v1.16b,v0.16b,v0.16b 440 orr v19.16b,v18.16b,v18.16b 441 b.lo Lcbc_dec_tail 442 443 orr v1.16b,v18.16b,v18.16b 444 ld1 {v18.16b},[x0],#16 445 orr v2.16b,v0.16b,v0.16b 446 orr v3.16b,v1.16b,v1.16b 447 orr v19.16b,v18.16b,v18.16b 448 449Loop3x_cbc_dec: 450 aesd v0.16b,v16.16b 451 aesimc v0.16b,v0.16b 452 aesd v1.16b,v16.16b 453 aesimc v1.16b,v1.16b 454 aesd v18.16b,v16.16b 455 aesimc v18.16b,v18.16b 456 ld1 {v16.4s},[x7],#16 457 subs w6,w6,#2 458 aesd v0.16b,v17.16b 459 aesimc v0.16b,v0.16b 460 aesd v1.16b,v17.16b 461 aesimc v1.16b,v1.16b 462 aesd v18.16b,v17.16b 463 aesimc v18.16b,v18.16b 464 ld1 {v17.4s},[x7],#16 465 b.gt Loop3x_cbc_dec 466 467 aesd v0.16b,v16.16b 468 aesimc v0.16b,v0.16b 469 aesd v1.16b,v16.16b 470 aesimc v1.16b,v1.16b 471 aesd v18.16b,v16.16b 472 aesimc v18.16b,v18.16b 473 eor v4.16b,v6.16b,v7.16b 474 subs x2,x2,#0x30 475 eor v5.16b,v2.16b,v7.16b 476 csel x6,x2,x6,lo // x6, w6, is zero at this point 477 aesd v0.16b,v17.16b 478 aesimc v0.16b,v0.16b 479 aesd v1.16b,v17.16b 480 aesimc v1.16b,v1.16b 481 aesd v18.16b,v17.16b 482 aesimc v18.16b,v18.16b 483 eor v17.16b,v3.16b,v7.16b 484 add x0,x0,x6 // x0 is adjusted in such way that 485 // at exit from the loop v1.16b-v18.16b 486 // are loaded with last "words" 487 orr v6.16b,v19.16b,v19.16b 488 mov x7,x3 489 aesd v0.16b,v20.16b 490 aesimc v0.16b,v0.16b 491 aesd v1.16b,v20.16b 492 aesimc v1.16b,v1.16b 493 aesd v18.16b,v20.16b 494 aesimc v18.16b,v18.16b 495 ld1 {v2.16b},[x0],#16 496 aesd v0.16b,v21.16b 497 aesimc v0.16b,v0.16b 498 aesd v1.16b,v21.16b 499 aesimc v1.16b,v1.16b 500 aesd v18.16b,v21.16b 501 aesimc v18.16b,v18.16b 502 ld1 {v3.16b},[x0],#16 503 aesd v0.16b,v22.16b 504 aesimc v0.16b,v0.16b 505 aesd v1.16b,v22.16b 506 aesimc v1.16b,v1.16b 507 aesd v18.16b,v22.16b 508 aesimc v18.16b,v18.16b 509 ld1 {v19.16b},[x0],#16 510 aesd v0.16b,v23.16b 511 aesd v1.16b,v23.16b 512 aesd v18.16b,v23.16b 513 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 514 add w6,w5,#2 515 eor v4.16b,v4.16b,v0.16b 516 eor v5.16b,v5.16b,v1.16b 517 eor v18.16b,v18.16b,v17.16b 518 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 519 st1 {v4.16b},[x1],#16 520 orr v0.16b,v2.16b,v2.16b 521 st1 {v5.16b},[x1],#16 522 orr v1.16b,v3.16b,v3.16b 523 st1 {v18.16b},[x1],#16 524 orr v18.16b,v19.16b,v19.16b 525 b.hs Loop3x_cbc_dec 526 527 cmn x2,#0x30 528 b.eq Lcbc_done 529 nop 530 531Lcbc_dec_tail: 532 aesd v1.16b,v16.16b 533 aesimc v1.16b,v1.16b 534 aesd v18.16b,v16.16b 535 aesimc v18.16b,v18.16b 536 ld1 {v16.4s},[x7],#16 537 subs w6,w6,#2 538 aesd v1.16b,v17.16b 539 aesimc v1.16b,v1.16b 540 aesd v18.16b,v17.16b 541 aesimc v18.16b,v18.16b 542 ld1 {v17.4s},[x7],#16 543 b.gt Lcbc_dec_tail 544 545 aesd v1.16b,v16.16b 546 aesimc v1.16b,v1.16b 547 aesd v18.16b,v16.16b 548 aesimc v18.16b,v18.16b 549 aesd v1.16b,v17.16b 550 aesimc v1.16b,v1.16b 551 aesd v18.16b,v17.16b 552 aesimc v18.16b,v18.16b 553 aesd v1.16b,v20.16b 554 aesimc v1.16b,v1.16b 555 aesd v18.16b,v20.16b 556 aesimc v18.16b,v18.16b 557 cmn x2,#0x20 558 aesd v1.16b,v21.16b 559 aesimc v1.16b,v1.16b 560 aesd v18.16b,v21.16b 561 aesimc v18.16b,v18.16b 562 eor v5.16b,v6.16b,v7.16b 563 aesd v1.16b,v22.16b 564 aesimc v1.16b,v1.16b 565 aesd v18.16b,v22.16b 566 aesimc v18.16b,v18.16b 567 eor v17.16b,v3.16b,v7.16b 568 aesd v1.16b,v23.16b 569 aesd v18.16b,v23.16b 570 b.eq Lcbc_dec_one 571 eor v5.16b,v5.16b,v1.16b 572 eor v17.16b,v17.16b,v18.16b 573 orr v6.16b,v19.16b,v19.16b 574 st1 {v5.16b},[x1],#16 575 st1 {v17.16b},[x1],#16 576 b Lcbc_done 577 578Lcbc_dec_one: 579 eor v5.16b,v5.16b,v18.16b 580 orr v6.16b,v19.16b,v19.16b 581 st1 {v5.16b},[x1],#16 582 583Lcbc_done: 584 st1 {v6.16b},[x4] 585Lcbc_abort: 586 ldr x29,[sp],#16 587 ret 588 589.globl _aes_hw_ctr32_encrypt_blocks 590.private_extern _aes_hw_ctr32_encrypt_blocks 591 592.align 5 593_aes_hw_ctr32_encrypt_blocks: 594 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 595 AARCH64_VALID_CALL_TARGET 596 stp x29,x30,[sp,#-16]! 597 add x29,sp,#0 598 ldr w5,[x3,#240] 599 600 ldr w8, [x4, #12] 601 ld1 {v0.4s},[x4] 602 603 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 604 sub w5,w5,#4 605 mov x12,#16 606 cmp x2,#2 607 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 608 sub w5,w5,#2 609 ld1 {v20.4s,v21.4s},[x7],#32 610 ld1 {v22.4s,v23.4s},[x7],#32 611 ld1 {v7.4s},[x7] 612 add x7,x3,#32 613 mov w6,w5 614 csel x12,xzr,x12,lo 615 616 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 617 // affected by silicon errata #1742098 [0] and #1655431 [1], 618 // respectively, where the second instruction of an aese/aesmc 619 // instruction pair may execute twice if an interrupt is taken right 620 // after the first instruction consumes an input register of which a 621 // single 32-bit lane has been updated the last time it was modified. 622 // 623 // This function uses a counter in one 32-bit lane. The vmov lines 624 // could write to v1.16b and v18.16b directly, but that trips this bugs. 625 // We write to v6.16b and copy to the final register as a workaround. 626 // 627 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 628 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 629#ifndef __AARCH64EB__ 630 rev w8, w8 631#endif 632 add w10, w8, #1 633 orr v6.16b,v0.16b,v0.16b 634 rev w10, w10 635 mov v6.s[3],w10 636 add w8, w8, #2 637 orr v1.16b,v6.16b,v6.16b 638 b.ls Lctr32_tail 639 rev w12, w8 640 mov v6.s[3],w12 641 sub x2,x2,#3 // bias 642 orr v18.16b,v6.16b,v6.16b 643 b Loop3x_ctr32 644 645.align 4 646Loop3x_ctr32: 647 aese v0.16b,v16.16b 648 aesmc v0.16b,v0.16b 649 aese v1.16b,v16.16b 650 aesmc v1.16b,v1.16b 651 aese v18.16b,v16.16b 652 aesmc v18.16b,v18.16b 653 ld1 {v16.4s},[x7],#16 654 subs w6,w6,#2 655 aese v0.16b,v17.16b 656 aesmc v0.16b,v0.16b 657 aese v1.16b,v17.16b 658 aesmc v1.16b,v1.16b 659 aese v18.16b,v17.16b 660 aesmc v18.16b,v18.16b 661 ld1 {v17.4s},[x7],#16 662 b.gt Loop3x_ctr32 663 664 aese v0.16b,v16.16b 665 aesmc v4.16b,v0.16b 666 aese v1.16b,v16.16b 667 aesmc v5.16b,v1.16b 668 ld1 {v2.16b},[x0],#16 669 add w9,w8,#1 670 aese v18.16b,v16.16b 671 aesmc v18.16b,v18.16b 672 ld1 {v3.16b},[x0],#16 673 rev w9,w9 674 aese v4.16b,v17.16b 675 aesmc v4.16b,v4.16b 676 aese v5.16b,v17.16b 677 aesmc v5.16b,v5.16b 678 ld1 {v19.16b},[x0],#16 679 mov x7,x3 680 aese v18.16b,v17.16b 681 aesmc v17.16b,v18.16b 682 aese v4.16b,v20.16b 683 aesmc v4.16b,v4.16b 684 aese v5.16b,v20.16b 685 aesmc v5.16b,v5.16b 686 eor v2.16b,v2.16b,v7.16b 687 add w10,w8,#2 688 aese v17.16b,v20.16b 689 aesmc v17.16b,v17.16b 690 eor v3.16b,v3.16b,v7.16b 691 add w8,w8,#3 692 aese v4.16b,v21.16b 693 aesmc v4.16b,v4.16b 694 aese v5.16b,v21.16b 695 aesmc v5.16b,v5.16b 696 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 697 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 698 // 32-bit mode. See the comment above. 699 eor v19.16b,v19.16b,v7.16b 700 mov v6.s[3], w9 701 aese v17.16b,v21.16b 702 aesmc v17.16b,v17.16b 703 orr v0.16b,v6.16b,v6.16b 704 rev w10,w10 705 aese v4.16b,v22.16b 706 aesmc v4.16b,v4.16b 707 mov v6.s[3], w10 708 rev w12,w8 709 aese v5.16b,v22.16b 710 aesmc v5.16b,v5.16b 711 orr v1.16b,v6.16b,v6.16b 712 mov v6.s[3], w12 713 aese v17.16b,v22.16b 714 aesmc v17.16b,v17.16b 715 orr v18.16b,v6.16b,v6.16b 716 subs x2,x2,#3 717 aese v4.16b,v23.16b 718 aese v5.16b,v23.16b 719 aese v17.16b,v23.16b 720 721 eor v2.16b,v2.16b,v4.16b 722 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 723 st1 {v2.16b},[x1],#16 724 eor v3.16b,v3.16b,v5.16b 725 mov w6,w5 726 st1 {v3.16b},[x1],#16 727 eor v19.16b,v19.16b,v17.16b 728 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 729 st1 {v19.16b},[x1],#16 730 b.hs Loop3x_ctr32 731 732 adds x2,x2,#3 733 b.eq Lctr32_done 734 cmp x2,#1 735 mov x12,#16 736 csel x12,xzr,x12,eq 737 738Lctr32_tail: 739 aese v0.16b,v16.16b 740 aesmc v0.16b,v0.16b 741 aese v1.16b,v16.16b 742 aesmc v1.16b,v1.16b 743 ld1 {v16.4s},[x7],#16 744 subs w6,w6,#2 745 aese v0.16b,v17.16b 746 aesmc v0.16b,v0.16b 747 aese v1.16b,v17.16b 748 aesmc v1.16b,v1.16b 749 ld1 {v17.4s},[x7],#16 750 b.gt Lctr32_tail 751 752 aese v0.16b,v16.16b 753 aesmc v0.16b,v0.16b 754 aese v1.16b,v16.16b 755 aesmc v1.16b,v1.16b 756 aese v0.16b,v17.16b 757 aesmc v0.16b,v0.16b 758 aese v1.16b,v17.16b 759 aesmc v1.16b,v1.16b 760 ld1 {v2.16b},[x0],x12 761 aese v0.16b,v20.16b 762 aesmc v0.16b,v0.16b 763 aese v1.16b,v20.16b 764 aesmc v1.16b,v1.16b 765 ld1 {v3.16b},[x0] 766 aese v0.16b,v21.16b 767 aesmc v0.16b,v0.16b 768 aese v1.16b,v21.16b 769 aesmc v1.16b,v1.16b 770 eor v2.16b,v2.16b,v7.16b 771 aese v0.16b,v22.16b 772 aesmc v0.16b,v0.16b 773 aese v1.16b,v22.16b 774 aesmc v1.16b,v1.16b 775 eor v3.16b,v3.16b,v7.16b 776 aese v0.16b,v23.16b 777 aese v1.16b,v23.16b 778 779 cmp x2,#1 780 eor v2.16b,v2.16b,v0.16b 781 eor v3.16b,v3.16b,v1.16b 782 st1 {v2.16b},[x1],#16 783 b.eq Lctr32_done 784 st1 {v3.16b},[x1] 785 786Lctr32_done: 787 ldr x29,[sp],#16 788 ret 789 790#endif 791#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) 792