1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) 7// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 8// 9// Licensed under the OpenSSL license (the "License"). You may not use 10// this file except in compliance with the License. You can obtain a copy 11// in the file LICENSE in the source distribution or at 12// https://www.openssl.org/source/license.html 13 14// ==================================================================== 15// Written by Andy Polyakov <[email protected]> for the OpenSSL 16// project. The module is, however, dual licensed under OpenSSL and 17// CRYPTOGAMS licenses depending on where you obtain it. For further 18// details see http://www.openssl.org/~appro/cryptogams/. 19// 20// Permission to use under GPLv2 terms is granted. 21// ==================================================================== 22// 23// SHA256/512 for ARMv8. 24// 25// Performance in cycles per processed byte and improvement coefficient 26// over code generated with "default" compiler: 27// 28// SHA256-hw SHA256(*) SHA512 29// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 30// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 31// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 32// Denver 2.01 10.5 (+26%) 6.70 (+8%) 33// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 34// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 35// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 36// 37// (*) Software SHA256 results are of lesser relevance, presented 38// mostly for informational purposes. 39// (**) The result is a trade-off: it's possible to improve it by 40// 10% (or by 1 cycle per round), but at the cost of 20% loss 41// on Cortex-A53 (or by 4 cycles per round). 42// (***) Super-impressive coefficients over gcc-generated code are 43// indication of some compiler "pathology", most notably code 44// generated with -mgeneral-regs-only is significantly faster 45// and the gap is only 40-90%. 46 47#ifndef __KERNEL__ 48# include <openssl/arm_arch.h> 49#endif 50 51.text 52 53.globl sha512_block_data_order_nohw 54.hidden sha512_block_data_order_nohw 55.type sha512_block_data_order_nohw,%function 56.align 6 57sha512_block_data_order_nohw: 58 AARCH64_SIGN_LINK_REGISTER 59 stp x29,x30,[sp,#-128]! 60 add x29,sp,#0 61 62 stp x19,x20,[sp,#16] 63 stp x21,x22,[sp,#32] 64 stp x23,x24,[sp,#48] 65 stp x25,x26,[sp,#64] 66 stp x27,x28,[sp,#80] 67 sub sp,sp,#4*8 68 69 ldp x20,x21,[x0] // load context 70 ldp x22,x23,[x0,#2*8] 71 ldp x24,x25,[x0,#4*8] 72 add x2,x1,x2,lsl#7 // end of input 73 ldp x26,x27,[x0,#6*8] 74 adrp x30,.LK512 75 add x30,x30,:lo12:.LK512 76 stp x0,x2,[x29,#96] 77 78.Loop: 79 ldp x3,x4,[x1],#2*8 80 ldr x19,[x30],#8 // *K++ 81 eor x28,x21,x22 // magic seed 82 str x1,[x29,#112] 83#ifndef __AARCH64EB__ 84 rev x3,x3 // 0 85#endif 86 ror x16,x24,#14 87 add x27,x27,x19 // h+=K[i] 88 eor x6,x24,x24,ror#23 89 and x17,x25,x24 90 bic x19,x26,x24 91 add x27,x27,x3 // h+=X[i] 92 orr x17,x17,x19 // Ch(e,f,g) 93 eor x19,x20,x21 // a^b, b^c in next round 94 eor x16,x16,x6,ror#18 // Sigma1(e) 95 ror x6,x20,#28 96 add x27,x27,x17 // h+=Ch(e,f,g) 97 eor x17,x20,x20,ror#5 98 add x27,x27,x16 // h+=Sigma1(e) 99 and x28,x28,x19 // (b^c)&=(a^b) 100 add x23,x23,x27 // d+=h 101 eor x28,x28,x21 // Maj(a,b,c) 102 eor x17,x6,x17,ror#34 // Sigma0(a) 103 add x27,x27,x28 // h+=Maj(a,b,c) 104 ldr x28,[x30],#8 // *K++, x19 in next round 105 //add x27,x27,x17 // h+=Sigma0(a) 106#ifndef __AARCH64EB__ 107 rev x4,x4 // 1 108#endif 109 ldp x5,x6,[x1],#2*8 110 add x27,x27,x17 // h+=Sigma0(a) 111 ror x16,x23,#14 112 add x26,x26,x28 // h+=K[i] 113 eor x7,x23,x23,ror#23 114 and x17,x24,x23 115 bic x28,x25,x23 116 add x26,x26,x4 // h+=X[i] 117 orr x17,x17,x28 // Ch(e,f,g) 118 eor x28,x27,x20 // a^b, b^c in next round 119 eor x16,x16,x7,ror#18 // Sigma1(e) 120 ror x7,x27,#28 121 add x26,x26,x17 // h+=Ch(e,f,g) 122 eor x17,x27,x27,ror#5 123 add x26,x26,x16 // h+=Sigma1(e) 124 and x19,x19,x28 // (b^c)&=(a^b) 125 add x22,x22,x26 // d+=h 126 eor x19,x19,x20 // Maj(a,b,c) 127 eor x17,x7,x17,ror#34 // Sigma0(a) 128 add x26,x26,x19 // h+=Maj(a,b,c) 129 ldr x19,[x30],#8 // *K++, x28 in next round 130 //add x26,x26,x17 // h+=Sigma0(a) 131#ifndef __AARCH64EB__ 132 rev x5,x5 // 2 133#endif 134 add x26,x26,x17 // h+=Sigma0(a) 135 ror x16,x22,#14 136 add x25,x25,x19 // h+=K[i] 137 eor x8,x22,x22,ror#23 138 and x17,x23,x22 139 bic x19,x24,x22 140 add x25,x25,x5 // h+=X[i] 141 orr x17,x17,x19 // Ch(e,f,g) 142 eor x19,x26,x27 // a^b, b^c in next round 143 eor x16,x16,x8,ror#18 // Sigma1(e) 144 ror x8,x26,#28 145 add x25,x25,x17 // h+=Ch(e,f,g) 146 eor x17,x26,x26,ror#5 147 add x25,x25,x16 // h+=Sigma1(e) 148 and x28,x28,x19 // (b^c)&=(a^b) 149 add x21,x21,x25 // d+=h 150 eor x28,x28,x27 // Maj(a,b,c) 151 eor x17,x8,x17,ror#34 // Sigma0(a) 152 add x25,x25,x28 // h+=Maj(a,b,c) 153 ldr x28,[x30],#8 // *K++, x19 in next round 154 //add x25,x25,x17 // h+=Sigma0(a) 155#ifndef __AARCH64EB__ 156 rev x6,x6 // 3 157#endif 158 ldp x7,x8,[x1],#2*8 159 add x25,x25,x17 // h+=Sigma0(a) 160 ror x16,x21,#14 161 add x24,x24,x28 // h+=K[i] 162 eor x9,x21,x21,ror#23 163 and x17,x22,x21 164 bic x28,x23,x21 165 add x24,x24,x6 // h+=X[i] 166 orr x17,x17,x28 // Ch(e,f,g) 167 eor x28,x25,x26 // a^b, b^c in next round 168 eor x16,x16,x9,ror#18 // Sigma1(e) 169 ror x9,x25,#28 170 add x24,x24,x17 // h+=Ch(e,f,g) 171 eor x17,x25,x25,ror#5 172 add x24,x24,x16 // h+=Sigma1(e) 173 and x19,x19,x28 // (b^c)&=(a^b) 174 add x20,x20,x24 // d+=h 175 eor x19,x19,x26 // Maj(a,b,c) 176 eor x17,x9,x17,ror#34 // Sigma0(a) 177 add x24,x24,x19 // h+=Maj(a,b,c) 178 ldr x19,[x30],#8 // *K++, x28 in next round 179 //add x24,x24,x17 // h+=Sigma0(a) 180#ifndef __AARCH64EB__ 181 rev x7,x7 // 4 182#endif 183 add x24,x24,x17 // h+=Sigma0(a) 184 ror x16,x20,#14 185 add x23,x23,x19 // h+=K[i] 186 eor x10,x20,x20,ror#23 187 and x17,x21,x20 188 bic x19,x22,x20 189 add x23,x23,x7 // h+=X[i] 190 orr x17,x17,x19 // Ch(e,f,g) 191 eor x19,x24,x25 // a^b, b^c in next round 192 eor x16,x16,x10,ror#18 // Sigma1(e) 193 ror x10,x24,#28 194 add x23,x23,x17 // h+=Ch(e,f,g) 195 eor x17,x24,x24,ror#5 196 add x23,x23,x16 // h+=Sigma1(e) 197 and x28,x28,x19 // (b^c)&=(a^b) 198 add x27,x27,x23 // d+=h 199 eor x28,x28,x25 // Maj(a,b,c) 200 eor x17,x10,x17,ror#34 // Sigma0(a) 201 add x23,x23,x28 // h+=Maj(a,b,c) 202 ldr x28,[x30],#8 // *K++, x19 in next round 203 //add x23,x23,x17 // h+=Sigma0(a) 204#ifndef __AARCH64EB__ 205 rev x8,x8 // 5 206#endif 207 ldp x9,x10,[x1],#2*8 208 add x23,x23,x17 // h+=Sigma0(a) 209 ror x16,x27,#14 210 add x22,x22,x28 // h+=K[i] 211 eor x11,x27,x27,ror#23 212 and x17,x20,x27 213 bic x28,x21,x27 214 add x22,x22,x8 // h+=X[i] 215 orr x17,x17,x28 // Ch(e,f,g) 216 eor x28,x23,x24 // a^b, b^c in next round 217 eor x16,x16,x11,ror#18 // Sigma1(e) 218 ror x11,x23,#28 219 add x22,x22,x17 // h+=Ch(e,f,g) 220 eor x17,x23,x23,ror#5 221 add x22,x22,x16 // h+=Sigma1(e) 222 and x19,x19,x28 // (b^c)&=(a^b) 223 add x26,x26,x22 // d+=h 224 eor x19,x19,x24 // Maj(a,b,c) 225 eor x17,x11,x17,ror#34 // Sigma0(a) 226 add x22,x22,x19 // h+=Maj(a,b,c) 227 ldr x19,[x30],#8 // *K++, x28 in next round 228 //add x22,x22,x17 // h+=Sigma0(a) 229#ifndef __AARCH64EB__ 230 rev x9,x9 // 6 231#endif 232 add x22,x22,x17 // h+=Sigma0(a) 233 ror x16,x26,#14 234 add x21,x21,x19 // h+=K[i] 235 eor x12,x26,x26,ror#23 236 and x17,x27,x26 237 bic x19,x20,x26 238 add x21,x21,x9 // h+=X[i] 239 orr x17,x17,x19 // Ch(e,f,g) 240 eor x19,x22,x23 // a^b, b^c in next round 241 eor x16,x16,x12,ror#18 // Sigma1(e) 242 ror x12,x22,#28 243 add x21,x21,x17 // h+=Ch(e,f,g) 244 eor x17,x22,x22,ror#5 245 add x21,x21,x16 // h+=Sigma1(e) 246 and x28,x28,x19 // (b^c)&=(a^b) 247 add x25,x25,x21 // d+=h 248 eor x28,x28,x23 // Maj(a,b,c) 249 eor x17,x12,x17,ror#34 // Sigma0(a) 250 add x21,x21,x28 // h+=Maj(a,b,c) 251 ldr x28,[x30],#8 // *K++, x19 in next round 252 //add x21,x21,x17 // h+=Sigma0(a) 253#ifndef __AARCH64EB__ 254 rev x10,x10 // 7 255#endif 256 ldp x11,x12,[x1],#2*8 257 add x21,x21,x17 // h+=Sigma0(a) 258 ror x16,x25,#14 259 add x20,x20,x28 // h+=K[i] 260 eor x13,x25,x25,ror#23 261 and x17,x26,x25 262 bic x28,x27,x25 263 add x20,x20,x10 // h+=X[i] 264 orr x17,x17,x28 // Ch(e,f,g) 265 eor x28,x21,x22 // a^b, b^c in next round 266 eor x16,x16,x13,ror#18 // Sigma1(e) 267 ror x13,x21,#28 268 add x20,x20,x17 // h+=Ch(e,f,g) 269 eor x17,x21,x21,ror#5 270 add x20,x20,x16 // h+=Sigma1(e) 271 and x19,x19,x28 // (b^c)&=(a^b) 272 add x24,x24,x20 // d+=h 273 eor x19,x19,x22 // Maj(a,b,c) 274 eor x17,x13,x17,ror#34 // Sigma0(a) 275 add x20,x20,x19 // h+=Maj(a,b,c) 276 ldr x19,[x30],#8 // *K++, x28 in next round 277 //add x20,x20,x17 // h+=Sigma0(a) 278#ifndef __AARCH64EB__ 279 rev x11,x11 // 8 280#endif 281 add x20,x20,x17 // h+=Sigma0(a) 282 ror x16,x24,#14 283 add x27,x27,x19 // h+=K[i] 284 eor x14,x24,x24,ror#23 285 and x17,x25,x24 286 bic x19,x26,x24 287 add x27,x27,x11 // h+=X[i] 288 orr x17,x17,x19 // Ch(e,f,g) 289 eor x19,x20,x21 // a^b, b^c in next round 290 eor x16,x16,x14,ror#18 // Sigma1(e) 291 ror x14,x20,#28 292 add x27,x27,x17 // h+=Ch(e,f,g) 293 eor x17,x20,x20,ror#5 294 add x27,x27,x16 // h+=Sigma1(e) 295 and x28,x28,x19 // (b^c)&=(a^b) 296 add x23,x23,x27 // d+=h 297 eor x28,x28,x21 // Maj(a,b,c) 298 eor x17,x14,x17,ror#34 // Sigma0(a) 299 add x27,x27,x28 // h+=Maj(a,b,c) 300 ldr x28,[x30],#8 // *K++, x19 in next round 301 //add x27,x27,x17 // h+=Sigma0(a) 302#ifndef __AARCH64EB__ 303 rev x12,x12 // 9 304#endif 305 ldp x13,x14,[x1],#2*8 306 add x27,x27,x17 // h+=Sigma0(a) 307 ror x16,x23,#14 308 add x26,x26,x28 // h+=K[i] 309 eor x15,x23,x23,ror#23 310 and x17,x24,x23 311 bic x28,x25,x23 312 add x26,x26,x12 // h+=X[i] 313 orr x17,x17,x28 // Ch(e,f,g) 314 eor x28,x27,x20 // a^b, b^c in next round 315 eor x16,x16,x15,ror#18 // Sigma1(e) 316 ror x15,x27,#28 317 add x26,x26,x17 // h+=Ch(e,f,g) 318 eor x17,x27,x27,ror#5 319 add x26,x26,x16 // h+=Sigma1(e) 320 and x19,x19,x28 // (b^c)&=(a^b) 321 add x22,x22,x26 // d+=h 322 eor x19,x19,x20 // Maj(a,b,c) 323 eor x17,x15,x17,ror#34 // Sigma0(a) 324 add x26,x26,x19 // h+=Maj(a,b,c) 325 ldr x19,[x30],#8 // *K++, x28 in next round 326 //add x26,x26,x17 // h+=Sigma0(a) 327#ifndef __AARCH64EB__ 328 rev x13,x13 // 10 329#endif 330 add x26,x26,x17 // h+=Sigma0(a) 331 ror x16,x22,#14 332 add x25,x25,x19 // h+=K[i] 333 eor x0,x22,x22,ror#23 334 and x17,x23,x22 335 bic x19,x24,x22 336 add x25,x25,x13 // h+=X[i] 337 orr x17,x17,x19 // Ch(e,f,g) 338 eor x19,x26,x27 // a^b, b^c in next round 339 eor x16,x16,x0,ror#18 // Sigma1(e) 340 ror x0,x26,#28 341 add x25,x25,x17 // h+=Ch(e,f,g) 342 eor x17,x26,x26,ror#5 343 add x25,x25,x16 // h+=Sigma1(e) 344 and x28,x28,x19 // (b^c)&=(a^b) 345 add x21,x21,x25 // d+=h 346 eor x28,x28,x27 // Maj(a,b,c) 347 eor x17,x0,x17,ror#34 // Sigma0(a) 348 add x25,x25,x28 // h+=Maj(a,b,c) 349 ldr x28,[x30],#8 // *K++, x19 in next round 350 //add x25,x25,x17 // h+=Sigma0(a) 351#ifndef __AARCH64EB__ 352 rev x14,x14 // 11 353#endif 354 ldp x15,x0,[x1],#2*8 355 add x25,x25,x17 // h+=Sigma0(a) 356 str x6,[sp,#24] 357 ror x16,x21,#14 358 add x24,x24,x28 // h+=K[i] 359 eor x6,x21,x21,ror#23 360 and x17,x22,x21 361 bic x28,x23,x21 362 add x24,x24,x14 // h+=X[i] 363 orr x17,x17,x28 // Ch(e,f,g) 364 eor x28,x25,x26 // a^b, b^c in next round 365 eor x16,x16,x6,ror#18 // Sigma1(e) 366 ror x6,x25,#28 367 add x24,x24,x17 // h+=Ch(e,f,g) 368 eor x17,x25,x25,ror#5 369 add x24,x24,x16 // h+=Sigma1(e) 370 and x19,x19,x28 // (b^c)&=(a^b) 371 add x20,x20,x24 // d+=h 372 eor x19,x19,x26 // Maj(a,b,c) 373 eor x17,x6,x17,ror#34 // Sigma0(a) 374 add x24,x24,x19 // h+=Maj(a,b,c) 375 ldr x19,[x30],#8 // *K++, x28 in next round 376 //add x24,x24,x17 // h+=Sigma0(a) 377#ifndef __AARCH64EB__ 378 rev x15,x15 // 12 379#endif 380 add x24,x24,x17 // h+=Sigma0(a) 381 str x7,[sp,#0] 382 ror x16,x20,#14 383 add x23,x23,x19 // h+=K[i] 384 eor x7,x20,x20,ror#23 385 and x17,x21,x20 386 bic x19,x22,x20 387 add x23,x23,x15 // h+=X[i] 388 orr x17,x17,x19 // Ch(e,f,g) 389 eor x19,x24,x25 // a^b, b^c in next round 390 eor x16,x16,x7,ror#18 // Sigma1(e) 391 ror x7,x24,#28 392 add x23,x23,x17 // h+=Ch(e,f,g) 393 eor x17,x24,x24,ror#5 394 add x23,x23,x16 // h+=Sigma1(e) 395 and x28,x28,x19 // (b^c)&=(a^b) 396 add x27,x27,x23 // d+=h 397 eor x28,x28,x25 // Maj(a,b,c) 398 eor x17,x7,x17,ror#34 // Sigma0(a) 399 add x23,x23,x28 // h+=Maj(a,b,c) 400 ldr x28,[x30],#8 // *K++, x19 in next round 401 //add x23,x23,x17 // h+=Sigma0(a) 402#ifndef __AARCH64EB__ 403 rev x0,x0 // 13 404#endif 405 ldp x1,x2,[x1] 406 add x23,x23,x17 // h+=Sigma0(a) 407 str x8,[sp,#8] 408 ror x16,x27,#14 409 add x22,x22,x28 // h+=K[i] 410 eor x8,x27,x27,ror#23 411 and x17,x20,x27 412 bic x28,x21,x27 413 add x22,x22,x0 // h+=X[i] 414 orr x17,x17,x28 // Ch(e,f,g) 415 eor x28,x23,x24 // a^b, b^c in next round 416 eor x16,x16,x8,ror#18 // Sigma1(e) 417 ror x8,x23,#28 418 add x22,x22,x17 // h+=Ch(e,f,g) 419 eor x17,x23,x23,ror#5 420 add x22,x22,x16 // h+=Sigma1(e) 421 and x19,x19,x28 // (b^c)&=(a^b) 422 add x26,x26,x22 // d+=h 423 eor x19,x19,x24 // Maj(a,b,c) 424 eor x17,x8,x17,ror#34 // Sigma0(a) 425 add x22,x22,x19 // h+=Maj(a,b,c) 426 ldr x19,[x30],#8 // *K++, x28 in next round 427 //add x22,x22,x17 // h+=Sigma0(a) 428#ifndef __AARCH64EB__ 429 rev x1,x1 // 14 430#endif 431 ldr x6,[sp,#24] 432 add x22,x22,x17 // h+=Sigma0(a) 433 str x9,[sp,#16] 434 ror x16,x26,#14 435 add x21,x21,x19 // h+=K[i] 436 eor x9,x26,x26,ror#23 437 and x17,x27,x26 438 bic x19,x20,x26 439 add x21,x21,x1 // h+=X[i] 440 orr x17,x17,x19 // Ch(e,f,g) 441 eor x19,x22,x23 // a^b, b^c in next round 442 eor x16,x16,x9,ror#18 // Sigma1(e) 443 ror x9,x22,#28 444 add x21,x21,x17 // h+=Ch(e,f,g) 445 eor x17,x22,x22,ror#5 446 add x21,x21,x16 // h+=Sigma1(e) 447 and x28,x28,x19 // (b^c)&=(a^b) 448 add x25,x25,x21 // d+=h 449 eor x28,x28,x23 // Maj(a,b,c) 450 eor x17,x9,x17,ror#34 // Sigma0(a) 451 add x21,x21,x28 // h+=Maj(a,b,c) 452 ldr x28,[x30],#8 // *K++, x19 in next round 453 //add x21,x21,x17 // h+=Sigma0(a) 454#ifndef __AARCH64EB__ 455 rev x2,x2 // 15 456#endif 457 ldr x7,[sp,#0] 458 add x21,x21,x17 // h+=Sigma0(a) 459 str x10,[sp,#24] 460 ror x16,x25,#14 461 add x20,x20,x28 // h+=K[i] 462 ror x9,x4,#1 463 and x17,x26,x25 464 ror x8,x1,#19 465 bic x28,x27,x25 466 ror x10,x21,#28 467 add x20,x20,x2 // h+=X[i] 468 eor x16,x16,x25,ror#18 469 eor x9,x9,x4,ror#8 470 orr x17,x17,x28 // Ch(e,f,g) 471 eor x28,x21,x22 // a^b, b^c in next round 472 eor x16,x16,x25,ror#41 // Sigma1(e) 473 eor x10,x10,x21,ror#34 474 add x20,x20,x17 // h+=Ch(e,f,g) 475 and x19,x19,x28 // (b^c)&=(a^b) 476 eor x8,x8,x1,ror#61 477 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 478 add x20,x20,x16 // h+=Sigma1(e) 479 eor x19,x19,x22 // Maj(a,b,c) 480 eor x17,x10,x21,ror#39 // Sigma0(a) 481 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 482 add x3,x3,x12 483 add x24,x24,x20 // d+=h 484 add x20,x20,x19 // h+=Maj(a,b,c) 485 ldr x19,[x30],#8 // *K++, x28 in next round 486 add x3,x3,x9 487 add x20,x20,x17 // h+=Sigma0(a) 488 add x3,x3,x8 489.Loop_16_xx: 490 ldr x8,[sp,#8] 491 str x11,[sp,#0] 492 ror x16,x24,#14 493 add x27,x27,x19 // h+=K[i] 494 ror x10,x5,#1 495 and x17,x25,x24 496 ror x9,x2,#19 497 bic x19,x26,x24 498 ror x11,x20,#28 499 add x27,x27,x3 // h+=X[i] 500 eor x16,x16,x24,ror#18 501 eor x10,x10,x5,ror#8 502 orr x17,x17,x19 // Ch(e,f,g) 503 eor x19,x20,x21 // a^b, b^c in next round 504 eor x16,x16,x24,ror#41 // Sigma1(e) 505 eor x11,x11,x20,ror#34 506 add x27,x27,x17 // h+=Ch(e,f,g) 507 and x28,x28,x19 // (b^c)&=(a^b) 508 eor x9,x9,x2,ror#61 509 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 510 add x27,x27,x16 // h+=Sigma1(e) 511 eor x28,x28,x21 // Maj(a,b,c) 512 eor x17,x11,x20,ror#39 // Sigma0(a) 513 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 514 add x4,x4,x13 515 add x23,x23,x27 // d+=h 516 add x27,x27,x28 // h+=Maj(a,b,c) 517 ldr x28,[x30],#8 // *K++, x19 in next round 518 add x4,x4,x10 519 add x27,x27,x17 // h+=Sigma0(a) 520 add x4,x4,x9 521 ldr x9,[sp,#16] 522 str x12,[sp,#8] 523 ror x16,x23,#14 524 add x26,x26,x28 // h+=K[i] 525 ror x11,x6,#1 526 and x17,x24,x23 527 ror x10,x3,#19 528 bic x28,x25,x23 529 ror x12,x27,#28 530 add x26,x26,x4 // h+=X[i] 531 eor x16,x16,x23,ror#18 532 eor x11,x11,x6,ror#8 533 orr x17,x17,x28 // Ch(e,f,g) 534 eor x28,x27,x20 // a^b, b^c in next round 535 eor x16,x16,x23,ror#41 // Sigma1(e) 536 eor x12,x12,x27,ror#34 537 add x26,x26,x17 // h+=Ch(e,f,g) 538 and x19,x19,x28 // (b^c)&=(a^b) 539 eor x10,x10,x3,ror#61 540 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 541 add x26,x26,x16 // h+=Sigma1(e) 542 eor x19,x19,x20 // Maj(a,b,c) 543 eor x17,x12,x27,ror#39 // Sigma0(a) 544 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 545 add x5,x5,x14 546 add x22,x22,x26 // d+=h 547 add x26,x26,x19 // h+=Maj(a,b,c) 548 ldr x19,[x30],#8 // *K++, x28 in next round 549 add x5,x5,x11 550 add x26,x26,x17 // h+=Sigma0(a) 551 add x5,x5,x10 552 ldr x10,[sp,#24] 553 str x13,[sp,#16] 554 ror x16,x22,#14 555 add x25,x25,x19 // h+=K[i] 556 ror x12,x7,#1 557 and x17,x23,x22 558 ror x11,x4,#19 559 bic x19,x24,x22 560 ror x13,x26,#28 561 add x25,x25,x5 // h+=X[i] 562 eor x16,x16,x22,ror#18 563 eor x12,x12,x7,ror#8 564 orr x17,x17,x19 // Ch(e,f,g) 565 eor x19,x26,x27 // a^b, b^c in next round 566 eor x16,x16,x22,ror#41 // Sigma1(e) 567 eor x13,x13,x26,ror#34 568 add x25,x25,x17 // h+=Ch(e,f,g) 569 and x28,x28,x19 // (b^c)&=(a^b) 570 eor x11,x11,x4,ror#61 571 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 572 add x25,x25,x16 // h+=Sigma1(e) 573 eor x28,x28,x27 // Maj(a,b,c) 574 eor x17,x13,x26,ror#39 // Sigma0(a) 575 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 576 add x6,x6,x15 577 add x21,x21,x25 // d+=h 578 add x25,x25,x28 // h+=Maj(a,b,c) 579 ldr x28,[x30],#8 // *K++, x19 in next round 580 add x6,x6,x12 581 add x25,x25,x17 // h+=Sigma0(a) 582 add x6,x6,x11 583 ldr x11,[sp,#0] 584 str x14,[sp,#24] 585 ror x16,x21,#14 586 add x24,x24,x28 // h+=K[i] 587 ror x13,x8,#1 588 and x17,x22,x21 589 ror x12,x5,#19 590 bic x28,x23,x21 591 ror x14,x25,#28 592 add x24,x24,x6 // h+=X[i] 593 eor x16,x16,x21,ror#18 594 eor x13,x13,x8,ror#8 595 orr x17,x17,x28 // Ch(e,f,g) 596 eor x28,x25,x26 // a^b, b^c in next round 597 eor x16,x16,x21,ror#41 // Sigma1(e) 598 eor x14,x14,x25,ror#34 599 add x24,x24,x17 // h+=Ch(e,f,g) 600 and x19,x19,x28 // (b^c)&=(a^b) 601 eor x12,x12,x5,ror#61 602 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 603 add x24,x24,x16 // h+=Sigma1(e) 604 eor x19,x19,x26 // Maj(a,b,c) 605 eor x17,x14,x25,ror#39 // Sigma0(a) 606 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 607 add x7,x7,x0 608 add x20,x20,x24 // d+=h 609 add x24,x24,x19 // h+=Maj(a,b,c) 610 ldr x19,[x30],#8 // *K++, x28 in next round 611 add x7,x7,x13 612 add x24,x24,x17 // h+=Sigma0(a) 613 add x7,x7,x12 614 ldr x12,[sp,#8] 615 str x15,[sp,#0] 616 ror x16,x20,#14 617 add x23,x23,x19 // h+=K[i] 618 ror x14,x9,#1 619 and x17,x21,x20 620 ror x13,x6,#19 621 bic x19,x22,x20 622 ror x15,x24,#28 623 add x23,x23,x7 // h+=X[i] 624 eor x16,x16,x20,ror#18 625 eor x14,x14,x9,ror#8 626 orr x17,x17,x19 // Ch(e,f,g) 627 eor x19,x24,x25 // a^b, b^c in next round 628 eor x16,x16,x20,ror#41 // Sigma1(e) 629 eor x15,x15,x24,ror#34 630 add x23,x23,x17 // h+=Ch(e,f,g) 631 and x28,x28,x19 // (b^c)&=(a^b) 632 eor x13,x13,x6,ror#61 633 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 634 add x23,x23,x16 // h+=Sigma1(e) 635 eor x28,x28,x25 // Maj(a,b,c) 636 eor x17,x15,x24,ror#39 // Sigma0(a) 637 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 638 add x8,x8,x1 639 add x27,x27,x23 // d+=h 640 add x23,x23,x28 // h+=Maj(a,b,c) 641 ldr x28,[x30],#8 // *K++, x19 in next round 642 add x8,x8,x14 643 add x23,x23,x17 // h+=Sigma0(a) 644 add x8,x8,x13 645 ldr x13,[sp,#16] 646 str x0,[sp,#8] 647 ror x16,x27,#14 648 add x22,x22,x28 // h+=K[i] 649 ror x15,x10,#1 650 and x17,x20,x27 651 ror x14,x7,#19 652 bic x28,x21,x27 653 ror x0,x23,#28 654 add x22,x22,x8 // h+=X[i] 655 eor x16,x16,x27,ror#18 656 eor x15,x15,x10,ror#8 657 orr x17,x17,x28 // Ch(e,f,g) 658 eor x28,x23,x24 // a^b, b^c in next round 659 eor x16,x16,x27,ror#41 // Sigma1(e) 660 eor x0,x0,x23,ror#34 661 add x22,x22,x17 // h+=Ch(e,f,g) 662 and x19,x19,x28 // (b^c)&=(a^b) 663 eor x14,x14,x7,ror#61 664 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 665 add x22,x22,x16 // h+=Sigma1(e) 666 eor x19,x19,x24 // Maj(a,b,c) 667 eor x17,x0,x23,ror#39 // Sigma0(a) 668 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 669 add x9,x9,x2 670 add x26,x26,x22 // d+=h 671 add x22,x22,x19 // h+=Maj(a,b,c) 672 ldr x19,[x30],#8 // *K++, x28 in next round 673 add x9,x9,x15 674 add x22,x22,x17 // h+=Sigma0(a) 675 add x9,x9,x14 676 ldr x14,[sp,#24] 677 str x1,[sp,#16] 678 ror x16,x26,#14 679 add x21,x21,x19 // h+=K[i] 680 ror x0,x11,#1 681 and x17,x27,x26 682 ror x15,x8,#19 683 bic x19,x20,x26 684 ror x1,x22,#28 685 add x21,x21,x9 // h+=X[i] 686 eor x16,x16,x26,ror#18 687 eor x0,x0,x11,ror#8 688 orr x17,x17,x19 // Ch(e,f,g) 689 eor x19,x22,x23 // a^b, b^c in next round 690 eor x16,x16,x26,ror#41 // Sigma1(e) 691 eor x1,x1,x22,ror#34 692 add x21,x21,x17 // h+=Ch(e,f,g) 693 and x28,x28,x19 // (b^c)&=(a^b) 694 eor x15,x15,x8,ror#61 695 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 696 add x21,x21,x16 // h+=Sigma1(e) 697 eor x28,x28,x23 // Maj(a,b,c) 698 eor x17,x1,x22,ror#39 // Sigma0(a) 699 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 700 add x10,x10,x3 701 add x25,x25,x21 // d+=h 702 add x21,x21,x28 // h+=Maj(a,b,c) 703 ldr x28,[x30],#8 // *K++, x19 in next round 704 add x10,x10,x0 705 add x21,x21,x17 // h+=Sigma0(a) 706 add x10,x10,x15 707 ldr x15,[sp,#0] 708 str x2,[sp,#24] 709 ror x16,x25,#14 710 add x20,x20,x28 // h+=K[i] 711 ror x1,x12,#1 712 and x17,x26,x25 713 ror x0,x9,#19 714 bic x28,x27,x25 715 ror x2,x21,#28 716 add x20,x20,x10 // h+=X[i] 717 eor x16,x16,x25,ror#18 718 eor x1,x1,x12,ror#8 719 orr x17,x17,x28 // Ch(e,f,g) 720 eor x28,x21,x22 // a^b, b^c in next round 721 eor x16,x16,x25,ror#41 // Sigma1(e) 722 eor x2,x2,x21,ror#34 723 add x20,x20,x17 // h+=Ch(e,f,g) 724 and x19,x19,x28 // (b^c)&=(a^b) 725 eor x0,x0,x9,ror#61 726 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 727 add x20,x20,x16 // h+=Sigma1(e) 728 eor x19,x19,x22 // Maj(a,b,c) 729 eor x17,x2,x21,ror#39 // Sigma0(a) 730 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 731 add x11,x11,x4 732 add x24,x24,x20 // d+=h 733 add x20,x20,x19 // h+=Maj(a,b,c) 734 ldr x19,[x30],#8 // *K++, x28 in next round 735 add x11,x11,x1 736 add x20,x20,x17 // h+=Sigma0(a) 737 add x11,x11,x0 738 ldr x0,[sp,#8] 739 str x3,[sp,#0] 740 ror x16,x24,#14 741 add x27,x27,x19 // h+=K[i] 742 ror x2,x13,#1 743 and x17,x25,x24 744 ror x1,x10,#19 745 bic x19,x26,x24 746 ror x3,x20,#28 747 add x27,x27,x11 // h+=X[i] 748 eor x16,x16,x24,ror#18 749 eor x2,x2,x13,ror#8 750 orr x17,x17,x19 // Ch(e,f,g) 751 eor x19,x20,x21 // a^b, b^c in next round 752 eor x16,x16,x24,ror#41 // Sigma1(e) 753 eor x3,x3,x20,ror#34 754 add x27,x27,x17 // h+=Ch(e,f,g) 755 and x28,x28,x19 // (b^c)&=(a^b) 756 eor x1,x1,x10,ror#61 757 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 758 add x27,x27,x16 // h+=Sigma1(e) 759 eor x28,x28,x21 // Maj(a,b,c) 760 eor x17,x3,x20,ror#39 // Sigma0(a) 761 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 762 add x12,x12,x5 763 add x23,x23,x27 // d+=h 764 add x27,x27,x28 // h+=Maj(a,b,c) 765 ldr x28,[x30],#8 // *K++, x19 in next round 766 add x12,x12,x2 767 add x27,x27,x17 // h+=Sigma0(a) 768 add x12,x12,x1 769 ldr x1,[sp,#16] 770 str x4,[sp,#8] 771 ror x16,x23,#14 772 add x26,x26,x28 // h+=K[i] 773 ror x3,x14,#1 774 and x17,x24,x23 775 ror x2,x11,#19 776 bic x28,x25,x23 777 ror x4,x27,#28 778 add x26,x26,x12 // h+=X[i] 779 eor x16,x16,x23,ror#18 780 eor x3,x3,x14,ror#8 781 orr x17,x17,x28 // Ch(e,f,g) 782 eor x28,x27,x20 // a^b, b^c in next round 783 eor x16,x16,x23,ror#41 // Sigma1(e) 784 eor x4,x4,x27,ror#34 785 add x26,x26,x17 // h+=Ch(e,f,g) 786 and x19,x19,x28 // (b^c)&=(a^b) 787 eor x2,x2,x11,ror#61 788 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 789 add x26,x26,x16 // h+=Sigma1(e) 790 eor x19,x19,x20 // Maj(a,b,c) 791 eor x17,x4,x27,ror#39 // Sigma0(a) 792 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 793 add x13,x13,x6 794 add x22,x22,x26 // d+=h 795 add x26,x26,x19 // h+=Maj(a,b,c) 796 ldr x19,[x30],#8 // *K++, x28 in next round 797 add x13,x13,x3 798 add x26,x26,x17 // h+=Sigma0(a) 799 add x13,x13,x2 800 ldr x2,[sp,#24] 801 str x5,[sp,#16] 802 ror x16,x22,#14 803 add x25,x25,x19 // h+=K[i] 804 ror x4,x15,#1 805 and x17,x23,x22 806 ror x3,x12,#19 807 bic x19,x24,x22 808 ror x5,x26,#28 809 add x25,x25,x13 // h+=X[i] 810 eor x16,x16,x22,ror#18 811 eor x4,x4,x15,ror#8 812 orr x17,x17,x19 // Ch(e,f,g) 813 eor x19,x26,x27 // a^b, b^c in next round 814 eor x16,x16,x22,ror#41 // Sigma1(e) 815 eor x5,x5,x26,ror#34 816 add x25,x25,x17 // h+=Ch(e,f,g) 817 and x28,x28,x19 // (b^c)&=(a^b) 818 eor x3,x3,x12,ror#61 819 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 820 add x25,x25,x16 // h+=Sigma1(e) 821 eor x28,x28,x27 // Maj(a,b,c) 822 eor x17,x5,x26,ror#39 // Sigma0(a) 823 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 824 add x14,x14,x7 825 add x21,x21,x25 // d+=h 826 add x25,x25,x28 // h+=Maj(a,b,c) 827 ldr x28,[x30],#8 // *K++, x19 in next round 828 add x14,x14,x4 829 add x25,x25,x17 // h+=Sigma0(a) 830 add x14,x14,x3 831 ldr x3,[sp,#0] 832 str x6,[sp,#24] 833 ror x16,x21,#14 834 add x24,x24,x28 // h+=K[i] 835 ror x5,x0,#1 836 and x17,x22,x21 837 ror x4,x13,#19 838 bic x28,x23,x21 839 ror x6,x25,#28 840 add x24,x24,x14 // h+=X[i] 841 eor x16,x16,x21,ror#18 842 eor x5,x5,x0,ror#8 843 orr x17,x17,x28 // Ch(e,f,g) 844 eor x28,x25,x26 // a^b, b^c in next round 845 eor x16,x16,x21,ror#41 // Sigma1(e) 846 eor x6,x6,x25,ror#34 847 add x24,x24,x17 // h+=Ch(e,f,g) 848 and x19,x19,x28 // (b^c)&=(a^b) 849 eor x4,x4,x13,ror#61 850 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 851 add x24,x24,x16 // h+=Sigma1(e) 852 eor x19,x19,x26 // Maj(a,b,c) 853 eor x17,x6,x25,ror#39 // Sigma0(a) 854 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 855 add x15,x15,x8 856 add x20,x20,x24 // d+=h 857 add x24,x24,x19 // h+=Maj(a,b,c) 858 ldr x19,[x30],#8 // *K++, x28 in next round 859 add x15,x15,x5 860 add x24,x24,x17 // h+=Sigma0(a) 861 add x15,x15,x4 862 ldr x4,[sp,#8] 863 str x7,[sp,#0] 864 ror x16,x20,#14 865 add x23,x23,x19 // h+=K[i] 866 ror x6,x1,#1 867 and x17,x21,x20 868 ror x5,x14,#19 869 bic x19,x22,x20 870 ror x7,x24,#28 871 add x23,x23,x15 // h+=X[i] 872 eor x16,x16,x20,ror#18 873 eor x6,x6,x1,ror#8 874 orr x17,x17,x19 // Ch(e,f,g) 875 eor x19,x24,x25 // a^b, b^c in next round 876 eor x16,x16,x20,ror#41 // Sigma1(e) 877 eor x7,x7,x24,ror#34 878 add x23,x23,x17 // h+=Ch(e,f,g) 879 and x28,x28,x19 // (b^c)&=(a^b) 880 eor x5,x5,x14,ror#61 881 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 882 add x23,x23,x16 // h+=Sigma1(e) 883 eor x28,x28,x25 // Maj(a,b,c) 884 eor x17,x7,x24,ror#39 // Sigma0(a) 885 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 886 add x0,x0,x9 887 add x27,x27,x23 // d+=h 888 add x23,x23,x28 // h+=Maj(a,b,c) 889 ldr x28,[x30],#8 // *K++, x19 in next round 890 add x0,x0,x6 891 add x23,x23,x17 // h+=Sigma0(a) 892 add x0,x0,x5 893 ldr x5,[sp,#16] 894 str x8,[sp,#8] 895 ror x16,x27,#14 896 add x22,x22,x28 // h+=K[i] 897 ror x7,x2,#1 898 and x17,x20,x27 899 ror x6,x15,#19 900 bic x28,x21,x27 901 ror x8,x23,#28 902 add x22,x22,x0 // h+=X[i] 903 eor x16,x16,x27,ror#18 904 eor x7,x7,x2,ror#8 905 orr x17,x17,x28 // Ch(e,f,g) 906 eor x28,x23,x24 // a^b, b^c in next round 907 eor x16,x16,x27,ror#41 // Sigma1(e) 908 eor x8,x8,x23,ror#34 909 add x22,x22,x17 // h+=Ch(e,f,g) 910 and x19,x19,x28 // (b^c)&=(a^b) 911 eor x6,x6,x15,ror#61 912 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 913 add x22,x22,x16 // h+=Sigma1(e) 914 eor x19,x19,x24 // Maj(a,b,c) 915 eor x17,x8,x23,ror#39 // Sigma0(a) 916 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 917 add x1,x1,x10 918 add x26,x26,x22 // d+=h 919 add x22,x22,x19 // h+=Maj(a,b,c) 920 ldr x19,[x30],#8 // *K++, x28 in next round 921 add x1,x1,x7 922 add x22,x22,x17 // h+=Sigma0(a) 923 add x1,x1,x6 924 ldr x6,[sp,#24] 925 str x9,[sp,#16] 926 ror x16,x26,#14 927 add x21,x21,x19 // h+=K[i] 928 ror x8,x3,#1 929 and x17,x27,x26 930 ror x7,x0,#19 931 bic x19,x20,x26 932 ror x9,x22,#28 933 add x21,x21,x1 // h+=X[i] 934 eor x16,x16,x26,ror#18 935 eor x8,x8,x3,ror#8 936 orr x17,x17,x19 // Ch(e,f,g) 937 eor x19,x22,x23 // a^b, b^c in next round 938 eor x16,x16,x26,ror#41 // Sigma1(e) 939 eor x9,x9,x22,ror#34 940 add x21,x21,x17 // h+=Ch(e,f,g) 941 and x28,x28,x19 // (b^c)&=(a^b) 942 eor x7,x7,x0,ror#61 943 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 944 add x21,x21,x16 // h+=Sigma1(e) 945 eor x28,x28,x23 // Maj(a,b,c) 946 eor x17,x9,x22,ror#39 // Sigma0(a) 947 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 948 add x2,x2,x11 949 add x25,x25,x21 // d+=h 950 add x21,x21,x28 // h+=Maj(a,b,c) 951 ldr x28,[x30],#8 // *K++, x19 in next round 952 add x2,x2,x8 953 add x21,x21,x17 // h+=Sigma0(a) 954 add x2,x2,x7 955 ldr x7,[sp,#0] 956 str x10,[sp,#24] 957 ror x16,x25,#14 958 add x20,x20,x28 // h+=K[i] 959 ror x9,x4,#1 960 and x17,x26,x25 961 ror x8,x1,#19 962 bic x28,x27,x25 963 ror x10,x21,#28 964 add x20,x20,x2 // h+=X[i] 965 eor x16,x16,x25,ror#18 966 eor x9,x9,x4,ror#8 967 orr x17,x17,x28 // Ch(e,f,g) 968 eor x28,x21,x22 // a^b, b^c in next round 969 eor x16,x16,x25,ror#41 // Sigma1(e) 970 eor x10,x10,x21,ror#34 971 add x20,x20,x17 // h+=Ch(e,f,g) 972 and x19,x19,x28 // (b^c)&=(a^b) 973 eor x8,x8,x1,ror#61 974 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 975 add x20,x20,x16 // h+=Sigma1(e) 976 eor x19,x19,x22 // Maj(a,b,c) 977 eor x17,x10,x21,ror#39 // Sigma0(a) 978 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 979 add x3,x3,x12 980 add x24,x24,x20 // d+=h 981 add x20,x20,x19 // h+=Maj(a,b,c) 982 ldr x19,[x30],#8 // *K++, x28 in next round 983 add x3,x3,x9 984 add x20,x20,x17 // h+=Sigma0(a) 985 add x3,x3,x8 986 cbnz x19,.Loop_16_xx 987 988 ldp x0,x2,[x29,#96] 989 ldr x1,[x29,#112] 990 sub x30,x30,#648 // rewind 991 992 ldp x3,x4,[x0] 993 ldp x5,x6,[x0,#2*8] 994 add x1,x1,#14*8 // advance input pointer 995 ldp x7,x8,[x0,#4*8] 996 add x20,x20,x3 997 ldp x9,x10,[x0,#6*8] 998 add x21,x21,x4 999 add x22,x22,x5 1000 add x23,x23,x6 1001 stp x20,x21,[x0] 1002 add x24,x24,x7 1003 add x25,x25,x8 1004 stp x22,x23,[x0,#2*8] 1005 add x26,x26,x9 1006 add x27,x27,x10 1007 cmp x1,x2 1008 stp x24,x25,[x0,#4*8] 1009 stp x26,x27,[x0,#6*8] 1010 b.ne .Loop 1011 1012 ldp x19,x20,[x29,#16] 1013 add sp,sp,#4*8 1014 ldp x21,x22,[x29,#32] 1015 ldp x23,x24,[x29,#48] 1016 ldp x25,x26,[x29,#64] 1017 ldp x27,x28,[x29,#80] 1018 ldp x29,x30,[sp],#128 1019 AARCH64_VALIDATE_LINK_REGISTER 1020 ret 1021.size sha512_block_data_order_nohw,.-sha512_block_data_order_nohw 1022 1023.section .rodata 1024.align 6 1025.type .LK512,%object 1026.LK512: 1027.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1028.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1029.quad 0x3956c25bf348b538,0x59f111f1b605d019 1030.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1031.quad 0xd807aa98a3030242,0x12835b0145706fbe 1032.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1033.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1034.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1035.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1036.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1037.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1038.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1039.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1040.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1041.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1042.quad 0x06ca6351e003826f,0x142929670a0e6e70 1043.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1044.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1045.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1046.quad 0x81c2c92e47edaee6,0x92722c851482353b 1047.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1048.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1049.quad 0xd192e819d6ef5218,0xd69906245565a910 1050.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1051.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1052.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1053.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1054.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1055.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1056.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1057.quad 0x90befffa23631e28,0xa4506cebde82bde9 1058.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1059.quad 0xca273eceea26619c,0xd186b8c721c0c207 1060.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1061.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1062.quad 0x113f9804bef90dae,0x1b710b35131c471b 1063.quad 0x28db77f523047d84,0x32caab7b40c72493 1064.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1065.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1066.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1067.quad 0 // terminator 1068.size .LK512,.-.LK512 1069.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1070.align 2 1071.align 2 1072.text 1073#ifndef __KERNEL__ 1074.globl sha512_block_data_order_hw 1075.hidden sha512_block_data_order_hw 1076.type sha512_block_data_order_hw,%function 1077.align 6 1078sha512_block_data_order_hw: 1079 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 1080 AARCH64_VALID_CALL_TARGET 1081 stp x29,x30,[sp,#-16]! 1082 add x29,sp,#0 1083 1084 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1085 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1086 1087 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1088 adrp x3,.LK512 1089 add x3,x3,:lo12:.LK512 1090 1091 rev64 v16.16b,v16.16b 1092 rev64 v17.16b,v17.16b 1093 rev64 v18.16b,v18.16b 1094 rev64 v19.16b,v19.16b 1095 rev64 v20.16b,v20.16b 1096 rev64 v21.16b,v21.16b 1097 rev64 v22.16b,v22.16b 1098 rev64 v23.16b,v23.16b 1099 b .Loop_hw 1100 1101.align 4 1102.Loop_hw: 1103 ld1 {v24.2d},[x3],#16 1104 subs x2,x2,#1 1105 sub x4,x1,#128 1106 orr v26.16b,v0.16b,v0.16b // offload 1107 orr v27.16b,v1.16b,v1.16b 1108 orr v28.16b,v2.16b,v2.16b 1109 orr v29.16b,v3.16b,v3.16b 1110 csel x1,x1,x4,ne // conditional rewind 1111 add v24.2d,v24.2d,v16.2d 1112 ld1 {v25.2d},[x3],#16 1113 ext v24.16b,v24.16b,v24.16b,#8 1114 ext v5.16b,v2.16b,v3.16b,#8 1115 ext v6.16b,v1.16b,v2.16b,#8 1116 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1117.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1118 ext v7.16b,v20.16b,v21.16b,#8 1119.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1120.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1121 add v4.2d,v1.2d,v3.2d // "D + T1" 1122.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1123 add v25.2d,v25.2d,v17.2d 1124 ld1 {v24.2d},[x3],#16 1125 ext v25.16b,v25.16b,v25.16b,#8 1126 ext v5.16b,v4.16b,v2.16b,#8 1127 ext v6.16b,v0.16b,v4.16b,#8 1128 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1129.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1130 ext v7.16b,v21.16b,v22.16b,#8 1131.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1132.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1133 add v1.2d,v0.2d,v2.2d // "D + T1" 1134.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1135 add v24.2d,v24.2d,v18.2d 1136 ld1 {v25.2d},[x3],#16 1137 ext v24.16b,v24.16b,v24.16b,#8 1138 ext v5.16b,v1.16b,v4.16b,#8 1139 ext v6.16b,v3.16b,v1.16b,#8 1140 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1141.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1142 ext v7.16b,v22.16b,v23.16b,#8 1143.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1144.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1145 add v0.2d,v3.2d,v4.2d // "D + T1" 1146.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1147 add v25.2d,v25.2d,v19.2d 1148 ld1 {v24.2d},[x3],#16 1149 ext v25.16b,v25.16b,v25.16b,#8 1150 ext v5.16b,v0.16b,v1.16b,#8 1151 ext v6.16b,v2.16b,v0.16b,#8 1152 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1153.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1154 ext v7.16b,v23.16b,v16.16b,#8 1155.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1156.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1157 add v3.2d,v2.2d,v1.2d // "D + T1" 1158.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1159 add v24.2d,v24.2d,v20.2d 1160 ld1 {v25.2d},[x3],#16 1161 ext v24.16b,v24.16b,v24.16b,#8 1162 ext v5.16b,v3.16b,v0.16b,#8 1163 ext v6.16b,v4.16b,v3.16b,#8 1164 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1165.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1166 ext v7.16b,v16.16b,v17.16b,#8 1167.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1168.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1169 add v2.2d,v4.2d,v0.2d // "D + T1" 1170.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1171 add v25.2d,v25.2d,v21.2d 1172 ld1 {v24.2d},[x3],#16 1173 ext v25.16b,v25.16b,v25.16b,#8 1174 ext v5.16b,v2.16b,v3.16b,#8 1175 ext v6.16b,v1.16b,v2.16b,#8 1176 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1177.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1178 ext v7.16b,v17.16b,v18.16b,#8 1179.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1180.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1181 add v4.2d,v1.2d,v3.2d // "D + T1" 1182.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1183 add v24.2d,v24.2d,v22.2d 1184 ld1 {v25.2d},[x3],#16 1185 ext v24.16b,v24.16b,v24.16b,#8 1186 ext v5.16b,v4.16b,v2.16b,#8 1187 ext v6.16b,v0.16b,v4.16b,#8 1188 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1189.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1190 ext v7.16b,v18.16b,v19.16b,#8 1191.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1192.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1193 add v1.2d,v0.2d,v2.2d // "D + T1" 1194.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1195 add v25.2d,v25.2d,v23.2d 1196 ld1 {v24.2d},[x3],#16 1197 ext v25.16b,v25.16b,v25.16b,#8 1198 ext v5.16b,v1.16b,v4.16b,#8 1199 ext v6.16b,v3.16b,v1.16b,#8 1200 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1201.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1202 ext v7.16b,v19.16b,v20.16b,#8 1203.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1204.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1205 add v0.2d,v3.2d,v4.2d // "D + T1" 1206.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1207 add v24.2d,v24.2d,v16.2d 1208 ld1 {v25.2d},[x3],#16 1209 ext v24.16b,v24.16b,v24.16b,#8 1210 ext v5.16b,v0.16b,v1.16b,#8 1211 ext v6.16b,v2.16b,v0.16b,#8 1212 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1213.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1214 ext v7.16b,v20.16b,v21.16b,#8 1215.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1216.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1217 add v3.2d,v2.2d,v1.2d // "D + T1" 1218.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1219 add v25.2d,v25.2d,v17.2d 1220 ld1 {v24.2d},[x3],#16 1221 ext v25.16b,v25.16b,v25.16b,#8 1222 ext v5.16b,v3.16b,v0.16b,#8 1223 ext v6.16b,v4.16b,v3.16b,#8 1224 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1225.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1226 ext v7.16b,v21.16b,v22.16b,#8 1227.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1228.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1229 add v2.2d,v4.2d,v0.2d // "D + T1" 1230.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1231 add v24.2d,v24.2d,v18.2d 1232 ld1 {v25.2d},[x3],#16 1233 ext v24.16b,v24.16b,v24.16b,#8 1234 ext v5.16b,v2.16b,v3.16b,#8 1235 ext v6.16b,v1.16b,v2.16b,#8 1236 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1237.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1238 ext v7.16b,v22.16b,v23.16b,#8 1239.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1240.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1241 add v4.2d,v1.2d,v3.2d // "D + T1" 1242.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1243 add v25.2d,v25.2d,v19.2d 1244 ld1 {v24.2d},[x3],#16 1245 ext v25.16b,v25.16b,v25.16b,#8 1246 ext v5.16b,v4.16b,v2.16b,#8 1247 ext v6.16b,v0.16b,v4.16b,#8 1248 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1249.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1250 ext v7.16b,v23.16b,v16.16b,#8 1251.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1252.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1253 add v1.2d,v0.2d,v2.2d // "D + T1" 1254.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1255 add v24.2d,v24.2d,v20.2d 1256 ld1 {v25.2d},[x3],#16 1257 ext v24.16b,v24.16b,v24.16b,#8 1258 ext v5.16b,v1.16b,v4.16b,#8 1259 ext v6.16b,v3.16b,v1.16b,#8 1260 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1261.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1262 ext v7.16b,v16.16b,v17.16b,#8 1263.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1264.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1265 add v0.2d,v3.2d,v4.2d // "D + T1" 1266.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1267 add v25.2d,v25.2d,v21.2d 1268 ld1 {v24.2d},[x3],#16 1269 ext v25.16b,v25.16b,v25.16b,#8 1270 ext v5.16b,v0.16b,v1.16b,#8 1271 ext v6.16b,v2.16b,v0.16b,#8 1272 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1273.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1274 ext v7.16b,v17.16b,v18.16b,#8 1275.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1276.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1277 add v3.2d,v2.2d,v1.2d // "D + T1" 1278.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1279 add v24.2d,v24.2d,v22.2d 1280 ld1 {v25.2d},[x3],#16 1281 ext v24.16b,v24.16b,v24.16b,#8 1282 ext v5.16b,v3.16b,v0.16b,#8 1283 ext v6.16b,v4.16b,v3.16b,#8 1284 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1285.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1286 ext v7.16b,v18.16b,v19.16b,#8 1287.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1288.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1289 add v2.2d,v4.2d,v0.2d // "D + T1" 1290.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1291 add v25.2d,v25.2d,v23.2d 1292 ld1 {v24.2d},[x3],#16 1293 ext v25.16b,v25.16b,v25.16b,#8 1294 ext v5.16b,v2.16b,v3.16b,#8 1295 ext v6.16b,v1.16b,v2.16b,#8 1296 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1297.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1298 ext v7.16b,v19.16b,v20.16b,#8 1299.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1300.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1301 add v4.2d,v1.2d,v3.2d // "D + T1" 1302.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1303 add v24.2d,v24.2d,v16.2d 1304 ld1 {v25.2d},[x3],#16 1305 ext v24.16b,v24.16b,v24.16b,#8 1306 ext v5.16b,v4.16b,v2.16b,#8 1307 ext v6.16b,v0.16b,v4.16b,#8 1308 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1309.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1310 ext v7.16b,v20.16b,v21.16b,#8 1311.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1312.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1313 add v1.2d,v0.2d,v2.2d // "D + T1" 1314.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1315 add v25.2d,v25.2d,v17.2d 1316 ld1 {v24.2d},[x3],#16 1317 ext v25.16b,v25.16b,v25.16b,#8 1318 ext v5.16b,v1.16b,v4.16b,#8 1319 ext v6.16b,v3.16b,v1.16b,#8 1320 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1321.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1322 ext v7.16b,v21.16b,v22.16b,#8 1323.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1324.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1325 add v0.2d,v3.2d,v4.2d // "D + T1" 1326.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1327 add v24.2d,v24.2d,v18.2d 1328 ld1 {v25.2d},[x3],#16 1329 ext v24.16b,v24.16b,v24.16b,#8 1330 ext v5.16b,v0.16b,v1.16b,#8 1331 ext v6.16b,v2.16b,v0.16b,#8 1332 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1333.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1334 ext v7.16b,v22.16b,v23.16b,#8 1335.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1336.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1337 add v3.2d,v2.2d,v1.2d // "D + T1" 1338.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1339 add v25.2d,v25.2d,v19.2d 1340 ld1 {v24.2d},[x3],#16 1341 ext v25.16b,v25.16b,v25.16b,#8 1342 ext v5.16b,v3.16b,v0.16b,#8 1343 ext v6.16b,v4.16b,v3.16b,#8 1344 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1345.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1346 ext v7.16b,v23.16b,v16.16b,#8 1347.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1348.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1349 add v2.2d,v4.2d,v0.2d // "D + T1" 1350.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1351 add v24.2d,v24.2d,v20.2d 1352 ld1 {v25.2d},[x3],#16 1353 ext v24.16b,v24.16b,v24.16b,#8 1354 ext v5.16b,v2.16b,v3.16b,#8 1355 ext v6.16b,v1.16b,v2.16b,#8 1356 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1357.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1358 ext v7.16b,v16.16b,v17.16b,#8 1359.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1360.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1361 add v4.2d,v1.2d,v3.2d // "D + T1" 1362.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1363 add v25.2d,v25.2d,v21.2d 1364 ld1 {v24.2d},[x3],#16 1365 ext v25.16b,v25.16b,v25.16b,#8 1366 ext v5.16b,v4.16b,v2.16b,#8 1367 ext v6.16b,v0.16b,v4.16b,#8 1368 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1369.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1370 ext v7.16b,v17.16b,v18.16b,#8 1371.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1372.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1373 add v1.2d,v0.2d,v2.2d // "D + T1" 1374.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1375 add v24.2d,v24.2d,v22.2d 1376 ld1 {v25.2d},[x3],#16 1377 ext v24.16b,v24.16b,v24.16b,#8 1378 ext v5.16b,v1.16b,v4.16b,#8 1379 ext v6.16b,v3.16b,v1.16b,#8 1380 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1381.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1382 ext v7.16b,v18.16b,v19.16b,#8 1383.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1384.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1385 add v0.2d,v3.2d,v4.2d // "D + T1" 1386.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1387 add v25.2d,v25.2d,v23.2d 1388 ld1 {v24.2d},[x3],#16 1389 ext v25.16b,v25.16b,v25.16b,#8 1390 ext v5.16b,v0.16b,v1.16b,#8 1391 ext v6.16b,v2.16b,v0.16b,#8 1392 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1393.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1394 ext v7.16b,v19.16b,v20.16b,#8 1395.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1396.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1397 add v3.2d,v2.2d,v1.2d // "D + T1" 1398.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1399 add v24.2d,v24.2d,v16.2d 1400 ld1 {v25.2d},[x3],#16 1401 ext v24.16b,v24.16b,v24.16b,#8 1402 ext v5.16b,v3.16b,v0.16b,#8 1403 ext v6.16b,v4.16b,v3.16b,#8 1404 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1405.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1406 ext v7.16b,v20.16b,v21.16b,#8 1407.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1408.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1409 add v2.2d,v4.2d,v0.2d // "D + T1" 1410.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1411 add v25.2d,v25.2d,v17.2d 1412 ld1 {v24.2d},[x3],#16 1413 ext v25.16b,v25.16b,v25.16b,#8 1414 ext v5.16b,v2.16b,v3.16b,#8 1415 ext v6.16b,v1.16b,v2.16b,#8 1416 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1417.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1418 ext v7.16b,v21.16b,v22.16b,#8 1419.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1420.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1421 add v4.2d,v1.2d,v3.2d // "D + T1" 1422.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1423 add v24.2d,v24.2d,v18.2d 1424 ld1 {v25.2d},[x3],#16 1425 ext v24.16b,v24.16b,v24.16b,#8 1426 ext v5.16b,v4.16b,v2.16b,#8 1427 ext v6.16b,v0.16b,v4.16b,#8 1428 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1429.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1430 ext v7.16b,v22.16b,v23.16b,#8 1431.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1432.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1433 add v1.2d,v0.2d,v2.2d // "D + T1" 1434.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1435 add v25.2d,v25.2d,v19.2d 1436 ld1 {v24.2d},[x3],#16 1437 ext v25.16b,v25.16b,v25.16b,#8 1438 ext v5.16b,v1.16b,v4.16b,#8 1439 ext v6.16b,v3.16b,v1.16b,#8 1440 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1441.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1442 ext v7.16b,v23.16b,v16.16b,#8 1443.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1444.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1445 add v0.2d,v3.2d,v4.2d // "D + T1" 1446.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1447 add v24.2d,v24.2d,v20.2d 1448 ld1 {v25.2d},[x3],#16 1449 ext v24.16b,v24.16b,v24.16b,#8 1450 ext v5.16b,v0.16b,v1.16b,#8 1451 ext v6.16b,v2.16b,v0.16b,#8 1452 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1453.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1454 ext v7.16b,v16.16b,v17.16b,#8 1455.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1456.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1457 add v3.2d,v2.2d,v1.2d // "D + T1" 1458.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1459 add v25.2d,v25.2d,v21.2d 1460 ld1 {v24.2d},[x3],#16 1461 ext v25.16b,v25.16b,v25.16b,#8 1462 ext v5.16b,v3.16b,v0.16b,#8 1463 ext v6.16b,v4.16b,v3.16b,#8 1464 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1465.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1466 ext v7.16b,v17.16b,v18.16b,#8 1467.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1468.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1469 add v2.2d,v4.2d,v0.2d // "D + T1" 1470.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1471 add v24.2d,v24.2d,v22.2d 1472 ld1 {v25.2d},[x3],#16 1473 ext v24.16b,v24.16b,v24.16b,#8 1474 ext v5.16b,v2.16b,v3.16b,#8 1475 ext v6.16b,v1.16b,v2.16b,#8 1476 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1477.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1478 ext v7.16b,v18.16b,v19.16b,#8 1479.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1480.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1481 add v4.2d,v1.2d,v3.2d // "D + T1" 1482.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1483 add v25.2d,v25.2d,v23.2d 1484 ld1 {v24.2d},[x3],#16 1485 ext v25.16b,v25.16b,v25.16b,#8 1486 ext v5.16b,v4.16b,v2.16b,#8 1487 ext v6.16b,v0.16b,v4.16b,#8 1488 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1489.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1490 ext v7.16b,v19.16b,v20.16b,#8 1491.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1492.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1493 add v1.2d,v0.2d,v2.2d // "D + T1" 1494.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1495 ld1 {v25.2d},[x3],#16 1496 add v24.2d,v24.2d,v16.2d 1497 ld1 {v16.16b},[x1],#16 // load next input 1498 ext v24.16b,v24.16b,v24.16b,#8 1499 ext v5.16b,v1.16b,v4.16b,#8 1500 ext v6.16b,v3.16b,v1.16b,#8 1501 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1502.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1503 rev64 v16.16b,v16.16b 1504 add v0.2d,v3.2d,v4.2d // "D + T1" 1505.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1506 ld1 {v24.2d},[x3],#16 1507 add v25.2d,v25.2d,v17.2d 1508 ld1 {v17.16b},[x1],#16 // load next input 1509 ext v25.16b,v25.16b,v25.16b,#8 1510 ext v5.16b,v0.16b,v1.16b,#8 1511 ext v6.16b,v2.16b,v0.16b,#8 1512 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1513.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1514 rev64 v17.16b,v17.16b 1515 add v3.2d,v2.2d,v1.2d // "D + T1" 1516.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1517 ld1 {v25.2d},[x3],#16 1518 add v24.2d,v24.2d,v18.2d 1519 ld1 {v18.16b},[x1],#16 // load next input 1520 ext v24.16b,v24.16b,v24.16b,#8 1521 ext v5.16b,v3.16b,v0.16b,#8 1522 ext v6.16b,v4.16b,v3.16b,#8 1523 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1524.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1525 rev64 v18.16b,v18.16b 1526 add v2.2d,v4.2d,v0.2d // "D + T1" 1527.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1528 ld1 {v24.2d},[x3],#16 1529 add v25.2d,v25.2d,v19.2d 1530 ld1 {v19.16b},[x1],#16 // load next input 1531 ext v25.16b,v25.16b,v25.16b,#8 1532 ext v5.16b,v2.16b,v3.16b,#8 1533 ext v6.16b,v1.16b,v2.16b,#8 1534 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1535.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1536 rev64 v19.16b,v19.16b 1537 add v4.2d,v1.2d,v3.2d // "D + T1" 1538.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1539 ld1 {v25.2d},[x3],#16 1540 add v24.2d,v24.2d,v20.2d 1541 ld1 {v20.16b},[x1],#16 // load next input 1542 ext v24.16b,v24.16b,v24.16b,#8 1543 ext v5.16b,v4.16b,v2.16b,#8 1544 ext v6.16b,v0.16b,v4.16b,#8 1545 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1546.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1547 rev64 v20.16b,v20.16b 1548 add v1.2d,v0.2d,v2.2d // "D + T1" 1549.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1550 ld1 {v24.2d},[x3],#16 1551 add v25.2d,v25.2d,v21.2d 1552 ld1 {v21.16b},[x1],#16 // load next input 1553 ext v25.16b,v25.16b,v25.16b,#8 1554 ext v5.16b,v1.16b,v4.16b,#8 1555 ext v6.16b,v3.16b,v1.16b,#8 1556 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1557.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1558 rev64 v21.16b,v21.16b 1559 add v0.2d,v3.2d,v4.2d // "D + T1" 1560.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1561 ld1 {v25.2d},[x3],#16 1562 add v24.2d,v24.2d,v22.2d 1563 ld1 {v22.16b},[x1],#16 // load next input 1564 ext v24.16b,v24.16b,v24.16b,#8 1565 ext v5.16b,v0.16b,v1.16b,#8 1566 ext v6.16b,v2.16b,v0.16b,#8 1567 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1568.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1569 rev64 v22.16b,v22.16b 1570 add v3.2d,v2.2d,v1.2d // "D + T1" 1571.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1572 sub x3,x3,#80*8 // rewind 1573 add v25.2d,v25.2d,v23.2d 1574 ld1 {v23.16b},[x1],#16 // load next input 1575 ext v25.16b,v25.16b,v25.16b,#8 1576 ext v5.16b,v3.16b,v0.16b,#8 1577 ext v6.16b,v4.16b,v3.16b,#8 1578 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1579.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1580 rev64 v23.16b,v23.16b 1581 add v2.2d,v4.2d,v0.2d // "D + T1" 1582.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1583 add v0.2d,v0.2d,v26.2d // accumulate 1584 add v1.2d,v1.2d,v27.2d 1585 add v2.2d,v2.2d,v28.2d 1586 add v3.2d,v3.2d,v29.2d 1587 1588 cbnz x2,.Loop_hw 1589 1590 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1591 1592 ldr x29,[sp],#16 1593 ret 1594.size sha512_block_data_order_hw,.-sha512_block_data_order_hw 1595#endif 1596#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) 1597