1/* 2 * Copyright © 2018, VideoLAN and dav1d authors 3 * Copyright © 2018, Janne Grunau 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#ifndef DAV1D_SRC_ARM_ASM_S 29#define DAV1D_SRC_ARM_ASM_S 30 31#include "config.h" 32 33#if ARCH_AARCH64 34#define x18 do_not_use_x18 35#define w18 do_not_use_w18 36 37#if HAVE_AS_ARCH_DIRECTIVE 38 .arch AS_ARCH_LEVEL 39#endif 40 41#if HAVE_AS_ARCHEXT_DOTPROD_DIRECTIVE 42#define ENABLE_DOTPROD .arch_extension dotprod 43#define DISABLE_DOTPROD .arch_extension nodotprod 44#else 45#define ENABLE_DOTPROD 46#define DISABLE_DOTPROD 47#endif 48#if HAVE_AS_ARCHEXT_I8MM_DIRECTIVE 49#define ENABLE_I8MM .arch_extension i8mm 50#define DISABLE_I8MM .arch_extension noi8mm 51#else 52#define ENABLE_I8MM 53#define DISABLE_I8MM 54#endif 55#if HAVE_AS_ARCHEXT_SVE_DIRECTIVE 56#define ENABLE_SVE .arch_extension sve 57#define DISABLE_SVE .arch_extension nosve 58#else 59#define ENABLE_SVE 60#define DISABLE_SVE 61#endif 62#if HAVE_AS_ARCHEXT_SVE2_DIRECTIVE 63#define ENABLE_SVE2 .arch_extension sve2 64#define DISABLE_SVE2 .arch_extension nosve2 65#else 66#define ENABLE_SVE2 67#define DISABLE_SVE2 68#endif 69 70/* If we do support the .arch_extension directives, disable support for all 71 * the extensions that we may use, in case they were implicitly enabled by 72 * the .arch level. This makes it clear if we try to assemble an instruction 73 * from an unintended extension set; we only allow assmbling such instructions 74 * within regions where we explicitly enable those extensions. */ 75DISABLE_DOTPROD 76DISABLE_I8MM 77DISABLE_SVE 78DISABLE_SVE2 79 80 81/* Support macros for 82 * - Armv8.3-A Pointer Authentication and 83 * - Armv8.5-A Branch Target Identification 84 * features which require emitting a .note.gnu.property section with the 85 * appropriate architecture-dependent feature bits set. 86 * 87 * |AARCH64_SIGN_LINK_REGISTER| and |AARCH64_VALIDATE_LINK_REGISTER| expand to 88 * PACIxSP and AUTIxSP, respectively. |AARCH64_SIGN_LINK_REGISTER| should be 89 * used immediately before saving the LR register (x30) to the stack. 90 * |AARCH64_VALIDATE_LINK_REGISTER| should be used immediately after restoring 91 * it. Note |AARCH64_SIGN_LINK_REGISTER|'s modifications to LR must be undone 92 * with |AARCH64_VALIDATE_LINK_REGISTER| before RET. The SP register must also 93 * have the same value at the two points. For example: 94 * 95 * .global f 96 * f: 97 * AARCH64_SIGN_LINK_REGISTER 98 * stp x29, x30, [sp, #-96]! 99 * mov x29, sp 100 * ... 101 * ldp x29, x30, [sp], #96 102 * AARCH64_VALIDATE_LINK_REGISTER 103 * ret 104 * 105 * |AARCH64_VALID_CALL_TARGET| expands to BTI 'c'. Either it, or 106 * |AARCH64_SIGN_LINK_REGISTER|, must be used at every point that may be an 107 * indirect call target. In particular, all symbols exported from a file must 108 * begin with one of these macros. For example, a leaf function that does not 109 * save LR can instead use |AARCH64_VALID_CALL_TARGET|: 110 * 111 * .globl return_zero 112 * return_zero: 113 * AARCH64_VALID_CALL_TARGET 114 * mov x0, #0 115 * ret 116 * 117 * A non-leaf function which does not immediately save LR may need both macros 118 * because |AARCH64_SIGN_LINK_REGISTER| appears late. For example, the function 119 * may jump to an alternate implementation before setting up the stack: 120 * 121 * .globl with_early_jump 122 * with_early_jump: 123 * AARCH64_VALID_CALL_TARGET 124 * cmp x0, #128 125 * b.lt .Lwith_early_jump_128 126 * AARCH64_SIGN_LINK_REGISTER 127 * stp x29, x30, [sp, #-96]! 128 * mov x29, sp 129 * ... 130 * ldp x29, x30, [sp], #96 131 * AARCH64_VALIDATE_LINK_REGISTER 132 * ret 133 * 134 * .Lwith_early_jump_128: 135 * ... 136 * ret 137 * 138 * These annotations are only required with indirect calls. Private symbols that 139 * are only the target of direct calls do not require annotations. Also note 140 * that |AARCH64_VALID_CALL_TARGET| is only valid for indirect calls (BLR), not 141 * indirect jumps (BR). Indirect jumps in assembly are supported through 142 * |AARCH64_VALID_JUMP_TARGET|. Landing Pads which shall serve for jumps and 143 * calls can be created using |AARCH64_VALID_JUMP_CALL_TARGET|. 144 * 145 * Although not necessary, it is safe to use these macros in 32-bit ARM 146 * assembly. This may be used to simplify dual 32-bit and 64-bit files. 147 * 148 * References: 149 * - "ELF for the Arm® 64-bit Architecture" 150 * https: *github.com/ARM-software/abi-aa/blob/master/aaelf64/aaelf64.rst 151 * - "Providing protection for complex software" 152 * https://developer.arm.com/architectures/learn-the-architecture/providing-protection-for-complex-software 153 */ 154#if defined(__ARM_FEATURE_BTI_DEFAULT) && (__ARM_FEATURE_BTI_DEFAULT == 1) 155#define GNU_PROPERTY_AARCH64_BTI (1 << 0) // Has Branch Target Identification 156#define AARCH64_VALID_JUMP_CALL_TARGET hint #38 // BTI 'jc' 157#define AARCH64_VALID_CALL_TARGET hint #34 // BTI 'c' 158#define AARCH64_VALID_JUMP_TARGET hint #36 // BTI 'j' 159#else 160#define GNU_PROPERTY_AARCH64_BTI 0 // No Branch Target Identification 161#define AARCH64_VALID_JUMP_CALL_TARGET 162#define AARCH64_VALID_CALL_TARGET 163#define AARCH64_VALID_JUMP_TARGET 164#endif 165 166#if defined(__ARM_FEATURE_PAC_DEFAULT) 167 168#if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 0)) != 0) // authentication using key A 169#define AARCH64_SIGN_LINK_REGISTER paciasp 170#define AARCH64_VALIDATE_LINK_REGISTER autiasp 171#elif ((__ARM_FEATURE_PAC_DEFAULT & (1 << 1)) != 0) // authentication using key B 172#define AARCH64_SIGN_LINK_REGISTER pacibsp 173#define AARCH64_VALIDATE_LINK_REGISTER autibsp 174#else 175#error Pointer authentication defines no valid key! 176#endif 177#if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 2)) != 0) // authentication of leaf functions 178#error Authentication of leaf functions is enabled but not supported in dav1d! 179#endif 180#define GNU_PROPERTY_AARCH64_PAC (1 << 1) 181 182#elif defined(__APPLE__) && defined(__arm64e__) 183 184#define GNU_PROPERTY_AARCH64_PAC 0 185#define AARCH64_SIGN_LINK_REGISTER pacibsp 186#define AARCH64_VALIDATE_LINK_REGISTER autibsp 187 188#else /* __ARM_FEATURE_PAC_DEFAULT */ 189 190#define GNU_PROPERTY_AARCH64_PAC 0 191#define AARCH64_SIGN_LINK_REGISTER 192#define AARCH64_VALIDATE_LINK_REGISTER 193 194#endif /* !__ARM_FEATURE_PAC_DEFAULT */ 195 196 197#if (GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_PAC != 0) && defined(__ELF__) 198 .pushsection .note.gnu.property, "a" 199 .balign 8 200 .long 4 201 .long 0x10 202 .long 0x5 203 .asciz "GNU" 204 .long 0xc0000000 /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */ 205 .long 4 206 .long (GNU_PROPERTY_AARCH64_BTI | GNU_PROPERTY_AARCH64_PAC) 207 .long 0 208 .popsection 209#endif /* (GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_PAC != 0) && defined(__ELF__) */ 210#endif /* ARCH_AARCH64 */ 211 212#if ARCH_ARM 213 .syntax unified 214#ifdef __ELF__ 215 .arch armv7-a 216 .fpu neon 217 .eabi_attribute 10, 0 // suppress Tag_FP_arch 218 .eabi_attribute 12, 0 // suppress Tag_Advanced_SIMD_arch 219 .section .note.GNU-stack,"",%progbits // Mark stack as non-executable 220#endif /* __ELF__ */ 221 222#ifdef _WIN32 223#define CONFIG_THUMB 1 224#else 225#define CONFIG_THUMB 0 226#endif 227 228#if CONFIG_THUMB 229 .thumb 230#define A @ 231#define T 232#else 233#define A 234#define T @ 235#endif /* CONFIG_THUMB */ 236#endif /* ARCH_ARM */ 237 238#if !defined(PIC) 239#if defined(__PIC__) 240#define PIC __PIC__ 241#elif defined(__pic__) 242#define PIC __pic__ 243#endif 244#endif 245 246#ifndef PRIVATE_PREFIX 247#define PRIVATE_PREFIX dav1d_ 248#endif 249 250#define PASTE(a,b) a ## b 251#define CONCAT(a,b) PASTE(a,b) 252 253#ifdef PREFIX 254#define EXTERN CONCAT(_,PRIVATE_PREFIX) 255#else 256#define EXTERN PRIVATE_PREFIX 257#endif 258 259.macro function name, export=0, align=2 260 .macro endfunc 261#ifdef __ELF__ 262 .size \name, . - \name 263#endif 264#if HAVE_AS_FUNC 265 .endfunc 266#endif 267 .purgem endfunc 268 .endm 269 .text 270 .align \align 271 .if \export 272 .global EXTERN\name 273#ifdef __ELF__ 274 .type EXTERN\name, %function 275 .hidden EXTERN\name 276#elif defined(__MACH__) 277 .private_extern EXTERN\name 278#endif 279#if HAVE_AS_FUNC 280 .func EXTERN\name 281#endif 282EXTERN\name: 283 .else 284#ifdef __ELF__ 285 .type \name, %function 286#endif 287#if HAVE_AS_FUNC 288 .func \name 289#endif 290 .endif 291\name: 292#if ARCH_AARCH64 293 .if \export 294 AARCH64_VALID_CALL_TARGET 295 .endif 296#endif 297.endm 298 299.macro const name, export=0, align=2 300 .macro endconst 301#ifdef __ELF__ 302 .size \name, . - \name 303#endif 304 .purgem endconst 305 .endm 306#if defined(_WIN32) 307 .section .rdata 308#elif !defined(__MACH__) 309 .section .rodata 310#else 311 .const_data 312#endif 313 .align \align 314 .if \export 315 .global EXTERN\name 316#ifdef __ELF__ 317 .hidden EXTERN\name 318#elif defined(__MACH__) 319 .private_extern EXTERN\name 320#endif 321EXTERN\name: 322 .endif 323\name: 324.endm 325 326.macro jumptable name 327#ifdef _WIN32 328// MS armasm64 doesn't seem to be able to create relocations for subtraction 329// of labels in different sections; for armasm64 (and all of Windows for 330// simplicity), write the jump table in the text section, to allow calculating 331// differences at assembly time. See 332// https://developercommunity.visualstudio.com/t/armasm64-unable-to-create-cross-section/10722340 333// for reference. (LLVM can create such relocations, but checking for _WIN32 334// for simplicity, as execute-only memory isn't relevant on Windows at the 335// moment.) 336 function \name 337#else 338// For other platforms, write jump tables in a const data section, to allow 339// working in environments where executable memory isn't readable. 340 const \name 341#endif 342.endm 343 344.macro endjumptable 345#ifdef _WIN32 346 endfunc 347#else 348 endconst 349#endif 350.endm 351 352#ifdef __APPLE__ 353#define L(x) L ## x 354#else 355#define L(x) .L ## x 356#endif 357 358#define X(x) CONCAT(EXTERN, x) 359 360 361#endif /* DAV1D_SRC_ARM_ASM_S */ 362