xref: /aosp_15_r20/external/libdav1d/src/arm/asm.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1/*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Janne Grunau
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 *    list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 *    this list of conditions and the following disclaimer in the documentation
14 *    and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#ifndef DAV1D_SRC_ARM_ASM_S
29#define DAV1D_SRC_ARM_ASM_S
30
31#include "config.h"
32
33#if ARCH_AARCH64
34#define x18 do_not_use_x18
35#define w18 do_not_use_w18
36
37#if HAVE_AS_ARCH_DIRECTIVE
38        .arch AS_ARCH_LEVEL
39#endif
40
41#if HAVE_AS_ARCHEXT_DOTPROD_DIRECTIVE
42#define ENABLE_DOTPROD  .arch_extension dotprod
43#define DISABLE_DOTPROD .arch_extension nodotprod
44#else
45#define ENABLE_DOTPROD
46#define DISABLE_DOTPROD
47#endif
48#if HAVE_AS_ARCHEXT_I8MM_DIRECTIVE
49#define ENABLE_I8MM  .arch_extension i8mm
50#define DISABLE_I8MM .arch_extension noi8mm
51#else
52#define ENABLE_I8MM
53#define DISABLE_I8MM
54#endif
55#if HAVE_AS_ARCHEXT_SVE_DIRECTIVE
56#define ENABLE_SVE  .arch_extension sve
57#define DISABLE_SVE .arch_extension nosve
58#else
59#define ENABLE_SVE
60#define DISABLE_SVE
61#endif
62#if HAVE_AS_ARCHEXT_SVE2_DIRECTIVE
63#define ENABLE_SVE2  .arch_extension sve2
64#define DISABLE_SVE2 .arch_extension nosve2
65#else
66#define ENABLE_SVE2
67#define DISABLE_SVE2
68#endif
69
70/* If we do support the .arch_extension directives, disable support for all
71 * the extensions that we may use, in case they were implicitly enabled by
72 * the .arch level. This makes it clear if we try to assemble an instruction
73 * from an unintended extension set; we only allow assmbling such instructions
74 * within regions where we explicitly enable those extensions. */
75DISABLE_DOTPROD
76DISABLE_I8MM
77DISABLE_SVE
78DISABLE_SVE2
79
80
81/* Support macros for
82 *   - Armv8.3-A Pointer Authentication and
83 *   - Armv8.5-A Branch Target Identification
84 * features which require emitting a .note.gnu.property section with the
85 * appropriate architecture-dependent feature bits set.
86 *
87 * |AARCH64_SIGN_LINK_REGISTER| and |AARCH64_VALIDATE_LINK_REGISTER| expand to
88 * PACIxSP and AUTIxSP, respectively. |AARCH64_SIGN_LINK_REGISTER| should be
89 * used immediately before saving the LR register (x30) to the stack.
90 * |AARCH64_VALIDATE_LINK_REGISTER| should be used immediately after restoring
91 * it. Note |AARCH64_SIGN_LINK_REGISTER|'s modifications to LR must be undone
92 * with |AARCH64_VALIDATE_LINK_REGISTER| before RET. The SP register must also
93 * have the same value at the two points. For example:
94 *
95 *   .global f
96 *   f:
97 *     AARCH64_SIGN_LINK_REGISTER
98 *     stp x29, x30, [sp, #-96]!
99 *     mov x29, sp
100 *     ...
101 *     ldp x29, x30, [sp], #96
102 *     AARCH64_VALIDATE_LINK_REGISTER
103 *     ret
104 *
105 * |AARCH64_VALID_CALL_TARGET| expands to BTI 'c'. Either it, or
106 * |AARCH64_SIGN_LINK_REGISTER|, must be used at every point that may be an
107 * indirect call target. In particular, all symbols exported from a file must
108 * begin with one of these macros. For example, a leaf function that does not
109 * save LR can instead use |AARCH64_VALID_CALL_TARGET|:
110 *
111 *   .globl return_zero
112 *   return_zero:
113 *     AARCH64_VALID_CALL_TARGET
114 *     mov x0, #0
115 *     ret
116 *
117 * A non-leaf function which does not immediately save LR may need both macros
118 * because |AARCH64_SIGN_LINK_REGISTER| appears late. For example, the function
119 * may jump to an alternate implementation before setting up the stack:
120 *
121 *   .globl with_early_jump
122 *   with_early_jump:
123 *     AARCH64_VALID_CALL_TARGET
124 *     cmp x0, #128
125 *     b.lt .Lwith_early_jump_128
126 *     AARCH64_SIGN_LINK_REGISTER
127 *     stp x29, x30, [sp, #-96]!
128 *     mov x29, sp
129 *     ...
130 *     ldp x29, x30, [sp], #96
131 *     AARCH64_VALIDATE_LINK_REGISTER
132 *     ret
133 *
134 *  .Lwith_early_jump_128:
135 *     ...
136 *     ret
137 *
138 * These annotations are only required with indirect calls. Private symbols that
139 * are only the target of direct calls do not require annotations. Also note
140 * that |AARCH64_VALID_CALL_TARGET| is only valid for indirect calls (BLR), not
141 * indirect jumps (BR). Indirect jumps in assembly are supported through
142 * |AARCH64_VALID_JUMP_TARGET|. Landing Pads which shall serve for jumps and
143 * calls can be created using |AARCH64_VALID_JUMP_CALL_TARGET|.
144 *
145 * Although not necessary, it is safe to use these macros in 32-bit ARM
146 * assembly. This may be used to simplify dual 32-bit and 64-bit files.
147 *
148 * References:
149 * - "ELF for the Arm® 64-bit Architecture"
150 *   https: *github.com/ARM-software/abi-aa/blob/master/aaelf64/aaelf64.rst
151 * - "Providing protection for complex software"
152 *   https://developer.arm.com/architectures/learn-the-architecture/providing-protection-for-complex-software
153 */
154#if defined(__ARM_FEATURE_BTI_DEFAULT) && (__ARM_FEATURE_BTI_DEFAULT == 1)
155#define GNU_PROPERTY_AARCH64_BTI (1 << 0)   // Has Branch Target Identification
156#define AARCH64_VALID_JUMP_CALL_TARGET hint #38  // BTI 'jc'
157#define AARCH64_VALID_CALL_TARGET      hint #34  // BTI 'c'
158#define AARCH64_VALID_JUMP_TARGET      hint #36  // BTI 'j'
159#else
160#define GNU_PROPERTY_AARCH64_BTI 0          // No Branch Target Identification
161#define AARCH64_VALID_JUMP_CALL_TARGET
162#define AARCH64_VALID_CALL_TARGET
163#define AARCH64_VALID_JUMP_TARGET
164#endif
165
166#if defined(__ARM_FEATURE_PAC_DEFAULT)
167
168#if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 0)) != 0) // authentication using key A
169#define AARCH64_SIGN_LINK_REGISTER      paciasp
170#define AARCH64_VALIDATE_LINK_REGISTER  autiasp
171#elif ((__ARM_FEATURE_PAC_DEFAULT & (1 << 1)) != 0) // authentication using key B
172#define AARCH64_SIGN_LINK_REGISTER      pacibsp
173#define AARCH64_VALIDATE_LINK_REGISTER  autibsp
174#else
175#error Pointer authentication defines no valid key!
176#endif
177#if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 2)) != 0) // authentication of leaf functions
178#error Authentication of leaf functions is enabled but not supported in dav1d!
179#endif
180#define GNU_PROPERTY_AARCH64_PAC (1 << 1)
181
182#elif defined(__APPLE__) && defined(__arm64e__)
183
184#define GNU_PROPERTY_AARCH64_PAC 0
185#define AARCH64_SIGN_LINK_REGISTER      pacibsp
186#define AARCH64_VALIDATE_LINK_REGISTER  autibsp
187
188#else /* __ARM_FEATURE_PAC_DEFAULT */
189
190#define GNU_PROPERTY_AARCH64_PAC 0
191#define AARCH64_SIGN_LINK_REGISTER
192#define AARCH64_VALIDATE_LINK_REGISTER
193
194#endif /* !__ARM_FEATURE_PAC_DEFAULT */
195
196
197#if (GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_PAC != 0) && defined(__ELF__)
198        .pushsection .note.gnu.property, "a"
199        .balign 8
200        .long 4
201        .long 0x10
202        .long 0x5
203        .asciz "GNU"
204        .long 0xc0000000 /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */
205        .long 4
206        .long (GNU_PROPERTY_AARCH64_BTI | GNU_PROPERTY_AARCH64_PAC)
207        .long 0
208        .popsection
209#endif /* (GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_PAC != 0) && defined(__ELF__) */
210#endif /* ARCH_AARCH64 */
211
212#if ARCH_ARM
213        .syntax unified
214#ifdef __ELF__
215        .arch armv7-a
216        .fpu neon
217        .eabi_attribute 10, 0           // suppress Tag_FP_arch
218        .eabi_attribute 12, 0           // suppress Tag_Advanced_SIMD_arch
219        .section .note.GNU-stack,"",%progbits // Mark stack as non-executable
220#endif /* __ELF__ */
221
222#ifdef _WIN32
223#define CONFIG_THUMB 1
224#else
225#define CONFIG_THUMB 0
226#endif
227
228#if CONFIG_THUMB
229        .thumb
230#define A @
231#define T
232#else
233#define A
234#define T @
235#endif /* CONFIG_THUMB */
236#endif /* ARCH_ARM */
237
238#if !defined(PIC)
239#if defined(__PIC__)
240#define PIC __PIC__
241#elif defined(__pic__)
242#define PIC __pic__
243#endif
244#endif
245
246#ifndef PRIVATE_PREFIX
247#define PRIVATE_PREFIX dav1d_
248#endif
249
250#define PASTE(a,b) a ## b
251#define CONCAT(a,b) PASTE(a,b)
252
253#ifdef PREFIX
254#define EXTERN CONCAT(_,PRIVATE_PREFIX)
255#else
256#define EXTERN PRIVATE_PREFIX
257#endif
258
259.macro function name, export=0, align=2
260    .macro endfunc
261#ifdef __ELF__
262        .size   \name, . - \name
263#endif
264#if HAVE_AS_FUNC
265        .endfunc
266#endif
267        .purgem endfunc
268    .endm
269        .text
270        .align \align
271    .if \export
272        .global EXTERN\name
273#ifdef __ELF__
274        .type   EXTERN\name, %function
275        .hidden EXTERN\name
276#elif defined(__MACH__)
277        .private_extern EXTERN\name
278#endif
279#if HAVE_AS_FUNC
280        .func   EXTERN\name
281#endif
282EXTERN\name:
283    .else
284#ifdef __ELF__
285        .type \name, %function
286#endif
287#if HAVE_AS_FUNC
288        .func \name
289#endif
290    .endif
291\name:
292#if ARCH_AARCH64
293    .if \export
294         AARCH64_VALID_CALL_TARGET
295    .endif
296#endif
297.endm
298
299.macro  const   name, export=0, align=2
300    .macro endconst
301#ifdef __ELF__
302        .size   \name, . - \name
303#endif
304        .purgem endconst
305    .endm
306#if defined(_WIN32)
307        .section        .rdata
308#elif !defined(__MACH__)
309        .section        .rodata
310#else
311        .const_data
312#endif
313        .align          \align
314    .if \export
315        .global EXTERN\name
316#ifdef __ELF__
317        .hidden EXTERN\name
318#elif defined(__MACH__)
319        .private_extern EXTERN\name
320#endif
321EXTERN\name:
322    .endif
323\name:
324.endm
325
326.macro jumptable name
327#ifdef _WIN32
328// MS armasm64 doesn't seem to be able to create relocations for subtraction
329// of labels in different sections; for armasm64 (and all of Windows for
330// simplicity), write the jump table in the text section, to allow calculating
331// differences at assembly time. See
332// https://developercommunity.visualstudio.com/t/armasm64-unable-to-create-cross-section/10722340
333// for reference. (LLVM can create such relocations, but checking for _WIN32
334// for simplicity, as execute-only memory isn't relevant on Windows at the
335// moment.)
336        function \name
337#else
338// For other platforms, write jump tables in a const data section, to allow
339// working in environments where executable memory isn't readable.
340        const \name
341#endif
342.endm
343
344.macro endjumptable
345#ifdef _WIN32
346        endfunc
347#else
348        endconst
349#endif
350.endm
351
352#ifdef __APPLE__
353#define L(x) L ## x
354#else
355#define L(x) .L ## x
356#endif
357
358#define X(x) CONCAT(EXTERN, x)
359
360
361#endif /* DAV1D_SRC_ARM_ASM_S */
362