xref: /aosp_15_r20/external/zucchini/arm_utils.h (revision a03ca8b91e029cd15055c20c78c2e087c84792e4)
1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_ZUCCHINI_ARM_UTILS_H_
6 #define COMPONENTS_ZUCCHINI_ARM_UTILS_H_
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include "base/check_op.h"
12 #include "components/zucchini/address_translator.h"
13 #include "components/zucchini/buffer_view.h"
14 
15 namespace zucchini {
16 
17 // References:
18 // * AArch32 (32-bit ARM, AKA ARM32):
19 //     https://static.docs.arm.com/ddi0406/c/DDI0406C_C_arm_architecture_reference_manual.pdf
20 // * AArch64 (64-bit ARM):
21 //     https://static.docs.arm.com/ddi0487/da/DDI0487D_a_armv8_arm.pdf
22 
23 // Definitions (used in Zucchini):
24 // * |instr_rva|: Instruction RVA: The RVA where an instruction is located. In
25 //   ARM mode and for AArch64 this is 4-byte aligned; in THUMB2 mode this is
26 //   2-byte aligned.
27 // * |code|: Instruction code: ARM instruction code as seen in manual. In ARM
28 //   mode and for AArch64, this is a 32-bit int. In THUMB2 mode, this may be a
29 //   16-bit or 32-bit int.
30 // * |disp|: Displacement: For branch instructions (e.g.: B, BL, BLX, and
31 //   conditional varieties) this is the value encoded in instruction bytes.
32 // * PC: Program Counter: In ARM mode this is |instr_rva + 8|; in THUMB2 mode
33 //   this is |instr_rva + 4|; for AArch64 this is |instr_rva|.
34 // * |target_rva|: Target RVA: The RVA targeted by a branch instruction.
35 //
36 // These are related by:
37 //   |code| = Fetch(image data at offset(|instr_rva|)).
38 //   |disp| = Decode(|code|).
39 //   PC = |instr_rva| + {8 in ARM mode, 4 in THUMB2 mode, 0 for AArch64}.
40 //   |target_rva| = PC + |disp| - (see "BLX complication" below)
41 //
42 // Example 1 (ARM mode):
43 //   00103050: 00 01 02 EA    B     00183458
44 //   |instr_rva| = 0x00103050  (4-byte aligned).
45 //   |code| = 0xEA020100  (little endian fetched from data).
46 //   |disp| = 0x00080400  (decoded from |code| with A24 -> B encoding T1).
47 //   PC = |instr_rva| + 8 = 0x00103058  (ARM mode).
48 //   |target_rva| = PC + |disp| = 0x00183458.
49 //
50 // Example 2 (THUMB2 mode):
51 //   001030A2: 00 F0 01 FA    BL    001034A8
52 //   |instr_rva| = 0x001030A2  (2-byte aligned).
53 //   |code| = 0xF000FA01  (special THUMB2 mode data fetch).
54 //   |disp| = 0x00000402  (decoded from |code| with T24 -> BL encoding T1).
55 //   PC = |instr_rva| + 4 = 0x001030A6  (THUMB2 mode).
56 //   |target_rva| = PC + |disp| = 0x001034A8.
57 //
58 // Example 3 (AArch64):
59 //   0000000000305070: 03 02 01 14    B     000000000034587C
60 //   |instr_rva| = 0x00305070  (4-byte aligned, assumed to fit in 32-bit).
61 //   |code| = 0x14010203  (little endian fetchd from data).
62 //   |disp| = 0x0004080C  (decoded from |code| with Immd -> B).
63 //   PC = |instr_rva| = 0x00305070  (AArch64).
64 //   |target_rva| = PC + |disp| = 0x0034587C.
65 
66 // BLX complication: BLX transits between ARM mode and THUMB2 mode, and branches
67 // to an address. Therefore |instr_rva| must align by the "old" mode, and
68 // |target_rva| must align by the "new" mode. In particular:
69 // * BLX encoding A2 (ARM -> THUMB2): |instr_rva| is 4-byte aligned with
70 //   PC = |instr_rva| + 8; |target_rva| is 2-byte aligned, and so |disp| is
71 //   2-byte aligned.
72 // * BLX encoding T2 (THUMB2 -> ARM): |instr_rva| is 2-byte aligned with
73 //   PC = |instr_rva| + 4; |target_rva| is 4-byte aligned. Complication: BLX
74 //   encoding T2 stores a bit |H| that corresponds to "2" in binary, but |H|
75 //   must be set to 0. Thus the encoded value is effectively 4-byte aligned. So
76 //   when computing |target_rva| by adding PC (2-byte aligned) to the stored
77 //   value (4-byte aligned), the result must be rounded down to the nearest
78 //   4-byte aligned address.
79 // The last situation creates ambiguity in how |disp| is defined! Alternatives:
80 // (1) |disp| := |target_rva| - PC: So |code| <-> |disp| for BLX encoding T2,
81 //     requires |instr_rva| % 4 to be determined, and adjustments made.
82 // (2) |disp| := Value stored in |code|: So |disp| <-> |target_rva| for BLX
83 //     encoding T2 requires adjustment: |disp| -> |target_rva| needs to round
84 //     down, whereas |target_rva| -> |disp| needs to round up.
85 // We adopt (2) to simplify |code| <-> |disp|, since that gets used.
86 
87 using arm_disp_t = int32_t;
88 
89 // Alignment requirement for |target_rva|, useful for |disp| <-> |target_rva|
90 // (also requires |instr_rva|). Alignment is determined by parsing |code| in
91 // *Decode() functions. kArmAlignFail is also defined to indicate parse failure.
92 // Alignments can be 2 or 4. These values are also used in the enum, so
93 // |x % align| with |x & (align - 1)| to compute alignment.
94 enum ArmAlign : uint32_t {
95   kArmAlignFail = 0U,
96   kArmAlign2 = 2U,
97   kArmAlign4 = 4U,
98 };
99 
100 // Traits for rel32 address types (technically rel64 for AArch64 -- but we
101 // assume values are small enough), which form collections of strategies to
102 // process each rel32 address type.
103 template <typename ENUM_ADDR_TYPE,
104           ENUM_ADDR_TYPE ADDR_TYPE,
105           typename CODE_T,
106           CODE_T (*FETCH)(ConstBufferView, offset_t),
107           void (*STORE)(MutableBufferView, offset_t, CODE_T),
108           ArmAlign (*DECODE)(CODE_T, arm_disp_t*),
109           bool (*ENCODE)(arm_disp_t, CODE_T*),
110           bool (*READ)(rva_t, CODE_T, rva_t*),
111           bool (*WRITE)(rva_t, rva_t, CODE_T*)>
112 class ArmAddrTraits {
113  public:
114   static constexpr ENUM_ADDR_TYPE addr_type = ADDR_TYPE;
115   using code_t = CODE_T;
116   static constexpr CODE_T (*Fetch)(ConstBufferView, offset_t) = FETCH;
117   static constexpr void (*Store)(MutableBufferView, offset_t, CODE_T) = STORE;
118   static constexpr ArmAlign (*Decode)(CODE_T, arm_disp_t*) = DECODE;
119   static constexpr bool (*Encode)(arm_disp_t, CODE_T*) = ENCODE;
120   static constexpr bool (*Read)(rva_t, CODE_T, rva_t*) = READ;
121   static constexpr bool (*Write)(rva_t, rva_t, CODE_T*) = WRITE;
122 };
123 
124 // Given THUMB2 instruction |code16|, returns 2 if it's from a 16-bit THUMB2
125 // instruction, or 4 if it's from a 32-bit THUMB2 instruction.
GetThumb2InstructionSize(uint16_t code16)126 inline int GetThumb2InstructionSize(uint16_t code16) {
127   return ((code16 & 0xF000) == 0xF000 || (code16 & 0xF800) == 0xE800) ? 4 : 2;
128 }
129 
130 // A translator for ARM mode and THUMB2 mode with static functions that
131 // translate among |code|, |disp|, and |target_rva|.
132 class AArch32Rel32Translator {
133  public:
134   // Rel32 address types enumeration.
135   enum AddrType : uint8_t {
136     ADDR_NONE = 0xFF,
137     // Naming: Here "A24" represents ARM mode instructions where |code|
138     // dedicates 24 bits (including sign bit) to specify |disp|. Similarly, "T8"
139     // represents THUMB2 mode instructions with 8 bits for |disp|. Currently
140     // only {A24, T8, T11, T20, T24} are defined. These are not to be confused
141     // with "B encoding A1", "B encoding T3", etc., which are specific encoding
142     // schemes given by the manual for the "B" (or other) instructions (only
143     // {A1, A2, T1, T2, T3, T4} are seen).
144     ADDR_A24 = 0,
145     ADDR_T8,
146     ADDR_T11,
147     ADDR_T20,
148     ADDR_T24,
149     NUM_ADDR_TYPE
150   };
151 
152   AArch32Rel32Translator();
153   AArch32Rel32Translator(const AArch32Rel32Translator&) = delete;
154   const AArch32Rel32Translator& operator=(const AArch32Rel32Translator&) =
155       delete;
156 
157   // Fetches the 32-bit ARM instruction |code| at |view[idx]|.
FetchArmCode32(ConstBufferView view,offset_t idx)158   static inline uint32_t FetchArmCode32(ConstBufferView view, offset_t idx) {
159     return view.read<uint32_t>(idx);
160   }
161 
162   // Fetches the 16-bit THUMB2 instruction |code| at |view[idx]|.
FetchThumb2Code16(ConstBufferView view,offset_t idx)163   static inline uint16_t FetchThumb2Code16(ConstBufferView view, offset_t idx) {
164     return view.read<uint16_t>(idx);
165   }
166 
167   // Fetches the 32-bit THUMB2 instruction |code| at |view[idx]|.
FetchThumb2Code32(ConstBufferView view,offset_t idx)168   static inline uint32_t FetchThumb2Code32(ConstBufferView view, offset_t idx) {
169     // By convention, 32-bit THUMB2 instructions are written (as seen later) as:
170     //   [byte3, byte2, byte1, byte0].
171     // However (assuming little-endian ARM) the in-memory representation is
172     //   [byte2, byte3, byte0, byte1].
173     return (static_cast<uint32_t>(view.read<uint16_t>(idx)) << 16) |
174            view.read<uint16_t>(idx + 2);
175   }
176 
177   // Stores the 32-bit ARM instruction |code| to |mutable_view[idx]|.
StoreArmCode32(MutableBufferView mutable_view,offset_t idx,uint32_t code)178   static inline void StoreArmCode32(MutableBufferView mutable_view,
179                                     offset_t idx,
180                                     uint32_t code) {
181     mutable_view.write<uint32_t>(idx, code);
182   }
183 
184   // Stores the 16-bit THUMB2 instruction |code| to |mutable_view[idx]|.
StoreThumb2Code16(MutableBufferView mutable_view,offset_t idx,uint16_t code)185   static inline void StoreThumb2Code16(MutableBufferView mutable_view,
186                                        offset_t idx,
187                                        uint16_t code) {
188     mutable_view.write<uint16_t>(idx, code);
189   }
190 
191   // Stores the next 32-bit THUMB2 instruction |code| to |mutable_view[idx]|.
StoreThumb2Code32(MutableBufferView mutable_view,offset_t idx,uint32_t code)192   static inline void StoreThumb2Code32(MutableBufferView mutable_view,
193                                        offset_t idx,
194                                        uint32_t code) {
195     mutable_view.write<uint16_t>(idx, static_cast<uint16_t>(code >> 16));
196     mutable_view.write<uint16_t>(idx + 2, static_cast<uint16_t>(code & 0xFFFF));
197   }
198 
199   // The following functions convert |code| (16-bit or 32-bit) from/to |disp|
200   // or |target_rva|, for specific branch instruction types.
201   // Read*() and write*() functions convert between |code| and |target_rva|.
202   // * Decode*() determines whether |code16/code32| is a branch instruction
203   //   of a specific type. If so, then extracts |*disp| and returns the required
204   //   ArmAlign. Otherwise returns kArmAlignFail.
205   // * Encode*() determines whether |*code16/*code32| is a branch instruction of
206   //   a specific type, and whether it can accommodate |disp|. If so, then
207   //   re-encodes |*code32| using |disp|, and returns true. Otherwise returns
208   //   false.
209   // * Read*() is similar to Decode*(), but on success, extracts |*target_rva|
210   //   using |instr_rva| as aid, performs the proper alignment, and returns
211   //   true. Otherwise returns false.
212   // * Write*() is similar to Encode*(), takes |target_rva| instead, and uses
213   //   |instr_rva| as aid.
214   static ArmAlign DecodeA24(uint32_t code32, arm_disp_t* disp);
215   static bool EncodeA24(arm_disp_t disp, uint32_t* code32);
216   // TODO(huangs): Refactor the Read*() functions: These are identical
217   // except for Decode*() and Get*TargetRvaFromDisp().
218   static bool ReadA24(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
219   static bool WriteA24(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
220 
221   static ArmAlign DecodeT8(uint16_t code16, arm_disp_t* disp);
222   static bool EncodeT8(arm_disp_t disp, uint16_t* code16);
223   static bool ReadT8(rva_t instr_rva, uint16_t code16, rva_t* target_rva);
224   static bool WriteT8(rva_t instr_rva, rva_t target_rva, uint16_t* code16);
225 
226   static ArmAlign DecodeT11(uint16_t code16, arm_disp_t* disp);
227   static bool EncodeT11(arm_disp_t disp, uint16_t* code16);
228   static bool ReadT11(rva_t instr_rva, uint16_t code16, rva_t* target_rva);
229   static bool WriteT11(rva_t instr_rva, rva_t target_rva, uint16_t* code16);
230 
231   static ArmAlign DecodeT20(uint32_t code32, arm_disp_t* disp);
232   static bool EncodeT20(arm_disp_t disp, uint32_t* code32);
233   static bool ReadT20(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
234   static bool WriteT20(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
235 
236   static ArmAlign DecodeT24(uint32_t code32, arm_disp_t* disp);
237   static bool EncodeT24(arm_disp_t disp, uint32_t* code32);
238   static bool ReadT24(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
239   static bool WriteT24(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
240 
241   // Computes |target_rva| from |instr_rva| and |disp| in ARM mode.
GetArmTargetRvaFromDisp(rva_t instr_rva,arm_disp_t disp,ArmAlign align)242   static inline rva_t GetArmTargetRvaFromDisp(rva_t instr_rva,
243                                               arm_disp_t disp,
244                                               ArmAlign align) {
245     rva_t ret = static_cast<rva_t>(instr_rva + 8 + disp);
246     // Align down.
247     DCHECK_NE(align, kArmAlignFail);
248     return ret - (ret & static_cast<rva_t>(align - 1));
249   }
250 
251   // Computes |target_rva| from |instr_rva| and |disp| in THUMB2 mode.
GetThumb2TargetRvaFromDisp(rva_t instr_rva,arm_disp_t disp,ArmAlign align)252   static inline rva_t GetThumb2TargetRvaFromDisp(rva_t instr_rva,
253                                                  arm_disp_t disp,
254                                                  ArmAlign align) {
255     rva_t ret = static_cast<rva_t>(instr_rva + 4 + disp);
256     // Align down.
257     DCHECK_NE(align, kArmAlignFail);
258     return ret - (ret & static_cast<rva_t>(align - 1));
259   }
260 
261   // Computes |disp| from |instr_rva| and |target_rva| in ARM mode.
GetArmDispFromTargetRva(rva_t instr_rva,rva_t target_rva,ArmAlign align)262   static inline arm_disp_t GetArmDispFromTargetRva(rva_t instr_rva,
263                                                    rva_t target_rva,
264                                                    ArmAlign align) {
265     // Assumes that |instr_rva + 8| does not overflow.
266     arm_disp_t ret = static_cast<arm_disp_t>(target_rva) -
267                      static_cast<arm_disp_t>(instr_rva + 8);
268     // Align up.
269     DCHECK_NE(align, kArmAlignFail);
270     return ret + ((-ret) & static_cast<arm_disp_t>(align - 1));
271   }
272 
273   // Computes |disp| from |instr_rva| and |target_rva| in THUMB2 mode.
GetThumb2DispFromTargetRva(rva_t instr_rva,rva_t target_rva,ArmAlign align)274   static inline arm_disp_t GetThumb2DispFromTargetRva(rva_t instr_rva,
275                                                       rva_t target_rva,
276                                                       ArmAlign align) {
277     // Assumes that |instr_rva + 4| does not overflow.
278     arm_disp_t ret = static_cast<arm_disp_t>(target_rva) -
279                      static_cast<arm_disp_t>(instr_rva + 4);
280     // Align up.
281     DCHECK_NE(align, kArmAlignFail);
282     return ret + ((-ret) & static_cast<arm_disp_t>(align - 1));
283   }
284 
285   // Strategies to process each rel32 address type.
286   using AddrTraits_A24 = ArmAddrTraits<AddrType,
287                                        ADDR_A24,
288                                        uint32_t,
289                                        FetchArmCode32,
290                                        StoreArmCode32,
291                                        DecodeA24,
292                                        EncodeA24,
293                                        ReadA24,
294                                        WriteA24>;
295   using AddrTraits_T8 = ArmAddrTraits<AddrType,
296                                       ADDR_T8,
297                                       uint16_t,
298                                       FetchThumb2Code16,
299                                       StoreThumb2Code16,
300                                       DecodeT8,
301                                       EncodeT8,
302                                       ReadT8,
303                                       WriteT8>;
304   using AddrTraits_T11 = ArmAddrTraits<AddrType,
305                                        ADDR_T11,
306                                        uint16_t,
307                                        FetchThumb2Code16,
308                                        StoreThumb2Code16,
309                                        DecodeT11,
310                                        EncodeT11,
311                                        ReadT11,
312                                        WriteT11>;
313   using AddrTraits_T20 = ArmAddrTraits<AddrType,
314                                        ADDR_T20,
315                                        uint32_t,
316                                        FetchThumb2Code32,
317                                        StoreThumb2Code32,
318                                        DecodeT20,
319                                        EncodeT20,
320                                        ReadT20,
321                                        WriteT20>;
322   using AddrTraits_T24 = ArmAddrTraits<AddrType,
323                                        ADDR_T24,
324                                        uint32_t,
325                                        FetchThumb2Code32,
326                                        StoreThumb2Code32,
327                                        DecodeT24,
328                                        EncodeT24,
329                                        ReadT24,
330                                        WriteT24>;
331 };
332 
333 // Translator for AArch64, which is simpler than 32-bit ARM. Although pointers
334 // are 64-bit, displacements are within 32-bit.
335 class AArch64Rel32Translator {
336  public:
337   // Rel64 address types enumeration.
338   enum AddrType : uint8_t {
339     ADDR_NONE = 0xFF,
340     ADDR_IMMD14 = 0,
341     ADDR_IMMD19,
342     ADDR_IMMD26,
343     NUM_ADDR_TYPE
344   };
345 
346   // Although RVA for 64-bit architecture can be 64-bit in length, we make the
347   // bold assumption that for ELF images that RVA will stay nicely in 32-bit!
348   AArch64Rel32Translator();
349   AArch64Rel32Translator(const AArch64Rel32Translator&) = delete;
350   const AArch64Rel32Translator& operator=(const AArch64Rel32Translator&) =
351       delete;
352 
FetchCode32(ConstBufferView view,offset_t idx)353   static inline uint32_t FetchCode32(ConstBufferView view, offset_t idx) {
354     return view.read<uint32_t>(idx);
355   }
356 
StoreCode32(MutableBufferView mutable_view,offset_t idx,uint32_t code)357   static inline void StoreCode32(MutableBufferView mutable_view,
358                                  offset_t idx,
359                                  uint32_t code) {
360     mutable_view.write<uint32_t>(idx, code);
361   }
362 
363   // Conversion functions for |code32| from/to |disp| or |target_rva|, similar
364   // to the counterparts in AArch32Rel32Translator.
365   static ArmAlign DecodeImmd14(uint32_t code32, arm_disp_t* disp);
366   static bool EncodeImmd14(arm_disp_t disp, uint32_t* code32);
367   // TODO(huangs): Refactor the Read*() functions: These are identical
368   // except for Decode*().
369   static bool ReadImmd14(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
370   static bool WriteImmd14(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
371 
372   static ArmAlign DecodeImmd19(uint32_t code32, arm_disp_t* disp);
373   static bool EncodeImmd19(arm_disp_t disp, uint32_t* code32);
374   static bool ReadImmd19(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
375   static bool WriteImmd19(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
376 
377   static ArmAlign DecodeImmd26(uint32_t code32, arm_disp_t* disp);
378   static bool EncodeImmd26(arm_disp_t disp, uint32_t* code32);
379   static bool ReadImmd26(rva_t instr_rva, uint32_t code32, rva_t* target_rva);
380   static bool WriteImmd26(rva_t instr_rva, rva_t target_rva, uint32_t* code32);
381 
GetTargetRvaFromDisp(rva_t instr_rva,arm_disp_t disp)382   static inline rva_t GetTargetRvaFromDisp(rva_t instr_rva, arm_disp_t disp) {
383     return static_cast<rva_t>(instr_rva + disp);
384   }
385 
GetDispFromTargetRva(rva_t instr_rva,rva_t target_rva)386   static inline arm_disp_t GetDispFromTargetRva(rva_t instr_rva,
387                                                 rva_t target_rva) {
388     return static_cast<arm_disp_t>(target_rva - instr_rva);
389   }
390 
391   // Strategies to process each rel32 address type.
392   using AddrTraits_Immd14 = ArmAddrTraits<AddrType,
393                                           ADDR_IMMD14,
394                                           uint32_t,
395                                           FetchCode32,
396                                           StoreCode32,
397                                           DecodeImmd14,
398                                           EncodeImmd14,
399                                           ReadImmd14,
400                                           WriteImmd14>;
401   using AddrTraits_Immd19 = ArmAddrTraits<AddrType,
402                                           ADDR_IMMD19,
403                                           uint32_t,
404                                           FetchCode32,
405                                           StoreCode32,
406                                           DecodeImmd19,
407                                           EncodeImmd19,
408                                           ReadImmd19,
409                                           WriteImmd19>;
410   using AddrTraits_Immd26 = ArmAddrTraits<AddrType,
411                                           ADDR_IMMD26,
412                                           uint32_t,
413                                           FetchCode32,
414                                           StoreCode32,
415                                           DecodeImmd26,
416                                           EncodeImmd26,
417                                           ReadImmd26,
418                                           WriteImmd26>;
419 };
420 
421 }  // namespace zucchini
422 
423 #endif  // COMPONENTS_ZUCCHINI_ARM_UTILS_H_
424