1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
18 #define BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
19 
20 #include <array>
21 #include <cstdint>
22 #include <type_traits>
23 
24 #include "berberis/assembler/x86_64.h"
25 #include "berberis/base/bit_util.h"
26 #include "berberis/base/dependent_false.h"
27 #include "berberis/intrinsics/macro_assembler.h"
28 #include "berberis/runtime_primitives/platform.h"
29 
30 namespace berberis::call_intrinsic {
31 
32 constexpr x86_64::Assembler::Register kCallerSavedRegs[] = {
33     x86_64::Assembler::rax,
34     x86_64::Assembler::rcx,
35     x86_64::Assembler::rdx,
36     x86_64::Assembler::rdi,
37     x86_64::Assembler::rsi,
38     x86_64::Assembler::r8,
39     x86_64::Assembler::r9,
40     x86_64::Assembler::r10,
41     x86_64::Assembler::r11,
42 };
43 
44 constexpr int8_t kRegIsNotOnStack = -1;
45 
46 // Map from register number to offset in CallIntrinsic save area. Counted in 8-byte slots.
47 inline constexpr auto kRegOffsetsOnStack = []() {
48   std::array<int8_t, 16> regs_on_stack = {};
49   // regs_on_stack.fill(kRegIsNotOnStack); - needs C++20
50   for (auto& num : regs_on_stack) {
51     num = kRegIsNotOnStack;
52   }
53 
54   int8_t stack_allocation_size = 0;
55   for (auto reg : kCallerSavedRegs) {
56     regs_on_stack[reg.GetPhysicalIndex()] = stack_allocation_size;
57     ++stack_allocation_size;
58   }
59   return regs_on_stack;
60 }();
61 
62 constexpr x86_64::Assembler::XMMRegister kCallerSavedXMMRegs[] = {
63     x86_64::Assembler::xmm0,
64     x86_64::Assembler::xmm1,
65     x86_64::Assembler::xmm2,
66     x86_64::Assembler::xmm3,
67     x86_64::Assembler::xmm4,
68     x86_64::Assembler::xmm5,
69     x86_64::Assembler::xmm6,
70     x86_64::Assembler::xmm7,
71     x86_64::Assembler::xmm8,
72     x86_64::Assembler::xmm9,
73     x86_64::Assembler::xmm10,
74     x86_64::Assembler::xmm11,
75     x86_64::Assembler::xmm12,
76     x86_64::Assembler::xmm13,
77     x86_64::Assembler::xmm14,
78     x86_64::Assembler::xmm15,
79 };
80 
81 // Map from register number to offset in CallIntrinsic save area. Counted in 8-byte slots.
82 inline constexpr auto kSimdRegOffsetsOnStack = []() {
83   std::array<int8_t, 16> simd_regs_on_stack = {};
84   // simd_regs_on_stack.fill(kRegIsNotOnStack); - needs C++20
85   for (auto& num : simd_regs_on_stack) {
86     num = kRegIsNotOnStack;
87   }
88 
89   int8_t stack_allocation_size = AlignUp(std::size(kCallerSavedRegs), 2);
90   for (auto reg : kCallerSavedXMMRegs) {
91     simd_regs_on_stack[reg.GetPhysicalIndex()] = stack_allocation_size;
92     stack_allocation_size += 2;
93   }
94   return simd_regs_on_stack;
95 }();
96 
97 // Save area size for CallIntrinsic save area. Counted in 8-byte slots.
98 inline constexpr int8_t kSaveAreaSize =
99     AlignUp(std::size(kCallerSavedRegs), 2) + std::size(kCallerSavedXMMRegs) * 2;
100 
101 struct StoredRegsInfo {
102   std::decay_t<decltype(kRegOffsetsOnStack)> regs_on_stack;
103   std::decay_t<decltype(kSimdRegOffsetsOnStack)> simd_regs_on_stack;
104 };
105 
PushCallerSaved(MacroAssembler<x86_64::Assembler> & as)106 inline void PushCallerSaved(MacroAssembler<x86_64::Assembler>& as) {
107   as.Subq(as.rsp, kSaveAreaSize * 8);
108 
109   for (auto reg : kCallerSavedRegs) {
110     as.Movq({.base = as.rsp, .disp = kRegOffsetsOnStack[reg.GetPhysicalIndex()] * 8}, reg);
111   }
112 
113   for (auto reg : kCallerSavedXMMRegs) {
114     as.Movdqa({.base = as.rsp, .disp = kSimdRegOffsetsOnStack[reg.GetPhysicalIndex()] * 8}, reg);
115   }
116 }
117 
118 // Note: regs_on_stack is usually copy of kRegOffsetsOnStack with some registers marked off as
119 // kRegIsNotOnStack, simd_regs_on_stack is kSimdRegOffsetsOnStack with some registers marked as
120 // kRegIsNotOnStack. These registers are skipped during restoration process.
PopCallerSaved(MacroAssembler<x86_64::Assembler> & as,const StoredRegsInfo regs_info)121 inline void PopCallerSaved(MacroAssembler<x86_64::Assembler>& as, const StoredRegsInfo regs_info) {
122   for (auto reg : kCallerSavedRegs) {
123     if (regs_info.regs_on_stack[reg.GetPhysicalIndex()] != kRegIsNotOnStack) {
124       as.Movq(reg, {.base = as.rsp, .disp = regs_info.regs_on_stack[reg.GetPhysicalIndex()] * 8});
125     }
126   }
127   for (auto reg : kCallerSavedXMMRegs) {
128     if (regs_info.simd_regs_on_stack[reg.GetPhysicalIndex()] != kRegIsNotOnStack) {
129       as.Movdqa(reg,
130                 {.base = as.rsp, .disp = regs_info.simd_regs_on_stack[reg.GetPhysicalIndex()] * 8});
131     }
132   }
133 
134   as.Addq(as.rsp, kSaveAreaSize * 8);
135 }
136 
137 // Nonfunctional assembler used by static_assert expression. It doesn't do anything but allows us
138 // to call InitArgs during compilation time with the same argument types as would happen during
139 // execution.
140 //
141 // This turns runtime check into compile time check and thus allows us to catch weird corner cases
142 // faster.
143 class ConstExprCheckAssembler {
144  public:
145   using Operand = MacroAssembler<x86_64::Assembler>::Operand;
146   using Register = MacroAssembler<x86_64::Assembler>::Register;
147   using XMMRegister = MacroAssembler<x86_64::Assembler>::XMMRegister;
148   static constexpr auto rsp = MacroAssembler<x86_64::Assembler>::rsp;
149 
150   constexpr ConstExprCheckAssembler() = default;
151 
152   template <typename U, typename V>
Expand(Register,Operand)153   constexpr void Expand(Register, Operand) const {}
154   template <typename U, typename V>
Expand(Register,Register)155   constexpr void Expand(Register, Register) const {}
156 
157   template <typename U>
Mov(Operand,Register)158   constexpr void Mov(Operand, Register) const {}
159   template <typename U>
Mov(Register,Operand)160   constexpr void Mov(Register, Operand) const {}
161   template <typename U>
Mov(Register,Register)162   constexpr void Mov(Register, Register) const {}
163 
Movl(Register,int32_t)164   constexpr void Movl(Register, int32_t) const {}
165 
166   template <typename U>
Movs(Operand,XMMRegister)167   constexpr void Movs(Operand, XMMRegister) const {}
168   template <typename U>
Movs(XMMRegister,Operand)169   constexpr void Movs(XMMRegister, Operand) const {}
170   template <typename U>
Movs(XMMRegister,XMMRegister)171   constexpr void Movs(XMMRegister, XMMRegister) const {}
172 
173   template <typename U>
Vmovs(Operand,XMMRegister)174   constexpr void Vmovs(Operand, XMMRegister) const {}
175   template <typename U>
Vmovs(XMMRegister,Operand)176   constexpr void Vmovs(XMMRegister, Operand) const {}
177   template <typename U>
Vmovs(XMMRegister,XMMRegister,XMMRegister)178   constexpr void Vmovs(XMMRegister, XMMRegister, XMMRegister) const {}
179 };
180 
181 // Helper wrapper to pass the intrinsic type down the generic lambda.
182 template <typename T, typename U>
183 struct ArgWrap {
184   using AssemblerType = T;
185   using IntrinsicType = U;
186   AssemblerType value;
187 };
188 
189 static constexpr x86_64::Assembler::Register kAbiArgs[] = {
190     x86_64::Assembler::rdi,
191     x86_64::Assembler::rsi,
192     x86_64::Assembler::rdx,
193     x86_64::Assembler::rcx,
194     x86_64::Assembler::r8,
195     x86_64::Assembler::r9,
196 };
197 
198 static constexpr x86_64::Assembler::XMMRegister kAbiSimdArgs[] = {
199     x86_64::Assembler::xmm0,
200     x86_64::Assembler::xmm1,
201     x86_64::Assembler::xmm2,
202     x86_64::Assembler::xmm3,
203     x86_64::Assembler::xmm4,
204     x86_64::Assembler::xmm5,
205     x86_64::Assembler::xmm6,
206     x86_64::Assembler::xmm7,
207 };
208 
209 // Assumes RSP points to preallocated stack args area.
210 template <typename IntrinsicResType,
211           typename... IntrinsicArgType,
212           typename MacroAssembler,
213           typename... AssemblerArgType>
InitArgs(MacroAssembler && as,bool has_avx,AssemblerArgType...args)214 constexpr bool InitArgs(MacroAssembler&& as, bool has_avx, AssemblerArgType... args) {
215   using Assembler = std::decay_t<MacroAssembler>;
216   using Register = typename Assembler::Register;
217   using XMMRegister = typename Assembler::XMMRegister;
218   using Float32 = intrinsics::Float32;
219   using Float64 = intrinsics::Float64;
220 
221   // All ABI argument registers are saved among caller-saved registers, so we can safely initialize
222   // them now. When intrinsic receives its argument from such register we'll read it from stack, so
223   // there is no early-clobbering problem. Callee-saved regs are never ABI arguments, so we can move
224   // them to ABI reg directly.
225 
226   size_t gp_index = 0;
227   size_t simd_index = 0;
228   bool success = ([&as, &gp_index, &simd_index, has_avx](auto arg) -> bool {
229     using AssemblerType = typename decltype(arg)::AssemblerType;
230     using IntrinsicType = typename decltype(arg)::IntrinsicType;
231 
232     if (std::is_integral_v<IntrinsicType>) {
233       if (gp_index == std::size(kAbiArgs)) {
234         return false;
235       }
236     } else if constexpr (std::is_same_v<IntrinsicType, Float32> ||
237                          std::is_same_v<IntrinsicType, Float64>) {
238       if (simd_index == std::size(kAbiSimdArgs)) {
239         return false;
240       }
241     } else {
242       return false;
243     }
244 
245     // Note, ABI mandates extension up to 32-bit and zero-filling the upper half.
246     if constexpr (std::is_integral_v<IntrinsicType> && sizeof(IntrinsicType) <= sizeof(int32_t) &&
247                   std::is_integral_v<AssemblerType> && sizeof(AssemblerType) <= sizeof(int32_t)) {
248       as.Movl(kAbiArgs[gp_index++], static_cast<int32_t>(arg.value));
249     } else if constexpr (std::is_integral_v<IntrinsicType> &&
250                          sizeof(IntrinsicType) == sizeof(int64_t) &&
251                          std::is_integral_v<AssemblerType> &&
252                          sizeof(AssemblerType) == sizeof(int64_t)) {
253       as.template Expand<int64_t, IntrinsicType>(kAbiArgs[gp_index++],
254                                                  static_cast<int64_t>(arg.value));
255     } else if constexpr (std::is_integral_v<IntrinsicType> &&
256                          sizeof(IntrinsicType) <= sizeof(int32_t) &&
257                          std::is_same_v<AssemblerType, Register>) {
258       if (kRegOffsetsOnStack[arg.value.GetPhysicalIndex()] == kRegIsNotOnStack) {
259         as.template Expand<int32_t, IntrinsicType>(kAbiArgs[gp_index++], arg.value);
260       } else {
261         as.template Expand<int32_t, IntrinsicType>(
262             kAbiArgs[gp_index++],
263             {.base = Assembler::rsp, .disp = kRegOffsetsOnStack[arg.value.GetPhysicalIndex()] * 8});
264       }
265     } else if constexpr (std::is_integral_v<IntrinsicType> &&
266                          sizeof(IntrinsicType) == sizeof(int64_t) &&
267                          std::is_same_v<AssemblerType, Register>) {
268       if (kRegOffsetsOnStack[arg.value.GetPhysicalIndex()] == kRegIsNotOnStack) {
269         as.template Expand<int64_t, IntrinsicType>(kAbiArgs[gp_index++], arg.value);
270       } else {
271         as.template Expand<int64_t, IntrinsicType>(
272             kAbiArgs[gp_index++],
273             {.base = Assembler::rsp, .disp = kRegOffsetsOnStack[arg.value.GetPhysicalIndex()] * 8});
274       }
275     } else if constexpr ((std::is_same_v<IntrinsicType, Float32> ||
276                           std::is_same_v<IntrinsicType, Float64>)&&std::is_same_v<AssemblerType,
277                                                                                   XMMRegister>) {
278       if (kSimdRegOffsetsOnStack[arg.value.GetPhysicalIndex()] == kRegIsNotOnStack) {
279         if (has_avx) {
280           as.template Vmovs<IntrinsicType>(
281               kAbiSimdArgs[simd_index], kAbiSimdArgs[simd_index], arg.value);
282           simd_index++;
283         } else {
284           as.template Movs<IntrinsicType>(kAbiSimdArgs[simd_index++], arg.value);
285         }
286       } else {
287         if (has_avx) {
288           as.template Vmovs<IntrinsicType>(
289               kAbiSimdArgs[simd_index++],
290               {.base = as.rsp, .disp = kSimdRegOffsetsOnStack[arg.value.GetPhysicalIndex()] * 8});
291         } else {
292           as.template Movs<IntrinsicType>(
293               kAbiSimdArgs[simd_index++],
294               {.base = as.rsp, .disp = kSimdRegOffsetsOnStack[arg.value.GetPhysicalIndex()] * 8});
295         }
296       }
297     } else {
298       static_assert(kDependentTypeFalse<std::tuple<IntrinsicType, AssemblerType>>,
299                     "Unknown parameter type, please add support to CallIntrinsic");
300     }
301     return true;
302   }(ArgWrap<AssemblerArgType, IntrinsicArgType>{.value = args}) && ...);
303   return success;
304 }
305 
306 // Forward results from ABI registers to result-specified registers and mark registers in the
307 // returned StoredRegsInfo with kRegIsNotOnStack to prevent restoration from stack.
308 template <typename IntrinsicResType, typename AssemblerResType>
ForwardResults(MacroAssembler<x86_64::Assembler> & as,AssemblerResType result)309 StoredRegsInfo ForwardResults(MacroAssembler<x86_64::Assembler>& as, AssemblerResType result) {
310   using Assembler = MacroAssembler<x86_64::Assembler>;
311   using Register = Assembler::Register;
312   using XMMRegister = Assembler::XMMRegister;
313   using Float32 = intrinsics::Float32;
314   using Float64 = intrinsics::Float64;
315 
316   StoredRegsInfo regs_info = {.regs_on_stack = kRegOffsetsOnStack,
317                               .simd_regs_on_stack = kSimdRegOffsetsOnStack};
318 
319   if constexpr (Assembler::kFormatIs<IntrinsicResType, std::tuple<int32_t>, std::tuple<uint32_t>> &&
320                 std::is_same_v<AssemblerResType, Register>) {
321     // Note: even unsigned 32-bit results are sign-extended to 64bit register on RV64.
322     regs_info.regs_on_stack[result.GetPhysicalIndex()] = kRegIsNotOnStack;
323     as.Expand<int64_t, int32_t>(result, Assembler::rax);
324   } else if constexpr (Assembler::
325                            kFormatIs<IntrinsicResType, std::tuple<int64_t>, std::tuple<uint64_t>> &&
326                        std::is_same_v<AssemblerResType, Register>) {
327     regs_info.regs_on_stack[result.GetPhysicalIndex()] = kRegIsNotOnStack;
328     as.Mov<int64_t>(result, Assembler::rax);
329   } else if constexpr (Assembler::
330                            kFormatIs<IntrinsicResType, std::tuple<Float32>, std::tuple<Float64>> &&
331                        std::is_same_v<AssemblerResType, XMMRegister>) {
332     using ResType0 = std::tuple_element_t<0, IntrinsicResType>;
333     regs_info.simd_regs_on_stack[result.GetPhysicalIndex()] = kRegIsNotOnStack;
334     if (host_platform::kHasAVX) {
335       as.Vmovs<ResType0>(result, result, Assembler::xmm0);
336     } else {
337       as.Movs<ResType0>(result, Assembler::xmm0);
338     }
339   } else if constexpr (std::tuple_size_v<IntrinsicResType> == 2) {
340     using ResType0 = std::tuple_element_t<0, IntrinsicResType>;
341     using ResType1 = std::tuple_element_t<1, IntrinsicResType>;
342     auto [result0, result1] = result;
343     if constexpr (Assembler::kFormatIs<ResType0, int32_t, uint32_t> &&
344                   std::is_same_v<std::tuple_element_t<0, AssemblerResType>, Register>) {
345       regs_info.regs_on_stack[result0.GetPhysicalIndex()] = kRegIsNotOnStack;
346       as.Expand<int64_t, int32_t>(result0, Assembler::rax);
347     } else if constexpr (Assembler::kFormatIs<ResType0, int64_t, uint64_t> &&
348                          std::is_same_v<std::tuple_element_t<0, AssemblerResType>, Register>) {
349       regs_info.regs_on_stack[result0.GetPhysicalIndex()] = kRegIsNotOnStack;
350       as.Mov<int64_t>(result0, Assembler::rax);
351     } else {
352       static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
353                     "Unknown result type, please add support to CallIntrinsic");
354     }
355     if constexpr (Assembler::kFormatIs<ResType1, int32_t, uint32_t> &&
356                   std::is_same_v<std::tuple_element_t<1, AssemblerResType>, Register>) {
357       regs_info.regs_on_stack[result1.GetPhysicalIndex()] = kRegIsNotOnStack;
358       as.Expand<int64_t, int32_t>(result1, Assembler::rdx);
359     } else if constexpr (Assembler::kFormatIs<ResType1, int64_t, uint64_t> &&
360                          std::is_same_v<std::tuple_element_t<1, AssemblerResType>, Register>) {
361       regs_info.regs_on_stack[result1.GetPhysicalIndex()] = kRegIsNotOnStack;
362       as.Mov<int64_t>(result1, Assembler::rdx);
363     } else {
364       static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
365                     "Unknown result type, please add support to CallIntrinsic");
366     }
367   } else {
368     static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
369                   "Unknown result type, please add support to CallIntrinsic");
370   }
371   return regs_info;
372 }
373 
374 // Note: we can ignore status in the actual InitArgs call because we know that InitArgs would
375 // succeed if the call in static_assert succeeded.
376 //
377 // AVX flag shouldn't change the outcome, but better safe than sorry.
378 
379 template <typename IntrinsicResType, typename... IntrinsicArgType, typename... AssemblerArgType>
InitArgsVerify(AssemblerArgType...)380 void InitArgsVerify(AssemblerArgType...) {
381   constexpr auto MakeDummyAssemblerType = []<typename AssemblerType>() {
382     if constexpr (std::is_same_v<AssemblerType, x86_64::Assembler::Register>) {
383       // Note: we couldn't use no_register here, but any “real” register should work.
384       return x86_64::Assembler::rax;
385     } else if constexpr (std::is_same_v<AssemblerType, x86_64::Assembler::XMMRegister>) {
386       // Note: we couldn't use no_xmm_register here, but any “real” register should work.
387       return x86_64::Assembler::xmm0;
388     } else {
389       return AssemblerType{0};
390     }
391   };
392   static_assert(InitArgs<IntrinsicResType, IntrinsicArgType...>(
393       ConstExprCheckAssembler(),
394       true,
395       MakeDummyAssemblerType.template operator()<AssemblerArgType>()...));
396   static_assert(InitArgs<IntrinsicResType, IntrinsicArgType...>(
397       ConstExprCheckAssembler(),
398       false,
399       MakeDummyAssemblerType.template operator()<AssemblerArgType>()...));
400 }
401 
402 template <typename AssemblerResType,
403           typename IntrinsicResType,
404           typename... IntrinsicArgType,
405           typename... AssemblerArgType>
CallIntrinsic(MacroAssembler<x86_64::Assembler> & as,IntrinsicResType (* function)(IntrinsicArgType...),AssemblerResType result,AssemblerArgType...args)406 void CallIntrinsic(MacroAssembler<x86_64::Assembler>& as,
407                    IntrinsicResType (*function)(IntrinsicArgType...),
408                    AssemblerResType result,
409                    AssemblerArgType... args) {
410   PushCallerSaved(as);
411 
412   InitArgsVerify<IntrinsicResType, IntrinsicArgType...>(args...);
413   InitArgs<IntrinsicResType, IntrinsicArgType...>(as, host_platform::kHasAVX, args...);
414 
415   as.Call(reinterpret_cast<void*>(function));
416 
417   auto regs_info = ForwardResults<IntrinsicResType>(as, result);
418 
419   PopCallerSaved(as, regs_info);
420 }
421 
422 template <typename AssemblerResType, typename... IntrinsicArgType, typename... AssemblerArgType>
CallIntrinsic(MacroAssembler<x86_64::Assembler> & as,void (* function)(IntrinsicArgType...),AssemblerArgType...args)423 void CallIntrinsic(MacroAssembler<x86_64::Assembler>& as,
424                    void (*function)(IntrinsicArgType...),
425                    AssemblerArgType... args) {
426   PushCallerSaved(as);
427 
428   InitArgsVerify<void, IntrinsicArgType...>(args...);
429   InitArgs<void, IntrinsicArgType...>(as, host_platform::kHasAVX, args...);
430 
431   as.Call(reinterpret_cast<void*>(function));
432 
433   PopCallerSaved(
434       as, {.regs_on_stack = kRegOffsetsOnStack, .simd_regs_on_stack = kSimdRegOffsetsOnStack});
435 }
436 
437 }  // namespace berberis::call_intrinsic
438 
439 #endif  // BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
440