xref: /aosp_15_r20/art/compiler/optimizing/code_generator_x86_64.h (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
19 
20 #include "arch/x86_64/instruction_set_features_x86_64.h"
21 #include "base/macros.h"
22 #include "code_generator.h"
23 #include "driver/compiler_options.h"
24 #include "nodes.h"
25 #include "parallel_move_resolver.h"
26 #include "utils/x86_64/assembler_x86_64.h"
27 
28 namespace art HIDDEN {
29 namespace x86_64 {
30 
31 static constexpr Register kMethodRegisterArgument = RDI;
32 
33 // Use a local definition to prevent copying mistakes.
34 static constexpr size_t kX86_64WordSize = static_cast<size_t>(kX86_64PointerSize);
35 
36 // Some x86_64 instructions require a register to be available as temp.
37 static constexpr Register TMP = R11;
38 
39 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
40 static constexpr FloatRegister kParameterFloatRegisters[] =
41     { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 };
42 
43 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
44 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
45 
46 static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX };
47 static constexpr size_t kRuntimeParameterCoreRegistersLength =
48     arraysize(kRuntimeParameterCoreRegisters);
49 static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
50 static constexpr size_t kRuntimeParameterFpuRegistersLength =
51     arraysize(kRuntimeParameterFpuRegisters);
52 
53 // These XMM registers are non-volatile in ART ABI, but volatile in native ABI.
54 // If the ART ABI changes, this list must be updated.  It is used to ensure that
55 // these are not clobbered by any direct call to native code (such as math intrinsics).
56 static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 };
57 
58 #define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \
59   V(MathSignumFloat)                           \
60   V(MathSignumDouble)                          \
61   V(MathCopySignFloat)                         \
62   V(MathCopySignDouble)                        \
63   V(CRC32Update)                               \
64   V(CRC32UpdateBytes)                          \
65   V(CRC32UpdateByteBuffer)                     \
66   V(FP16ToFloat)                               \
67   V(FP16ToHalf)                                \
68   V(FP16Floor)                                 \
69   V(FP16Ceil)                                  \
70   V(FP16Rint)                                  \
71   V(FP16Greater)                               \
72   V(FP16GreaterEquals)                         \
73   V(FP16Less)                                  \
74   V(FP16LessEquals)                            \
75   V(FP16Compare)                               \
76   V(FP16Min)                                   \
77   V(FP16Max)                                   \
78   V(IntegerRemainderUnsigned)                  \
79   V(LongRemainderUnsigned)                     \
80   V(StringStringIndexOf)                       \
81   V(StringStringIndexOfAfter)                  \
82   V(StringBufferAppend)                        \
83   V(StringBufferLength)                        \
84   V(StringBufferToString)                      \
85   V(StringBuilderAppendObject)                 \
86   V(StringBuilderAppendString)                 \
87   V(StringBuilderAppendCharSequence)           \
88   V(StringBuilderAppendCharArray)              \
89   V(StringBuilderAppendBoolean)                \
90   V(StringBuilderAppendChar)                   \
91   V(StringBuilderAppendInt)                    \
92   V(StringBuilderAppendLong)                   \
93   V(StringBuilderAppendFloat)                  \
94   V(StringBuilderAppendDouble)                 \
95   V(StringBuilderLength)                       \
96   V(StringBuilderToString)                     \
97   V(UnsafeArrayBaseOffset)                     \
98   /* 1.8 */                                    \
99   V(JdkUnsafeArrayBaseOffset)                  \
100   V(MethodHandleInvoke)                        \
101 
102 
103 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
104  public:
InvokeRuntimeCallingConvention()105   InvokeRuntimeCallingConvention()
106       : CallingConvention(kRuntimeParameterCoreRegisters,
107                           kRuntimeParameterCoreRegistersLength,
108                           kRuntimeParameterFpuRegisters,
109                           kRuntimeParameterFpuRegistersLength,
110                           kX86_64PointerSize) {}
111 
112  private:
113   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
114 };
115 
116 class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> {
117  public:
InvokeDexCallingConvention()118   InvokeDexCallingConvention() : CallingConvention(
119       kParameterCoreRegisters,
120       kParameterCoreRegistersLength,
121       kParameterFloatRegisters,
122       kParameterFloatRegistersLength,
123       kX86_64PointerSize) {}
124 
125  private:
126   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
127 };
128 
129 class CriticalNativeCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
130  public:
CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)131   explicit CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)
132       : for_register_allocation_(for_register_allocation) {}
133 
~CriticalNativeCallingConventionVisitorX86_64()134   virtual ~CriticalNativeCallingConventionVisitorX86_64() {}
135 
136   Location GetNextLocation(DataType::Type type) override;
137   Location GetReturnLocation(DataType::Type type) const override;
138   Location GetMethodLocation() const override;
139 
GetStackOffset()140   size_t GetStackOffset() const { return stack_offset_; }
141 
142  private:
143   // Register allocator does not support adjusting frame size, so we cannot provide final locations
144   // of stack arguments for register allocation. We ask the register allocator for any location and
145   // move these arguments to the right place after adjusting the SP when generating the call.
146   const bool for_register_allocation_;
147   size_t gpr_index_ = 0u;
148   size_t fpr_index_ = 0u;
149   size_t stack_offset_ = 0u;
150 
151   DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86_64);
152 };
153 
154 class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
155  public:
FieldAccessCallingConventionX86_64()156   FieldAccessCallingConventionX86_64() {}
157 
GetObjectLocation()158   Location GetObjectLocation() const override {
159     return Location::RegisterLocation(RSI);
160   }
GetFieldIndexLocation()161   Location GetFieldIndexLocation() const override {
162     return Location::RegisterLocation(RDI);
163   }
GetReturnLocation(DataType::Type type)164   Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override {
165     return Location::RegisterLocation(RAX);
166   }
GetSetValueLocation(DataType::Type type,bool is_instance)167   Location GetSetValueLocation([[maybe_unused]] DataType::Type type,
168                                bool is_instance) const override {
169     return is_instance
170         ? Location::RegisterLocation(RDX)
171         : Location::RegisterLocation(RSI);
172   }
GetFpuLocation(DataType::Type type)173   Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
174     return Location::FpuRegisterLocation(XMM0);
175   }
176 
177  private:
178   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64);
179 };
180 
181 
182 class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
183  public:
InvokeDexCallingConventionVisitorX86_64()184   InvokeDexCallingConventionVisitorX86_64() {}
~InvokeDexCallingConventionVisitorX86_64()185   virtual ~InvokeDexCallingConventionVisitorX86_64() {}
186 
187   Location GetNextLocation(DataType::Type type) override;
188   Location GetReturnLocation(DataType::Type type) const override;
189   Location GetMethodLocation() const override;
190 
191  private:
192   InvokeDexCallingConvention calling_convention;
193 
194   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64);
195 };
196 
197 class CodeGeneratorX86_64;
198 
199 class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap {
200  public:
ParallelMoveResolverX86_64(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen)201   ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
202       : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
203 
204   void EmitMove(size_t index) override;
205   void EmitSwap(size_t index) override;
206   void SpillScratch(int reg) override;
207   void RestoreScratch(int reg) override;
208 
209   X86_64Assembler* GetAssembler() const;
210 
211  private:
212   void Exchange32(CpuRegister reg, int mem);
213   void Exchange32(XmmRegister reg, int mem);
214   void Exchange64(CpuRegister reg1, CpuRegister reg2);
215   void Exchange64(CpuRegister reg, int mem);
216   void Exchange64(XmmRegister reg, int mem);
217   void Exchange128(XmmRegister reg, int mem);
218   void ExchangeMemory32(int mem1, int mem2);
219   void ExchangeMemory64(int mem1, int mem2, int num_of_qwords);
220 
221   CodeGeneratorX86_64* const codegen_;
222 
223   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64);
224 };
225 
226 class LocationsBuilderX86_64 : public HGraphVisitor {
227  public:
LocationsBuilderX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)228   LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
229       : HGraphVisitor(graph), codegen_(codegen) {}
230 
231 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
232   void Visit##name(H##name* instr) override;
233 
234   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)235   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
236   FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
237 
238 #undef DECLARE_VISIT_INSTRUCTION
239 
240   void VisitInstruction(HInstruction* instruction) override {
241     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
242                << " (id " << instruction->GetId() << ")";
243   }
244 
245  private:
246   void HandleInvoke(HInvoke* invoke);
247   void HandleBitwiseOperation(HBinaryOperation* operation);
248   void HandleCondition(HCondition* condition);
249   void HandleShift(HBinaryOperation* operation);
250   void HandleRotate(HBinaryOperation* rotate);
251   void HandleFieldSet(HInstruction* instruction,
252                       const FieldInfo& field_info,
253                       WriteBarrierKind write_barrier_kind);
254   void HandleFieldGet(HInstruction* instruction);
255   bool CpuHasAvxFeatureFlag();
256   bool CpuHasAvx2FeatureFlag();
257 
258   CodeGeneratorX86_64* const codegen_;
259   InvokeDexCallingConventionVisitorX86_64 parameter_visitor_;
260 
261   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
262 };
263 
264 class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
265  public:
266   InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
267 
268 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
269   void Visit##name(H##name* instr) override;
270 
271   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)272   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
273   FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
274 
275 #undef DECLARE_VISIT_INSTRUCTION
276 
277   void VisitInstruction(HInstruction* instruction) override {
278     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
279                << " (id " << instruction->GetId() << ")";
280   }
281 
GetAssembler()282   X86_64Assembler* GetAssembler() const { return assembler_; }
283 
284   // Generate a GC root reference load:
285   //
286   //   root <- *address
287   //
288   // while honoring read barriers based on read_barrier_option.
289   void GenerateGcRootFieldLoad(HInstruction* instruction,
290                                Location root,
291                                const Address& address,
292                                Label* fixup_label,
293                                ReadBarrierOption read_barrier_option);
294   void HandleFieldSet(HInstruction* instruction,
295                       uint32_t value_index,
296                       uint32_t extra_temp_index,
297                       DataType::Type field_type,
298                       Address field_addr,
299                       CpuRegister base,
300                       bool is_volatile,
301                       bool is_atomic,
302                       bool value_can_be_null,
303                       bool byte_swap,
304                       WriteBarrierKind write_barrier_kind);
305 
306   void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr);
307 
308  private:
309   // Generate code for the given suspend check. If not null, `successor`
310   // is the block to branch to if the suspend check is not needed, and after
311   // the suspend call.
312   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
313   void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg);
314   void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp);
315   void HandleBitwiseOperation(HBinaryOperation* operation);
316   void GenerateRemFP(HRem* rem);
317   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
318   void DivByPowerOfTwo(HDiv* instruction);
319   void RemByPowerOfTwo(HRem* instruction);
320   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
321   void GenerateDivRemIntegral(HBinaryOperation* instruction);
322   void HandleCondition(HCondition* condition);
323   void HandleShift(HBinaryOperation* operation);
324   void HandleRotate(HBinaryOperation* rotate);
325 
326   void HandleFieldSet(HInstruction* instruction,
327                       const FieldInfo& field_info,
328                       bool value_can_be_null,
329                       WriteBarrierKind write_barrier_kind);
330   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
331 
332   void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
333   void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
334   void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
335   void GenerateMethodEntryExitHook(HInstruction* instruction);
336 
337   // Generate a heap reference load using one register `out`:
338   //
339   //   out <- *(out + offset)
340   //
341   // while honoring heap poisoning and/or read barriers (if any).
342   //
343   // Location `maybe_temp` is used when generating a read barrier and
344   // shall be a register in that case; it may be an invalid location
345   // otherwise.
346   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
347                                         Location out,
348                                         uint32_t offset,
349                                         Location maybe_temp,
350                                         ReadBarrierOption read_barrier_option);
351   // Generate a heap reference load using two different registers
352   // `out` and `obj`:
353   //
354   //   out <- *(obj + offset)
355   //
356   // while honoring heap poisoning and/or read barriers (if any).
357   //
358   // Location `maybe_temp` is used when generating a Baker's (fast
359   // path) read barrier and shall be a register in that case; it may
360   // be an invalid location otherwise.
361   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
362                                          Location out,
363                                          Location obj,
364                                          uint32_t offset,
365                                          ReadBarrierOption read_barrier_option);
366 
367   void PushOntoFPStack(Location source, uint32_t temp_offset,
368                        uint32_t stack_adjustment, bool is_float);
369   void GenerateCompareTest(HCondition* condition);
370   template<class LabelType>
371   void GenerateTestAndBranch(HInstruction* instruction,
372                              size_t condition_input_index,
373                              LabelType* true_target,
374                              LabelType* false_target);
375   template<class LabelType>
376   void GenerateCompareTestAndBranch(HCondition* condition,
377                                     LabelType* true_target,
378                                     LabelType* false_target);
379   template<class LabelType>
380   void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label);
381 
382   void HandleGoto(HInstruction* got, HBasicBlock* successor);
383 
384   bool CpuHasAvxFeatureFlag();
385   bool CpuHasAvx2FeatureFlag();
386 
387   X86_64Assembler* const assembler_;
388   CodeGeneratorX86_64* const codegen_;
389 
390   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64);
391 };
392 
393 // Class for fixups to jump tables.
394 class JumpTableRIPFixup;
395 
396 class CodeGeneratorX86_64 : public CodeGenerator {
397  public:
398   CodeGeneratorX86_64(HGraph* graph,
399                   const CompilerOptions& compiler_options,
400                   OptimizingCompilerStats* stats = nullptr);
~CodeGeneratorX86_64()401   virtual ~CodeGeneratorX86_64() {}
402 
403   void GenerateFrameEntry() override;
404   void GenerateFrameExit() override;
405   void Bind(HBasicBlock* block) override;
406   void MoveConstant(Location destination, int32_t value) override;
407   void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
408   void AddLocationAsTemp(Location location, LocationSummary* locations) override;
409 
410   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
411   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
412   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
413   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
414 
415   // Generate code to invoke a runtime entry point.
416   void InvokeRuntime(QuickEntrypointEnum entrypoint,
417                      HInstruction* instruction,
418                      uint32_t dex_pc,
419                      SlowPathCode* slow_path = nullptr) override;
420 
421   // Generate code to invoke a runtime entry point, but do not record
422   // PC-related information in a stack map.
423   void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
424                                            HInstruction* instruction,
425                                            SlowPathCode* slow_path);
426 
427   void GenerateInvokeRuntime(int32_t entry_point_offset);
428 
GetWordSize()429   size_t GetWordSize() const override {
430     return kX86_64WordSize;
431   }
432 
GetSlowPathFPWidth()433   size_t GetSlowPathFPWidth() const override {
434     return GetGraph()->HasSIMD()
435         ? GetSIMDRegisterWidth()
436         : 1 * kX86_64WordSize;  //  8 bytes == 1 x86_64 words for each spill
437   }
438 
GetCalleePreservedFPWidth()439   size_t GetCalleePreservedFPWidth() const override {
440     return 1 * kX86_64WordSize;
441   }
442 
GetSIMDRegisterWidth()443   size_t GetSIMDRegisterWidth() const override {
444     return 2 * kX86_64WordSize;
445   }
446 
GetLocationBuilder()447   HGraphVisitor* GetLocationBuilder() override {
448     return &location_builder_;
449   }
450 
GetInstructionVisitor()451   HGraphVisitor* GetInstructionVisitor() override {
452     return &instruction_visitor_;
453   }
454 
GetAssembler()455   X86_64Assembler* GetAssembler() override {
456     return &assembler_;
457   }
458 
GetAssembler()459   const X86_64Assembler& GetAssembler() const override {
460     return assembler_;
461   }
462 
GetMoveResolver()463   ParallelMoveResolverX86_64* GetMoveResolver() override {
464     return &move_resolver_;
465   }
466 
GetAddressOf(HBasicBlock * block)467   uintptr_t GetAddressOf(HBasicBlock* block) override {
468     return GetLabelOf(block)->Position();
469   }
470 
471   void SetupBlockedRegisters() const override;
472   void DumpCoreRegister(std::ostream& stream, int reg) const override;
473   void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
474   void Finalize() override;
475 
GetInstructionSet()476   InstructionSet GetInstructionSet() const override {
477     return InstructionSet::kX86_64;
478   }
479 
GetInstructionCodegen()480   InstructionCodeGeneratorX86_64* GetInstructionCodegen() {
481     return down_cast<InstructionCodeGeneratorX86_64*>(GetInstructionVisitor());
482   }
483 
484   const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const;
485 
486   // Emit a write barrier if:
487   // A) emit_null_check is false
488   // B) emit_null_check is true, and value is not null.
489   void MaybeMarkGCCard(CpuRegister temp,
490                        CpuRegister card,
491                        CpuRegister object,
492                        CpuRegister value,
493                        bool emit_null_check);
494 
495   // Emit a write barrier unconditionally.
496   void MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object);
497 
498   // Crash if the card table is not valid. This check is only emitted for the CC GC. We assert
499   // `(!clean || !self->is_gc_marking)`, since the card table should not be set to clean when the CC
500   // GC is marking for eliminated write barriers.
501   void CheckGCCardIsValid(CpuRegister temp, CpuRegister card, CpuRegister object);
502 
503   void GenerateMemoryBarrier(MemBarrierKind kind);
504 
505   // Helper method to move a value between two locations.
506   void Move(Location destination, Location source);
507   // Helper method to load a value of non-reference type from memory.
508   void LoadFromMemoryNoReference(DataType::Type type, Location dst, Address src);
509 
GetLabelOf(HBasicBlock * block)510   Label* GetLabelOf(HBasicBlock* block) const {
511     return CommonGetLabelOf<Label>(block_labels_, block);
512   }
513 
Initialize()514   void Initialize() override {
515     block_labels_ = CommonInitializeLabels<Label>();
516   }
517 
NeedsTwoRegisters(DataType::Type type)518   bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
519 
520   // Check if the desired_string_load_kind is supported. If it is, return it,
521   // otherwise return a fall-back kind that should be used instead.
522   HLoadString::LoadKind GetSupportedLoadStringKind(
523       HLoadString::LoadKind desired_string_load_kind) override;
524 
525   // Check if the desired_class_load_kind is supported. If it is, return it,
526   // otherwise return a fall-back kind that should be used instead.
527   HLoadClass::LoadKind GetSupportedLoadClassKind(
528       HLoadClass::LoadKind desired_class_load_kind) override;
529 
530   // Check if the desired_dispatch_info is supported. If it is, return it,
531   // otherwise return a fall-back info that should be used instead.
532   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
533       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
534       ArtMethod* method) override;
535 
536   void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke);
537   void GenerateStaticOrDirectCall(
538       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
539   void GenerateVirtualCall(
540       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
541 
542   void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data);
543   void RecordBootImageRelRoPatch(uint32_t boot_image_offset);
544   void RecordBootImageMethodPatch(HInvoke* invoke);
545   void RecordAppImageMethodPatch(HInvoke* invoke);
546   void RecordMethodBssEntryPatch(HInvoke* invoke);
547   void RecordBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
548   void RecordAppImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
549   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
550   void RecordBootImageStringPatch(HLoadString* load_string);
551   Label* NewStringBssEntryPatch(HLoadString* load_string);
552   Label* NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type);
553   void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke);
554   Label* NewJitRootStringPatch(const DexFile& dex_file,
555                                dex::StringIndex string_index,
556                                Handle<mirror::String> handle);
557   Label* NewJitRootClassPatch(const DexFile& dex_file,
558                               dex::TypeIndex type_index,
559                               Handle<mirror::Class> handle);
560   Label* NewJitRootMethodTypePatch(const DexFile& dex_file,
561                                    dex::ProtoIndex proto_index,
562                                    Handle<mirror::MethodType> method_type);
563 
564   void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference);
565   void LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke);
566   void LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root);
567 
568   void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
569 
570   void PatchJitRootUse(uint8_t* code,
571                        const uint8_t* roots_data,
572                        const PatchInfo<Label>& info,
573                        uint64_t index_in_table) const;
574 
575   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
576 
577   // Fast path implementation of ReadBarrier::Barrier for a heap
578   // reference field load when Baker's read barriers are used.
579   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
580                                              Location ref,
581                                              CpuRegister obj,
582                                              uint32_t offset,
583                                              bool needs_null_check);
584   // Fast path implementation of ReadBarrier::Barrier for a heap
585   // reference array load when Baker's read barriers are used.
586   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
587                                              Location ref,
588                                              CpuRegister obj,
589                                              uint32_t data_offset,
590                                              Location index,
591                                              bool needs_null_check);
592   // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
593   // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
594   //
595   // Load the object reference located at address `src`, held by
596   // object `obj`, into `ref`, and mark it if needed.  The base of
597   // address `src` must be `obj`.
598   //
599   // If `always_update_field` is true, the value of the reference is
600   // atomically updated in the holder (`obj`).  This operation
601   // requires two temporary registers, which must be provided as
602   // non-null pointers (`temp1` and `temp2`).
603   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
604                                                  Location ref,
605                                                  CpuRegister obj,
606                                                  const Address& src,
607                                                  bool needs_null_check,
608                                                  bool always_update_field = false,
609                                                  CpuRegister* temp1 = nullptr,
610                                                  CpuRegister* temp2 = nullptr);
611 
612   // Generate a read barrier for a heap reference within `instruction`
613   // using a slow path.
614   //
615   // A read barrier for an object reference read from the heap is
616   // implemented as a call to the artReadBarrierSlow runtime entry
617   // point, which is passed the values in locations `ref`, `obj`, and
618   // `offset`:
619   //
620   //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
621   //                                      mirror::Object* obj,
622   //                                      uint32_t offset);
623   //
624   // The `out` location contains the value returned by
625   // artReadBarrierSlow.
626   //
627   // When `index` provided (i.e., when it is different from
628   // Location::NoLocation()), the offset value passed to
629   // artReadBarrierSlow is adjusted to take `index` into account.
630   void GenerateReadBarrierSlow(HInstruction* instruction,
631                                Location out,
632                                Location ref,
633                                Location obj,
634                                uint32_t offset,
635                                Location index = Location::NoLocation());
636 
637   // If read barriers are enabled, generate a read barrier for a heap
638   // reference using a slow path. If heap poisoning is enabled, also
639   // unpoison the reference in `out`.
640   void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
641                                     Location out,
642                                     Location ref,
643                                     Location obj,
644                                     uint32_t offset,
645                                     Location index = Location::NoLocation());
646 
647   // Generate a read barrier for a GC root within `instruction` using
648   // a slow path.
649   //
650   // A read barrier for an object reference GC root is implemented as
651   // a call to the artReadBarrierForRootSlow runtime entry point,
652   // which is passed the value in location `root`:
653   //
654   //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
655   //
656   // The `out` location contains the value returned by
657   // artReadBarrierForRootSlow.
658   void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
659 
ConstantAreaStart()660   int ConstantAreaStart() const {
661     return constant_area_start_;
662   }
663 
664   Address LiteralDoubleAddress(double v);
665   Address LiteralFloatAddress(float v);
666   Address LiteralInt32Address(int32_t v);
667   Address LiteralInt64Address(int64_t v);
668 
669   // Load a 32/64-bit value into a register in the most efficient manner.
670   void Load32BitValue(CpuRegister dest, int32_t value);
671   void Load64BitValue(CpuRegister dest, int64_t value);
672   void Load32BitValue(XmmRegister dest, int32_t value);
673   void Load64BitValue(XmmRegister dest, int64_t value);
674   void Load32BitValue(XmmRegister dest, float value);
675   void Load64BitValue(XmmRegister dest, double value);
676 
677   // Compare a register with a 32/64-bit value in the most efficient manner.
678   void Compare32BitValue(CpuRegister dest, int32_t value);
679   void Compare64BitValue(CpuRegister dest, int64_t value);
680 
681   // Compare int values. Supports register locations for `lhs`.
682   void GenerateIntCompare(Location lhs, Location rhs);
683   void GenerateIntCompare(CpuRegister lhs, Location rhs);
684 
685   // Compare long values. Supports only register locations for `lhs`.
686   void GenerateLongCompare(Location lhs, Location rhs);
687 
688   // Construct address for array access.
689   static Address ArrayAddress(CpuRegister obj,
690                               Location index,
691                               ScaleFactor scale,
692                               uint32_t data_offset);
693 
694   Address LiteralCaseTable(HPackedSwitch* switch_instr);
695 
696   // Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
697   void Store64BitValueToStack(Location dest, int64_t value);
698 
699   void MoveFromReturnRegister(Location trg, DataType::Type type) override;
700 
701   // Assign a 64 bit constant to an address.
702   void MoveInt64ToAddress(const Address& addr_low,
703                           const Address& addr_high,
704                           int64_t v,
705                           HInstruction* instruction);
706 
707   // Ensure that prior stores complete to memory before subsequent loads.
708   // The locked add implementation will avoid serializing device memory, but will
709   // touch (but not change) the top of the stack.
710   // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
711   void MemoryFence(bool force_mfence = false) {
712     if (!force_mfence) {
713       assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
714     } else {
715       assembler_.mfence();
716     }
717   }
718 
719   void IncreaseFrame(size_t adjustment) override;
720   void DecreaseFrame(size_t adjustment) override;
721 
722   void GenerateNop() override;
723   void GenerateImplicitNullCheck(HNullCheck* instruction) override;
724   void GenerateExplicitNullCheck(HNullCheck* instruction) override;
725   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls);
726 
727   void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry);
728 
729   static void BlockNonVolatileXmmRegisters(LocationSummary* locations);
730 
731   // When we don't know the proper offset for the value, we use kPlaceholder32BitOffset.
732   // We will fix this up in the linker later to have the right value.
733   static constexpr int32_t kPlaceholder32BitOffset = 256;
734 
735  private:
736   template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
737   static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos,
738                                           ArenaVector<linker::LinkerPatch>* linker_patches);
739 
740   // Labels for each block that will be compiled.
741   Label* block_labels_;  // Indexed by block id.
742   Label frame_entry_label_;
743   LocationsBuilderX86_64 location_builder_;
744   InstructionCodeGeneratorX86_64 instruction_visitor_;
745   ParallelMoveResolverX86_64 move_resolver_;
746   X86_64Assembler assembler_;
747 
748   // Offset to the start of the constant area in the assembled code.
749   // Used for fixups to the constant area.
750   int constant_area_start_;
751 
752   // PC-relative method patch info for kBootImageLinkTimePcRelative.
753   ArenaDeque<PatchInfo<Label>> boot_image_method_patches_;
754   // PC-relative method patch info for kAppImageRelRo.
755   ArenaDeque<PatchInfo<Label>> app_image_method_patches_;
756   // PC-relative method patch info for kBssEntry.
757   ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_;
758   // PC-relative type patch info for kBootImageLinkTimePcRelative.
759   ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
760   // PC-relative type patch info for kAppImageRelRo.
761   ArenaDeque<PatchInfo<Label>> app_image_type_patches_;
762   // PC-relative type patch info for kBssEntry.
763   ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
764   // PC-relative public type patch info for kBssEntryPublic.
765   ArenaDeque<PatchInfo<Label>> public_type_bss_entry_patches_;
766   // PC-relative package type patch info for kBssEntryPackage.
767   ArenaDeque<PatchInfo<Label>> package_type_bss_entry_patches_;
768   // PC-relative String patch info for kBootImageLinkTimePcRelative.
769   ArenaDeque<PatchInfo<Label>> boot_image_string_patches_;
770   // PC-relative String patch info for kBssEntry.
771   ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_;
772   // PC-relative MethodType patch info for kBssEntry.
773   ArenaDeque<PatchInfo<Label>> method_type_bss_entry_patches_;
774   // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative.
775   ArenaDeque<PatchInfo<Label>> boot_image_jni_entrypoint_patches_;
776   // PC-relative patch info for IntrinsicObjects for the boot image,
777   // and for method/type/string patches for kBootImageRelRo otherwise.
778   ArenaDeque<PatchInfo<Label>> boot_image_other_patches_;
779 
780   // Patches for string literals in JIT compiled code.
781   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
782   // Patches for class literals in JIT compiled code.
783   ArenaDeque<PatchInfo<Label>> jit_class_patches_;
784   // Patches for method type in JIT compiled code.
785   ArenaDeque<PatchInfo<Label>> jit_method_type_patches_;
786 
787   // Fixups for jump tables need to be handled specially.
788   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
789 
790   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
791 };
792 
793 }  // namespace x86_64
794 }  // namespace art
795 
796 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
797