1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 19 20 #include "arch/x86_64/instruction_set_features_x86_64.h" 21 #include "base/macros.h" 22 #include "code_generator.h" 23 #include "driver/compiler_options.h" 24 #include "nodes.h" 25 #include "parallel_move_resolver.h" 26 #include "utils/x86_64/assembler_x86_64.h" 27 28 namespace art HIDDEN { 29 namespace x86_64 { 30 31 static constexpr Register kMethodRegisterArgument = RDI; 32 33 // Use a local definition to prevent copying mistakes. 34 static constexpr size_t kX86_64WordSize = static_cast<size_t>(kX86_64PointerSize); 35 36 // Some x86_64 instructions require a register to be available as temp. 37 static constexpr Register TMP = R11; 38 39 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 }; 40 static constexpr FloatRegister kParameterFloatRegisters[] = 41 { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 }; 42 43 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); 44 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); 45 46 static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX }; 47 static constexpr size_t kRuntimeParameterCoreRegistersLength = 48 arraysize(kRuntimeParameterCoreRegisters); 49 static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; 50 static constexpr size_t kRuntimeParameterFpuRegistersLength = 51 arraysize(kRuntimeParameterFpuRegisters); 52 53 // These XMM registers are non-volatile in ART ABI, but volatile in native ABI. 54 // If the ART ABI changes, this list must be updated. It is used to ensure that 55 // these are not clobbered by any direct call to native code (such as math intrinsics). 56 static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; 57 58 #define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \ 59 V(MathSignumFloat) \ 60 V(MathSignumDouble) \ 61 V(MathCopySignFloat) \ 62 V(MathCopySignDouble) \ 63 V(CRC32Update) \ 64 V(CRC32UpdateBytes) \ 65 V(CRC32UpdateByteBuffer) \ 66 V(FP16ToFloat) \ 67 V(FP16ToHalf) \ 68 V(FP16Floor) \ 69 V(FP16Ceil) \ 70 V(FP16Rint) \ 71 V(FP16Greater) \ 72 V(FP16GreaterEquals) \ 73 V(FP16Less) \ 74 V(FP16LessEquals) \ 75 V(FP16Compare) \ 76 V(FP16Min) \ 77 V(FP16Max) \ 78 V(IntegerRemainderUnsigned) \ 79 V(LongRemainderUnsigned) \ 80 V(StringStringIndexOf) \ 81 V(StringStringIndexOfAfter) \ 82 V(StringBufferAppend) \ 83 V(StringBufferLength) \ 84 V(StringBufferToString) \ 85 V(StringBuilderAppendObject) \ 86 V(StringBuilderAppendString) \ 87 V(StringBuilderAppendCharSequence) \ 88 V(StringBuilderAppendCharArray) \ 89 V(StringBuilderAppendBoolean) \ 90 V(StringBuilderAppendChar) \ 91 V(StringBuilderAppendInt) \ 92 V(StringBuilderAppendLong) \ 93 V(StringBuilderAppendFloat) \ 94 V(StringBuilderAppendDouble) \ 95 V(StringBuilderLength) \ 96 V(StringBuilderToString) \ 97 V(UnsafeArrayBaseOffset) \ 98 /* 1.8 */ \ 99 V(JdkUnsafeArrayBaseOffset) \ 100 V(MethodHandleInvoke) \ 101 102 103 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { 104 public: InvokeRuntimeCallingConvention()105 InvokeRuntimeCallingConvention() 106 : CallingConvention(kRuntimeParameterCoreRegisters, 107 kRuntimeParameterCoreRegistersLength, 108 kRuntimeParameterFpuRegisters, 109 kRuntimeParameterFpuRegistersLength, 110 kX86_64PointerSize) {} 111 112 private: 113 DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); 114 }; 115 116 class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> { 117 public: InvokeDexCallingConvention()118 InvokeDexCallingConvention() : CallingConvention( 119 kParameterCoreRegisters, 120 kParameterCoreRegistersLength, 121 kParameterFloatRegisters, 122 kParameterFloatRegistersLength, 123 kX86_64PointerSize) {} 124 125 private: 126 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); 127 }; 128 129 class CriticalNativeCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 130 public: CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)131 explicit CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation) 132 : for_register_allocation_(for_register_allocation) {} 133 ~CriticalNativeCallingConventionVisitorX86_64()134 virtual ~CriticalNativeCallingConventionVisitorX86_64() {} 135 136 Location GetNextLocation(DataType::Type type) override; 137 Location GetReturnLocation(DataType::Type type) const override; 138 Location GetMethodLocation() const override; 139 GetStackOffset()140 size_t GetStackOffset() const { return stack_offset_; } 141 142 private: 143 // Register allocator does not support adjusting frame size, so we cannot provide final locations 144 // of stack arguments for register allocation. We ask the register allocator for any location and 145 // move these arguments to the right place after adjusting the SP when generating the call. 146 const bool for_register_allocation_; 147 size_t gpr_index_ = 0u; 148 size_t fpr_index_ = 0u; 149 size_t stack_offset_ = 0u; 150 151 DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86_64); 152 }; 153 154 class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { 155 public: FieldAccessCallingConventionX86_64()156 FieldAccessCallingConventionX86_64() {} 157 GetObjectLocation()158 Location GetObjectLocation() const override { 159 return Location::RegisterLocation(RSI); 160 } GetFieldIndexLocation()161 Location GetFieldIndexLocation() const override { 162 return Location::RegisterLocation(RDI); 163 } GetReturnLocation(DataType::Type type)164 Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override { 165 return Location::RegisterLocation(RAX); 166 } GetSetValueLocation(DataType::Type type,bool is_instance)167 Location GetSetValueLocation([[maybe_unused]] DataType::Type type, 168 bool is_instance) const override { 169 return is_instance 170 ? Location::RegisterLocation(RDX) 171 : Location::RegisterLocation(RSI); 172 } GetFpuLocation(DataType::Type type)173 Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { 174 return Location::FpuRegisterLocation(XMM0); 175 } 176 177 private: 178 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64); 179 }; 180 181 182 class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 183 public: InvokeDexCallingConventionVisitorX86_64()184 InvokeDexCallingConventionVisitorX86_64() {} ~InvokeDexCallingConventionVisitorX86_64()185 virtual ~InvokeDexCallingConventionVisitorX86_64() {} 186 187 Location GetNextLocation(DataType::Type type) override; 188 Location GetReturnLocation(DataType::Type type) const override; 189 Location GetMethodLocation() const override; 190 191 private: 192 InvokeDexCallingConvention calling_convention; 193 194 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64); 195 }; 196 197 class CodeGeneratorX86_64; 198 199 class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { 200 public: ParallelMoveResolverX86_64(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen)201 ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) 202 : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} 203 204 void EmitMove(size_t index) override; 205 void EmitSwap(size_t index) override; 206 void SpillScratch(int reg) override; 207 void RestoreScratch(int reg) override; 208 209 X86_64Assembler* GetAssembler() const; 210 211 private: 212 void Exchange32(CpuRegister reg, int mem); 213 void Exchange32(XmmRegister reg, int mem); 214 void Exchange64(CpuRegister reg1, CpuRegister reg2); 215 void Exchange64(CpuRegister reg, int mem); 216 void Exchange64(XmmRegister reg, int mem); 217 void Exchange128(XmmRegister reg, int mem); 218 void ExchangeMemory32(int mem1, int mem2); 219 void ExchangeMemory64(int mem1, int mem2, int num_of_qwords); 220 221 CodeGeneratorX86_64* const codegen_; 222 223 DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64); 224 }; 225 226 class LocationsBuilderX86_64 : public HGraphVisitor { 227 public: LocationsBuilderX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)228 LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) 229 : HGraphVisitor(graph), codegen_(codegen) {} 230 231 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 232 void Visit##name(H##name* instr) override; 233 234 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)235 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 236 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 237 238 #undef DECLARE_VISIT_INSTRUCTION 239 240 void VisitInstruction(HInstruction* instruction) override { 241 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 242 << " (id " << instruction->GetId() << ")"; 243 } 244 245 private: 246 void HandleInvoke(HInvoke* invoke); 247 void HandleBitwiseOperation(HBinaryOperation* operation); 248 void HandleCondition(HCondition* condition); 249 void HandleShift(HBinaryOperation* operation); 250 void HandleRotate(HBinaryOperation* rotate); 251 void HandleFieldSet(HInstruction* instruction, 252 const FieldInfo& field_info, 253 WriteBarrierKind write_barrier_kind); 254 void HandleFieldGet(HInstruction* instruction); 255 bool CpuHasAvxFeatureFlag(); 256 bool CpuHasAvx2FeatureFlag(); 257 258 CodeGeneratorX86_64* const codegen_; 259 InvokeDexCallingConventionVisitorX86_64 parameter_visitor_; 260 261 DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); 262 }; 263 264 class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { 265 public: 266 InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); 267 268 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 269 void Visit##name(H##name* instr) override; 270 271 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)272 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 273 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 274 275 #undef DECLARE_VISIT_INSTRUCTION 276 277 void VisitInstruction(HInstruction* instruction) override { 278 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 279 << " (id " << instruction->GetId() << ")"; 280 } 281 GetAssembler()282 X86_64Assembler* GetAssembler() const { return assembler_; } 283 284 // Generate a GC root reference load: 285 // 286 // root <- *address 287 // 288 // while honoring read barriers based on read_barrier_option. 289 void GenerateGcRootFieldLoad(HInstruction* instruction, 290 Location root, 291 const Address& address, 292 Label* fixup_label, 293 ReadBarrierOption read_barrier_option); 294 void HandleFieldSet(HInstruction* instruction, 295 uint32_t value_index, 296 uint32_t extra_temp_index, 297 DataType::Type field_type, 298 Address field_addr, 299 CpuRegister base, 300 bool is_volatile, 301 bool is_atomic, 302 bool value_can_be_null, 303 bool byte_swap, 304 WriteBarrierKind write_barrier_kind); 305 306 void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr); 307 308 private: 309 // Generate code for the given suspend check. If not null, `successor` 310 // is the block to branch to if the suspend check is not needed, and after 311 // the suspend call. 312 void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); 313 void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); 314 void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp); 315 void HandleBitwiseOperation(HBinaryOperation* operation); 316 void GenerateRemFP(HRem* rem); 317 void DivRemOneOrMinusOne(HBinaryOperation* instruction); 318 void DivByPowerOfTwo(HDiv* instruction); 319 void RemByPowerOfTwo(HRem* instruction); 320 void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); 321 void GenerateDivRemIntegral(HBinaryOperation* instruction); 322 void HandleCondition(HCondition* condition); 323 void HandleShift(HBinaryOperation* operation); 324 void HandleRotate(HBinaryOperation* rotate); 325 326 void HandleFieldSet(HInstruction* instruction, 327 const FieldInfo& field_info, 328 bool value_can_be_null, 329 WriteBarrierKind write_barrier_kind); 330 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); 331 332 void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); 333 void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); 334 void GenerateMinMax(HBinaryOperation* minmax, bool is_min); 335 void GenerateMethodEntryExitHook(HInstruction* instruction); 336 337 // Generate a heap reference load using one register `out`: 338 // 339 // out <- *(out + offset) 340 // 341 // while honoring heap poisoning and/or read barriers (if any). 342 // 343 // Location `maybe_temp` is used when generating a read barrier and 344 // shall be a register in that case; it may be an invalid location 345 // otherwise. 346 void GenerateReferenceLoadOneRegister(HInstruction* instruction, 347 Location out, 348 uint32_t offset, 349 Location maybe_temp, 350 ReadBarrierOption read_barrier_option); 351 // Generate a heap reference load using two different registers 352 // `out` and `obj`: 353 // 354 // out <- *(obj + offset) 355 // 356 // while honoring heap poisoning and/or read barriers (if any). 357 // 358 // Location `maybe_temp` is used when generating a Baker's (fast 359 // path) read barrier and shall be a register in that case; it may 360 // be an invalid location otherwise. 361 void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, 362 Location out, 363 Location obj, 364 uint32_t offset, 365 ReadBarrierOption read_barrier_option); 366 367 void PushOntoFPStack(Location source, uint32_t temp_offset, 368 uint32_t stack_adjustment, bool is_float); 369 void GenerateCompareTest(HCondition* condition); 370 template<class LabelType> 371 void GenerateTestAndBranch(HInstruction* instruction, 372 size_t condition_input_index, 373 LabelType* true_target, 374 LabelType* false_target); 375 template<class LabelType> 376 void GenerateCompareTestAndBranch(HCondition* condition, 377 LabelType* true_target, 378 LabelType* false_target); 379 template<class LabelType> 380 void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); 381 382 void HandleGoto(HInstruction* got, HBasicBlock* successor); 383 384 bool CpuHasAvxFeatureFlag(); 385 bool CpuHasAvx2FeatureFlag(); 386 387 X86_64Assembler* const assembler_; 388 CodeGeneratorX86_64* const codegen_; 389 390 DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64); 391 }; 392 393 // Class for fixups to jump tables. 394 class JumpTableRIPFixup; 395 396 class CodeGeneratorX86_64 : public CodeGenerator { 397 public: 398 CodeGeneratorX86_64(HGraph* graph, 399 const CompilerOptions& compiler_options, 400 OptimizingCompilerStats* stats = nullptr); ~CodeGeneratorX86_64()401 virtual ~CodeGeneratorX86_64() {} 402 403 void GenerateFrameEntry() override; 404 void GenerateFrameExit() override; 405 void Bind(HBasicBlock* block) override; 406 void MoveConstant(Location destination, int32_t value) override; 407 void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; 408 void AddLocationAsTemp(Location location, LocationSummary* locations) override; 409 410 size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; 411 size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; 412 size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 413 size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 414 415 // Generate code to invoke a runtime entry point. 416 void InvokeRuntime(QuickEntrypointEnum entrypoint, 417 HInstruction* instruction, 418 uint32_t dex_pc, 419 SlowPathCode* slow_path = nullptr) override; 420 421 // Generate code to invoke a runtime entry point, but do not record 422 // PC-related information in a stack map. 423 void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 424 HInstruction* instruction, 425 SlowPathCode* slow_path); 426 427 void GenerateInvokeRuntime(int32_t entry_point_offset); 428 GetWordSize()429 size_t GetWordSize() const override { 430 return kX86_64WordSize; 431 } 432 GetSlowPathFPWidth()433 size_t GetSlowPathFPWidth() const override { 434 return GetGraph()->HasSIMD() 435 ? GetSIMDRegisterWidth() 436 : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill 437 } 438 GetCalleePreservedFPWidth()439 size_t GetCalleePreservedFPWidth() const override { 440 return 1 * kX86_64WordSize; 441 } 442 GetSIMDRegisterWidth()443 size_t GetSIMDRegisterWidth() const override { 444 return 2 * kX86_64WordSize; 445 } 446 GetLocationBuilder()447 HGraphVisitor* GetLocationBuilder() override { 448 return &location_builder_; 449 } 450 GetInstructionVisitor()451 HGraphVisitor* GetInstructionVisitor() override { 452 return &instruction_visitor_; 453 } 454 GetAssembler()455 X86_64Assembler* GetAssembler() override { 456 return &assembler_; 457 } 458 GetAssembler()459 const X86_64Assembler& GetAssembler() const override { 460 return assembler_; 461 } 462 GetMoveResolver()463 ParallelMoveResolverX86_64* GetMoveResolver() override { 464 return &move_resolver_; 465 } 466 GetAddressOf(HBasicBlock * block)467 uintptr_t GetAddressOf(HBasicBlock* block) override { 468 return GetLabelOf(block)->Position(); 469 } 470 471 void SetupBlockedRegisters() const override; 472 void DumpCoreRegister(std::ostream& stream, int reg) const override; 473 void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; 474 void Finalize() override; 475 GetInstructionSet()476 InstructionSet GetInstructionSet() const override { 477 return InstructionSet::kX86_64; 478 } 479 GetInstructionCodegen()480 InstructionCodeGeneratorX86_64* GetInstructionCodegen() { 481 return down_cast<InstructionCodeGeneratorX86_64*>(GetInstructionVisitor()); 482 } 483 484 const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const; 485 486 // Emit a write barrier if: 487 // A) emit_null_check is false 488 // B) emit_null_check is true, and value is not null. 489 void MaybeMarkGCCard(CpuRegister temp, 490 CpuRegister card, 491 CpuRegister object, 492 CpuRegister value, 493 bool emit_null_check); 494 495 // Emit a write barrier unconditionally. 496 void MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object); 497 498 // Crash if the card table is not valid. This check is only emitted for the CC GC. We assert 499 // `(!clean || !self->is_gc_marking)`, since the card table should not be set to clean when the CC 500 // GC is marking for eliminated write barriers. 501 void CheckGCCardIsValid(CpuRegister temp, CpuRegister card, CpuRegister object); 502 503 void GenerateMemoryBarrier(MemBarrierKind kind); 504 505 // Helper method to move a value between two locations. 506 void Move(Location destination, Location source); 507 // Helper method to load a value of non-reference type from memory. 508 void LoadFromMemoryNoReference(DataType::Type type, Location dst, Address src); 509 GetLabelOf(HBasicBlock * block)510 Label* GetLabelOf(HBasicBlock* block) const { 511 return CommonGetLabelOf<Label>(block_labels_, block); 512 } 513 Initialize()514 void Initialize() override { 515 block_labels_ = CommonInitializeLabels<Label>(); 516 } 517 NeedsTwoRegisters(DataType::Type type)518 bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; } 519 520 // Check if the desired_string_load_kind is supported. If it is, return it, 521 // otherwise return a fall-back kind that should be used instead. 522 HLoadString::LoadKind GetSupportedLoadStringKind( 523 HLoadString::LoadKind desired_string_load_kind) override; 524 525 // Check if the desired_class_load_kind is supported. If it is, return it, 526 // otherwise return a fall-back kind that should be used instead. 527 HLoadClass::LoadKind GetSupportedLoadClassKind( 528 HLoadClass::LoadKind desired_class_load_kind) override; 529 530 // Check if the desired_dispatch_info is supported. If it is, return it, 531 // otherwise return a fall-back info that should be used instead. 532 HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 533 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 534 ArtMethod* method) override; 535 536 void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke); 537 void GenerateStaticOrDirectCall( 538 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 539 void GenerateVirtualCall( 540 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 541 542 void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data); 543 void RecordBootImageRelRoPatch(uint32_t boot_image_offset); 544 void RecordBootImageMethodPatch(HInvoke* invoke); 545 void RecordAppImageMethodPatch(HInvoke* invoke); 546 void RecordMethodBssEntryPatch(HInvoke* invoke); 547 void RecordBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); 548 void RecordAppImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); 549 Label* NewTypeBssEntryPatch(HLoadClass* load_class); 550 void RecordBootImageStringPatch(HLoadString* load_string); 551 Label* NewStringBssEntryPatch(HLoadString* load_string); 552 Label* NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type); 553 void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke); 554 Label* NewJitRootStringPatch(const DexFile& dex_file, 555 dex::StringIndex string_index, 556 Handle<mirror::String> handle); 557 Label* NewJitRootClassPatch(const DexFile& dex_file, 558 dex::TypeIndex type_index, 559 Handle<mirror::Class> handle); 560 Label* NewJitRootMethodTypePatch(const DexFile& dex_file, 561 dex::ProtoIndex proto_index, 562 Handle<mirror::MethodType> method_type); 563 564 void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference); 565 void LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke); 566 void LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root); 567 568 void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; 569 570 void PatchJitRootUse(uint8_t* code, 571 const uint8_t* roots_data, 572 const PatchInfo<Label>& info, 573 uint64_t index_in_table) const; 574 575 void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; 576 577 // Fast path implementation of ReadBarrier::Barrier for a heap 578 // reference field load when Baker's read barriers are used. 579 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 580 Location ref, 581 CpuRegister obj, 582 uint32_t offset, 583 bool needs_null_check); 584 // Fast path implementation of ReadBarrier::Barrier for a heap 585 // reference array load when Baker's read barriers are used. 586 void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 587 Location ref, 588 CpuRegister obj, 589 uint32_t data_offset, 590 Location index, 591 bool needs_null_check); 592 // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, 593 // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. 594 // 595 // Load the object reference located at address `src`, held by 596 // object `obj`, into `ref`, and mark it if needed. The base of 597 // address `src` must be `obj`. 598 // 599 // If `always_update_field` is true, the value of the reference is 600 // atomically updated in the holder (`obj`). This operation 601 // requires two temporary registers, which must be provided as 602 // non-null pointers (`temp1` and `temp2`). 603 void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 604 Location ref, 605 CpuRegister obj, 606 const Address& src, 607 bool needs_null_check, 608 bool always_update_field = false, 609 CpuRegister* temp1 = nullptr, 610 CpuRegister* temp2 = nullptr); 611 612 // Generate a read barrier for a heap reference within `instruction` 613 // using a slow path. 614 // 615 // A read barrier for an object reference read from the heap is 616 // implemented as a call to the artReadBarrierSlow runtime entry 617 // point, which is passed the values in locations `ref`, `obj`, and 618 // `offset`: 619 // 620 // mirror::Object* artReadBarrierSlow(mirror::Object* ref, 621 // mirror::Object* obj, 622 // uint32_t offset); 623 // 624 // The `out` location contains the value returned by 625 // artReadBarrierSlow. 626 // 627 // When `index` provided (i.e., when it is different from 628 // Location::NoLocation()), the offset value passed to 629 // artReadBarrierSlow is adjusted to take `index` into account. 630 void GenerateReadBarrierSlow(HInstruction* instruction, 631 Location out, 632 Location ref, 633 Location obj, 634 uint32_t offset, 635 Location index = Location::NoLocation()); 636 637 // If read barriers are enabled, generate a read barrier for a heap 638 // reference using a slow path. If heap poisoning is enabled, also 639 // unpoison the reference in `out`. 640 void MaybeGenerateReadBarrierSlow(HInstruction* instruction, 641 Location out, 642 Location ref, 643 Location obj, 644 uint32_t offset, 645 Location index = Location::NoLocation()); 646 647 // Generate a read barrier for a GC root within `instruction` using 648 // a slow path. 649 // 650 // A read barrier for an object reference GC root is implemented as 651 // a call to the artReadBarrierForRootSlow runtime entry point, 652 // which is passed the value in location `root`: 653 // 654 // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); 655 // 656 // The `out` location contains the value returned by 657 // artReadBarrierForRootSlow. 658 void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); 659 ConstantAreaStart()660 int ConstantAreaStart() const { 661 return constant_area_start_; 662 } 663 664 Address LiteralDoubleAddress(double v); 665 Address LiteralFloatAddress(float v); 666 Address LiteralInt32Address(int32_t v); 667 Address LiteralInt64Address(int64_t v); 668 669 // Load a 32/64-bit value into a register in the most efficient manner. 670 void Load32BitValue(CpuRegister dest, int32_t value); 671 void Load64BitValue(CpuRegister dest, int64_t value); 672 void Load32BitValue(XmmRegister dest, int32_t value); 673 void Load64BitValue(XmmRegister dest, int64_t value); 674 void Load32BitValue(XmmRegister dest, float value); 675 void Load64BitValue(XmmRegister dest, double value); 676 677 // Compare a register with a 32/64-bit value in the most efficient manner. 678 void Compare32BitValue(CpuRegister dest, int32_t value); 679 void Compare64BitValue(CpuRegister dest, int64_t value); 680 681 // Compare int values. Supports register locations for `lhs`. 682 void GenerateIntCompare(Location lhs, Location rhs); 683 void GenerateIntCompare(CpuRegister lhs, Location rhs); 684 685 // Compare long values. Supports only register locations for `lhs`. 686 void GenerateLongCompare(Location lhs, Location rhs); 687 688 // Construct address for array access. 689 static Address ArrayAddress(CpuRegister obj, 690 Location index, 691 ScaleFactor scale, 692 uint32_t data_offset); 693 694 Address LiteralCaseTable(HPackedSwitch* switch_instr); 695 696 // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. 697 void Store64BitValueToStack(Location dest, int64_t value); 698 699 void MoveFromReturnRegister(Location trg, DataType::Type type) override; 700 701 // Assign a 64 bit constant to an address. 702 void MoveInt64ToAddress(const Address& addr_low, 703 const Address& addr_high, 704 int64_t v, 705 HInstruction* instruction); 706 707 // Ensure that prior stores complete to memory before subsequent loads. 708 // The locked add implementation will avoid serializing device memory, but will 709 // touch (but not change) the top of the stack. 710 // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. 711 void MemoryFence(bool force_mfence = false) { 712 if (!force_mfence) { 713 assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0)); 714 } else { 715 assembler_.mfence(); 716 } 717 } 718 719 void IncreaseFrame(size_t adjustment) override; 720 void DecreaseFrame(size_t adjustment) override; 721 722 void GenerateNop() override; 723 void GenerateImplicitNullCheck(HNullCheck* instruction) override; 724 void GenerateExplicitNullCheck(HNullCheck* instruction) override; 725 void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls); 726 727 void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry); 728 729 static void BlockNonVolatileXmmRegisters(LocationSummary* locations); 730 731 // When we don't know the proper offset for the value, we use kPlaceholder32BitOffset. 732 // We will fix this up in the linker later to have the right value. 733 static constexpr int32_t kPlaceholder32BitOffset = 256; 734 735 private: 736 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 737 static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, 738 ArenaVector<linker::LinkerPatch>* linker_patches); 739 740 // Labels for each block that will be compiled. 741 Label* block_labels_; // Indexed by block id. 742 Label frame_entry_label_; 743 LocationsBuilderX86_64 location_builder_; 744 InstructionCodeGeneratorX86_64 instruction_visitor_; 745 ParallelMoveResolverX86_64 move_resolver_; 746 X86_64Assembler assembler_; 747 748 // Offset to the start of the constant area in the assembled code. 749 // Used for fixups to the constant area. 750 int constant_area_start_; 751 752 // PC-relative method patch info for kBootImageLinkTimePcRelative. 753 ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; 754 // PC-relative method patch info for kAppImageRelRo. 755 ArenaDeque<PatchInfo<Label>> app_image_method_patches_; 756 // PC-relative method patch info for kBssEntry. 757 ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; 758 // PC-relative type patch info for kBootImageLinkTimePcRelative. 759 ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; 760 // PC-relative type patch info for kAppImageRelRo. 761 ArenaDeque<PatchInfo<Label>> app_image_type_patches_; 762 // PC-relative type patch info for kBssEntry. 763 ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; 764 // PC-relative public type patch info for kBssEntryPublic. 765 ArenaDeque<PatchInfo<Label>> public_type_bss_entry_patches_; 766 // PC-relative package type patch info for kBssEntryPackage. 767 ArenaDeque<PatchInfo<Label>> package_type_bss_entry_patches_; 768 // PC-relative String patch info for kBootImageLinkTimePcRelative. 769 ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; 770 // PC-relative String patch info for kBssEntry. 771 ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; 772 // PC-relative MethodType patch info for kBssEntry. 773 ArenaDeque<PatchInfo<Label>> method_type_bss_entry_patches_; 774 // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative. 775 ArenaDeque<PatchInfo<Label>> boot_image_jni_entrypoint_patches_; 776 // PC-relative patch info for IntrinsicObjects for the boot image, 777 // and for method/type/string patches for kBootImageRelRo otherwise. 778 ArenaDeque<PatchInfo<Label>> boot_image_other_patches_; 779 780 // Patches for string literals in JIT compiled code. 781 ArenaDeque<PatchInfo<Label>> jit_string_patches_; 782 // Patches for class literals in JIT compiled code. 783 ArenaDeque<PatchInfo<Label>> jit_class_patches_; 784 // Patches for method type in JIT compiled code. 785 ArenaDeque<PatchInfo<Label>> jit_method_type_patches_; 786 787 // Fixups for jump tables need to be handled specially. 788 ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; 789 790 DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); 791 }; 792 793 } // namespace x86_64 794 } // namespace art 795 796 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 797