xref: /aosp_15_r20/art/compiler/optimizing/code_generator_x86.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86.h"
18 
19 #include "arch/x86/jni_frame_x86.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "entrypoints/quick/quick_entrypoints_enum.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_list.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/var_handle.h"
39 #include "optimizing/nodes.h"
40 #include "profiling_info_builder.h"
41 #include "scoped_thread_state_change-inl.h"
42 #include "thread.h"
43 #include "trace.h"
44 #include "utils/assembler.h"
45 #include "utils/stack_checks.h"
46 #include "utils/x86/assembler_x86.h"
47 #include "utils/x86/constants_x86.h"
48 #include "utils/x86/managed_register_x86.h"
49 
50 namespace art HIDDEN {
51 
52 template<class MirrorType>
53 class GcRoot;
54 
55 namespace x86 {
56 
57 static constexpr int kCurrentMethodStackOffset = 0;
58 static constexpr Register kMethodRegisterArgument = EAX;
59 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
60 
61 static constexpr int kC2ConditionMask = 0x400;
62 
63 static constexpr int kFakeReturnRegister = Register(8);
64 
65 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
66 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
67 
OneRegInReferenceOutSaveEverythingCallerSaves()68 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
69   InvokeRuntimeCallingConvention calling_convention;
70   RegisterSet caller_saves = RegisterSet::Empty();
71   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
72   // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
73   // that the kPrimNot result register is the same as the first argument register.
74   return caller_saves;
75 }
76 
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
79 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
80 
81 class NullCheckSlowPathX86 : public SlowPathCode {
82  public:
NullCheckSlowPathX86(HNullCheck * instruction)83   explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
84 
EmitNativeCode(CodeGenerator * codegen)85   void EmitNativeCode(CodeGenerator* codegen) override {
86     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
87     __ Bind(GetEntryLabel());
88     if (instruction_->CanThrowIntoCatchBlock()) {
89       // Live registers will be restored in the catch block if caught.
90       SaveLiveRegisters(codegen, instruction_->GetLocations());
91     }
92     x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
93                                instruction_,
94                                instruction_->GetDexPc(),
95                                this);
96     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
97   }
98 
IsFatal() const99   bool IsFatal() const override { return true; }
100 
GetDescription() const101   const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
102 
103  private:
104   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
105 };
106 
107 class DivZeroCheckSlowPathX86 : public SlowPathCode {
108  public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)109   explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
110 
EmitNativeCode(CodeGenerator * codegen)111   void EmitNativeCode(CodeGenerator* codegen) override {
112     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
113     __ Bind(GetEntryLabel());
114     x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
115     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
116   }
117 
IsFatal() const118   bool IsFatal() const override { return true; }
119 
GetDescription() const120   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
121 
122  private:
123   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
124 };
125 
126 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
127  public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)128   DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
129       : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
130 
EmitNativeCode(CodeGenerator * codegen)131   void EmitNativeCode(CodeGenerator* codegen) override {
132     __ Bind(GetEntryLabel());
133     if (is_div_) {
134       __ negl(reg_);
135     } else {
136       __ movl(reg_, Immediate(0));
137     }
138     __ jmp(GetExitLabel());
139   }
140 
GetDescription() const141   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
142 
143  private:
144   Register reg_;
145   bool is_div_;
146   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
147 };
148 
149 class BoundsCheckSlowPathX86 : public SlowPathCode {
150  public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)151   explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
152 
EmitNativeCode(CodeGenerator * codegen)153   void EmitNativeCode(CodeGenerator* codegen) override {
154     LocationSummary* locations = instruction_->GetLocations();
155     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
156     __ Bind(GetEntryLabel());
157     if (instruction_->CanThrowIntoCatchBlock()) {
158       // Live registers will be restored in the catch block if caught.
159       SaveLiveRegisters(codegen, locations);
160     }
161 
162     Location index_loc = locations->InAt(0);
163     Location length_loc = locations->InAt(1);
164     InvokeRuntimeCallingConvention calling_convention;
165     Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
166     Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
167 
168     // Are we using an array length from memory?
169     if (!length_loc.IsValid()) {
170       DCHECK(instruction_->InputAt(1)->IsArrayLength());
171       HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
172       DCHECK(array_length->IsEmittedAtUseSite());
173       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
174       Location array_loc = array_length->GetLocations()->InAt(0);
175       if (!index_loc.Equals(length_arg)) {
176         // The index is not clobbered by loading the length directly to `length_arg`.
177         __ movl(length_arg.AsRegister<Register>(),
178                 Address(array_loc.AsRegister<Register>(), len_offset));
179         x86_codegen->Move32(index_arg, index_loc);
180       } else if (!array_loc.Equals(index_arg)) {
181         // The array reference is not clobbered by the index move.
182         x86_codegen->Move32(index_arg, index_loc);
183         __ movl(length_arg.AsRegister<Register>(),
184                 Address(array_loc.AsRegister<Register>(), len_offset));
185       } else {
186         // We do not have a temporary we could use, so swap the registers using the
187         // parallel move resolver and replace the array with the length afterwards.
188         codegen->EmitParallelMoves(
189             index_loc,
190             index_arg,
191             DataType::Type::kInt32,
192             array_loc,
193             length_arg,
194             DataType::Type::kReference);
195         __ movl(length_arg.AsRegister<Register>(),
196                 Address(length_arg.AsRegister<Register>(), len_offset));
197       }
198       if (mirror::kUseStringCompression && array_length->IsStringLength()) {
199         __ shrl(length_arg.AsRegister<Register>(), Immediate(1));
200       }
201     } else {
202       // We're moving two locations to locations that could overlap,
203       // so we need a parallel move resolver.
204       codegen->EmitParallelMoves(
205           index_loc,
206           index_arg,
207           DataType::Type::kInt32,
208           length_loc,
209           length_arg,
210           DataType::Type::kInt32);
211     }
212 
213     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
214         ? kQuickThrowStringBounds
215         : kQuickThrowArrayBounds;
216     x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
217     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
218     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
219   }
220 
IsFatal() const221   bool IsFatal() const override { return true; }
222 
GetDescription() const223   const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
224 
225  private:
226   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
227 };
228 
229 class SuspendCheckSlowPathX86 : public SlowPathCode {
230  public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)231   SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
232       : SlowPathCode(instruction), successor_(successor) {}
233 
EmitNativeCode(CodeGenerator * codegen)234   void EmitNativeCode(CodeGenerator* codegen) override {
235     LocationSummary* locations = instruction_->GetLocations();
236     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
237     __ Bind(GetEntryLabel());
238     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
239     x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
240     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
241     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
242     if (successor_ == nullptr) {
243       __ jmp(GetReturnLabel());
244     } else {
245       __ jmp(x86_codegen->GetLabelOf(successor_));
246     }
247   }
248 
GetReturnLabel()249   Label* GetReturnLabel() {
250     DCHECK(successor_ == nullptr);
251     return &return_label_;
252   }
253 
GetSuccessor() const254   HBasicBlock* GetSuccessor() const {
255     return successor_;
256   }
257 
GetDescription() const258   const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
259 
260  private:
261   HBasicBlock* const successor_;
262   Label return_label_;
263 
264   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
265 };
266 
267 class LoadStringSlowPathX86 : public SlowPathCode {
268  public:
LoadStringSlowPathX86(HLoadString * instruction)269   explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
270 
EmitNativeCode(CodeGenerator * codegen)271   void EmitNativeCode(CodeGenerator* codegen) override {
272     LocationSummary* locations = instruction_->GetLocations();
273     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
274 
275     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
276     __ Bind(GetEntryLabel());
277     SaveLiveRegisters(codegen, locations);
278 
279     InvokeRuntimeCallingConvention calling_convention;
280     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
281     __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
282     x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
283     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
284     x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
285     RestoreLiveRegisters(codegen, locations);
286 
287     __ jmp(GetExitLabel());
288   }
289 
GetDescription() const290   const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
291 
292  private:
293   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
294 };
295 
296 class LoadClassSlowPathX86 : public SlowPathCode {
297  public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)298   LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
299       : SlowPathCode(at), cls_(cls) {
300     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
301     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
302   }
303 
EmitNativeCode(CodeGenerator * codegen)304   void EmitNativeCode(CodeGenerator* codegen) override {
305     LocationSummary* locations = instruction_->GetLocations();
306     Location out = locations->Out();
307     const uint32_t dex_pc = instruction_->GetDexPc();
308     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
309     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
310 
311     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
312     __ Bind(GetEntryLabel());
313     SaveLiveRegisters(codegen, locations);
314 
315     InvokeRuntimeCallingConvention calling_convention;
316     if (must_resolve_type) {
317       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()) ||
318              x86_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
319              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
320                              &cls_->GetDexFile()));
321       dex::TypeIndex type_index = cls_->GetTypeIndex();
322       __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
323       if (cls_->NeedsAccessCheck()) {
324         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
325         x86_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
326       } else {
327         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
328         x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
329       }
330       // If we also must_do_clinit, the resolved type is now in the correct register.
331     } else {
332       DCHECK(must_do_clinit);
333       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
334       x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
335     }
336     if (must_do_clinit) {
337       x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
338       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
339     }
340 
341     // Move the class to the desired location.
342     if (out.IsValid()) {
343       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
344       x86_codegen->Move32(out, Location::RegisterLocation(EAX));
345     }
346     RestoreLiveRegisters(codegen, locations);
347     __ jmp(GetExitLabel());
348   }
349 
GetDescription() const350   const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
351 
352  private:
353   // The class this slow path will load.
354   HLoadClass* const cls_;
355 
356   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
357 };
358 
359 class TypeCheckSlowPathX86 : public SlowPathCode {
360  public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)361   TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
362       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
363 
EmitNativeCode(CodeGenerator * codegen)364   void EmitNativeCode(CodeGenerator* codegen) override {
365     LocationSummary* locations = instruction_->GetLocations();
366     DCHECK(instruction_->IsCheckCast()
367            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
368 
369     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
370     __ Bind(GetEntryLabel());
371 
372     if (kPoisonHeapReferences &&
373         instruction_->IsCheckCast() &&
374         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
375       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
376       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
377     }
378 
379     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
380       SaveLiveRegisters(codegen, locations);
381     }
382 
383     // We're moving two locations to locations that could overlap, so we need a parallel
384     // move resolver.
385     InvokeRuntimeCallingConvention calling_convention;
386     x86_codegen->EmitParallelMoves(locations->InAt(0),
387                                    Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
388                                    DataType::Type::kReference,
389                                    locations->InAt(1),
390                                    Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
391                                    DataType::Type::kReference);
392     if (instruction_->IsInstanceOf()) {
393       x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
394                                  instruction_,
395                                  instruction_->GetDexPc(),
396                                  this);
397       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
398     } else {
399       DCHECK(instruction_->IsCheckCast());
400       x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
401                                  instruction_,
402                                  instruction_->GetDexPc(),
403                                  this);
404       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
405     }
406 
407     if (!is_fatal_) {
408       if (instruction_->IsInstanceOf()) {
409         x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
410       }
411       RestoreLiveRegisters(codegen, locations);
412 
413       __ jmp(GetExitLabel());
414     }
415   }
416 
GetDescription() const417   const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const418   bool IsFatal() const override { return is_fatal_; }
419 
420  private:
421   const bool is_fatal_;
422 
423   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
424 };
425 
426 class DeoptimizationSlowPathX86 : public SlowPathCode {
427  public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)428   explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
429     : SlowPathCode(instruction) {}
430 
EmitNativeCode(CodeGenerator * codegen)431   void EmitNativeCode(CodeGenerator* codegen) override {
432     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
433     __ Bind(GetEntryLabel());
434     LocationSummary* locations = instruction_->GetLocations();
435     SaveLiveRegisters(codegen, locations);
436     InvokeRuntimeCallingConvention calling_convention;
437     x86_codegen->Load32BitValue(
438         calling_convention.GetRegisterAt(0),
439         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
440     x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
441     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
442   }
443 
GetDescription() const444   const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
445 
446  private:
447   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
448 };
449 
450 class ArraySetSlowPathX86 : public SlowPathCode {
451  public:
ArraySetSlowPathX86(HInstruction * instruction)452   explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
453 
EmitNativeCode(CodeGenerator * codegen)454   void EmitNativeCode(CodeGenerator* codegen) override {
455     LocationSummary* locations = instruction_->GetLocations();
456     __ Bind(GetEntryLabel());
457     SaveLiveRegisters(codegen, locations);
458 
459     InvokeRuntimeCallingConvention calling_convention;
460     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
461     parallel_move.AddMove(
462         locations->InAt(0),
463         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
464         DataType::Type::kReference,
465         nullptr);
466     parallel_move.AddMove(
467         locations->InAt(1),
468         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
469         DataType::Type::kInt32,
470         nullptr);
471     parallel_move.AddMove(
472         locations->InAt(2),
473         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
474         DataType::Type::kReference,
475         nullptr);
476     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
477 
478     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
479     x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
480     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
481     RestoreLiveRegisters(codegen, locations);
482     __ jmp(GetExitLabel());
483   }
484 
GetDescription() const485   const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
486 
487  private:
488   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
489 };
490 
491 // Slow path marking an object reference `ref` during a read
492 // barrier. The field `obj.field` in the object `obj` holding this
493 // reference does not get updated by this slow path after marking (see
494 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
495 //
496 // This means that after the execution of this slow path, `ref` will
497 // always be up-to-date, but `obj.field` may not; i.e., after the
498 // flip, `ref` will be a to-space reference, but `obj.field` will
499 // probably still be a from-space reference (unless it gets updated by
500 // another thread, or if another thread installed another object
501 // reference (different from `ref`) in `obj.field`).
502 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
503  public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)504   ReadBarrierMarkSlowPathX86(HInstruction* instruction,
505                              Location ref,
506                              bool unpoison_ref_before_marking)
507       : SlowPathCode(instruction),
508         ref_(ref),
509         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
510   }
511 
GetDescription() const512   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
513 
EmitNativeCode(CodeGenerator * codegen)514   void EmitNativeCode(CodeGenerator* codegen) override {
515     DCHECK(codegen->EmitReadBarrier());
516     LocationSummary* locations = instruction_->GetLocations();
517     Register ref_reg = ref_.AsRegister<Register>();
518     DCHECK(locations->CanCall());
519     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
520     DCHECK(instruction_->IsInstanceFieldGet() ||
521            instruction_->IsStaticFieldGet() ||
522            instruction_->IsArrayGet() ||
523            instruction_->IsArraySet() ||
524            instruction_->IsLoadClass() ||
525            instruction_->IsLoadString() ||
526            instruction_->IsInstanceOf() ||
527            instruction_->IsCheckCast() ||
528            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
529         << "Unexpected instruction in read barrier marking slow path: "
530         << instruction_->DebugName();
531 
532     __ Bind(GetEntryLabel());
533     if (unpoison_ref_before_marking_) {
534       // Object* ref = ref_addr->AsMirrorPtr()
535       __ MaybeUnpoisonHeapReference(ref_reg);
536     }
537     // No need to save live registers; it's taken care of by the
538     // entrypoint. Also, there is no need to update the stack mask,
539     // as this runtime call will not trigger a garbage collection.
540     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
541     DCHECK_NE(ref_reg, ESP);
542     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
543     // "Compact" slow path, saving two moves.
544     //
545     // Instead of using the standard runtime calling convention (input
546     // and output in EAX):
547     //
548     //   EAX <- ref
549     //   EAX <- ReadBarrierMark(EAX)
550     //   ref <- EAX
551     //
552     // we just use rX (the register containing `ref`) as input and output
553     // of a dedicated entrypoint:
554     //
555     //   rX <- ReadBarrierMarkRegX(rX)
556     //
557     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
558     // This runtime call does not require a stack map.
559     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
560     __ jmp(GetExitLabel());
561   }
562 
563  private:
564   // The location (register) of the marked object reference.
565   const Location ref_;
566   // Should the reference in `ref_` be unpoisoned prior to marking it?
567   const bool unpoison_ref_before_marking_;
568 
569   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
570 };
571 
572 // Slow path marking an object reference `ref` during a read barrier,
573 // and if needed, atomically updating the field `obj.field` in the
574 // object `obj` holding this reference after marking (contrary to
575 // ReadBarrierMarkSlowPathX86 above, which never tries to update
576 // `obj.field`).
577 //
578 // This means that after the execution of this slow path, both `ref`
579 // and `obj.field` will be up-to-date; i.e., after the flip, both will
580 // hold the same to-space reference (unless another thread installed
581 // another object reference (different from `ref`) in `obj.field`).
582 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
583  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)584   ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
585                                            Location ref,
586                                            Register obj,
587                                            const Address& field_addr,
588                                            bool unpoison_ref_before_marking,
589                                            Register temp)
590       : SlowPathCode(instruction),
591         ref_(ref),
592         obj_(obj),
593         field_addr_(field_addr),
594         unpoison_ref_before_marking_(unpoison_ref_before_marking),
595         temp_(temp) {
596   }
597 
GetDescription() const598   const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
599 
EmitNativeCode(CodeGenerator * codegen)600   void EmitNativeCode(CodeGenerator* codegen) override {
601     DCHECK(codegen->EmitReadBarrier());
602     LocationSummary* locations = instruction_->GetLocations();
603     Register ref_reg = ref_.AsRegister<Register>();
604     DCHECK(locations->CanCall());
605     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
606     DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
607         << "Unexpected instruction in read barrier marking and field updating slow path: "
608         << instruction_->DebugName();
609     HInvoke* invoke = instruction_->AsInvoke();
610     DCHECK(IsUnsafeCASReference(invoke) ||
611            IsUnsafeGetAndSetReference(invoke) ||
612            IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
613 
614     __ Bind(GetEntryLabel());
615     if (unpoison_ref_before_marking_) {
616       // Object* ref = ref_addr->AsMirrorPtr()
617       __ MaybeUnpoisonHeapReference(ref_reg);
618     }
619 
620     // Save the old (unpoisoned) reference.
621     __ movl(temp_, ref_reg);
622 
623     // No need to save live registers; it's taken care of by the
624     // entrypoint. Also, there is no need to update the stack mask,
625     // as this runtime call will not trigger a garbage collection.
626     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
627     DCHECK_NE(ref_reg, ESP);
628     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
629     // "Compact" slow path, saving two moves.
630     //
631     // Instead of using the standard runtime calling convention (input
632     // and output in EAX):
633     //
634     //   EAX <- ref
635     //   EAX <- ReadBarrierMark(EAX)
636     //   ref <- EAX
637     //
638     // we just use rX (the register containing `ref`) as input and output
639     // of a dedicated entrypoint:
640     //
641     //   rX <- ReadBarrierMarkRegX(rX)
642     //
643     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
644     // This runtime call does not require a stack map.
645     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
646 
647     // If the new reference is different from the old reference,
648     // update the field in the holder (`*field_addr`).
649     //
650     // Note that this field could also hold a different object, if
651     // another thread had concurrently changed it. In that case, the
652     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
653     // operation below would abort the CAS, leaving the field as-is.
654     NearLabel done;
655     __ cmpl(temp_, ref_reg);
656     __ j(kEqual, &done);
657 
658     // Update the holder's field atomically.  This may fail if
659     // mutator updates before us, but it's OK.  This is achieved
660     // using a strong compare-and-set (CAS) operation with relaxed
661     // memory synchronization ordering, where the expected value is
662     // the old reference and the desired value is the new reference.
663     // This operation is implemented with a 32-bit LOCK CMPXLCHG
664     // instruction, which requires the expected value (the old
665     // reference) to be in EAX.  Save EAX beforehand, and move the
666     // expected value (stored in `temp_`) into EAX.
667     __ pushl(EAX);
668     __ movl(EAX, temp_);
669 
670     // Convenience aliases.
671     Register base = obj_;
672     Register expected = EAX;
673     Register value = ref_reg;
674 
675     bool base_equals_value = (base == value);
676     if (kPoisonHeapReferences) {
677       if (base_equals_value) {
678         // If `base` and `value` are the same register location, move
679         // `value` to a temporary register.  This way, poisoning
680         // `value` won't invalidate `base`.
681         value = temp_;
682         __ movl(value, base);
683       }
684 
685       // Check that the register allocator did not assign the location
686       // of `expected` (EAX) to `value` nor to `base`, so that heap
687       // poisoning (when enabled) works as intended below.
688       // - If `value` were equal to `expected`, both references would
689       //   be poisoned twice, meaning they would not be poisoned at
690       //   all, as heap poisoning uses address negation.
691       // - If `base` were equal to `expected`, poisoning `expected`
692       //   would invalidate `base`.
693       DCHECK_NE(value, expected);
694       DCHECK_NE(base, expected);
695 
696       __ PoisonHeapReference(expected);
697       __ PoisonHeapReference(value);
698     }
699 
700     __ LockCmpxchgl(field_addr_, value);
701 
702     // If heap poisoning is enabled, we need to unpoison the values
703     // that were poisoned earlier.
704     if (kPoisonHeapReferences) {
705       if (base_equals_value) {
706         // `value` has been moved to a temporary register, no need
707         // to unpoison it.
708       } else {
709         __ UnpoisonHeapReference(value);
710       }
711       // No need to unpoison `expected` (EAX), as it is be overwritten below.
712     }
713 
714     // Restore EAX.
715     __ popl(EAX);
716 
717     __ Bind(&done);
718     __ jmp(GetExitLabel());
719   }
720 
721  private:
722   // The location (register) of the marked object reference.
723   const Location ref_;
724   // The register containing the object holding the marked object reference field.
725   const Register obj_;
726   // The address of the marked reference field.  The base of this address must be `obj_`.
727   const Address field_addr_;
728 
729   // Should the reference in `ref_` be unpoisoned prior to marking it?
730   const bool unpoison_ref_before_marking_;
731 
732   const Register temp_;
733 
734   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
735 };
736 
737 // Slow path generating a read barrier for a heap reference.
738 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
739  public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)740   ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
741                                          Location out,
742                                          Location ref,
743                                          Location obj,
744                                          uint32_t offset,
745                                          Location index)
746       : SlowPathCode(instruction),
747         out_(out),
748         ref_(ref),
749         obj_(obj),
750         offset_(offset),
751         index_(index) {
752     // If `obj` is equal to `out` or `ref`, it means the initial object
753     // has been overwritten by (or after) the heap object reference load
754     // to be instrumented, e.g.:
755     //
756     //   __ movl(out, Address(out, offset));
757     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
758     //
759     // In that case, we have lost the information about the original
760     // object, and the emitted read barrier cannot work properly.
761     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
762     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
763   }
764 
EmitNativeCode(CodeGenerator * codegen)765   void EmitNativeCode(CodeGenerator* codegen) override {
766     DCHECK(codegen->EmitReadBarrier());
767     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
768     LocationSummary* locations = instruction_->GetLocations();
769     Register reg_out = out_.AsRegister<Register>();
770     DCHECK(locations->CanCall());
771     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
772     DCHECK(instruction_->IsInstanceFieldGet() ||
773            instruction_->IsStaticFieldGet() ||
774            instruction_->IsArrayGet() ||
775            instruction_->IsInstanceOf() ||
776            instruction_->IsCheckCast() ||
777            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
778         << "Unexpected instruction in read barrier for heap reference slow path: "
779         << instruction_->DebugName();
780 
781     __ Bind(GetEntryLabel());
782     SaveLiveRegisters(codegen, locations);
783 
784     // We may have to change the index's value, but as `index_` is a
785     // constant member (like other "inputs" of this slow path),
786     // introduce a copy of it, `index`.
787     Location index = index_;
788     if (index_.IsValid()) {
789       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
790       if (instruction_->IsArrayGet()) {
791         // Compute the actual memory offset and store it in `index`.
792         Register index_reg = index_.AsRegister<Register>();
793         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
794         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
795           // We are about to change the value of `index_reg` (see the
796           // calls to art::x86::X86Assembler::shll and
797           // art::x86::X86Assembler::AddImmediate below), but it has
798           // not been saved by the previous call to
799           // art::SlowPathCode::SaveLiveRegisters, as it is a
800           // callee-save register --
801           // art::SlowPathCode::SaveLiveRegisters does not consider
802           // callee-save registers, as it has been designed with the
803           // assumption that callee-save registers are supposed to be
804           // handled by the called function.  So, as a callee-save
805           // register, `index_reg` _would_ eventually be saved onto
806           // the stack, but it would be too late: we would have
807           // changed its value earlier.  Therefore, we manually save
808           // it here into another freely available register,
809           // `free_reg`, chosen of course among the caller-save
810           // registers (as a callee-save `free_reg` register would
811           // exhibit the same problem).
812           //
813           // Note we could have requested a temporary register from
814           // the register allocator instead; but we prefer not to, as
815           // this is a slow path, and we know we can find a
816           // caller-save register that is available.
817           Register free_reg = FindAvailableCallerSaveRegister(codegen);
818           __ movl(free_reg, index_reg);
819           index_reg = free_reg;
820           index = Location::RegisterLocation(index_reg);
821         } else {
822           // The initial register stored in `index_` has already been
823           // saved in the call to art::SlowPathCode::SaveLiveRegisters
824           // (as it is not a callee-save register), so we can freely
825           // use it.
826         }
827         // Shifting the index value contained in `index_reg` by the scale
828         // factor (2) cannot overflow in practice, as the runtime is
829         // unable to allocate object arrays with a size larger than
830         // 2^26 - 1 (that is, 2^28 - 4 bytes).
831         __ shll(index_reg, Immediate(TIMES_4));
832         static_assert(
833             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
834             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
835         __ AddImmediate(index_reg, Immediate(offset_));
836       } else {
837         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
838         // intrinsics, `index_` is not shifted by a scale factor of 2
839         // (as in the case of ArrayGet), as it is actually an offset
840         // to an object field within an object.
841         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
842         DCHECK(instruction_->GetLocations()->Intrinsified());
843         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
844                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
845                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
846                (instruction_->AsInvoke()->GetIntrinsic() ==
847                     Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
848                (instruction_->AsInvoke()->GetIntrinsic() ==
849                     Intrinsics::kJdkUnsafeGetReferenceAcquire))
850             << instruction_->AsInvoke()->GetIntrinsic();
851         DCHECK_EQ(offset_, 0U);
852         DCHECK(index_.IsRegisterPair());
853         // UnsafeGet's offset location is a register pair, the low
854         // part contains the correct offset.
855         index = index_.ToLow();
856       }
857     }
858 
859     // We're moving two or three locations to locations that could
860     // overlap, so we need a parallel move resolver.
861     InvokeRuntimeCallingConvention calling_convention;
862     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
863     parallel_move.AddMove(ref_,
864                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
865                           DataType::Type::kReference,
866                           nullptr);
867     parallel_move.AddMove(obj_,
868                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
869                           DataType::Type::kReference,
870                           nullptr);
871     if (index.IsValid()) {
872       parallel_move.AddMove(index,
873                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
874                             DataType::Type::kInt32,
875                             nullptr);
876       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
877     } else {
878       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
879       __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
880     }
881     x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
882     CheckEntrypointTypes<
883         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
884     x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
885 
886     RestoreLiveRegisters(codegen, locations);
887     __ jmp(GetExitLabel());
888   }
889 
GetDescription() const890   const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
891 
892  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)893   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
894     size_t ref = static_cast<int>(ref_.AsRegister<Register>());
895     size_t obj = static_cast<int>(obj_.AsRegister<Register>());
896     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
897       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
898         return static_cast<Register>(i);
899       }
900     }
901     // We shall never fail to find a free caller-save register, as
902     // there are more than two core caller-save registers on x86
903     // (meaning it is possible to find one which is different from
904     // `ref` and `obj`).
905     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
906     LOG(FATAL) << "Could not find a free caller-save register";
907     UNREACHABLE();
908   }
909 
910   const Location out_;
911   const Location ref_;
912   const Location obj_;
913   const uint32_t offset_;
914   // An additional location containing an index to an array.
915   // Only used for HArrayGet and the UnsafeGetObject &
916   // UnsafeGetObjectVolatile intrinsics.
917   const Location index_;
918 
919   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
920 };
921 
922 // Slow path generating a read barrier for a GC root.
923 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
924  public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)925   ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
926       : SlowPathCode(instruction), out_(out), root_(root) {
927   }
928 
EmitNativeCode(CodeGenerator * codegen)929   void EmitNativeCode(CodeGenerator* codegen) override {
930     DCHECK(codegen->EmitReadBarrier());
931     LocationSummary* locations = instruction_->GetLocations();
932     Register reg_out = out_.AsRegister<Register>();
933     DCHECK(locations->CanCall());
934     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
935     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
936         << "Unexpected instruction in read barrier for GC root slow path: "
937         << instruction_->DebugName();
938 
939     __ Bind(GetEntryLabel());
940     SaveLiveRegisters(codegen, locations);
941 
942     InvokeRuntimeCallingConvention calling_convention;
943     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
944     x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
945     x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
946                                instruction_,
947                                instruction_->GetDexPc(),
948                                this);
949     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
950     x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
951 
952     RestoreLiveRegisters(codegen, locations);
953     __ jmp(GetExitLabel());
954   }
955 
GetDescription() const956   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
957 
958  private:
959   const Location out_;
960   const Location root_;
961 
962   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
963 };
964 
965 class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
966  public:
MethodEntryExitHooksSlowPathX86(HInstruction * instruction)967   explicit MethodEntryExitHooksSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
968 
EmitNativeCode(CodeGenerator * codegen)969   void EmitNativeCode(CodeGenerator* codegen) override {
970     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
971     LocationSummary* locations = instruction_->GetLocations();
972     QuickEntrypointEnum entry_point =
973         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
974     __ Bind(GetEntryLabel());
975     SaveLiveRegisters(codegen, locations);
976     if (instruction_->IsMethodExitHook()) {
977       __ movl(EBX, Immediate(codegen->GetFrameSize()));
978     }
979     x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
980     RestoreLiveRegisters(codegen, locations);
981     __ jmp(GetExitLabel());
982   }
983 
GetDescription() const984   const char* GetDescription() const override {
985     return "MethodEntryExitHooksSlowPath";
986   }
987 
988  private:
989   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86);
990 };
991 
992 class CompileOptimizedSlowPathX86 : public SlowPathCode {
993  public:
CompileOptimizedSlowPathX86(HSuspendCheck * suspend_check,uint32_t counter_address)994   CompileOptimizedSlowPathX86(HSuspendCheck* suspend_check, uint32_t counter_address)
995       : SlowPathCode(suspend_check),
996         counter_address_(counter_address) {}
997 
EmitNativeCode(CodeGenerator * codegen)998   void EmitNativeCode(CodeGenerator* codegen) override {
999     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
1000     __ Bind(GetEntryLabel());
1001     __ movw(Address::Absolute(counter_address_), Immediate(ProfilingInfo::GetOptimizeThreshold()));
1002     if (instruction_ != nullptr) {
1003       // Only saves full width XMM for SIMD.
1004       SaveLiveRegisters(codegen, instruction_->GetLocations());
1005     }
1006     x86_codegen->GenerateInvokeRuntime(
1007         GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1008     if (instruction_ != nullptr) {
1009       // Only restores full width XMM for SIMD.
1010       RestoreLiveRegisters(codegen, instruction_->GetLocations());
1011     }
1012     __ jmp(GetExitLabel());
1013   }
1014 
GetDescription() const1015   const char* GetDescription() const override {
1016     return "CompileOptimizedSlowPath";
1017   }
1018 
1019  private:
1020   uint32_t counter_address_;
1021 
1022   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86);
1023 };
1024 
1025 #undef __
1026 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1027 #define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
1028 
X86Condition(IfCondition cond)1029 inline Condition X86Condition(IfCondition cond) {
1030   switch (cond) {
1031     case kCondEQ: return kEqual;
1032     case kCondNE: return kNotEqual;
1033     case kCondLT: return kLess;
1034     case kCondLE: return kLessEqual;
1035     case kCondGT: return kGreater;
1036     case kCondGE: return kGreaterEqual;
1037     case kCondB:  return kBelow;
1038     case kCondBE: return kBelowEqual;
1039     case kCondA:  return kAbove;
1040     case kCondAE: return kAboveEqual;
1041   }
1042   LOG(FATAL) << "Unreachable";
1043   UNREACHABLE();
1044 }
1045 
1046 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)1047 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
1048   switch (cond) {
1049     case kCondEQ: return kEqual;
1050     case kCondNE: return kNotEqual;
1051     // Signed to unsigned, and FP to x86 name.
1052     case kCondLT: return kBelow;
1053     case kCondLE: return kBelowEqual;
1054     case kCondGT: return kAbove;
1055     case kCondGE: return kAboveEqual;
1056     // Unsigned remain unchanged.
1057     case kCondB:  return kBelow;
1058     case kCondBE: return kBelowEqual;
1059     case kCondA:  return kAbove;
1060     case kCondAE: return kAboveEqual;
1061   }
1062   LOG(FATAL) << "Unreachable";
1063   UNREACHABLE();
1064 }
1065 
DumpCoreRegister(std::ostream & stream,int reg) const1066 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
1067   stream << Register(reg);
1068 }
1069 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1070 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1071   stream << XmmRegister(reg);
1072 }
1073 
GetInstructionSetFeatures() const1074 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
1075   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
1076 }
1077 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1078 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1079   __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
1080   return kX86WordSize;
1081 }
1082 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1083 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1084   __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
1085   return kX86WordSize;
1086 }
1087 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1088 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1089   if (GetGraph()->HasSIMD()) {
1090     __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
1091   } else {
1092     __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
1093   }
1094   return GetSlowPathFPWidth();
1095 }
1096 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1097 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1098   if (GetGraph()->HasSIMD()) {
1099     __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
1100   } else {
1101     __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1102   }
1103   return GetSlowPathFPWidth();
1104 }
1105 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1106 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1107                                      HInstruction* instruction,
1108                                      uint32_t dex_pc,
1109                                      SlowPathCode* slow_path) {
1110   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1111   GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1112   if (EntrypointRequiresStackMap(entrypoint)) {
1113     RecordPcInfo(instruction, dex_pc, slow_path);
1114   }
1115 }
1116 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1117 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1118                                                            HInstruction* instruction,
1119                                                            SlowPathCode* slow_path) {
1120   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1121   GenerateInvokeRuntime(entry_point_offset);
1122 }
1123 
GenerateInvokeRuntime(int32_t entry_point_offset)1124 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1125   __ fs()->call(Address::Absolute(entry_point_offset));
1126 }
1127 
1128 namespace detail {
1129 
1130 // Mark which intrinsics we don't have handcrafted code for.
1131 template <Intrinsics T>
1132 struct IsUnimplemented {
1133   bool is_unimplemented = false;
1134 };
1135 
1136 #define TRUE_OVERRIDE(Name)                     \
1137   template <>                                   \
1138   struct IsUnimplemented<Intrinsics::k##Name> { \
1139     bool is_unimplemented = true;               \
1140   };
1141 UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE)
1142 #undef TRUE_OVERRIDE
1143 
1144 static constexpr bool kIsIntrinsicUnimplemented[] = {
1145     false,  // kNone
1146 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1147     IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1148     ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1149 #undef IS_UNIMPLEMENTED
1150 };
1151 
1152 }  // namespace detail
1153 
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1154 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1155                                    const CompilerOptions& compiler_options,
1156                                    OptimizingCompilerStats* stats)
1157     : CodeGenerator(graph,
1158                     kNumberOfCpuRegisters,
1159                     kNumberOfXmmRegisters,
1160                     kNumberOfRegisterPairs,
1161                     ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
1162                         | (1 << kFakeReturnRegister),
1163                     0,
1164                     compiler_options,
1165                     stats,
1166                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1167       block_labels_(nullptr),
1168       location_builder_(graph, this),
1169       instruction_visitor_(graph, this),
1170       move_resolver_(graph->GetAllocator(), this),
1171       assembler_(graph->GetAllocator(),
1172                  compiler_options.GetInstructionSetFeatures()->AsX86InstructionSetFeatures()),
1173       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1174       app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1175       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1176       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1177       app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1178       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1179       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1180       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1181       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1182       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1183       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1184       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1185       jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1186       jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1187       constant_area_start_(-1),
1188       fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1189       method_address_offset_(std::less<uint32_t>(),
1190                              graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1191   // Use a fake return address register to mimic Quick.
1192   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1193 }
1194 
SetupBlockedRegisters() const1195 void CodeGeneratorX86::SetupBlockedRegisters() const {
1196   // Stack register is always reserved.
1197   blocked_core_registers_[ESP] = true;
1198 }
1199 
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1200 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1201       : InstructionCodeGenerator(graph, codegen),
1202         assembler_(codegen->GetAssembler()),
1203         codegen_(codegen) {}
1204 
DWARFReg(Register reg)1205 static dwarf::Reg DWARFReg(Register reg) {
1206   return dwarf::Reg::X86Core(static_cast<int>(reg));
1207 }
1208 
SetInForReturnValue(HInstruction * ret,LocationSummary * locations)1209 void SetInForReturnValue(HInstruction* ret, LocationSummary* locations) {
1210   switch (ret->InputAt(0)->GetType()) {
1211     case DataType::Type::kReference:
1212     case DataType::Type::kBool:
1213     case DataType::Type::kUint8:
1214     case DataType::Type::kInt8:
1215     case DataType::Type::kUint16:
1216     case DataType::Type::kInt16:
1217     case DataType::Type::kInt32:
1218       locations->SetInAt(0, Location::RegisterLocation(EAX));
1219       break;
1220 
1221     case DataType::Type::kInt64:
1222       locations->SetInAt(0, Location::RegisterPairLocation(EAX, EDX));
1223       break;
1224 
1225     case DataType::Type::kFloat32:
1226     case DataType::Type::kFloat64:
1227       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1228       break;
1229 
1230     case DataType::Type::kVoid:
1231       locations->SetInAt(0, Location::NoLocation());
1232       break;
1233 
1234     default:
1235       LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
1236   }
1237 }
1238 
VisitMethodExitHook(HMethodExitHook * method_hook)1239 void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) {
1240   LocationSummary* locations = new (GetGraph()->GetAllocator())
1241       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1242   SetInForReturnValue(method_hook, locations);
1243   // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
1244   locations->AddTemp(Location::RegisterLocation(EAX));
1245   locations->AddTemp(Location::RegisterLocation(EDX));
1246   // An additional temporary register to hold address to store the timestamp counter.
1247   locations->AddTemp(Location::RequiresRegister());
1248 }
1249 
GenerateMethodEntryExitHook(HInstruction * instruction)1250 void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) {
1251   SlowPathCode* slow_path =
1252       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
1253   codegen_->AddSlowPath(slow_path);
1254   LocationSummary* locations = instruction->GetLocations();
1255 
1256   if (instruction->IsMethodExitHook()) {
1257     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1258     // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1259     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1260     // disabled in debuggable runtime. The other bit is used when this method itself requires a
1261     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1262     __ cmpl(Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1263     __ j(kNotEqual, slow_path->GetEntryLabel());
1264   }
1265 
1266   uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1267   MemberOffset  offset = instruction->IsMethodExitHook() ?
1268       instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
1269       instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1270   __ cmpb(Address::Absolute(address + offset.Int32Value()),
1271           Immediate(instrumentation::Instrumentation::kFastTraceListeners));
1272   // Check if there are any trace method entry / exit listeners. If no, continue.
1273   __ j(kLess, slow_path->GetExitLabel());
1274   // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
1275   // If yes, just take the slow path.
1276   __ j(kGreater, slow_path->GetEntryLabel());
1277 
1278   // For curr_entry use the register that isn't EAX or EDX. We need this after
1279   // rdtsc which returns values in EAX + EDX.
1280   Register curr_entry = locations->GetTemp(2).AsRegister<Register>();
1281   Register init_entry = locations->GetTemp(1).AsRegister<Register>();
1282 
1283   // Check if there is place in the buffer for a new entry, if no, take slow path.
1284   uint32_t trace_buffer_ptr = Thread::TraceBufferPtrOffset<kX86PointerSize>().Int32Value();
1285   uint64_t trace_buffer_curr_entry_offset =
1286       Thread::TraceBufferCurrPtrOffset<kX86PointerSize>().Int32Value();
1287 
1288   __ fs()->movl(curr_entry, Address::Absolute(trace_buffer_curr_entry_offset));
1289   __ subl(curr_entry, Immediate(kNumEntriesForWallClock * sizeof(void*)));
1290   __ fs()->movl(init_entry, Address::Absolute(trace_buffer_ptr));
1291   __ cmpl(curr_entry, init_entry);
1292   __ j(kLess, slow_path->GetEntryLabel());
1293 
1294   // Update the index in the `Thread`.
1295   __ fs()->movl(Address::Absolute(trace_buffer_curr_entry_offset), curr_entry);
1296 
1297   // Record method pointer and trace action.
1298   Register method = init_entry;
1299   __ movl(method, Address(ESP, kCurrentMethodStackOffset));
1300   // Use last two bits to encode trace method action. For MethodEntry it is 0
1301   // so no need to set the bits since they are 0 already.
1302   if (instruction->IsMethodExitHook()) {
1303     DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1304     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1305     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1306     __ orl(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1307   }
1308   __ movl(Address(curr_entry, kMethodOffsetInBytes), method);
1309   // Get the timestamp. rdtsc returns timestamp in EAX + EDX.
1310   __ rdtsc();
1311   __ movl(Address(curr_entry, kTimestampOffsetInBytes), EAX);
1312   __ movl(Address(curr_entry, kHighTimestampOffsetInBytes), EDX);
1313   __ Bind(slow_path->GetExitLabel());
1314 }
1315 
VisitMethodExitHook(HMethodExitHook * instruction)1316 void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instruction) {
1317   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1318   DCHECK(codegen_->RequiresCurrentMethod());
1319   GenerateMethodEntryExitHook(instruction);
1320 }
1321 
VisitMethodEntryHook(HMethodEntryHook * method_hook)1322 void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1323   LocationSummary* locations = new (GetGraph()->GetAllocator())
1324       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1325   // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
1326   locations->AddTemp(Location::RegisterLocation(EAX));
1327   locations->AddTemp(Location::RegisterLocation(EDX));
1328   // An additional temporary register to hold address to store the timestamp counter.
1329   locations->AddTemp(Location::RequiresRegister());
1330 }
1331 
VisitMethodEntryHook(HMethodEntryHook * instruction)1332 void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1333   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1334   DCHECK(codegen_->RequiresCurrentMethod());
1335   GenerateMethodEntryExitHook(instruction);
1336 }
1337 
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1338 void CodeGeneratorX86::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1339   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1340     Register reg = EAX;
1341     if (is_frame_entry) {
1342       reg = kMethodRegisterArgument;
1343     } else {
1344       __ pushl(EAX);
1345       __ cfi().AdjustCFAOffset(4);
1346       __ movl(EAX, Address(ESP, kX86WordSize));
1347     }
1348     NearLabel overflow;
1349     __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1350             Immediate(interpreter::kNterpHotnessValue));
1351     __ j(kEqual, &overflow);
1352     __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(-1));
1353     __ Bind(&overflow);
1354     if (!is_frame_entry) {
1355       __ popl(EAX);
1356       __ cfi().AdjustCFAOffset(-4);
1357     }
1358   }
1359 
1360   if (GetGraph()->IsCompilingBaseline() &&
1361       GetGraph()->IsUsefulOptimizing() &&
1362       !Runtime::Current()->IsAotCompiler()) {
1363     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1364     DCHECK(info != nullptr);
1365     uint32_t address = reinterpret_cast32<uint32_t>(info) +
1366         ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1367     DCHECK(!HasEmptyFrame());
1368     SlowPathCode* slow_path =
1369         new (GetScopedAllocator()) CompileOptimizedSlowPathX86(suspend_check, address);
1370     AddSlowPath(slow_path);
1371     // With multiple threads, this can overflow. This is OK, we will eventually get to see
1372     // it reaching 0. Also, at this point we have no register available to look
1373     // at the counter directly.
1374     __ addw(Address::Absolute(address), Immediate(-1));
1375     __ j(kEqual, slow_path->GetEntryLabel());
1376     __ Bind(slow_path->GetExitLabel());
1377   }
1378 }
1379 
GenerateFrameEntry()1380 void CodeGeneratorX86::GenerateFrameEntry() {
1381   __ cfi().SetCurrentCFAOffset(kX86WordSize);  // return address
1382 
1383   // Check if we need to generate the clinit check. We will jump to the
1384   // resolution stub if the class is not initialized and the executing thread is
1385   // not the thread initializing it.
1386   // We do this before constructing the frame to get the correct stack trace if
1387   // an exception is thrown.
1388   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1389     NearLabel continue_execution, resolution;
1390     // We'll use EBP as temporary.
1391     __ pushl(EBP);
1392     __ cfi().AdjustCFAOffset(4);
1393     // Check if we're visibly initialized.
1394 
1395     // We don't emit a read barrier here to save on code size. We rely on the
1396     // resolution trampoline to do a suspend check before re-entering this code.
1397     __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value()));
1398     __ cmpb(Address(EBP, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
1399     __ j(kAboveEqual, &continue_execution);
1400 
1401     // Check if we're initializing and the thread initializing is the one
1402     // executing the code.
1403     __ cmpb(Address(EBP, kClassStatusByteOffset), Immediate(kShiftedInitializingValue));
1404     __ j(kBelow, &resolution);
1405 
1406     __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value()));
1407     __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value()));
1408     __ j(kEqual, &continue_execution);
1409     __ Bind(&resolution);
1410 
1411     __ popl(EBP);
1412     __ cfi().AdjustCFAOffset(-4);
1413     // Jump to the resolution stub.
1414     ThreadOffset32 entrypoint_offset =
1415         GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline);
1416     __ fs()->jmp(Address::Absolute(entrypoint_offset));
1417 
1418     __ Bind(&continue_execution);
1419     __ cfi().AdjustCFAOffset(4);  // Undo the `-4` adjustment above. We get here with EBP pushed.
1420     __ popl(EBP);
1421     __ cfi().AdjustCFAOffset(-4);
1422   }
1423 
1424   __ Bind(&frame_entry_label_);
1425   bool skip_overflow_check =
1426       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1427   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1428 
1429   if (!skip_overflow_check) {
1430     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1431     __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1432     RecordPcInfo(nullptr, 0);
1433   }
1434 
1435   if (!HasEmptyFrame()) {
1436     // Make sure the frame size isn't unreasonably large.
1437     DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1438 
1439     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1440       Register reg = kCoreCalleeSaves[i];
1441       if (allocated_registers_.ContainsCoreRegister(reg)) {
1442         __ pushl(reg);
1443         __ cfi().AdjustCFAOffset(kX86WordSize);
1444         __ cfi().RelOffset(DWARFReg(reg), 0);
1445       }
1446     }
1447 
1448     int adjust = GetFrameSize() - FrameEntrySpillSize();
1449     IncreaseFrame(adjust);
1450     // Save the current method if we need it. Note that we do not
1451     // do this in HCurrentMethod, as the instruction might have been removed
1452     // in the SSA graph.
1453     if (RequiresCurrentMethod()) {
1454       __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1455     }
1456 
1457     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1458       // Initialize should_deoptimize flag to 0.
1459       __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1460     }
1461   }
1462 
1463   MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1464 }
1465 
GenerateFrameExit()1466 void CodeGeneratorX86::GenerateFrameExit() {
1467   __ cfi().RememberState();
1468   if (!HasEmptyFrame()) {
1469     int adjust = GetFrameSize() - FrameEntrySpillSize();
1470     DecreaseFrame(adjust);
1471 
1472     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1473       Register reg = kCoreCalleeSaves[i];
1474       if (allocated_registers_.ContainsCoreRegister(reg)) {
1475         __ popl(reg);
1476         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1477         __ cfi().Restore(DWARFReg(reg));
1478       }
1479     }
1480   }
1481   __ ret();
1482   __ cfi().RestoreState();
1483   __ cfi().DefCFAOffset(GetFrameSize());
1484 }
1485 
Bind(HBasicBlock * block)1486 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1487   __ Bind(GetLabelOf(block));
1488 }
1489 
GetReturnLocation(DataType::Type type) const1490 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1491   switch (type) {
1492     case DataType::Type::kReference:
1493     case DataType::Type::kBool:
1494     case DataType::Type::kUint8:
1495     case DataType::Type::kInt8:
1496     case DataType::Type::kUint16:
1497     case DataType::Type::kInt16:
1498     case DataType::Type::kUint32:
1499     case DataType::Type::kInt32:
1500       return Location::RegisterLocation(EAX);
1501 
1502     case DataType::Type::kUint64:
1503     case DataType::Type::kInt64:
1504       return Location::RegisterPairLocation(EAX, EDX);
1505 
1506     case DataType::Type::kVoid:
1507       return Location::NoLocation();
1508 
1509     case DataType::Type::kFloat64:
1510     case DataType::Type::kFloat32:
1511       return Location::FpuRegisterLocation(XMM0);
1512   }
1513 }
1514 
GetMethodLocation() const1515 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1516   return Location::RegisterLocation(kMethodRegisterArgument);
1517 }
1518 
GetNextLocation(DataType::Type type)1519 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1520   switch (type) {
1521     case DataType::Type::kReference:
1522     case DataType::Type::kBool:
1523     case DataType::Type::kUint8:
1524     case DataType::Type::kInt8:
1525     case DataType::Type::kUint16:
1526     case DataType::Type::kInt16:
1527     case DataType::Type::kInt32: {
1528       uint32_t index = gp_index_++;
1529       stack_index_++;
1530       if (index < calling_convention.GetNumberOfRegisters()) {
1531         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1532       } else {
1533         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1534       }
1535     }
1536 
1537     case DataType::Type::kInt64: {
1538       uint32_t index = gp_index_;
1539       gp_index_ += 2;
1540       stack_index_ += 2;
1541       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1542         X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1543             calling_convention.GetRegisterPairAt(index));
1544         return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1545       } else {
1546         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1547       }
1548     }
1549 
1550     case DataType::Type::kFloat32: {
1551       uint32_t index = float_index_++;
1552       stack_index_++;
1553       if (index < calling_convention.GetNumberOfFpuRegisters()) {
1554         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1555       } else {
1556         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1557       }
1558     }
1559 
1560     case DataType::Type::kFloat64: {
1561       uint32_t index = float_index_++;
1562       stack_index_ += 2;
1563       if (index < calling_convention.GetNumberOfFpuRegisters()) {
1564         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1565       } else {
1566         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1567       }
1568     }
1569 
1570     case DataType::Type::kUint32:
1571     case DataType::Type::kUint64:
1572     case DataType::Type::kVoid:
1573       LOG(FATAL) << "Unexpected parameter type " << type;
1574       UNREACHABLE();
1575   }
1576   return Location::NoLocation();
1577 }
1578 
GetNextLocation(DataType::Type type)1579 Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1580   DCHECK_NE(type, DataType::Type::kReference);
1581 
1582   Location location;
1583   if (DataType::Is64BitType(type)) {
1584     location = Location::DoubleStackSlot(stack_offset_);
1585     stack_offset_ += 2 * kFramePointerSize;
1586   } else {
1587     location = Location::StackSlot(stack_offset_);
1588     stack_offset_ += kFramePointerSize;
1589   }
1590   if (for_register_allocation_) {
1591     location = Location::Any();
1592   }
1593   return location;
1594 }
1595 
GetReturnLocation(DataType::Type type) const1596 Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1597   // We perform conversion to the managed ABI return register after the call if needed.
1598   InvokeDexCallingConventionVisitorX86 dex_calling_convention;
1599   return dex_calling_convention.GetReturnLocation(type);
1600 }
1601 
GetMethodLocation() const1602 Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
1603   // Pass the method in the hidden argument EAX.
1604   return Location::RegisterLocation(EAX);
1605 }
1606 
Move32(Location destination,Location source)1607 void CodeGeneratorX86::Move32(Location destination, Location source) {
1608   if (source.Equals(destination)) {
1609     return;
1610   }
1611   if (destination.IsRegister()) {
1612     if (source.IsRegister()) {
1613       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1614     } else if (source.IsFpuRegister()) {
1615       __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1616     } else if (source.IsConstant()) {
1617       int32_t value = GetInt32ValueOf(source.GetConstant());
1618       __ movl(destination.AsRegister<Register>(), Immediate(value));
1619     } else {
1620       DCHECK(source.IsStackSlot());
1621       __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1622     }
1623   } else if (destination.IsFpuRegister()) {
1624     if (source.IsRegister()) {
1625       __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1626     } else if (source.IsFpuRegister()) {
1627       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1628     } else {
1629       DCHECK(source.IsStackSlot());
1630       __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1631     }
1632   } else {
1633     DCHECK(destination.IsStackSlot()) << destination;
1634     if (source.IsRegister()) {
1635       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1636     } else if (source.IsFpuRegister()) {
1637       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1638     } else if (source.IsConstant()) {
1639       HConstant* constant = source.GetConstant();
1640       int32_t value = GetInt32ValueOf(constant);
1641       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1642     } else {
1643       DCHECK(source.IsStackSlot());
1644       __ pushl(Address(ESP, source.GetStackIndex()));
1645       __ popl(Address(ESP, destination.GetStackIndex()));
1646     }
1647   }
1648 }
1649 
Move64(Location destination,Location source)1650 void CodeGeneratorX86::Move64(Location destination, Location source) {
1651   if (source.Equals(destination)) {
1652     return;
1653   }
1654   if (destination.IsRegisterPair()) {
1655     if (source.IsRegisterPair()) {
1656       EmitParallelMoves(
1657           Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1658           Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1659           DataType::Type::kInt32,
1660           Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1661           Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1662           DataType::Type::kInt32);
1663     } else if (source.IsFpuRegister()) {
1664       XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1665       __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1666       __ psrlq(src_reg, Immediate(32));
1667       __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1668     } else {
1669       // No conflict possible, so just do the moves.
1670       DCHECK(source.IsDoubleStackSlot());
1671       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1672       __ movl(destination.AsRegisterPairHigh<Register>(),
1673               Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1674     }
1675   } else if (destination.IsFpuRegister()) {
1676     if (source.IsFpuRegister()) {
1677       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1678     } else if (source.IsDoubleStackSlot()) {
1679       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1680     } else if (source.IsRegisterPair()) {
1681       size_t elem_size = DataType::Size(DataType::Type::kInt32);
1682       // Push the 2 source registers to the stack.
1683       __ pushl(source.AsRegisterPairHigh<Register>());
1684       __ cfi().AdjustCFAOffset(elem_size);
1685       __ pushl(source.AsRegisterPairLow<Register>());
1686       __ cfi().AdjustCFAOffset(elem_size);
1687       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1688       // And remove the temporary stack space we allocated.
1689       DecreaseFrame(2 * elem_size);
1690     } else {
1691       LOG(FATAL) << "Unimplemented";
1692     }
1693   } else {
1694     DCHECK(destination.IsDoubleStackSlot()) << destination;
1695     if (source.IsRegisterPair()) {
1696       // No conflict possible, so just do the moves.
1697       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1698       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1699               source.AsRegisterPairHigh<Register>());
1700     } else if (source.IsFpuRegister()) {
1701       __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1702     } else if (source.IsConstant()) {
1703       HConstant* constant = source.GetConstant();
1704       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1705       int64_t value = GetInt64ValueOf(constant);
1706       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1707       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1708               Immediate(High32Bits(value)));
1709     } else {
1710       DCHECK(source.IsDoubleStackSlot()) << source;
1711       EmitParallelMoves(
1712           Location::StackSlot(source.GetStackIndex()),
1713           Location::StackSlot(destination.GetStackIndex()),
1714           DataType::Type::kInt32,
1715           Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1716           Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1717           DataType::Type::kInt32);
1718     }
1719   }
1720 }
1721 
CreateAddress(Register base,Register index=Register::kNoRegister,ScaleFactor scale=TIMES_1,int32_t disp=0)1722 static Address CreateAddress(Register base,
1723                              Register index = Register::kNoRegister,
1724                              ScaleFactor scale = TIMES_1,
1725                              int32_t disp = 0) {
1726   if (index == Register::kNoRegister) {
1727     return Address(base, disp);
1728   }
1729 
1730   return Address(base, index, scale, disp);
1731 }
1732 
LoadFromMemoryNoBarrier(DataType::Type dst_type,Location dst,Address src,HInstruction * instr,XmmRegister temp,bool is_atomic_load)1733 void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
1734                                                Location dst,
1735                                                Address src,
1736                                                HInstruction* instr,
1737                                                XmmRegister temp,
1738                                                bool is_atomic_load) {
1739   switch (dst_type) {
1740     case DataType::Type::kBool:
1741     case DataType::Type::kUint8:
1742       __ movzxb(dst.AsRegister<Register>(), src);
1743       break;
1744     case DataType::Type::kInt8:
1745       __ movsxb(dst.AsRegister<Register>(), src);
1746       break;
1747     case DataType::Type::kInt16:
1748       __ movsxw(dst.AsRegister<Register>(), src);
1749       break;
1750     case DataType::Type::kUint16:
1751       __ movzxw(dst.AsRegister<Register>(), src);
1752       break;
1753     case DataType::Type::kInt32:
1754       __ movl(dst.AsRegister<Register>(), src);
1755       break;
1756     case DataType::Type::kInt64: {
1757       if (is_atomic_load) {
1758         __ movsd(temp, src);
1759         if (instr != nullptr) {
1760           MaybeRecordImplicitNullCheck(instr);
1761         }
1762         __ movd(dst.AsRegisterPairLow<Register>(), temp);
1763         __ psrlq(temp, Immediate(32));
1764         __ movd(dst.AsRegisterPairHigh<Register>(), temp);
1765       } else {
1766         DCHECK_NE(src.GetBaseRegister(), dst.AsRegisterPairLow<Register>());
1767         Address src_high = Address::displace(src, kX86WordSize);
1768         __ movl(dst.AsRegisterPairLow<Register>(), src);
1769         if (instr != nullptr) {
1770           MaybeRecordImplicitNullCheck(instr);
1771         }
1772         __ movl(dst.AsRegisterPairHigh<Register>(), src_high);
1773       }
1774       break;
1775     }
1776     case DataType::Type::kFloat32:
1777       __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1778       break;
1779     case DataType::Type::kFloat64:
1780       __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1781       break;
1782     case DataType::Type::kReference:
1783       DCHECK(!EmitReadBarrier());
1784       __ movl(dst.AsRegister<Register>(), src);
1785       __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
1786       break;
1787     default:
1788       LOG(FATAL) << "Unreachable type " << dst_type;
1789   }
1790   if (instr != nullptr && dst_type != DataType::Type::kInt64) {
1791     // kInt64 needs special handling that is done in the above switch.
1792     MaybeRecordImplicitNullCheck(instr);
1793   }
1794 }
1795 
MoveToMemory(DataType::Type src_type,Location src,Register dst_base,Register dst_index,ScaleFactor dst_scale,int32_t dst_disp)1796 void CodeGeneratorX86::MoveToMemory(DataType::Type src_type,
1797                                     Location src,
1798                                     Register dst_base,
1799                                     Register dst_index,
1800                                     ScaleFactor dst_scale,
1801                                     int32_t dst_disp) {
1802   DCHECK(dst_base != Register::kNoRegister);
1803   Address dst = CreateAddress(dst_base, dst_index, dst_scale, dst_disp);
1804 
1805   switch (src_type) {
1806     case DataType::Type::kBool:
1807     case DataType::Type::kUint8:
1808     case DataType::Type::kInt8: {
1809       if (src.IsConstant()) {
1810         __ movb(dst, Immediate(CodeGenerator::GetInt8ValueOf(src.GetConstant())));
1811       } else {
1812         __ movb(dst, src.AsRegister<ByteRegister>());
1813       }
1814       break;
1815     }
1816     case DataType::Type::kUint16:
1817     case DataType::Type::kInt16: {
1818       if (src.IsConstant()) {
1819         __ movw(dst, Immediate(CodeGenerator::GetInt16ValueOf(src.GetConstant())));
1820       } else {
1821         __ movw(dst, src.AsRegister<Register>());
1822       }
1823       break;
1824     }
1825     case DataType::Type::kUint32:
1826     case DataType::Type::kInt32: {
1827       if (src.IsConstant()) {
1828         int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1829         __ movl(dst, Immediate(v));
1830       } else {
1831         __ movl(dst, src.AsRegister<Register>());
1832       }
1833       break;
1834     }
1835     case DataType::Type::kUint64:
1836     case DataType::Type::kInt64: {
1837       Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1838       if (src.IsConstant()) {
1839         int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1840         __ movl(dst, Immediate(Low32Bits(v)));
1841         __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1842       } else {
1843         __ movl(dst, src.AsRegisterPairLow<Register>());
1844         __ movl(dst_next_4_bytes, src.AsRegisterPairHigh<Register>());
1845       }
1846       break;
1847     }
1848     case DataType::Type::kFloat32: {
1849       if (src.IsConstant()) {
1850         int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1851         __ movl(dst, Immediate(v));
1852       } else {
1853         __ movss(dst, src.AsFpuRegister<XmmRegister>());
1854       }
1855       break;
1856     }
1857     case DataType::Type::kFloat64: {
1858       Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1859       if (src.IsConstant()) {
1860         int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1861         __ movl(dst, Immediate(Low32Bits(v)));
1862         __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1863       } else {
1864         __ movsd(dst, src.AsFpuRegister<XmmRegister>());
1865       }
1866       break;
1867     }
1868     case DataType::Type::kVoid:
1869     case DataType::Type::kReference:
1870       LOG(FATAL) << "Unreachable type " << src_type;
1871   }
1872 }
1873 
MoveConstant(Location location,int32_t value)1874 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1875   DCHECK(location.IsRegister());
1876   __ movl(location.AsRegister<Register>(), Immediate(value));
1877 }
1878 
MoveLocation(Location dst,Location src,DataType::Type dst_type)1879 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1880   HParallelMove move(GetGraph()->GetAllocator());
1881   if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1882     move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1883     move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1884   } else {
1885     move.AddMove(src, dst, dst_type, nullptr);
1886   }
1887   GetMoveResolver()->EmitNativeCode(&move);
1888 }
1889 
AddLocationAsTemp(Location location,LocationSummary * locations)1890 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1891   if (location.IsRegister()) {
1892     locations->AddTemp(location);
1893   } else if (location.IsRegisterPair()) {
1894     locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1895     locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1896   } else {
1897     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1898   }
1899 }
1900 
HandleGoto(HInstruction * got,HBasicBlock * successor)1901 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1902   if (successor->IsExitBlock()) {
1903     DCHECK(got->GetPrevious()->AlwaysThrows());
1904     return;  // no code needed
1905   }
1906 
1907   HBasicBlock* block = got->GetBlock();
1908   HInstruction* previous = got->GetPrevious();
1909 
1910   HLoopInformation* info = block->GetLoopInformation();
1911   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1912     codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
1913     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1914     return;
1915   }
1916 
1917   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1918     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1919   }
1920   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1921     __ jmp(codegen_->GetLabelOf(successor));
1922   }
1923 }
1924 
VisitGoto(HGoto * got)1925 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1926   got->SetLocations(nullptr);
1927 }
1928 
VisitGoto(HGoto * got)1929 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1930   HandleGoto(got, got->GetSuccessor());
1931 }
1932 
VisitTryBoundary(HTryBoundary * try_boundary)1933 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1934   try_boundary->SetLocations(nullptr);
1935 }
1936 
VisitTryBoundary(HTryBoundary * try_boundary)1937 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1938   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1939   if (!successor->IsExitBlock()) {
1940     HandleGoto(try_boundary, successor);
1941   }
1942 }
1943 
VisitExit(HExit * exit)1944 void LocationsBuilderX86::VisitExit(HExit* exit) {
1945   exit->SetLocations(nullptr);
1946 }
1947 
VisitExit(HExit * exit)1948 void InstructionCodeGeneratorX86::VisitExit([[maybe_unused]] HExit* exit) {}
1949 
1950 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1951 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1952                                                   LabelType* true_label,
1953                                                   LabelType* false_label) {
1954   if (cond->IsFPConditionTrueIfNaN()) {
1955     __ j(kUnordered, true_label);
1956   } else if (cond->IsFPConditionFalseIfNaN()) {
1957     __ j(kUnordered, false_label);
1958   }
1959   __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1960 }
1961 
1962 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1963 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1964                                                                LabelType* true_label,
1965                                                                LabelType* false_label) {
1966   LocationSummary* locations = cond->GetLocations();
1967   Location left = locations->InAt(0);
1968   Location right = locations->InAt(1);
1969   IfCondition if_cond = cond->GetCondition();
1970 
1971   Register left_high = left.AsRegisterPairHigh<Register>();
1972   Register left_low = left.AsRegisterPairLow<Register>();
1973   IfCondition true_high_cond = if_cond;
1974   IfCondition false_high_cond = cond->GetOppositeCondition();
1975   Condition final_condition = X86UnsignedOrFPCondition(if_cond);  // unsigned on lower part
1976 
1977   // Set the conditions for the test, remembering that == needs to be
1978   // decided using the low words.
1979   switch (if_cond) {
1980     case kCondEQ:
1981     case kCondNE:
1982       // Nothing to do.
1983       break;
1984     case kCondLT:
1985       false_high_cond = kCondGT;
1986       break;
1987     case kCondLE:
1988       true_high_cond = kCondLT;
1989       break;
1990     case kCondGT:
1991       false_high_cond = kCondLT;
1992       break;
1993     case kCondGE:
1994       true_high_cond = kCondGT;
1995       break;
1996     case kCondB:
1997       false_high_cond = kCondA;
1998       break;
1999     case kCondBE:
2000       true_high_cond = kCondB;
2001       break;
2002     case kCondA:
2003       false_high_cond = kCondB;
2004       break;
2005     case kCondAE:
2006       true_high_cond = kCondA;
2007       break;
2008   }
2009 
2010   if (right.IsConstant()) {
2011     int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
2012     int32_t val_high = High32Bits(value);
2013     int32_t val_low = Low32Bits(value);
2014 
2015     codegen_->Compare32BitValue(left_high, val_high);
2016     if (if_cond == kCondNE) {
2017       __ j(X86Condition(true_high_cond), true_label);
2018     } else if (if_cond == kCondEQ) {
2019       __ j(X86Condition(false_high_cond), false_label);
2020     } else {
2021       __ j(X86Condition(true_high_cond), true_label);
2022       __ j(X86Condition(false_high_cond), false_label);
2023     }
2024     // Must be equal high, so compare the lows.
2025     codegen_->Compare32BitValue(left_low, val_low);
2026   } else if (right.IsRegisterPair()) {
2027     Register right_high = right.AsRegisterPairHigh<Register>();
2028     Register right_low = right.AsRegisterPairLow<Register>();
2029 
2030     __ cmpl(left_high, right_high);
2031     if (if_cond == kCondNE) {
2032       __ j(X86Condition(true_high_cond), true_label);
2033     } else if (if_cond == kCondEQ) {
2034       __ j(X86Condition(false_high_cond), false_label);
2035     } else {
2036       __ j(X86Condition(true_high_cond), true_label);
2037       __ j(X86Condition(false_high_cond), false_label);
2038     }
2039     // Must be equal high, so compare the lows.
2040     __ cmpl(left_low, right_low);
2041   } else {
2042     DCHECK(right.IsDoubleStackSlot());
2043     __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
2044     if (if_cond == kCondNE) {
2045       __ j(X86Condition(true_high_cond), true_label);
2046     } else if (if_cond == kCondEQ) {
2047       __ j(X86Condition(false_high_cond), false_label);
2048     } else {
2049       __ j(X86Condition(true_high_cond), true_label);
2050       __ j(X86Condition(false_high_cond), false_label);
2051     }
2052     // Must be equal high, so compare the lows.
2053     __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
2054   }
2055   // The last comparison might be unsigned.
2056   __ j(final_condition, true_label);
2057 }
2058 
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)2059 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
2060                                                     Location rhs,
2061                                                     HInstruction* insn,
2062                                                     bool is_double) {
2063   HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTableOrNull();
2064   if (is_double) {
2065     if (rhs.IsFpuRegister()) {
2066       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
2067     } else if (const_area != nullptr) {
2068       DCHECK(const_area->IsEmittedAtUseSite());
2069       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
2070                  codegen_->LiteralDoubleAddress(
2071                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
2072                      const_area->GetBaseMethodAddress(),
2073                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
2074     } else {
2075       DCHECK(rhs.IsDoubleStackSlot());
2076       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
2077     }
2078   } else {
2079     if (rhs.IsFpuRegister()) {
2080       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
2081     } else if (const_area != nullptr) {
2082       DCHECK(const_area->IsEmittedAtUseSite());
2083       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
2084                  codegen_->LiteralFloatAddress(
2085                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
2086                      const_area->GetBaseMethodAddress(),
2087                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
2088     } else {
2089       DCHECK(rhs.IsStackSlot());
2090       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
2091     }
2092   }
2093 }
2094 
2095 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2096 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
2097                                                                LabelType* true_target_in,
2098                                                                LabelType* false_target_in) {
2099   // Generated branching requires both targets to be explicit. If either of the
2100   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2101   LabelType fallthrough_target;
2102   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2103   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2104 
2105   LocationSummary* locations = condition->GetLocations();
2106   Location left = locations->InAt(0);
2107   Location right = locations->InAt(1);
2108 
2109   DataType::Type type = condition->InputAt(0)->GetType();
2110   switch (type) {
2111     case DataType::Type::kInt64:
2112       GenerateLongComparesAndJumps(condition, true_target, false_target);
2113       break;
2114     case DataType::Type::kFloat32:
2115       GenerateFPCompare(left, right, condition, false);
2116       GenerateFPJumps(condition, true_target, false_target);
2117       break;
2118     case DataType::Type::kFloat64:
2119       GenerateFPCompare(left, right, condition, true);
2120       GenerateFPJumps(condition, true_target, false_target);
2121       break;
2122     default:
2123       LOG(FATAL) << "Unexpected compare type " << type;
2124   }
2125 
2126   if (false_target != &fallthrough_target) {
2127     __ jmp(false_target);
2128   }
2129 
2130   if (fallthrough_target.IsLinked()) {
2131     __ Bind(&fallthrough_target);
2132   }
2133 }
2134 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch,const CompilerOptions & compiler_options)2135 static bool AreEflagsSetFrom(HInstruction* cond,
2136                              HInstruction* branch,
2137                              const CompilerOptions& compiler_options) {
2138   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2139   // are set only strictly before `branch`. We can't use the eflags on long/FP
2140   // conditions if they are materialized due to the complex branching.
2141   return cond->IsCondition() &&
2142          cond->GetNext() == branch &&
2143          cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
2144          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
2145          !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
2146            compiler_options.ProfileBranches());
2147 }
2148 
2149 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2150 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
2151                                                         size_t condition_input_index,
2152                                                         LabelType* true_target,
2153                                                         LabelType* false_target) {
2154   HInstruction* cond = instruction->InputAt(condition_input_index);
2155 
2156   if (true_target == nullptr && false_target == nullptr) {
2157     // Nothing to do. The code always falls through.
2158     return;
2159   } else if (cond->IsIntConstant()) {
2160     // Constant condition, statically compared against "true" (integer value 1).
2161     if (cond->AsIntConstant()->IsTrue()) {
2162       if (true_target != nullptr) {
2163         __ jmp(true_target);
2164       }
2165     } else {
2166       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2167       if (false_target != nullptr) {
2168         __ jmp(false_target);
2169       }
2170     }
2171     return;
2172   }
2173 
2174   // The following code generates these patterns:
2175   //  (1) true_target == nullptr && false_target != nullptr
2176   //        - opposite condition true => branch to false_target
2177   //  (2) true_target != nullptr && false_target == nullptr
2178   //        - condition true => branch to true_target
2179   //  (3) true_target != nullptr && false_target != nullptr
2180   //        - condition true => branch to true_target
2181   //        - branch to false_target
2182   if (IsBooleanValueOrMaterializedCondition(cond)) {
2183     if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
2184       if (true_target == nullptr) {
2185         __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
2186       } else {
2187         __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
2188       }
2189     } else {
2190       // Materialized condition, compare against 0.
2191       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2192       if (lhs.IsRegister()) {
2193         __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
2194       } else {
2195         __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
2196       }
2197       if (true_target == nullptr) {
2198         __ j(kEqual, false_target);
2199       } else {
2200         __ j(kNotEqual, true_target);
2201       }
2202     }
2203   } else {
2204     // Condition has not been materialized, use its inputs as the comparison and
2205     // its condition as the branch condition.
2206     HCondition* condition = cond->AsCondition();
2207 
2208     // If this is a long or FP comparison that has been folded into
2209     // the HCondition, generate the comparison directly.
2210     DataType::Type type = condition->InputAt(0)->GetType();
2211     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2212       GenerateCompareTestAndBranch(condition, true_target, false_target);
2213       return;
2214     }
2215 
2216     Location lhs = condition->GetLocations()->InAt(0);
2217     Location rhs = condition->GetLocations()->InAt(1);
2218     // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
2219     codegen_->GenerateIntCompare(lhs, rhs);
2220     if (true_target == nullptr) {
2221       __ j(X86Condition(condition->GetOppositeCondition()), false_target);
2222     } else {
2223       __ j(X86Condition(condition->GetCondition()), true_target);
2224     }
2225   }
2226 
2227   // If neither branch falls through (case 3), the conditional branch to `true_target`
2228   // was already emitted (case 2) and we need to emit a jump to `false_target`.
2229   if (true_target != nullptr && false_target != nullptr) {
2230     __ jmp(false_target);
2231   }
2232 }
2233 
VisitIf(HIf * if_instr)2234 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
2235   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2236   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2237     if (GetGraph()->IsCompilingBaseline() &&
2238         codegen_->GetCompilerOptions().ProfileBranches() &&
2239         !Runtime::Current()->IsAotCompiler()) {
2240       locations->SetInAt(0, Location::RequiresRegister());
2241       locations->AddRegisterTemps(2);
2242     } else {
2243       locations->SetInAt(0, Location::Any());
2244     }
2245   }
2246 }
2247 
VisitIf(HIf * if_instr)2248 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
2249   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2250   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2251   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2252       nullptr : codegen_->GetLabelOf(true_successor);
2253   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2254       nullptr : codegen_->GetLabelOf(false_successor);
2255   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2256     if (GetGraph()->IsCompilingBaseline() &&
2257         codegen_->GetCompilerOptions().ProfileBranches() &&
2258         !Runtime::Current()->IsAotCompiler()) {
2259       DCHECK(if_instr->InputAt(0)->IsCondition());
2260       Register temp = if_instr->GetLocations()->GetTemp(0).AsRegister<Register>();
2261       Register counter = if_instr->GetLocations()->GetTemp(1).AsRegister<Register>();
2262       ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2263       DCHECK(info != nullptr);
2264       BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
2265       // Currently, not all If branches are profiled.
2266       if (cache != nullptr) {
2267         uint64_t address =
2268             reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
2269         static_assert(
2270             BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
2271             "Unexpected offsets for BranchCache");
2272         NearLabel done;
2273         Location lhs = if_instr->GetLocations()->InAt(0);
2274         __ movl(temp, Immediate(address));
2275         __ movzxw(counter, Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0));
2276         __ addw(counter, Immediate(1));
2277         __ j(kEqual, &done);
2278         __ movw(Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0), counter);
2279         __ Bind(&done);
2280       }
2281     }
2282   }
2283   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2284 }
2285 
VisitDeoptimize(HDeoptimize * deoptimize)2286 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2287   LocationSummary* locations = new (GetGraph()->GetAllocator())
2288       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2289   InvokeRuntimeCallingConvention calling_convention;
2290   RegisterSet caller_saves = RegisterSet::Empty();
2291   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2292   locations->SetCustomSlowPathCallerSaves(caller_saves);
2293   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2294     locations->SetInAt(0, Location::Any());
2295   }
2296 }
2297 
VisitDeoptimize(HDeoptimize * deoptimize)2298 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2299   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
2300   GenerateTestAndBranch<Label>(deoptimize,
2301                                /* condition_input_index= */ 0,
2302                                slow_path->GetEntryLabel(),
2303                                /* false_target= */ nullptr);
2304 }
2305 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2306 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2307   LocationSummary* locations = new (GetGraph()->GetAllocator())
2308       LocationSummary(flag, LocationSummary::kNoCall);
2309   locations->SetOut(Location::RequiresRegister());
2310 }
2311 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2312 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2313   __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
2314           Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2315 }
2316 
SelectCanUseCMOV(HSelect * select)2317 static bool SelectCanUseCMOV(HSelect* select) {
2318   // There are no conditional move instructions for XMMs.
2319   if (DataType::IsFloatingPointType(select->GetType())) {
2320     return false;
2321   }
2322 
2323   // A FP condition doesn't generate the single CC that we need.
2324   // In 32 bit mode, a long condition doesn't generate a single CC either.
2325   HInstruction* condition = select->GetCondition();
2326   if (condition->IsCondition()) {
2327     DataType::Type compare_type = condition->InputAt(0)->GetType();
2328     if (compare_type == DataType::Type::kInt64 ||
2329         DataType::IsFloatingPointType(compare_type)) {
2330       return false;
2331     }
2332   }
2333 
2334   // We can generate a CMOV for this Select.
2335   return true;
2336 }
2337 
VisitSelect(HSelect * select)2338 void LocationsBuilderX86::VisitSelect(HSelect* select) {
2339   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2340   if (DataType::IsFloatingPointType(select->GetType())) {
2341     locations->SetInAt(0, Location::RequiresFpuRegister());
2342     locations->SetInAt(1, Location::Any());
2343   } else {
2344     locations->SetInAt(0, Location::RequiresRegister());
2345     if (SelectCanUseCMOV(select)) {
2346       if (select->InputAt(1)->IsConstant()) {
2347         // Cmov can't handle a constant value.
2348         locations->SetInAt(1, Location::RequiresRegister());
2349       } else {
2350         locations->SetInAt(1, Location::Any());
2351       }
2352     } else {
2353       locations->SetInAt(1, Location::Any());
2354     }
2355   }
2356   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2357     locations->SetInAt(2, Location::RequiresRegister());
2358   }
2359   locations->SetOut(Location::SameAsFirstInput());
2360 }
2361 
VisitSelect(HSelect * select)2362 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
2363   LocationSummary* locations = select->GetLocations();
2364   DCHECK(locations->InAt(0).Equals(locations->Out()));
2365   if (SelectCanUseCMOV(select)) {
2366     // If both the condition and the source types are integer, we can generate
2367     // a CMOV to implement Select.
2368 
2369     HInstruction* select_condition = select->GetCondition();
2370     Condition cond = kNotEqual;
2371 
2372     // Figure out how to test the 'condition'.
2373     if (select_condition->IsCondition()) {
2374       HCondition* condition = select_condition->AsCondition();
2375       if (!condition->IsEmittedAtUseSite()) {
2376         // This was a previously materialized condition.
2377         // Can we use the existing condition code?
2378         if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
2379           // Materialization was the previous instruction. Condition codes are right.
2380           cond = X86Condition(condition->GetCondition());
2381         } else {
2382           // No, we have to recreate the condition code.
2383           Register cond_reg = locations->InAt(2).AsRegister<Register>();
2384           __ testl(cond_reg, cond_reg);
2385         }
2386       } else {
2387         // We can't handle FP or long here.
2388         DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
2389         DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
2390         LocationSummary* cond_locations = condition->GetLocations();
2391         codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
2392         cond = X86Condition(condition->GetCondition());
2393       }
2394     } else {
2395       // Must be a Boolean condition, which needs to be compared to 0.
2396       Register cond_reg = locations->InAt(2).AsRegister<Register>();
2397       __ testl(cond_reg, cond_reg);
2398     }
2399 
2400     // If the condition is true, overwrite the output, which already contains false.
2401     Location false_loc = locations->InAt(0);
2402     Location true_loc = locations->InAt(1);
2403     if (select->GetType() == DataType::Type::kInt64) {
2404       // 64 bit conditional move.
2405       Register false_high = false_loc.AsRegisterPairHigh<Register>();
2406       Register false_low = false_loc.AsRegisterPairLow<Register>();
2407       if (true_loc.IsRegisterPair()) {
2408         __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
2409         __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
2410       } else {
2411         __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
2412         __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
2413       }
2414     } else {
2415       // 32 bit conditional move.
2416       Register false_reg = false_loc.AsRegister<Register>();
2417       if (true_loc.IsRegister()) {
2418         __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
2419       } else {
2420         __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
2421       }
2422     }
2423   } else {
2424     NearLabel false_target;
2425     GenerateTestAndBranch<NearLabel>(
2426         select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
2427     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2428     __ Bind(&false_target);
2429   }
2430 }
2431 
VisitNop(HNop * nop)2432 void LocationsBuilderX86::VisitNop(HNop* nop) {
2433   new (GetGraph()->GetAllocator()) LocationSummary(nop);
2434 }
2435 
VisitNop(HNop *)2436 void InstructionCodeGeneratorX86::VisitNop(HNop*) {
2437   // The environment recording already happened in CodeGenerator::Compile.
2438 }
2439 
IncreaseFrame(size_t adjustment)2440 void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
2441   __ subl(ESP, Immediate(adjustment));
2442   __ cfi().AdjustCFAOffset(adjustment);
2443 }
2444 
DecreaseFrame(size_t adjustment)2445 void CodeGeneratorX86::DecreaseFrame(size_t adjustment) {
2446   __ addl(ESP, Immediate(adjustment));
2447   __ cfi().AdjustCFAOffset(-adjustment);
2448 }
2449 
GenerateNop()2450 void CodeGeneratorX86::GenerateNop() {
2451   __ nop();
2452 }
2453 
HandleCondition(HCondition * cond)2454 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
2455   LocationSummary* locations =
2456       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2457   // Handle the long/FP comparisons made in instruction simplification.
2458   switch (cond->InputAt(0)->GetType()) {
2459     case DataType::Type::kInt64: {
2460       locations->SetInAt(0, Location::RequiresRegister());
2461       locations->SetInAt(1, Location::Any());
2462       if (!cond->IsEmittedAtUseSite()) {
2463         locations->SetOut(Location::RequiresRegister());
2464       }
2465       break;
2466     }
2467     case DataType::Type::kFloat32:
2468     case DataType::Type::kFloat64: {
2469       locations->SetInAt(0, Location::RequiresFpuRegister());
2470       if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
2471         DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
2472       } else if (cond->InputAt(1)->IsConstant()) {
2473         locations->SetInAt(1, Location::RequiresFpuRegister());
2474       } else {
2475         locations->SetInAt(1, Location::Any());
2476       }
2477       if (!cond->IsEmittedAtUseSite()) {
2478         locations->SetOut(Location::RequiresRegister());
2479       }
2480       break;
2481     }
2482     default:
2483       locations->SetInAt(0, Location::RequiresRegister());
2484       locations->SetInAt(1, Location::Any());
2485       if (!cond->IsEmittedAtUseSite()) {
2486         // We need a byte register.
2487         locations->SetOut(Location::RegisterLocation(ECX));
2488       }
2489       break;
2490   }
2491 }
2492 
HandleCondition(HCondition * cond)2493 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
2494   if (cond->IsEmittedAtUseSite()) {
2495     return;
2496   }
2497 
2498   LocationSummary* locations = cond->GetLocations();
2499   Location lhs = locations->InAt(0);
2500   Location rhs = locations->InAt(1);
2501   Register reg = locations->Out().AsRegister<Register>();
2502   NearLabel true_label, false_label;
2503 
2504   switch (cond->InputAt(0)->GetType()) {
2505     default: {
2506       // Integer case.
2507 
2508       // Clear output register: setb only sets the low byte.
2509       __ xorl(reg, reg);
2510       codegen_->GenerateIntCompare(lhs, rhs);
2511       __ setb(X86Condition(cond->GetCondition()), reg);
2512       return;
2513     }
2514     case DataType::Type::kInt64:
2515       GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2516       break;
2517     case DataType::Type::kFloat32:
2518       GenerateFPCompare(lhs, rhs, cond, false);
2519       GenerateFPJumps(cond, &true_label, &false_label);
2520       break;
2521     case DataType::Type::kFloat64:
2522       GenerateFPCompare(lhs, rhs, cond, true);
2523       GenerateFPJumps(cond, &true_label, &false_label);
2524       break;
2525   }
2526 
2527   // Convert the jumps into the result.
2528   NearLabel done_label;
2529 
2530   // False case: result = 0.
2531   __ Bind(&false_label);
2532   __ xorl(reg, reg);
2533   __ jmp(&done_label);
2534 
2535   // True case: result = 1.
2536   __ Bind(&true_label);
2537   __ movl(reg, Immediate(1));
2538   __ Bind(&done_label);
2539 }
2540 
VisitEqual(HEqual * comp)2541 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2542   HandleCondition(comp);
2543 }
2544 
VisitEqual(HEqual * comp)2545 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2546   HandleCondition(comp);
2547 }
2548 
VisitNotEqual(HNotEqual * comp)2549 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2550   HandleCondition(comp);
2551 }
2552 
VisitNotEqual(HNotEqual * comp)2553 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2554   HandleCondition(comp);
2555 }
2556 
VisitLessThan(HLessThan * comp)2557 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2558   HandleCondition(comp);
2559 }
2560 
VisitLessThan(HLessThan * comp)2561 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2562   HandleCondition(comp);
2563 }
2564 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2565 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2566   HandleCondition(comp);
2567 }
2568 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2569 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2570   HandleCondition(comp);
2571 }
2572 
VisitGreaterThan(HGreaterThan * comp)2573 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2574   HandleCondition(comp);
2575 }
2576 
VisitGreaterThan(HGreaterThan * comp)2577 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2578   HandleCondition(comp);
2579 }
2580 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2581 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2582   HandleCondition(comp);
2583 }
2584 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2585 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2586   HandleCondition(comp);
2587 }
2588 
VisitBelow(HBelow * comp)2589 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2590   HandleCondition(comp);
2591 }
2592 
VisitBelow(HBelow * comp)2593 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2594   HandleCondition(comp);
2595 }
2596 
VisitBelowOrEqual(HBelowOrEqual * comp)2597 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2598   HandleCondition(comp);
2599 }
2600 
VisitBelowOrEqual(HBelowOrEqual * comp)2601 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2602   HandleCondition(comp);
2603 }
2604 
VisitAbove(HAbove * comp)2605 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2606   HandleCondition(comp);
2607 }
2608 
VisitAbove(HAbove * comp)2609 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2610   HandleCondition(comp);
2611 }
2612 
VisitAboveOrEqual(HAboveOrEqual * comp)2613 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2614   HandleCondition(comp);
2615 }
2616 
VisitAboveOrEqual(HAboveOrEqual * comp)2617 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2618   HandleCondition(comp);
2619 }
2620 
VisitIntConstant(HIntConstant * constant)2621 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2622   LocationSummary* locations =
2623       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2624   locations->SetOut(Location::ConstantLocation(constant));
2625 }
2626 
VisitIntConstant(HIntConstant * constant)2627 void InstructionCodeGeneratorX86::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
2628   // Will be generated at use site.
2629 }
2630 
VisitNullConstant(HNullConstant * constant)2631 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2632   LocationSummary* locations =
2633       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2634   locations->SetOut(Location::ConstantLocation(constant));
2635 }
2636 
VisitNullConstant(HNullConstant * constant)2637 void InstructionCodeGeneratorX86::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
2638   // Will be generated at use site.
2639 }
2640 
VisitLongConstant(HLongConstant * constant)2641 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2642   LocationSummary* locations =
2643       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2644   locations->SetOut(Location::ConstantLocation(constant));
2645 }
2646 
VisitLongConstant(HLongConstant * constant)2647 void InstructionCodeGeneratorX86::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
2648   // Will be generated at use site.
2649 }
2650 
VisitFloatConstant(HFloatConstant * constant)2651 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2652   LocationSummary* locations =
2653       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2654   locations->SetOut(Location::ConstantLocation(constant));
2655 }
2656 
VisitFloatConstant(HFloatConstant * constant)2657 void InstructionCodeGeneratorX86::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
2658   // Will be generated at use site.
2659 }
2660 
VisitDoubleConstant(HDoubleConstant * constant)2661 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2662   LocationSummary* locations =
2663       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2664   locations->SetOut(Location::ConstantLocation(constant));
2665 }
2666 
VisitDoubleConstant(HDoubleConstant * constant)2667 void InstructionCodeGeneratorX86::VisitDoubleConstant([[maybe_unused]] HDoubleConstant* constant) {
2668   // Will be generated at use site.
2669 }
2670 
VisitConstructorFence(HConstructorFence * constructor_fence)2671 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2672   constructor_fence->SetLocations(nullptr);
2673 }
2674 
VisitConstructorFence(HConstructorFence * constructor_fence)2675 void InstructionCodeGeneratorX86::VisitConstructorFence(
2676     [[maybe_unused]] HConstructorFence* constructor_fence) {
2677   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2678 }
2679 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2680 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2681   memory_barrier->SetLocations(nullptr);
2682 }
2683 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2684 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2685   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2686 }
2687 
VisitReturnVoid(HReturnVoid * ret)2688 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2689   ret->SetLocations(nullptr);
2690 }
2691 
VisitReturnVoid(HReturnVoid * ret)2692 void InstructionCodeGeneratorX86::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
2693   codegen_->GenerateFrameExit();
2694 }
2695 
VisitReturn(HReturn * ret)2696 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2697   LocationSummary* locations =
2698       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2699   SetInForReturnValue(ret, locations);
2700 }
2701 
VisitReturn(HReturn * ret)2702 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2703   switch (ret->InputAt(0)->GetType()) {
2704     case DataType::Type::kReference:
2705     case DataType::Type::kBool:
2706     case DataType::Type::kUint8:
2707     case DataType::Type::kInt8:
2708     case DataType::Type::kUint16:
2709     case DataType::Type::kInt16:
2710     case DataType::Type::kInt32:
2711       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2712       break;
2713 
2714     case DataType::Type::kInt64:
2715       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2716       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2717       break;
2718 
2719     case DataType::Type::kFloat32:
2720       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2721       if (GetGraph()->IsCompilingOsr()) {
2722         // To simplify callers of an OSR method, we put the return value in both
2723         // floating point and core registers.
2724         __ movd(EAX, XMM0);
2725       }
2726       break;
2727 
2728     case DataType::Type::kFloat64:
2729       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2730       if (GetGraph()->IsCompilingOsr()) {
2731         // To simplify callers of an OSR method, we put the return value in both
2732         // floating point and core registers.
2733         __ movd(EAX, XMM0);
2734         // Use XMM1 as temporary register to not clobber XMM0.
2735         __ movaps(XMM1, XMM0);
2736         __ psrlq(XMM1, Immediate(32));
2737         __ movd(EDX, XMM1);
2738       }
2739       break;
2740 
2741     default:
2742       LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2743   }
2744   codegen_->GenerateFrameExit();
2745 }
2746 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2747 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2748   // The trampoline uses the same calling convention as dex calling conventions,
2749   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2750   // the method_idx.
2751   HandleInvoke(invoke);
2752 }
2753 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2754 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2755   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2756 }
2757 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2758 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2759   // Explicit clinit checks triggered by static invokes must have been pruned by
2760   // art::PrepareForRegisterAllocation.
2761   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2762 
2763   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2764   if (intrinsic.TryDispatch(invoke)) {
2765     if (invoke->GetLocations()->CanCall() &&
2766         invoke->HasPcRelativeMethodLoadKind() &&
2767         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2768       invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2769     }
2770     return;
2771   }
2772 
2773   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2774     CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
2775         /*for_register_allocation=*/ true);
2776     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2777   } else {
2778     HandleInvoke(invoke);
2779   }
2780 
2781   // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2782   if (invoke->HasPcRelativeMethodLoadKind()) {
2783     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2784   }
2785 }
2786 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2787 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2788   if (invoke->GetLocations()->Intrinsified()) {
2789     IntrinsicCodeGeneratorX86 intrinsic(codegen);
2790     intrinsic.Dispatch(invoke);
2791     return true;
2792   }
2793   return false;
2794 }
2795 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2796 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2797   // Explicit clinit checks triggered by static invokes must have been pruned by
2798   // art::PrepareForRegisterAllocation.
2799   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2800 
2801   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2802     return;
2803   }
2804 
2805   LocationSummary* locations = invoke->GetLocations();
2806   codegen_->GenerateStaticOrDirectCall(
2807       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2808 }
2809 
VisitInvokeVirtual(HInvokeVirtual * invoke)2810 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2811   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2812   if (intrinsic.TryDispatch(invoke)) {
2813     return;
2814   }
2815 
2816   HandleInvoke(invoke);
2817 
2818   if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) {
2819     // Add one temporary for inline cache update.
2820     invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2821   }
2822 }
2823 
HandleInvoke(HInvoke * invoke)2824 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2825   InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2826   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2827 }
2828 
VisitInvokeVirtual(HInvokeVirtual * invoke)2829 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2830   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2831     return;
2832   }
2833 
2834   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2835   DCHECK(!codegen_->IsLeafMethod());
2836 }
2837 
VisitInvokeInterface(HInvokeInterface * invoke)2838 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2839   // This call to HandleInvoke allocates a temporary (core) register
2840   // which is also used to transfer the hidden argument from FP to
2841   // core register.
2842   HandleInvoke(invoke);
2843   // Add the hidden argument.
2844   invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2845 
2846   if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) {
2847     // Add one temporary for inline cache update.
2848     invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2849   }
2850 
2851   // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2852   if (IsPcRelativeMethodLoadKind(invoke->GetHiddenArgumentLoadKind())) {
2853     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2854   }
2855 
2856   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2857     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2858                                     Location::RequiresRegister());
2859   }
2860 }
2861 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2862 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2863   DCHECK_EQ(EAX, klass);
2864   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
2865     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2866     DCHECK(info != nullptr);
2867     InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
2868         info, GetCompilerOptions(), instruction->AsInvoke());
2869     if (cache != nullptr) {
2870       uint32_t address = reinterpret_cast32<uint32_t>(cache);
2871       if (kIsDebugBuild) {
2872         uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2873         CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2874       }
2875       Register temp = EBP;
2876       NearLabel done;
2877       __ movl(temp, Immediate(address));
2878       // Fast path for a monomorphic cache.
2879       __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2880       __ j(kEqual, &done);
2881       GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2882       __ Bind(&done);
2883     } else {
2884       // This is unexpected, but we don't guarantee stable compilation across
2885       // JIT runs so just warn about it.
2886       ScopedObjectAccess soa(Thread::Current());
2887       LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
2888     }
2889   }
2890 }
2891 
VisitInvokeInterface(HInvokeInterface * invoke)2892 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2893   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2894   LocationSummary* locations = invoke->GetLocations();
2895   Register temp = locations->GetTemp(0).AsRegister<Register>();
2896   XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2897   Location receiver = locations->InAt(0);
2898   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2899 
2900   // Set the hidden argument. This is safe to do this here, as XMM7
2901   // won't be modified thereafter, before the `call` instruction.
2902   DCHECK_EQ(XMM7, hidden_reg);
2903   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2904     __ movd(hidden_reg, locations->InAt(invoke->GetNumberOfArguments() - 1).AsRegister<Register>());
2905   } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2906     codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), locations->GetTemp(0), invoke);
2907     __ movd(hidden_reg, temp);
2908   }
2909 
2910   if (receiver.IsStackSlot()) {
2911     __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2912     // /* HeapReference<Class> */ temp = temp->klass_
2913     __ movl(temp, Address(temp, class_offset));
2914   } else {
2915     // /* HeapReference<Class> */ temp = receiver->klass_
2916     __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2917   }
2918   codegen_->MaybeRecordImplicitNullCheck(invoke);
2919   // Instead of simply (possibly) unpoisoning `temp` here, we should
2920   // emit a read barrier for the previous class reference load.
2921   // However this is not required in practice, as this is an
2922   // intermediate/temporary reference and because the current
2923   // concurrent copying collector keeps the from-space memory
2924   // intact/accessible until the end of the marking phase (the
2925   // concurrent copying collector may not in the future).
2926   __ MaybeUnpoisonHeapReference(temp);
2927 
2928   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2929 
2930   // temp = temp->GetAddressOfIMT()
2931   __ movl(temp,
2932       Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2933   // temp = temp->GetImtEntryAt(method_offset);
2934   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2935       invoke->GetImtIndex(), kX86PointerSize));
2936   __ movl(temp, Address(temp, method_offset));
2937   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2938     // We pass the method from the IMT in case of a conflict. This will ensure
2939     // we go into the runtime to resolve the actual method.
2940     __ movd(hidden_reg, temp);
2941   }
2942   // call temp->GetEntryPoint();
2943   __ call(Address(temp,
2944                   ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2945 
2946   DCHECK(!codegen_->IsLeafMethod());
2947   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2948 }
2949 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2950 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2951   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2952   if (intrinsic.TryDispatch(invoke)) {
2953     return;
2954   }
2955   HandleInvoke(invoke);
2956 }
2957 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2958 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2959   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2960     return;
2961   }
2962   codegen_->GenerateInvokePolymorphicCall(invoke);
2963 }
2964 
VisitInvokeCustom(HInvokeCustom * invoke)2965 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2966   HandleInvoke(invoke);
2967 }
2968 
VisitInvokeCustom(HInvokeCustom * invoke)2969 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2970   codegen_->GenerateInvokeCustomCall(invoke);
2971 }
2972 
VisitNeg(HNeg * neg)2973 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2974   LocationSummary* locations =
2975       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2976   switch (neg->GetResultType()) {
2977     case DataType::Type::kInt32:
2978     case DataType::Type::kInt64:
2979       locations->SetInAt(0, Location::RequiresRegister());
2980       locations->SetOut(Location::SameAsFirstInput());
2981       break;
2982 
2983     case DataType::Type::kFloat32:
2984       locations->SetInAt(0, Location::RequiresFpuRegister());
2985       locations->SetOut(Location::SameAsFirstInput());
2986       locations->AddTemp(Location::RequiresRegister());
2987       locations->AddTemp(Location::RequiresFpuRegister());
2988       break;
2989 
2990     case DataType::Type::kFloat64:
2991       locations->SetInAt(0, Location::RequiresFpuRegister());
2992       locations->SetOut(Location::SameAsFirstInput());
2993       locations->AddTemp(Location::RequiresFpuRegister());
2994       break;
2995 
2996     default:
2997       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2998   }
2999 }
3000 
VisitNeg(HNeg * neg)3001 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
3002   LocationSummary* locations = neg->GetLocations();
3003   Location out = locations->Out();
3004   Location in = locations->InAt(0);
3005   switch (neg->GetResultType()) {
3006     case DataType::Type::kInt32:
3007       DCHECK(in.IsRegister());
3008       DCHECK(in.Equals(out));
3009       __ negl(out.AsRegister<Register>());
3010       break;
3011 
3012     case DataType::Type::kInt64:
3013       DCHECK(in.IsRegisterPair());
3014       DCHECK(in.Equals(out));
3015       __ negl(out.AsRegisterPairLow<Register>());
3016       // Negation is similar to subtraction from zero.  The least
3017       // significant byte triggers a borrow when it is different from
3018       // zero; to take it into account, add 1 to the most significant
3019       // byte if the carry flag (CF) is set to 1 after the first NEGL
3020       // operation.
3021       __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
3022       __ negl(out.AsRegisterPairHigh<Register>());
3023       break;
3024 
3025     case DataType::Type::kFloat32: {
3026       DCHECK(in.Equals(out));
3027       Register constant = locations->GetTemp(0).AsRegister<Register>();
3028       XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
3029       // Implement float negation with an exclusive or with value
3030       // 0x80000000 (mask for bit 31, representing the sign of a
3031       // single-precision floating-point number).
3032       __ movl(constant, Immediate(INT32_C(0x80000000)));
3033       __ movd(mask, constant);
3034       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3035       break;
3036     }
3037 
3038     case DataType::Type::kFloat64: {
3039       DCHECK(in.Equals(out));
3040       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3041       // Implement double negation with an exclusive or with value
3042       // 0x8000000000000000 (mask for bit 63, representing the sign of
3043       // a double-precision floating-point number).
3044       __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
3045       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3046       break;
3047     }
3048 
3049     default:
3050       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3051   }
3052 }
3053 
VisitX86FPNeg(HX86FPNeg * neg)3054 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
3055   LocationSummary* locations =
3056       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3057   DCHECK(DataType::IsFloatingPointType(neg->GetType()));
3058   locations->SetInAt(0, Location::RequiresFpuRegister());
3059   locations->SetInAt(1, Location::RequiresRegister());
3060   locations->SetOut(Location::SameAsFirstInput());
3061   locations->AddTemp(Location::RequiresFpuRegister());
3062 }
3063 
VisitX86FPNeg(HX86FPNeg * neg)3064 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
3065   LocationSummary* locations = neg->GetLocations();
3066   Location out = locations->Out();
3067   DCHECK(locations->InAt(0).Equals(out));
3068 
3069   Register constant_area = locations->InAt(1).AsRegister<Register>();
3070   XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3071   if (neg->GetType() == DataType::Type::kFloat32) {
3072     __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
3073                                                  neg->GetBaseMethodAddress(),
3074                                                  constant_area));
3075     __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3076   } else {
3077     __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
3078                                                  neg->GetBaseMethodAddress(),
3079                                                  constant_area));
3080     __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3081   }
3082 }
3083 
VisitTypeConversion(HTypeConversion * conversion)3084 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
3085   DataType::Type result_type = conversion->GetResultType();
3086   DataType::Type input_type = conversion->GetInputType();
3087   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3088       << input_type << " -> " << result_type;
3089 
3090   // The float-to-long and double-to-long type conversions rely on a
3091   // call to the runtime.
3092   LocationSummary::CallKind call_kind =
3093       ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3094        && result_type == DataType::Type::kInt64)
3095       ? LocationSummary::kCallOnMainOnly
3096       : LocationSummary::kNoCall;
3097   LocationSummary* locations =
3098       new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3099 
3100   switch (result_type) {
3101     case DataType::Type::kUint8:
3102     case DataType::Type::kInt8:
3103       switch (input_type) {
3104         case DataType::Type::kUint8:
3105         case DataType::Type::kInt8:
3106         case DataType::Type::kUint16:
3107         case DataType::Type::kInt16:
3108         case DataType::Type::kInt32:
3109           locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
3110           // Make the output overlap to please the register allocator. This greatly simplifies
3111           // the validation of the linear scan implementation
3112           locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3113           break;
3114         case DataType::Type::kInt64: {
3115           HInstruction* input = conversion->InputAt(0);
3116           Location input_location = input->IsConstant()
3117               ? Location::ConstantLocation(input)
3118               : Location::RegisterPairLocation(EAX, EDX);
3119           locations->SetInAt(0, input_location);
3120           // Make the output overlap to please the register allocator. This greatly simplifies
3121           // the validation of the linear scan implementation
3122           locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3123           break;
3124         }
3125 
3126         default:
3127           LOG(FATAL) << "Unexpected type conversion from " << input_type
3128                      << " to " << result_type;
3129       }
3130       break;
3131 
3132     case DataType::Type::kUint16:
3133     case DataType::Type::kInt16:
3134       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3135       locations->SetInAt(0, Location::Any());
3136       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3137       break;
3138 
3139     case DataType::Type::kInt32:
3140       switch (input_type) {
3141         case DataType::Type::kInt64:
3142           locations->SetInAt(0, Location::Any());
3143           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3144           break;
3145 
3146         case DataType::Type::kFloat32:
3147           locations->SetInAt(0, Location::RequiresFpuRegister());
3148           locations->SetOut(Location::RequiresRegister());
3149           locations->AddTemp(Location::RequiresFpuRegister());
3150           break;
3151 
3152         case DataType::Type::kFloat64:
3153           locations->SetInAt(0, Location::RequiresFpuRegister());
3154           locations->SetOut(Location::RequiresRegister());
3155           locations->AddTemp(Location::RequiresFpuRegister());
3156           break;
3157 
3158         default:
3159           LOG(FATAL) << "Unexpected type conversion from " << input_type
3160                      << " to " << result_type;
3161       }
3162       break;
3163 
3164     case DataType::Type::kInt64:
3165       switch (input_type) {
3166         case DataType::Type::kBool:
3167         case DataType::Type::kUint8:
3168         case DataType::Type::kInt8:
3169         case DataType::Type::kUint16:
3170         case DataType::Type::kInt16:
3171         case DataType::Type::kInt32:
3172           locations->SetInAt(0, Location::RegisterLocation(EAX));
3173           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3174           break;
3175 
3176         case DataType::Type::kFloat32:
3177         case DataType::Type::kFloat64: {
3178           InvokeRuntimeCallingConvention calling_convention;
3179           XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
3180           locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
3181 
3182           // The runtime helper puts the result in EAX, EDX.
3183           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3184         }
3185         break;
3186 
3187         default:
3188           LOG(FATAL) << "Unexpected type conversion from " << input_type
3189                      << " to " << result_type;
3190       }
3191       break;
3192 
3193     case DataType::Type::kFloat32:
3194       switch (input_type) {
3195         case DataType::Type::kBool:
3196         case DataType::Type::kUint8:
3197         case DataType::Type::kInt8:
3198         case DataType::Type::kUint16:
3199         case DataType::Type::kInt16:
3200         case DataType::Type::kInt32:
3201           locations->SetInAt(0, Location::RequiresRegister());
3202           locations->SetOut(Location::RequiresFpuRegister());
3203           break;
3204 
3205         case DataType::Type::kInt64:
3206           locations->SetInAt(0, Location::Any());
3207           locations->SetOut(Location::Any());
3208           break;
3209 
3210         case DataType::Type::kFloat64:
3211           locations->SetInAt(0, Location::RequiresFpuRegister());
3212           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3213           break;
3214 
3215         default:
3216           LOG(FATAL) << "Unexpected type conversion from " << input_type
3217                      << " to " << result_type;
3218       }
3219       break;
3220 
3221     case DataType::Type::kFloat64:
3222       switch (input_type) {
3223         case DataType::Type::kBool:
3224         case DataType::Type::kUint8:
3225         case DataType::Type::kInt8:
3226         case DataType::Type::kUint16:
3227         case DataType::Type::kInt16:
3228         case DataType::Type::kInt32:
3229           locations->SetInAt(0, Location::RequiresRegister());
3230           locations->SetOut(Location::RequiresFpuRegister());
3231           break;
3232 
3233         case DataType::Type::kInt64:
3234           locations->SetInAt(0, Location::Any());
3235           locations->SetOut(Location::Any());
3236           break;
3237 
3238         case DataType::Type::kFloat32:
3239           locations->SetInAt(0, Location::RequiresFpuRegister());
3240           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3241           break;
3242 
3243         default:
3244           LOG(FATAL) << "Unexpected type conversion from " << input_type
3245                      << " to " << result_type;
3246       }
3247       break;
3248 
3249     default:
3250       LOG(FATAL) << "Unexpected type conversion from " << input_type
3251                  << " to " << result_type;
3252   }
3253 }
3254 
VisitTypeConversion(HTypeConversion * conversion)3255 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
3256   LocationSummary* locations = conversion->GetLocations();
3257   Location out = locations->Out();
3258   Location in = locations->InAt(0);
3259   DataType::Type result_type = conversion->GetResultType();
3260   DataType::Type input_type = conversion->GetInputType();
3261   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3262       << input_type << " -> " << result_type;
3263   switch (result_type) {
3264     case DataType::Type::kUint8:
3265       switch (input_type) {
3266         case DataType::Type::kInt8:
3267         case DataType::Type::kUint16:
3268         case DataType::Type::kInt16:
3269         case DataType::Type::kInt32:
3270           if (in.IsRegister()) {
3271             __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3272           } else {
3273             DCHECK(in.GetConstant()->IsIntConstant());
3274             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3275             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3276           }
3277           break;
3278         case DataType::Type::kInt64:
3279           if (in.IsRegisterPair()) {
3280             __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3281           } else {
3282             DCHECK(in.GetConstant()->IsLongConstant());
3283             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3284             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3285           }
3286           break;
3287 
3288         default:
3289           LOG(FATAL) << "Unexpected type conversion from " << input_type
3290                      << " to " << result_type;
3291       }
3292       break;
3293 
3294     case DataType::Type::kInt8:
3295       switch (input_type) {
3296         case DataType::Type::kUint8:
3297         case DataType::Type::kUint16:
3298         case DataType::Type::kInt16:
3299         case DataType::Type::kInt32:
3300           if (in.IsRegister()) {
3301             __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3302           } else {
3303             DCHECK(in.GetConstant()->IsIntConstant());
3304             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3305             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3306           }
3307           break;
3308         case DataType::Type::kInt64:
3309           if (in.IsRegisterPair()) {
3310             __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3311           } else {
3312             DCHECK(in.GetConstant()->IsLongConstant());
3313             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3314             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3315           }
3316           break;
3317 
3318         default:
3319           LOG(FATAL) << "Unexpected type conversion from " << input_type
3320                      << " to " << result_type;
3321       }
3322       break;
3323 
3324     case DataType::Type::kUint16:
3325       switch (input_type) {
3326         case DataType::Type::kInt8:
3327         case DataType::Type::kInt16:
3328         case DataType::Type::kInt32:
3329           if (in.IsRegister()) {
3330             __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3331           } else if (in.IsStackSlot()) {
3332             __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3333           } else {
3334             DCHECK(in.GetConstant()->IsIntConstant());
3335             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3336             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3337           }
3338           break;
3339         case DataType::Type::kInt64:
3340           if (in.IsRegisterPair()) {
3341             __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3342           } else if (in.IsDoubleStackSlot()) {
3343             __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3344           } else {
3345             DCHECK(in.GetConstant()->IsLongConstant());
3346             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3347             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3348           }
3349           break;
3350 
3351         default:
3352           LOG(FATAL) << "Unexpected type conversion from " << input_type
3353                      << " to " << result_type;
3354       }
3355       break;
3356 
3357     case DataType::Type::kInt16:
3358       switch (input_type) {
3359         case DataType::Type::kUint16:
3360         case DataType::Type::kInt32:
3361           if (in.IsRegister()) {
3362             __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3363           } else if (in.IsStackSlot()) {
3364             __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3365           } else {
3366             DCHECK(in.GetConstant()->IsIntConstant());
3367             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3368             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3369           }
3370           break;
3371         case DataType::Type::kInt64:
3372           if (in.IsRegisterPair()) {
3373             __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3374           } else if (in.IsDoubleStackSlot()) {
3375             __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3376           } else {
3377             DCHECK(in.GetConstant()->IsLongConstant());
3378             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3379             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3380           }
3381           break;
3382 
3383         default:
3384           LOG(FATAL) << "Unexpected type conversion from " << input_type
3385                      << " to " << result_type;
3386       }
3387       break;
3388 
3389     case DataType::Type::kInt32:
3390       switch (input_type) {
3391         case DataType::Type::kInt64:
3392           if (in.IsRegisterPair()) {
3393             __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3394           } else if (in.IsDoubleStackSlot()) {
3395             __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3396           } else {
3397             DCHECK(in.IsConstant());
3398             DCHECK(in.GetConstant()->IsLongConstant());
3399             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3400             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
3401           }
3402           break;
3403 
3404         case DataType::Type::kFloat32: {
3405           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3406           Register output = out.AsRegister<Register>();
3407           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3408           NearLabel done, nan;
3409 
3410           __ movl(output, Immediate(kPrimIntMax));
3411           // temp = int-to-float(output)
3412           __ cvtsi2ss(temp, output);
3413           // if input >= temp goto done
3414           __ comiss(input, temp);
3415           __ j(kAboveEqual, &done);
3416           // if input == NaN goto nan
3417           __ j(kUnordered, &nan);
3418           // output = float-to-int-truncate(input)
3419           __ cvttss2si(output, input);
3420           __ jmp(&done);
3421           __ Bind(&nan);
3422           //  output = 0
3423           __ xorl(output, output);
3424           __ Bind(&done);
3425           break;
3426         }
3427 
3428         case DataType::Type::kFloat64: {
3429           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3430           Register output = out.AsRegister<Register>();
3431           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3432           NearLabel done, nan;
3433 
3434           __ movl(output, Immediate(kPrimIntMax));
3435           // temp = int-to-double(output)
3436           __ cvtsi2sd(temp, output);
3437           // if input >= temp goto done
3438           __ comisd(input, temp);
3439           __ j(kAboveEqual, &done);
3440           // if input == NaN goto nan
3441           __ j(kUnordered, &nan);
3442           // output = double-to-int-truncate(input)
3443           __ cvttsd2si(output, input);
3444           __ jmp(&done);
3445           __ Bind(&nan);
3446           //  output = 0
3447           __ xorl(output, output);
3448           __ Bind(&done);
3449           break;
3450         }
3451 
3452         default:
3453           LOG(FATAL) << "Unexpected type conversion from " << input_type
3454                      << " to " << result_type;
3455       }
3456       break;
3457 
3458     case DataType::Type::kInt64:
3459       switch (input_type) {
3460         case DataType::Type::kBool:
3461         case DataType::Type::kUint8:
3462         case DataType::Type::kInt8:
3463         case DataType::Type::kUint16:
3464         case DataType::Type::kInt16:
3465         case DataType::Type::kInt32:
3466           DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
3467           DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
3468           DCHECK_EQ(in.AsRegister<Register>(), EAX);
3469           __ cdq();
3470           break;
3471 
3472         case DataType::Type::kFloat32:
3473           codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3474           CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3475           break;
3476 
3477         case DataType::Type::kFloat64:
3478           codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3479           CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3480           break;
3481 
3482         default:
3483           LOG(FATAL) << "Unexpected type conversion from " << input_type
3484                      << " to " << result_type;
3485       }
3486       break;
3487 
3488     case DataType::Type::kFloat32:
3489       switch (input_type) {
3490         case DataType::Type::kBool:
3491         case DataType::Type::kUint8:
3492         case DataType::Type::kInt8:
3493         case DataType::Type::kUint16:
3494         case DataType::Type::kInt16:
3495         case DataType::Type::kInt32:
3496           __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3497           break;
3498 
3499         case DataType::Type::kInt64: {
3500           size_t adjustment = 0;
3501 
3502           // Create stack space for the call to
3503           // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
3504           // TODO: enhance register allocator to ask for stack temporaries.
3505           if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
3506             adjustment = DataType::Size(DataType::Type::kInt64);
3507             codegen_->IncreaseFrame(adjustment);
3508           }
3509 
3510           // Load the value to the FP stack, using temporaries if needed.
3511           PushOntoFPStack(in, 0, adjustment, false, true);
3512 
3513           if (out.IsStackSlot()) {
3514             __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
3515           } else {
3516             __ fstps(Address(ESP, 0));
3517             Location stack_temp = Location::StackSlot(0);
3518             codegen_->Move32(out, stack_temp);
3519           }
3520 
3521           // Remove the temporary stack space we allocated.
3522           if (adjustment != 0) {
3523             codegen_->DecreaseFrame(adjustment);
3524           }
3525           break;
3526         }
3527 
3528         case DataType::Type::kFloat64:
3529           __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3530           break;
3531 
3532         default:
3533           LOG(FATAL) << "Unexpected type conversion from " << input_type
3534                      << " to " << result_type;
3535       }
3536       break;
3537 
3538     case DataType::Type::kFloat64:
3539       switch (input_type) {
3540         case DataType::Type::kBool:
3541         case DataType::Type::kUint8:
3542         case DataType::Type::kInt8:
3543         case DataType::Type::kUint16:
3544         case DataType::Type::kInt16:
3545         case DataType::Type::kInt32:
3546           __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3547           break;
3548 
3549         case DataType::Type::kInt64: {
3550           size_t adjustment = 0;
3551 
3552           // Create stack space for the call to
3553           // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3554           // TODO: enhance register allocator to ask for stack temporaries.
3555           if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3556             adjustment = DataType::Size(DataType::Type::kInt64);
3557             codegen_->IncreaseFrame(adjustment);
3558           }
3559 
3560           // Load the value to the FP stack, using temporaries if needed.
3561           PushOntoFPStack(in, 0, adjustment, false, true);
3562 
3563           if (out.IsDoubleStackSlot()) {
3564             __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3565           } else {
3566             __ fstpl(Address(ESP, 0));
3567             Location stack_temp = Location::DoubleStackSlot(0);
3568             codegen_->Move64(out, stack_temp);
3569           }
3570 
3571           // Remove the temporary stack space we allocated.
3572           if (adjustment != 0) {
3573             codegen_->DecreaseFrame(adjustment);
3574           }
3575           break;
3576         }
3577 
3578         case DataType::Type::kFloat32:
3579           __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3580           break;
3581 
3582         default:
3583           LOG(FATAL) << "Unexpected type conversion from " << input_type
3584                      << " to " << result_type;
3585       }
3586       break;
3587 
3588     default:
3589       LOG(FATAL) << "Unexpected type conversion from " << input_type
3590                  << " to " << result_type;
3591   }
3592 }
3593 
VisitAdd(HAdd * add)3594 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3595   LocationSummary* locations =
3596       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3597   switch (add->GetResultType()) {
3598     case DataType::Type::kInt32: {
3599       locations->SetInAt(0, Location::RequiresRegister());
3600       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3601       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3602       break;
3603     }
3604 
3605     case DataType::Type::kInt64: {
3606       locations->SetInAt(0, Location::RequiresRegister());
3607       locations->SetInAt(1, Location::Any());
3608       locations->SetOut(Location::SameAsFirstInput());
3609       break;
3610     }
3611 
3612     case DataType::Type::kFloat32:
3613     case DataType::Type::kFloat64: {
3614       locations->SetInAt(0, Location::RequiresFpuRegister());
3615       if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3616         DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3617       } else if (add->InputAt(1)->IsConstant()) {
3618         locations->SetInAt(1, Location::RequiresFpuRegister());
3619       } else {
3620         locations->SetInAt(1, Location::Any());
3621       }
3622       locations->SetOut(Location::SameAsFirstInput());
3623       break;
3624     }
3625 
3626     default:
3627       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3628       UNREACHABLE();
3629   }
3630 }
3631 
VisitAdd(HAdd * add)3632 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3633   LocationSummary* locations = add->GetLocations();
3634   Location first = locations->InAt(0);
3635   Location second = locations->InAt(1);
3636   Location out = locations->Out();
3637 
3638   switch (add->GetResultType()) {
3639     case DataType::Type::kInt32: {
3640       if (second.IsRegister()) {
3641         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3642           __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3643         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3644           __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3645         } else {
3646           __ leal(out.AsRegister<Register>(), Address(
3647               first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3648           }
3649       } else if (second.IsConstant()) {
3650         int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3651         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3652           __ addl(out.AsRegister<Register>(), Immediate(value));
3653         } else {
3654           __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3655         }
3656       } else {
3657         DCHECK(first.Equals(locations->Out()));
3658         __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3659       }
3660       break;
3661     }
3662 
3663     case DataType::Type::kInt64: {
3664       if (second.IsRegisterPair()) {
3665         __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3666         __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3667       } else if (second.IsDoubleStackSlot()) {
3668         __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3669         __ adcl(first.AsRegisterPairHigh<Register>(),
3670                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3671       } else {
3672         DCHECK(second.IsConstant()) << second;
3673         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3674         __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3675         __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3676       }
3677       break;
3678     }
3679 
3680     case DataType::Type::kFloat32: {
3681       if (second.IsFpuRegister()) {
3682         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3683       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3684         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3685         DCHECK(const_area->IsEmittedAtUseSite());
3686         __ addss(first.AsFpuRegister<XmmRegister>(),
3687                  codegen_->LiteralFloatAddress(
3688                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3689                      const_area->GetBaseMethodAddress(),
3690                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3691       } else {
3692         DCHECK(second.IsStackSlot());
3693         __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3694       }
3695       break;
3696     }
3697 
3698     case DataType::Type::kFloat64: {
3699       if (second.IsFpuRegister()) {
3700         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3701       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3702         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3703         DCHECK(const_area->IsEmittedAtUseSite());
3704         __ addsd(first.AsFpuRegister<XmmRegister>(),
3705                  codegen_->LiteralDoubleAddress(
3706                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3707                      const_area->GetBaseMethodAddress(),
3708                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3709       } else {
3710         DCHECK(second.IsDoubleStackSlot());
3711         __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3712       }
3713       break;
3714     }
3715 
3716     default:
3717       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3718   }
3719 }
3720 
VisitSub(HSub * sub)3721 void LocationsBuilderX86::VisitSub(HSub* sub) {
3722   LocationSummary* locations =
3723       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3724   switch (sub->GetResultType()) {
3725     case DataType::Type::kInt32:
3726     case DataType::Type::kInt64: {
3727       locations->SetInAt(0, Location::RequiresRegister());
3728       locations->SetInAt(1, Location::Any());
3729       locations->SetOut(Location::SameAsFirstInput());
3730       break;
3731     }
3732     case DataType::Type::kFloat32:
3733     case DataType::Type::kFloat64: {
3734       locations->SetInAt(0, Location::RequiresFpuRegister());
3735       if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3736         DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3737       } else if (sub->InputAt(1)->IsConstant()) {
3738         locations->SetInAt(1, Location::RequiresFpuRegister());
3739       } else {
3740         locations->SetInAt(1, Location::Any());
3741       }
3742       locations->SetOut(Location::SameAsFirstInput());
3743       break;
3744     }
3745 
3746     default:
3747       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3748   }
3749 }
3750 
VisitSub(HSub * sub)3751 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3752   LocationSummary* locations = sub->GetLocations();
3753   Location first = locations->InAt(0);
3754   Location second = locations->InAt(1);
3755   DCHECK(first.Equals(locations->Out()));
3756   switch (sub->GetResultType()) {
3757     case DataType::Type::kInt32: {
3758       if (second.IsRegister()) {
3759         __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3760       } else if (second.IsConstant()) {
3761         __ subl(first.AsRegister<Register>(),
3762                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3763       } else {
3764         __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3765       }
3766       break;
3767     }
3768 
3769     case DataType::Type::kInt64: {
3770       if (second.IsRegisterPair()) {
3771         __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3772         __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3773       } else if (second.IsDoubleStackSlot()) {
3774         __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3775         __ sbbl(first.AsRegisterPairHigh<Register>(),
3776                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3777       } else {
3778         DCHECK(second.IsConstant()) << second;
3779         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3780         __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3781         __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3782       }
3783       break;
3784     }
3785 
3786     case DataType::Type::kFloat32: {
3787       if (second.IsFpuRegister()) {
3788         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3789       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3790         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3791         DCHECK(const_area->IsEmittedAtUseSite());
3792         __ subss(first.AsFpuRegister<XmmRegister>(),
3793                  codegen_->LiteralFloatAddress(
3794                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3795                      const_area->GetBaseMethodAddress(),
3796                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3797       } else {
3798         DCHECK(second.IsStackSlot());
3799         __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3800       }
3801       break;
3802     }
3803 
3804     case DataType::Type::kFloat64: {
3805       if (second.IsFpuRegister()) {
3806         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3807       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3808         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3809         DCHECK(const_area->IsEmittedAtUseSite());
3810         __ subsd(first.AsFpuRegister<XmmRegister>(),
3811                  codegen_->LiteralDoubleAddress(
3812                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3813                      const_area->GetBaseMethodAddress(),
3814                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3815       } else {
3816         DCHECK(second.IsDoubleStackSlot());
3817         __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3818       }
3819       break;
3820     }
3821 
3822     default:
3823       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3824   }
3825 }
3826 
VisitMul(HMul * mul)3827 void LocationsBuilderX86::VisitMul(HMul* mul) {
3828   LocationSummary* locations =
3829       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3830   switch (mul->GetResultType()) {
3831     case DataType::Type::kInt32:
3832       locations->SetInAt(0, Location::RequiresRegister());
3833       locations->SetInAt(1, Location::Any());
3834       if (mul->InputAt(1)->IsIntConstant()) {
3835         // Can use 3 operand multiply.
3836         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3837       } else {
3838         locations->SetOut(Location::SameAsFirstInput());
3839       }
3840       break;
3841     case DataType::Type::kInt64: {
3842       locations->SetInAt(0, Location::RequiresRegister());
3843       locations->SetInAt(1, Location::Any());
3844       locations->SetOut(Location::SameAsFirstInput());
3845       // Needed for imul on 32bits with 64bits output.
3846       locations->AddTemp(Location::RegisterLocation(EAX));
3847       locations->AddTemp(Location::RegisterLocation(EDX));
3848       break;
3849     }
3850     case DataType::Type::kFloat32:
3851     case DataType::Type::kFloat64: {
3852       locations->SetInAt(0, Location::RequiresFpuRegister());
3853       if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3854         DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3855       } else if (mul->InputAt(1)->IsConstant()) {
3856         locations->SetInAt(1, Location::RequiresFpuRegister());
3857       } else {
3858         locations->SetInAt(1, Location::Any());
3859       }
3860       locations->SetOut(Location::SameAsFirstInput());
3861       break;
3862     }
3863 
3864     default:
3865       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3866   }
3867 }
3868 
VisitMul(HMul * mul)3869 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3870   LocationSummary* locations = mul->GetLocations();
3871   Location first = locations->InAt(0);
3872   Location second = locations->InAt(1);
3873   Location out = locations->Out();
3874 
3875   switch (mul->GetResultType()) {
3876     case DataType::Type::kInt32:
3877       // The constant may have ended up in a register, so test explicitly to avoid
3878       // problems where the output may not be the same as the first operand.
3879       if (mul->InputAt(1)->IsIntConstant()) {
3880         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3881         __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3882       } else if (second.IsRegister()) {
3883         DCHECK(first.Equals(out));
3884         __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3885       } else {
3886         DCHECK(second.IsStackSlot());
3887         DCHECK(first.Equals(out));
3888         __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3889       }
3890       break;
3891 
3892     case DataType::Type::kInt64: {
3893       Register in1_hi = first.AsRegisterPairHigh<Register>();
3894       Register in1_lo = first.AsRegisterPairLow<Register>();
3895       Register eax = locations->GetTemp(0).AsRegister<Register>();
3896       Register edx = locations->GetTemp(1).AsRegister<Register>();
3897 
3898       DCHECK_EQ(EAX, eax);
3899       DCHECK_EQ(EDX, edx);
3900 
3901       // input: in1 - 64 bits, in2 - 64 bits.
3902       // output: in1
3903       // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3904       // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3905       // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3906       if (second.IsConstant()) {
3907         DCHECK(second.GetConstant()->IsLongConstant());
3908 
3909         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3910         int32_t low_value = Low32Bits(value);
3911         int32_t high_value = High32Bits(value);
3912         Immediate low(low_value);
3913         Immediate high(high_value);
3914 
3915         __ movl(eax, high);
3916         // eax <- in1.lo * in2.hi
3917         __ imull(eax, in1_lo);
3918         // in1.hi <- in1.hi * in2.lo
3919         __ imull(in1_hi, low);
3920         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3921         __ addl(in1_hi, eax);
3922         // move in2_lo to eax to prepare for double precision
3923         __ movl(eax, low);
3924         // edx:eax <- in1.lo * in2.lo
3925         __ mull(in1_lo);
3926         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3927         __ addl(in1_hi, edx);
3928         // in1.lo <- (in1.lo * in2.lo)[31:0];
3929         __ movl(in1_lo, eax);
3930       } else if (second.IsRegisterPair()) {
3931         Register in2_hi = second.AsRegisterPairHigh<Register>();
3932         Register in2_lo = second.AsRegisterPairLow<Register>();
3933 
3934         __ movl(eax, in2_hi);
3935         // eax <- in1.lo * in2.hi
3936         __ imull(eax, in1_lo);
3937         // in1.hi <- in1.hi * in2.lo
3938         __ imull(in1_hi, in2_lo);
3939         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3940         __ addl(in1_hi, eax);
3941         // move in1_lo to eax to prepare for double precision
3942         __ movl(eax, in1_lo);
3943         // edx:eax <- in1.lo * in2.lo
3944         __ mull(in2_lo);
3945         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3946         __ addl(in1_hi, edx);
3947         // in1.lo <- (in1.lo * in2.lo)[31:0];
3948         __ movl(in1_lo, eax);
3949       } else {
3950         DCHECK(second.IsDoubleStackSlot()) << second;
3951         Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3952         Address in2_lo(ESP, second.GetStackIndex());
3953 
3954         __ movl(eax, in2_hi);
3955         // eax <- in1.lo * in2.hi
3956         __ imull(eax, in1_lo);
3957         // in1.hi <- in1.hi * in2.lo
3958         __ imull(in1_hi, in2_lo);
3959         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3960         __ addl(in1_hi, eax);
3961         // move in1_lo to eax to prepare for double precision
3962         __ movl(eax, in1_lo);
3963         // edx:eax <- in1.lo * in2.lo
3964         __ mull(in2_lo);
3965         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3966         __ addl(in1_hi, edx);
3967         // in1.lo <- (in1.lo * in2.lo)[31:0];
3968         __ movl(in1_lo, eax);
3969       }
3970 
3971       break;
3972     }
3973 
3974     case DataType::Type::kFloat32: {
3975       DCHECK(first.Equals(locations->Out()));
3976       if (second.IsFpuRegister()) {
3977         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3978       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3979         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3980         DCHECK(const_area->IsEmittedAtUseSite());
3981         __ mulss(first.AsFpuRegister<XmmRegister>(),
3982                  codegen_->LiteralFloatAddress(
3983                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3984                      const_area->GetBaseMethodAddress(),
3985                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3986       } else {
3987         DCHECK(second.IsStackSlot());
3988         __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3989       }
3990       break;
3991     }
3992 
3993     case DataType::Type::kFloat64: {
3994       DCHECK(first.Equals(locations->Out()));
3995       if (second.IsFpuRegister()) {
3996         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3997       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3998         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3999         DCHECK(const_area->IsEmittedAtUseSite());
4000         __ mulsd(first.AsFpuRegister<XmmRegister>(),
4001                  codegen_->LiteralDoubleAddress(
4002                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4003                      const_area->GetBaseMethodAddress(),
4004                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4005       } else {
4006         DCHECK(second.IsDoubleStackSlot());
4007         __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4008       }
4009       break;
4010     }
4011 
4012     default:
4013       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4014   }
4015 }
4016 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)4017 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
4018                                                   uint32_t temp_offset,
4019                                                   uint32_t stack_adjustment,
4020                                                   bool is_fp,
4021                                                   bool is_wide) {
4022   if (source.IsStackSlot()) {
4023     DCHECK(!is_wide);
4024     if (is_fp) {
4025       __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
4026     } else {
4027       __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
4028     }
4029   } else if (source.IsDoubleStackSlot()) {
4030     DCHECK(is_wide);
4031     if (is_fp) {
4032       __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
4033     } else {
4034       __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
4035     }
4036   } else {
4037     // Write the value to the temporary location on the stack and load to FP stack.
4038     if (!is_wide) {
4039       Location stack_temp = Location::StackSlot(temp_offset);
4040       codegen_->Move32(stack_temp, source);
4041       if (is_fp) {
4042         __ flds(Address(ESP, temp_offset));
4043       } else {
4044         __ filds(Address(ESP, temp_offset));
4045       }
4046     } else {
4047       Location stack_temp = Location::DoubleStackSlot(temp_offset);
4048       codegen_->Move64(stack_temp, source);
4049       if (is_fp) {
4050         __ fldl(Address(ESP, temp_offset));
4051       } else {
4052         __ fildl(Address(ESP, temp_offset));
4053       }
4054     }
4055   }
4056 }
4057 
GenerateRemFP(HRem * rem)4058 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
4059   DataType::Type type = rem->GetResultType();
4060   bool is_float = type == DataType::Type::kFloat32;
4061   size_t elem_size = DataType::Size(type);
4062   LocationSummary* locations = rem->GetLocations();
4063   Location first = locations->InAt(0);
4064   Location second = locations->InAt(1);
4065   Location out = locations->Out();
4066 
4067   // Create stack space for 2 elements.
4068   // TODO: enhance register allocator to ask for stack temporaries.
4069   codegen_->IncreaseFrame(2 * elem_size);
4070 
4071   // Load the values to the FP stack in reverse order, using temporaries if needed.
4072   const bool is_wide = !is_float;
4073   PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
4074   PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
4075 
4076   // Loop doing FPREM until we stabilize.
4077   NearLabel retry;
4078   __ Bind(&retry);
4079   __ fprem();
4080 
4081   // Move FP status to AX.
4082   __ fstsw();
4083 
4084   // And see if the argument reduction is complete. This is signaled by the
4085   // C2 FPU flag bit set to 0.
4086   __ andl(EAX, Immediate(kC2ConditionMask));
4087   __ j(kNotEqual, &retry);
4088 
4089   // We have settled on the final value. Retrieve it into an XMM register.
4090   // Store FP top of stack to real stack.
4091   if (is_float) {
4092     __ fsts(Address(ESP, 0));
4093   } else {
4094     __ fstl(Address(ESP, 0));
4095   }
4096 
4097   // Pop the 2 items from the FP stack.
4098   __ fucompp();
4099 
4100   // Load the value from the stack into an XMM register.
4101   DCHECK(out.IsFpuRegister()) << out;
4102   if (is_float) {
4103     __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
4104   } else {
4105     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
4106   }
4107 
4108   // And remove the temporary stack space we allocated.
4109   codegen_->DecreaseFrame(2 * elem_size);
4110 }
4111 
4112 
DivRemOneOrMinusOne(HBinaryOperation * instruction)4113 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4114   DCHECK(instruction->IsDiv() || instruction->IsRem());
4115 
4116   LocationSummary* locations = instruction->GetLocations();
4117   DCHECK(locations->InAt(1).IsConstant());
4118   DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
4119 
4120   Register out_register = locations->Out().AsRegister<Register>();
4121   Register input_register = locations->InAt(0).AsRegister<Register>();
4122   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4123 
4124   DCHECK(imm == 1 || imm == -1);
4125 
4126   if (instruction->IsRem()) {
4127     __ xorl(out_register, out_register);
4128   } else {
4129     __ movl(out_register, input_register);
4130     if (imm == -1) {
4131       __ negl(out_register);
4132     }
4133   }
4134 }
4135 
RemByPowerOfTwo(HRem * instruction)4136 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
4137   LocationSummary* locations = instruction->GetLocations();
4138   Location second = locations->InAt(1);
4139 
4140   Register out = locations->Out().AsRegister<Register>();
4141   Register numerator = locations->InAt(0).AsRegister<Register>();
4142 
4143   int32_t imm = Int64FromConstant(second.GetConstant());
4144   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4145   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4146 
4147   Register tmp = locations->GetTemp(0).AsRegister<Register>();
4148   NearLabel done;
4149   __ movl(out, numerator);
4150   __ andl(out, Immediate(abs_imm-1));
4151   __ j(Condition::kZero, &done);
4152   __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4153   __ testl(numerator, numerator);
4154   __ cmovl(Condition::kLess, out, tmp);
4155   __ Bind(&done);
4156 }
4157 
DivByPowerOfTwo(HDiv * instruction)4158 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
4159   LocationSummary* locations = instruction->GetLocations();
4160 
4161   Register out_register = locations->Out().AsRegister<Register>();
4162   Register input_register = locations->InAt(0).AsRegister<Register>();
4163   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4164   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4165   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4166 
4167   Register num = locations->GetTemp(0).AsRegister<Register>();
4168 
4169   __ leal(num, Address(input_register, abs_imm - 1));
4170   __ testl(input_register, input_register);
4171   __ cmovl(kGreaterEqual, num, input_register);
4172   int shift = CTZ(imm);
4173   __ sarl(num, Immediate(shift));
4174 
4175   if (imm < 0) {
4176     __ negl(num);
4177   }
4178 
4179   __ movl(out_register, num);
4180 }
4181 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4182 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4183   DCHECK(instruction->IsDiv() || instruction->IsRem());
4184 
4185   LocationSummary* locations = instruction->GetLocations();
4186   int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4187 
4188   Register eax = locations->InAt(0).AsRegister<Register>();
4189   Register out = locations->Out().AsRegister<Register>();
4190   Register num;
4191   Register edx;
4192 
4193   if (instruction->IsDiv()) {
4194     edx = locations->GetTemp(0).AsRegister<Register>();
4195     num = locations->GetTemp(1).AsRegister<Register>();
4196   } else {
4197     edx = locations->Out().AsRegister<Register>();
4198     num = locations->GetTemp(0).AsRegister<Register>();
4199   }
4200 
4201   DCHECK_EQ(EAX, eax);
4202   DCHECK_EQ(EDX, edx);
4203   if (instruction->IsDiv()) {
4204     DCHECK_EQ(EAX, out);
4205   } else {
4206     DCHECK_EQ(EDX, out);
4207   }
4208 
4209   int64_t magic;
4210   int shift;
4211   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4212 
4213   // Save the numerator.
4214   __ movl(num, eax);
4215 
4216   // EAX = magic
4217   __ movl(eax, Immediate(magic));
4218 
4219   // EDX:EAX = magic * numerator
4220   __ imull(num);
4221 
4222   if (imm > 0 && magic < 0) {
4223     // EDX += num
4224     __ addl(edx, num);
4225   } else if (imm < 0 && magic > 0) {
4226     __ subl(edx, num);
4227   }
4228 
4229   // Shift if needed.
4230   if (shift != 0) {
4231     __ sarl(edx, Immediate(shift));
4232   }
4233 
4234   // EDX += 1 if EDX < 0
4235   __ movl(eax, edx);
4236   __ shrl(edx, Immediate(31));
4237   __ addl(edx, eax);
4238 
4239   if (instruction->IsRem()) {
4240     __ movl(eax, num);
4241     __ imull(edx, Immediate(imm));
4242     __ subl(eax, edx);
4243     __ movl(edx, eax);
4244   } else {
4245     __ movl(eax, edx);
4246   }
4247 }
4248 
GenerateDivRemIntegral(HBinaryOperation * instruction)4249 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4250   DCHECK(instruction->IsDiv() || instruction->IsRem());
4251 
4252   LocationSummary* locations = instruction->GetLocations();
4253   Location out = locations->Out();
4254   Location first = locations->InAt(0);
4255   Location second = locations->InAt(1);
4256   bool is_div = instruction->IsDiv();
4257 
4258   switch (instruction->GetResultType()) {
4259     case DataType::Type::kInt32: {
4260       DCHECK_EQ(EAX, first.AsRegister<Register>());
4261       DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
4262 
4263       if (second.IsConstant()) {
4264         int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
4265 
4266         if (imm == 0) {
4267           // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
4268         } else if (imm == 1 || imm == -1) {
4269           DivRemOneOrMinusOne(instruction);
4270         } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4271           if (is_div) {
4272             DivByPowerOfTwo(instruction->AsDiv());
4273           } else {
4274             RemByPowerOfTwo(instruction->AsRem());
4275           }
4276         } else {
4277           DCHECK(imm <= -2 || imm >= 2);
4278           GenerateDivRemWithAnyConstant(instruction);
4279         }
4280       } else {
4281         SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
4282             instruction, out.AsRegister<Register>(), is_div);
4283         codegen_->AddSlowPath(slow_path);
4284 
4285         Register second_reg = second.AsRegister<Register>();
4286         // 0x80000000/-1 triggers an arithmetic exception!
4287         // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
4288         // it's safe to just use negl instead of more complex comparisons.
4289 
4290         __ cmpl(second_reg, Immediate(-1));
4291         __ j(kEqual, slow_path->GetEntryLabel());
4292 
4293         // edx:eax <- sign-extended of eax
4294         __ cdq();
4295         // eax = quotient, edx = remainder
4296         __ idivl(second_reg);
4297         __ Bind(slow_path->GetExitLabel());
4298       }
4299       break;
4300     }
4301 
4302     case DataType::Type::kInt64: {
4303       InvokeRuntimeCallingConvention calling_convention;
4304       DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
4305       DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
4306       DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
4307       DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
4308       DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
4309       DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
4310 
4311       if (is_div) {
4312         codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
4313         CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4314       } else {
4315         codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
4316         CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4317       }
4318       break;
4319     }
4320 
4321     default:
4322       LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
4323   }
4324 }
4325 
VisitDiv(HDiv * div)4326 void LocationsBuilderX86::VisitDiv(HDiv* div) {
4327   LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
4328       ? LocationSummary::kCallOnMainOnly
4329       : LocationSummary::kNoCall;
4330   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4331 
4332   switch (div->GetResultType()) {
4333     case DataType::Type::kInt32: {
4334       locations->SetInAt(0, Location::RegisterLocation(EAX));
4335       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4336       locations->SetOut(Location::SameAsFirstInput());
4337       // Intel uses edx:eax as the dividend.
4338       locations->AddTemp(Location::RegisterLocation(EDX));
4339       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4340       // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
4341       // output and request another temp.
4342       if (div->InputAt(1)->IsIntConstant()) {
4343         locations->AddTemp(Location::RequiresRegister());
4344       }
4345       break;
4346     }
4347     case DataType::Type::kInt64: {
4348       InvokeRuntimeCallingConvention calling_convention;
4349       locations->SetInAt(0, Location::RegisterPairLocation(
4350           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4351       locations->SetInAt(1, Location::RegisterPairLocation(
4352           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4353       // Runtime helper puts the result in EAX, EDX.
4354       locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4355       break;
4356     }
4357     case DataType::Type::kFloat32:
4358     case DataType::Type::kFloat64: {
4359       locations->SetInAt(0, Location::RequiresFpuRegister());
4360       if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4361         DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
4362       } else if (div->InputAt(1)->IsConstant()) {
4363         locations->SetInAt(1, Location::RequiresFpuRegister());
4364       } else {
4365         locations->SetInAt(1, Location::Any());
4366       }
4367       locations->SetOut(Location::SameAsFirstInput());
4368       break;
4369     }
4370 
4371     default:
4372       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4373   }
4374 }
4375 
VisitDiv(HDiv * div)4376 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
4377   LocationSummary* locations = div->GetLocations();
4378   Location first = locations->InAt(0);
4379   Location second = locations->InAt(1);
4380 
4381   switch (div->GetResultType()) {
4382     case DataType::Type::kInt32:
4383     case DataType::Type::kInt64: {
4384       GenerateDivRemIntegral(div);
4385       break;
4386     }
4387 
4388     case DataType::Type::kFloat32: {
4389       if (second.IsFpuRegister()) {
4390         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4391       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4392         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4393         DCHECK(const_area->IsEmittedAtUseSite());
4394         __ divss(first.AsFpuRegister<XmmRegister>(),
4395                  codegen_->LiteralFloatAddress(
4396                    const_area->GetConstant()->AsFloatConstant()->GetValue(),
4397                    const_area->GetBaseMethodAddress(),
4398                    const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4399       } else {
4400         DCHECK(second.IsStackSlot());
4401         __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4402       }
4403       break;
4404     }
4405 
4406     case DataType::Type::kFloat64: {
4407       if (second.IsFpuRegister()) {
4408         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4409       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4410         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4411         DCHECK(const_area->IsEmittedAtUseSite());
4412         __ divsd(first.AsFpuRegister<XmmRegister>(),
4413                  codegen_->LiteralDoubleAddress(
4414                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4415                      const_area->GetBaseMethodAddress(),
4416                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4417       } else {
4418         DCHECK(second.IsDoubleStackSlot());
4419         __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4420       }
4421       break;
4422     }
4423 
4424     default:
4425       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4426   }
4427 }
4428 
VisitRem(HRem * rem)4429 void LocationsBuilderX86::VisitRem(HRem* rem) {
4430   DataType::Type type = rem->GetResultType();
4431 
4432   LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
4433       ? LocationSummary::kCallOnMainOnly
4434       : LocationSummary::kNoCall;
4435   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4436 
4437   switch (type) {
4438     case DataType::Type::kInt32: {
4439       locations->SetInAt(0, Location::RegisterLocation(EAX));
4440       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4441       locations->SetOut(Location::RegisterLocation(EDX));
4442       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4443       // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
4444       // output and request another temp.
4445       if (rem->InputAt(1)->IsIntConstant()) {
4446         locations->AddTemp(Location::RequiresRegister());
4447       }
4448       break;
4449     }
4450     case DataType::Type::kInt64: {
4451       InvokeRuntimeCallingConvention calling_convention;
4452       locations->SetInAt(0, Location::RegisterPairLocation(
4453           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4454       locations->SetInAt(1, Location::RegisterPairLocation(
4455           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4456       // Runtime helper puts the result in EAX, EDX.
4457       locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4458       break;
4459     }
4460     case DataType::Type::kFloat64:
4461     case DataType::Type::kFloat32: {
4462       locations->SetInAt(0, Location::Any());
4463       locations->SetInAt(1, Location::Any());
4464       locations->SetOut(Location::RequiresFpuRegister());
4465       locations->AddTemp(Location::RegisterLocation(EAX));
4466       break;
4467     }
4468 
4469     default:
4470       LOG(FATAL) << "Unexpected rem type " << type;
4471   }
4472 }
4473 
VisitRem(HRem * rem)4474 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
4475   DataType::Type type = rem->GetResultType();
4476   switch (type) {
4477     case DataType::Type::kInt32:
4478     case DataType::Type::kInt64: {
4479       GenerateDivRemIntegral(rem);
4480       break;
4481     }
4482     case DataType::Type::kFloat32:
4483     case DataType::Type::kFloat64: {
4484       GenerateRemFP(rem);
4485       break;
4486     }
4487     default:
4488       LOG(FATAL) << "Unexpected rem type " << type;
4489   }
4490 }
4491 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4492 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4493   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4494   switch (minmax->GetResultType()) {
4495     case DataType::Type::kInt32:
4496       locations->SetInAt(0, Location::RequiresRegister());
4497       locations->SetInAt(1, Location::RequiresRegister());
4498       locations->SetOut(Location::SameAsFirstInput());
4499       break;
4500     case DataType::Type::kInt64:
4501       locations->SetInAt(0, Location::RequiresRegister());
4502       locations->SetInAt(1, Location::RequiresRegister());
4503       locations->SetOut(Location::SameAsFirstInput());
4504       // Register to use to perform a long subtract to set cc.
4505       locations->AddTemp(Location::RequiresRegister());
4506       break;
4507     case DataType::Type::kFloat32:
4508       locations->SetInAt(0, Location::RequiresFpuRegister());
4509       locations->SetInAt(1, Location::RequiresFpuRegister());
4510       locations->SetOut(Location::SameAsFirstInput());
4511       locations->AddTemp(Location::RequiresRegister());
4512       break;
4513     case DataType::Type::kFloat64:
4514       locations->SetInAt(0, Location::RequiresFpuRegister());
4515       locations->SetInAt(1, Location::RequiresFpuRegister());
4516       locations->SetOut(Location::SameAsFirstInput());
4517       break;
4518     default:
4519       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4520   }
4521 }
4522 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4523 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4524                                                     bool is_min,
4525                                                     DataType::Type type) {
4526   Location op1_loc = locations->InAt(0);
4527   Location op2_loc = locations->InAt(1);
4528 
4529   // Shortcut for same input locations.
4530   if (op1_loc.Equals(op2_loc)) {
4531     // Can return immediately, as op1_loc == out_loc.
4532     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4533     //       a copy here.
4534     DCHECK(locations->Out().Equals(op1_loc));
4535     return;
4536   }
4537 
4538   if (type == DataType::Type::kInt64) {
4539     // Need to perform a subtract to get the sign right.
4540     // op1 is already in the same location as the output.
4541     Location output = locations->Out();
4542     Register output_lo = output.AsRegisterPairLow<Register>();
4543     Register output_hi = output.AsRegisterPairHigh<Register>();
4544 
4545     Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4546     Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4547 
4548     // The comparison is performed by subtracting the second operand from
4549     // the first operand and then setting the status flags in the same
4550     // manner as the SUB instruction."
4551     __ cmpl(output_lo, op2_lo);
4552 
4553     // Now use a temp and the borrow to finish the subtraction of op2_hi.
4554     Register temp = locations->GetTemp(0).AsRegister<Register>();
4555     __ movl(temp, output_hi);
4556     __ sbbl(temp, op2_hi);
4557 
4558     // Now the condition code is correct.
4559     Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4560     __ cmovl(cond, output_lo, op2_lo);
4561     __ cmovl(cond, output_hi, op2_hi);
4562   } else {
4563     DCHECK_EQ(type, DataType::Type::kInt32);
4564     Register out = locations->Out().AsRegister<Register>();
4565     Register op2 = op2_loc.AsRegister<Register>();
4566 
4567     //  (out := op1)
4568     //  out <=? op2
4569     //  if out is min jmp done
4570     //  out := op2
4571     // done:
4572 
4573     __ cmpl(out, op2);
4574     Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4575     __ cmovl(cond, out, op2);
4576   }
4577 }
4578 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4579 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4580                                                    bool is_min,
4581                                                    DataType::Type type) {
4582   Location op1_loc = locations->InAt(0);
4583   Location op2_loc = locations->InAt(1);
4584   Location out_loc = locations->Out();
4585   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4586 
4587   // Shortcut for same input locations.
4588   if (op1_loc.Equals(op2_loc)) {
4589     DCHECK(out_loc.Equals(op1_loc));
4590     return;
4591   }
4592 
4593   //  (out := op1)
4594   //  out <=? op2
4595   //  if Nan jmp Nan_label
4596   //  if out is min jmp done
4597   //  if op2 is min jmp op2_label
4598   //  handle -0/+0
4599   //  jmp done
4600   // Nan_label:
4601   //  out := NaN
4602   // op2_label:
4603   //  out := op2
4604   // done:
4605   //
4606   // This removes one jmp, but needs to copy one input (op1) to out.
4607   //
4608   // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4609 
4610   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4611 
4612   NearLabel nan, done, op2_label;
4613   if (type == DataType::Type::kFloat64) {
4614     __ ucomisd(out, op2);
4615   } else {
4616     DCHECK_EQ(type, DataType::Type::kFloat32);
4617     __ ucomiss(out, op2);
4618   }
4619 
4620   __ j(Condition::kParityEven, &nan);
4621 
4622   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4623   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4624 
4625   // Handle 0.0/-0.0.
4626   if (is_min) {
4627     if (type == DataType::Type::kFloat64) {
4628       __ orpd(out, op2);
4629     } else {
4630       __ orps(out, op2);
4631     }
4632   } else {
4633     if (type == DataType::Type::kFloat64) {
4634       __ andpd(out, op2);
4635     } else {
4636       __ andps(out, op2);
4637     }
4638   }
4639   __ jmp(&done);
4640 
4641   // NaN handling.
4642   __ Bind(&nan);
4643   if (type == DataType::Type::kFloat64) {
4644     // TODO: Use a constant from the constant table (requires extra input).
4645     __ LoadLongConstant(out, kDoubleNaN);
4646   } else {
4647     Register constant = locations->GetTemp(0).AsRegister<Register>();
4648     __ movl(constant, Immediate(kFloatNaN));
4649     __ movd(out, constant);
4650   }
4651   __ jmp(&done);
4652 
4653   // out := op2;
4654   __ Bind(&op2_label);
4655   if (type == DataType::Type::kFloat64) {
4656     __ movsd(out, op2);
4657   } else {
4658     __ movss(out, op2);
4659   }
4660 
4661   // Done.
4662   __ Bind(&done);
4663 }
4664 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4665 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4666   DataType::Type type = minmax->GetResultType();
4667   switch (type) {
4668     case DataType::Type::kInt32:
4669     case DataType::Type::kInt64:
4670       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4671       break;
4672     case DataType::Type::kFloat32:
4673     case DataType::Type::kFloat64:
4674       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4675       break;
4676     default:
4677       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4678   }
4679 }
4680 
VisitMin(HMin * min)4681 void LocationsBuilderX86::VisitMin(HMin* min) {
4682   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4683 }
4684 
VisitMin(HMin * min)4685 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4686   GenerateMinMax(min, /*is_min*/ true);
4687 }
4688 
VisitMax(HMax * max)4689 void LocationsBuilderX86::VisitMax(HMax* max) {
4690   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4691 }
4692 
VisitMax(HMax * max)4693 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4694   GenerateMinMax(max, /*is_min*/ false);
4695 }
4696 
VisitAbs(HAbs * abs)4697 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4698   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4699   switch (abs->GetResultType()) {
4700     case DataType::Type::kInt32:
4701       locations->SetInAt(0, Location::RegisterLocation(EAX));
4702       locations->SetOut(Location::SameAsFirstInput());
4703       locations->AddTemp(Location::RegisterLocation(EDX));
4704       break;
4705     case DataType::Type::kInt64:
4706       locations->SetInAt(0, Location::RequiresRegister());
4707       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4708       locations->AddTemp(Location::RequiresRegister());
4709       break;
4710     case DataType::Type::kFloat32:
4711       locations->SetInAt(0, Location::RequiresFpuRegister());
4712       locations->SetOut(Location::SameAsFirstInput());
4713       locations->AddTemp(Location::RequiresFpuRegister());
4714       locations->AddTemp(Location::RequiresRegister());
4715       break;
4716     case DataType::Type::kFloat64:
4717       locations->SetInAt(0, Location::RequiresFpuRegister());
4718       locations->SetOut(Location::SameAsFirstInput());
4719       locations->AddTemp(Location::RequiresFpuRegister());
4720       break;
4721     default:
4722       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4723   }
4724 }
4725 
VisitAbs(HAbs * abs)4726 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4727   LocationSummary* locations = abs->GetLocations();
4728   switch (abs->GetResultType()) {
4729     case DataType::Type::kInt32: {
4730       Register out = locations->Out().AsRegister<Register>();
4731       DCHECK_EQ(out, EAX);
4732       Register temp = locations->GetTemp(0).AsRegister<Register>();
4733       DCHECK_EQ(temp, EDX);
4734       // Sign extend EAX into EDX.
4735       __ cdq();
4736       // XOR EAX with sign.
4737       __ xorl(EAX, EDX);
4738       // Subtract out sign to correct.
4739       __ subl(EAX, EDX);
4740       // The result is in EAX.
4741       break;
4742     }
4743     case DataType::Type::kInt64: {
4744       Location input = locations->InAt(0);
4745       Register input_lo = input.AsRegisterPairLow<Register>();
4746       Register input_hi = input.AsRegisterPairHigh<Register>();
4747       Location output = locations->Out();
4748       Register output_lo = output.AsRegisterPairLow<Register>();
4749       Register output_hi = output.AsRegisterPairHigh<Register>();
4750       Register temp = locations->GetTemp(0).AsRegister<Register>();
4751       // Compute the sign into the temporary.
4752       __ movl(temp, input_hi);
4753       __ sarl(temp, Immediate(31));
4754       // Store the sign into the output.
4755       __ movl(output_lo, temp);
4756       __ movl(output_hi, temp);
4757       // XOR the input to the output.
4758       __ xorl(output_lo, input_lo);
4759       __ xorl(output_hi, input_hi);
4760       // Subtract the sign.
4761       __ subl(output_lo, temp);
4762       __ sbbl(output_hi, temp);
4763       break;
4764     }
4765     case DataType::Type::kFloat32: {
4766       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4767       XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4768       Register constant = locations->GetTemp(1).AsRegister<Register>();
4769       __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4770       __ movd(temp, constant);
4771       __ andps(out, temp);
4772       break;
4773     }
4774     case DataType::Type::kFloat64: {
4775       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4776       XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4777       // TODO: Use a constant from the constant table (requires extra input).
4778       __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4779       __ andpd(out, temp);
4780       break;
4781     }
4782     default:
4783       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4784   }
4785 }
4786 
VisitDivZeroCheck(HDivZeroCheck * instruction)4787 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4788   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4789   switch (instruction->GetType()) {
4790     case DataType::Type::kBool:
4791     case DataType::Type::kUint8:
4792     case DataType::Type::kInt8:
4793     case DataType::Type::kUint16:
4794     case DataType::Type::kInt16:
4795     case DataType::Type::kInt32: {
4796       locations->SetInAt(0, Location::Any());
4797       break;
4798     }
4799     case DataType::Type::kInt64: {
4800       locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4801       if (!instruction->IsConstant()) {
4802         locations->AddTemp(Location::RequiresRegister());
4803       }
4804       break;
4805     }
4806     default:
4807       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4808   }
4809 }
4810 
VisitDivZeroCheck(HDivZeroCheck * instruction)4811 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4812   SlowPathCode* slow_path =
4813       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4814   codegen_->AddSlowPath(slow_path);
4815 
4816   LocationSummary* locations = instruction->GetLocations();
4817   Location value = locations->InAt(0);
4818 
4819   switch (instruction->GetType()) {
4820     case DataType::Type::kBool:
4821     case DataType::Type::kUint8:
4822     case DataType::Type::kInt8:
4823     case DataType::Type::kUint16:
4824     case DataType::Type::kInt16:
4825     case DataType::Type::kInt32: {
4826       if (value.IsRegister()) {
4827         __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4828         __ j(kEqual, slow_path->GetEntryLabel());
4829       } else if (value.IsStackSlot()) {
4830         __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4831         __ j(kEqual, slow_path->GetEntryLabel());
4832       } else {
4833         DCHECK(value.IsConstant()) << value;
4834         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4835           __ jmp(slow_path->GetEntryLabel());
4836         }
4837       }
4838       break;
4839     }
4840     case DataType::Type::kInt64: {
4841       if (value.IsRegisterPair()) {
4842         Register temp = locations->GetTemp(0).AsRegister<Register>();
4843         __ movl(temp, value.AsRegisterPairLow<Register>());
4844         __ orl(temp, value.AsRegisterPairHigh<Register>());
4845         __ j(kEqual, slow_path->GetEntryLabel());
4846       } else {
4847         DCHECK(value.IsConstant()) << value;
4848         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4849           __ jmp(slow_path->GetEntryLabel());
4850         }
4851       }
4852       break;
4853     }
4854     default:
4855       LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4856   }
4857 }
4858 
HandleShift(HBinaryOperation * op)4859 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4860   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4861 
4862   LocationSummary* locations =
4863       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4864 
4865   switch (op->GetResultType()) {
4866     case DataType::Type::kInt32:
4867     case DataType::Type::kInt64: {
4868       // Can't have Location::Any() and output SameAsFirstInput()
4869       locations->SetInAt(0, Location::RequiresRegister());
4870       // The shift count needs to be in CL or a constant.
4871       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4872       locations->SetOut(Location::SameAsFirstInput());
4873       break;
4874     }
4875     default:
4876       LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4877   }
4878 }
4879 
HandleShift(HBinaryOperation * op)4880 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4881   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4882 
4883   LocationSummary* locations = op->GetLocations();
4884   Location first = locations->InAt(0);
4885   Location second = locations->InAt(1);
4886   DCHECK(first.Equals(locations->Out()));
4887 
4888   switch (op->GetResultType()) {
4889     case DataType::Type::kInt32: {
4890       DCHECK(first.IsRegister());
4891       Register first_reg = first.AsRegister<Register>();
4892       if (second.IsRegister()) {
4893         Register second_reg = second.AsRegister<Register>();
4894         DCHECK_EQ(ECX, second_reg);
4895         if (op->IsShl()) {
4896           __ shll(first_reg, second_reg);
4897         } else if (op->IsShr()) {
4898           __ sarl(first_reg, second_reg);
4899         } else {
4900           __ shrl(first_reg, second_reg);
4901         }
4902       } else {
4903         int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4904         if (shift == 0) {
4905           return;
4906         }
4907         Immediate imm(shift);
4908         if (op->IsShl()) {
4909           __ shll(first_reg, imm);
4910         } else if (op->IsShr()) {
4911           __ sarl(first_reg, imm);
4912         } else {
4913           __ shrl(first_reg, imm);
4914         }
4915       }
4916       break;
4917     }
4918     case DataType::Type::kInt64: {
4919       if (second.IsRegister()) {
4920         Register second_reg = second.AsRegister<Register>();
4921         DCHECK_EQ(ECX, second_reg);
4922         if (op->IsShl()) {
4923           GenerateShlLong(first, second_reg);
4924         } else if (op->IsShr()) {
4925           GenerateShrLong(first, second_reg);
4926         } else {
4927           GenerateUShrLong(first, second_reg);
4928         }
4929       } else {
4930         // Shift by a constant.
4931         int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4932         // Nothing to do if the shift is 0, as the input is already the output.
4933         if (shift != 0) {
4934           if (op->IsShl()) {
4935             GenerateShlLong(first, shift);
4936           } else if (op->IsShr()) {
4937             GenerateShrLong(first, shift);
4938           } else {
4939             GenerateUShrLong(first, shift);
4940           }
4941         }
4942       }
4943       break;
4944     }
4945     default:
4946       LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4947   }
4948 }
4949 
GenerateShlLong(const Location & loc,int shift)4950 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4951   Register low = loc.AsRegisterPairLow<Register>();
4952   Register high = loc.AsRegisterPairHigh<Register>();
4953   if (shift == 1) {
4954     // This is just an addition.
4955     __ addl(low, low);
4956     __ adcl(high, high);
4957   } else if (shift == 32) {
4958     // Shift by 32 is easy. High gets low, and low gets 0.
4959     codegen_->EmitParallelMoves(
4960         loc.ToLow(),
4961         loc.ToHigh(),
4962         DataType::Type::kInt32,
4963         Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4964         loc.ToLow(),
4965         DataType::Type::kInt32);
4966   } else if (shift > 32) {
4967     // Low part becomes 0.  High part is low part << (shift-32).
4968     __ movl(high, low);
4969     __ shll(high, Immediate(shift - 32));
4970     __ xorl(low, low);
4971   } else {
4972     // Between 1 and 31.
4973     __ shld(high, low, Immediate(shift));
4974     __ shll(low, Immediate(shift));
4975   }
4976 }
4977 
GenerateShlLong(const Location & loc,Register shifter)4978 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4979   NearLabel done;
4980   __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4981   __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4982   __ testl(shifter, Immediate(32));
4983   __ j(kEqual, &done);
4984   __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4985   __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4986   __ Bind(&done);
4987 }
4988 
GenerateShrLong(const Location & loc,int shift)4989 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4990   Register low = loc.AsRegisterPairLow<Register>();
4991   Register high = loc.AsRegisterPairHigh<Register>();
4992   if (shift == 32) {
4993     // Need to copy the sign.
4994     DCHECK_NE(low, high);
4995     __ movl(low, high);
4996     __ sarl(high, Immediate(31));
4997   } else if (shift > 32) {
4998     DCHECK_NE(low, high);
4999     // High part becomes sign. Low part is shifted by shift - 32.
5000     __ movl(low, high);
5001     __ sarl(high, Immediate(31));
5002     __ sarl(low, Immediate(shift - 32));
5003   } else {
5004     // Between 1 and 31.
5005     __ shrd(low, high, Immediate(shift));
5006     __ sarl(high, Immediate(shift));
5007   }
5008 }
5009 
GenerateShrLong(const Location & loc,Register shifter)5010 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
5011   NearLabel done;
5012   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
5013   __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
5014   __ testl(shifter, Immediate(32));
5015   __ j(kEqual, &done);
5016   __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
5017   __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
5018   __ Bind(&done);
5019 }
5020 
GenerateUShrLong(const Location & loc,int shift)5021 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
5022   Register low = loc.AsRegisterPairLow<Register>();
5023   Register high = loc.AsRegisterPairHigh<Register>();
5024   if (shift == 32) {
5025     // Shift by 32 is easy. Low gets high, and high gets 0.
5026     codegen_->EmitParallelMoves(
5027         loc.ToHigh(),
5028         loc.ToLow(),
5029         DataType::Type::kInt32,
5030         Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
5031         loc.ToHigh(),
5032         DataType::Type::kInt32);
5033   } else if (shift > 32) {
5034     // Low part is high >> (shift - 32). High part becomes 0.
5035     __ movl(low, high);
5036     __ shrl(low, Immediate(shift - 32));
5037     __ xorl(high, high);
5038   } else {
5039     // Between 1 and 31.
5040     __ shrd(low, high, Immediate(shift));
5041     __ shrl(high, Immediate(shift));
5042   }
5043 }
5044 
GenerateUShrLong(const Location & loc,Register shifter)5045 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
5046   NearLabel done;
5047   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
5048   __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
5049   __ testl(shifter, Immediate(32));
5050   __ j(kEqual, &done);
5051   __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
5052   __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
5053   __ Bind(&done);
5054 }
5055 
VisitRol(HRol * rol)5056 void LocationsBuilderX86::VisitRol(HRol* rol) {
5057   HandleRotate(rol);
5058 }
5059 
VisitRor(HRor * ror)5060 void LocationsBuilderX86::VisitRor(HRor* ror) {
5061   HandleRotate(ror);
5062 }
5063 
HandleRotate(HBinaryOperation * rotate)5064 void LocationsBuilderX86::HandleRotate(HBinaryOperation* rotate) {
5065   LocationSummary* locations =
5066       new (GetGraph()->GetAllocator()) LocationSummary(rotate, LocationSummary::kNoCall);
5067 
5068   switch (rotate->GetResultType()) {
5069     case DataType::Type::kInt64:
5070       // Add the temporary needed.
5071       locations->AddTemp(Location::RequiresRegister());
5072       FALLTHROUGH_INTENDED;
5073     case DataType::Type::kInt32:
5074       locations->SetInAt(0, Location::RequiresRegister());
5075       // The shift count needs to be in CL (unless it is a constant).
5076       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, rotate->InputAt(1)));
5077       locations->SetOut(Location::SameAsFirstInput());
5078       break;
5079     default:
5080       LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5081       UNREACHABLE();
5082   }
5083 }
5084 
VisitRol(HRol * rol)5085 void InstructionCodeGeneratorX86::VisitRol(HRol* rol) {
5086   HandleRotate(rol);
5087 }
5088 
VisitRor(HRor * ror)5089 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
5090   HandleRotate(ror);
5091 }
5092 
HandleRotate(HBinaryOperation * rotate)5093 void InstructionCodeGeneratorX86::HandleRotate(HBinaryOperation* rotate) {
5094   LocationSummary* locations = rotate->GetLocations();
5095   Location first = locations->InAt(0);
5096   Location second = locations->InAt(1);
5097 
5098   if (rotate->GetResultType() == DataType::Type::kInt32) {
5099     Register first_reg = first.AsRegister<Register>();
5100     if (second.IsRegister()) {
5101       Register second_reg = second.AsRegister<Register>();
5102       if (rotate->IsRol()) {
5103         __ roll(first_reg, second_reg);
5104       } else {
5105         DCHECK(rotate->IsRor());
5106         __ rorl(first_reg, second_reg);
5107       }
5108     } else {
5109       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5110       if (rotate->IsRol()) {
5111         __ roll(first_reg, imm);
5112       } else {
5113         DCHECK(rotate->IsRor());
5114         __ rorl(first_reg, imm);
5115       }
5116     }
5117     return;
5118   }
5119 
5120   DCHECK_EQ(rotate->GetResultType(), DataType::Type::kInt64);
5121   Register first_reg_lo = first.AsRegisterPairLow<Register>();
5122   Register first_reg_hi = first.AsRegisterPairHigh<Register>();
5123   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
5124   if (second.IsRegister()) {
5125     Register second_reg = second.AsRegister<Register>();
5126     DCHECK_EQ(second_reg, ECX);
5127 
5128     __ movl(temp_reg, first_reg_hi);
5129     if (rotate->IsRol()) {
5130       __ shld(first_reg_hi, first_reg_lo, second_reg);
5131       __ shld(first_reg_lo, temp_reg, second_reg);
5132     } else {
5133       __ shrd(first_reg_hi, first_reg_lo, second_reg);
5134       __ shrd(first_reg_lo, temp_reg, second_reg);
5135     }
5136     __ movl(temp_reg, first_reg_hi);
5137     __ testl(second_reg, Immediate(32));
5138     __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
5139     __ cmovl(kNotEqual, first_reg_lo, temp_reg);
5140   } else {
5141     int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
5142     if (rotate->IsRol()) {
5143       value = -value;
5144     }
5145     int32_t shift_amt = value & kMaxLongShiftDistance;
5146 
5147     if (shift_amt == 0) {
5148       // Already fine.
5149       return;
5150     }
5151     if (shift_amt == 32) {
5152       // Just swap.
5153       __ movl(temp_reg, first_reg_lo);
5154       __ movl(first_reg_lo, first_reg_hi);
5155       __ movl(first_reg_hi, temp_reg);
5156       return;
5157     }
5158 
5159     Immediate imm(shift_amt);
5160     // Save the constents of the low value.
5161     __ movl(temp_reg, first_reg_lo);
5162 
5163     // Shift right into low, feeding bits from high.
5164     __ shrd(first_reg_lo, first_reg_hi, imm);
5165 
5166     // Shift right into high, feeding bits from the original low.
5167     __ shrd(first_reg_hi, temp_reg, imm);
5168 
5169     // Swap if needed.
5170     if (shift_amt > 32) {
5171       __ movl(temp_reg, first_reg_lo);
5172       __ movl(first_reg_lo, first_reg_hi);
5173       __ movl(first_reg_hi, temp_reg);
5174     }
5175   }
5176 }
5177 
VisitShl(HShl * shl)5178 void LocationsBuilderX86::VisitShl(HShl* shl) {
5179   HandleShift(shl);
5180 }
5181 
VisitShl(HShl * shl)5182 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
5183   HandleShift(shl);
5184 }
5185 
VisitShr(HShr * shr)5186 void LocationsBuilderX86::VisitShr(HShr* shr) {
5187   HandleShift(shr);
5188 }
5189 
VisitShr(HShr * shr)5190 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
5191   HandleShift(shr);
5192 }
5193 
VisitUShr(HUShr * ushr)5194 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
5195   HandleShift(ushr);
5196 }
5197 
VisitUShr(HUShr * ushr)5198 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
5199   HandleShift(ushr);
5200 }
5201 
VisitNewInstance(HNewInstance * instruction)5202 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
5203   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5204       instruction, LocationSummary::kCallOnMainOnly);
5205   locations->SetOut(Location::RegisterLocation(EAX));
5206   InvokeRuntimeCallingConvention calling_convention;
5207   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5208 }
5209 
VisitNewInstance(HNewInstance * instruction)5210 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
5211   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5212   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5213   DCHECK(!codegen_->IsLeafMethod());
5214 }
5215 
VisitNewArray(HNewArray * instruction)5216 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
5217   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5218       instruction, LocationSummary::kCallOnMainOnly);
5219   locations->SetOut(Location::RegisterLocation(EAX));
5220   InvokeRuntimeCallingConvention calling_convention;
5221   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5222   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5223 }
5224 
VisitNewArray(HNewArray * instruction)5225 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
5226   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5227   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5228   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5229   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5230   DCHECK(!codegen_->IsLeafMethod());
5231 }
5232 
VisitParameterValue(HParameterValue * instruction)5233 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
5234   LocationSummary* locations =
5235       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5236   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5237   if (location.IsStackSlot()) {
5238     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5239   } else if (location.IsDoubleStackSlot()) {
5240     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5241   }
5242   locations->SetOut(location);
5243 }
5244 
VisitParameterValue(HParameterValue * instruction)5245 void InstructionCodeGeneratorX86::VisitParameterValue(
5246     [[maybe_unused]] HParameterValue* instruction) {}
5247 
VisitCurrentMethod(HCurrentMethod * instruction)5248 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
5249   LocationSummary* locations =
5250       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5251   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5252 }
5253 
VisitCurrentMethod(HCurrentMethod * instruction)5254 void InstructionCodeGeneratorX86::VisitCurrentMethod([[maybe_unused]] HCurrentMethod* instruction) {
5255 }
5256 
VisitClassTableGet(HClassTableGet * instruction)5257 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
5258   LocationSummary* locations =
5259       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5260   locations->SetInAt(0, Location::RequiresRegister());
5261   locations->SetOut(Location::RequiresRegister());
5262 }
5263 
VisitClassTableGet(HClassTableGet * instruction)5264 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
5265   LocationSummary* locations = instruction->GetLocations();
5266   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5267     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5268         instruction->GetIndex(), kX86PointerSize).SizeValue();
5269     __ movl(locations->Out().AsRegister<Register>(),
5270             Address(locations->InAt(0).AsRegister<Register>(), method_offset));
5271   } else {
5272     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5273         instruction->GetIndex(), kX86PointerSize));
5274     __ movl(locations->Out().AsRegister<Register>(),
5275             Address(locations->InAt(0).AsRegister<Register>(),
5276                     mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
5277     // temp = temp->GetImtEntryAt(method_offset);
5278     __ movl(locations->Out().AsRegister<Register>(),
5279             Address(locations->Out().AsRegister<Register>(), method_offset));
5280   }
5281 }
5282 
VisitNot(HNot * not_)5283 void LocationsBuilderX86::VisitNot(HNot* not_) {
5284   LocationSummary* locations =
5285       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5286   locations->SetInAt(0, Location::RequiresRegister());
5287   locations->SetOut(Location::SameAsFirstInput());
5288 }
5289 
VisitNot(HNot * not_)5290 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
5291   LocationSummary* locations = not_->GetLocations();
5292   Location in = locations->InAt(0);
5293   Location out = locations->Out();
5294   DCHECK(in.Equals(out));
5295   switch (not_->GetResultType()) {
5296     case DataType::Type::kInt32:
5297       __ notl(out.AsRegister<Register>());
5298       break;
5299 
5300     case DataType::Type::kInt64:
5301       __ notl(out.AsRegisterPairLow<Register>());
5302       __ notl(out.AsRegisterPairHigh<Register>());
5303       break;
5304 
5305     default:
5306       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5307   }
5308 }
5309 
VisitBooleanNot(HBooleanNot * bool_not)5310 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
5311   LocationSummary* locations =
5312       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5313   locations->SetInAt(0, Location::RequiresRegister());
5314   locations->SetOut(Location::SameAsFirstInput());
5315 }
5316 
VisitBooleanNot(HBooleanNot * bool_not)5317 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
5318   LocationSummary* locations = bool_not->GetLocations();
5319   Location in = locations->InAt(0);
5320   Location out = locations->Out();
5321   DCHECK(in.Equals(out));
5322   __ xorl(out.AsRegister<Register>(), Immediate(1));
5323 }
5324 
VisitCompare(HCompare * compare)5325 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
5326   LocationSummary* locations =
5327       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5328   switch (compare->GetComparisonType()) {
5329     case DataType::Type::kBool:
5330     case DataType::Type::kUint8:
5331     case DataType::Type::kInt8:
5332     case DataType::Type::kUint16:
5333     case DataType::Type::kInt16:
5334     case DataType::Type::kInt32:
5335     case DataType::Type::kUint32:
5336     case DataType::Type::kInt64:
5337     case DataType::Type::kUint64: {
5338       locations->SetInAt(0, Location::RequiresRegister());
5339       locations->SetInAt(1, Location::Any());
5340       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5341       break;
5342     }
5343     case DataType::Type::kFloat32:
5344     case DataType::Type::kFloat64: {
5345       locations->SetInAt(0, Location::RequiresFpuRegister());
5346       if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
5347         DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
5348       } else if (compare->InputAt(1)->IsConstant()) {
5349         locations->SetInAt(1, Location::RequiresFpuRegister());
5350       } else {
5351         locations->SetInAt(1, Location::Any());
5352       }
5353       locations->SetOut(Location::RequiresRegister());
5354       break;
5355     }
5356     default:
5357       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5358   }
5359 }
5360 
VisitCompare(HCompare * compare)5361 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
5362   LocationSummary* locations = compare->GetLocations();
5363   Register out = locations->Out().AsRegister<Register>();
5364   Location left = locations->InAt(0);
5365   Location right = locations->InAt(1);
5366 
5367   NearLabel less, greater, done;
5368   Condition less_cond = kLess;
5369   Condition greater_cond = kGreater;
5370 
5371   switch (compare->GetComparisonType()) {
5372     case DataType::Type::kUint32:
5373       less_cond = kBelow;
5374       // greater_cond - is not needed below
5375       FALLTHROUGH_INTENDED;
5376     case DataType::Type::kBool:
5377     case DataType::Type::kUint8:
5378     case DataType::Type::kInt8:
5379     case DataType::Type::kUint16:
5380     case DataType::Type::kInt16:
5381     case DataType::Type::kInt32: {
5382       codegen_->GenerateIntCompare(left, right);
5383       break;
5384     }
5385     case DataType::Type::kUint64:
5386       less_cond = kBelow;
5387       greater_cond = kAbove;
5388       FALLTHROUGH_INTENDED;
5389     case DataType::Type::kInt64: {
5390       Register left_low = left.AsRegisterPairLow<Register>();
5391       Register left_high = left.AsRegisterPairHigh<Register>();
5392       int32_t val_low = 0;
5393       int32_t val_high = 0;
5394       bool right_is_const = false;
5395 
5396       if (right.IsConstant()) {
5397         DCHECK(right.GetConstant()->IsLongConstant());
5398         right_is_const = true;
5399         int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
5400         val_low = Low32Bits(val);
5401         val_high = High32Bits(val);
5402       }
5403 
5404       if (right.IsRegisterPair()) {
5405         __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
5406       } else if (right.IsDoubleStackSlot()) {
5407         __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
5408       } else {
5409         DCHECK(right_is_const) << right;
5410         codegen_->Compare32BitValue(left_high, val_high);
5411       }
5412       __ j(less_cond, &less);        // High part compare.
5413       __ j(greater_cond, &greater);  // High part compare.
5414       if (right.IsRegisterPair()) {
5415         __ cmpl(left_low, right.AsRegisterPairLow<Register>());
5416       } else if (right.IsDoubleStackSlot()) {
5417         __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
5418       } else {
5419         DCHECK(right_is_const) << right;
5420         codegen_->Compare32BitValue(left_low, val_low);
5421       }
5422       less_cond = kBelow;  // for CF (unsigned).
5423       // greater_cond - is not needed below
5424       break;
5425     }
5426     case DataType::Type::kFloat32: {
5427       GenerateFPCompare(left, right, compare, false);
5428       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5429       less_cond = kBelow;  // for CF (floats).
5430       break;
5431     }
5432     case DataType::Type::kFloat64: {
5433       GenerateFPCompare(left, right, compare, true);
5434       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5435       less_cond = kBelow;  // for CF (floats).
5436       break;
5437     }
5438     default:
5439       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5440   }
5441 
5442   __ movl(out, Immediate(0));
5443   __ j(kEqual, &done);
5444   __ j(less_cond, &less);
5445 
5446   __ Bind(&greater);
5447   __ movl(out, Immediate(1));
5448   __ jmp(&done);
5449 
5450   __ Bind(&less);
5451   __ movl(out, Immediate(-1));
5452 
5453   __ Bind(&done);
5454 }
5455 
VisitPhi(HPhi * instruction)5456 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
5457   LocationSummary* locations =
5458       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5459   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5460     locations->SetInAt(i, Location::Any());
5461   }
5462   locations->SetOut(Location::Any());
5463 }
5464 
VisitPhi(HPhi * instruction)5465 void InstructionCodeGeneratorX86::VisitPhi([[maybe_unused]] HPhi* instruction) {
5466   LOG(FATAL) << "Unreachable";
5467 }
5468 
GenerateMemoryBarrier(MemBarrierKind kind)5469 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
5470   /*
5471    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
5472    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
5473    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5474    */
5475   switch (kind) {
5476     case MemBarrierKind::kAnyAny: {
5477       MemoryFence();
5478       break;
5479     }
5480     case MemBarrierKind::kAnyStore:
5481     case MemBarrierKind::kLoadAny:
5482     case MemBarrierKind::kStoreStore: {
5483       // nop
5484       break;
5485     }
5486     case MemBarrierKind::kNTStoreStore:
5487       // Non-Temporal Store/Store needs an explicit fence.
5488       MemoryFence(/* non-temporal= */ true);
5489       break;
5490   }
5491 }
5492 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)5493 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
5494     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
5495     [[maybe_unused]] ArtMethod* method) {
5496   return desired_dispatch_info;
5497 }
5498 
GetInvokeExtraParameter(HInvoke * invoke,Register temp)5499 Register CodeGeneratorX86::GetInvokeExtraParameter(HInvoke* invoke, Register temp) {
5500   if (invoke->IsInvokeStaticOrDirect()) {
5501     return GetInvokeStaticOrDirectExtraParameter(invoke->AsInvokeStaticOrDirect(), temp);
5502   }
5503   DCHECK(invoke->IsInvokeInterface());
5504   Location location =
5505       invoke->GetLocations()->InAt(invoke->AsInvokeInterface()->GetSpecialInputIndex());
5506   return location.AsRegister<Register>();
5507 }
5508 
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)5509 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
5510                                                                  Register temp) {
5511   Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
5512   if (!invoke->GetLocations()->Intrinsified()) {
5513     return location.AsRegister<Register>();
5514   }
5515   // For intrinsics we allow any location, so it may be on the stack.
5516   if (!location.IsRegister()) {
5517     __ movl(temp, Address(ESP, location.GetStackIndex()));
5518     return temp;
5519   }
5520   // For register locations, check if the register was saved. If so, get it from the stack.
5521   // Note: There is a chance that the register was saved but not overwritten, so we could
5522   // save one load. However, since this is just an intrinsic slow path we prefer this
5523   // simple and more robust approach rather that trying to determine if that's the case.
5524   SlowPathCode* slow_path = GetCurrentSlowPath();
5525   DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
5526   if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
5527     int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
5528     __ movl(temp, Address(ESP, stack_offset));
5529     return temp;
5530   }
5531   return location.AsRegister<Register>();
5532 }
5533 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)5534 void CodeGeneratorX86::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
5535   switch (load_kind) {
5536     case MethodLoadKind::kBootImageLinkTimePcRelative: {
5537       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5538       Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5539       __ leal(temp.AsRegister<Register>(),
5540               Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5541       RecordBootImageMethodPatch(invoke);
5542       break;
5543     }
5544     case MethodLoadKind::kBootImageRelRo: {
5545       size_t index = invoke->IsInvokeInterface()
5546           ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5547           : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5548       Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5549       __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5550       RecordBootImageRelRoPatch(
5551           invoke->InputAt(index)->AsX86ComputeBaseMethodAddress(),
5552           GetBootImageOffset(invoke));
5553       break;
5554     }
5555     case MethodLoadKind::kAppImageRelRo: {
5556       DCHECK(GetCompilerOptions().IsAppImage());
5557       Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5558       __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5559       RecordAppImageMethodPatch(invoke);
5560       break;
5561     }
5562     case MethodLoadKind::kBssEntry: {
5563       Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5564       __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5565       RecordMethodBssEntryPatch(invoke);
5566       // No need for memory fence, thanks to the x86 memory model.
5567       break;
5568     }
5569     case MethodLoadKind::kJitDirectAddress: {
5570       __ movl(temp.AsRegister<Register>(),
5571               Immediate(reinterpret_cast32<uint32_t>(invoke->GetResolvedMethod())));
5572       break;
5573     }
5574     case MethodLoadKind::kRuntimeCall: {
5575       // Test situation, don't do anything.
5576       break;
5577     }
5578     default: {
5579       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
5580       UNREACHABLE();
5581     }
5582   }
5583 }
5584 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5585 void CodeGeneratorX86::GenerateStaticOrDirectCall(
5586     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5587   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
5588   switch (invoke->GetMethodLoadKind()) {
5589     case MethodLoadKind::kStringInit: {
5590       // temp = thread->string_init_entrypoint
5591       uint32_t offset =
5592           GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5593       __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
5594       break;
5595     }
5596     case MethodLoadKind::kRecursive: {
5597       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5598       break;
5599     }
5600     case MethodLoadKind::kRuntimeCall: {
5601       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5602       return;  // No code pointer retrieval; the runtime performs the call directly.
5603     }
5604     case MethodLoadKind::kBootImageLinkTimePcRelative:
5605       // For kCallCriticalNative we skip loading the method and do the call directly.
5606       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
5607         break;
5608       }
5609       FALLTHROUGH_INTENDED;
5610     default: {
5611       LoadMethod(invoke->GetMethodLoadKind(), callee_method, invoke);
5612     }
5613   }
5614 
5615   switch (invoke->GetCodePtrLocation()) {
5616     case CodePtrLocation::kCallSelf:
5617       DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
5618       __ call(GetFrameEntryLabel());
5619       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5620       break;
5621     case CodePtrLocation::kCallCriticalNative: {
5622       size_t out_frame_size =
5623           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
5624                                     kNativeStackAlignment,
5625                                     GetCriticalNativeDirectCallFrameSize>(invoke);
5626       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
5627         DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5628         Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5629         __ call(Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5630         RecordBootImageJniEntrypointPatch(invoke);
5631       } else {
5632         // (callee_method + offset_of_jni_entry_point)()
5633         __ call(Address(callee_method.AsRegister<Register>(),
5634                         ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
5635       }
5636       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5637       if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
5638         // Create space for conversion.
5639         out_frame_size = 8u;
5640         IncreaseFrame(out_frame_size);
5641       }
5642       // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
5643       switch (invoke->GetType()) {
5644         case DataType::Type::kBool:
5645           __ movzxb(EAX, AL);
5646           break;
5647         case DataType::Type::kInt8:
5648           __ movsxb(EAX, AL);
5649           break;
5650         case DataType::Type::kUint16:
5651           __ movzxw(EAX, EAX);
5652           break;
5653         case DataType::Type::kInt16:
5654           __ movsxw(EAX, EAX);
5655           break;
5656         case DataType::Type::kFloat32:
5657           __ fstps(Address(ESP, 0));
5658           __ movss(XMM0, Address(ESP, 0));
5659           break;
5660         case DataType::Type::kFloat64:
5661           __ fstpl(Address(ESP, 0));
5662           __ movsd(XMM0, Address(ESP, 0));
5663           break;
5664         case DataType::Type::kInt32:
5665         case DataType::Type::kInt64:
5666         case DataType::Type::kVoid:
5667           break;
5668         default:
5669           DCHECK(false) << invoke->GetType();
5670           break;
5671       }
5672       if (out_frame_size != 0u) {
5673         DecreaseFrame(out_frame_size);
5674       }
5675       break;
5676     }
5677     case CodePtrLocation::kCallArtMethod:
5678       // (callee_method + offset_of_quick_compiled_code)()
5679       __ call(Address(callee_method.AsRegister<Register>(),
5680                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5681                           kX86PointerSize).Int32Value()));
5682       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5683       break;
5684   }
5685 
5686   DCHECK(!IsLeafMethod());
5687 }
5688 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5689 void CodeGeneratorX86::GenerateVirtualCall(
5690     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5691   Register temp = temp_in.AsRegister<Register>();
5692   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5693       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5694 
5695   // Use the calling convention instead of the location of the receiver, as
5696   // intrinsics may have put the receiver in a different register. In the intrinsics
5697   // slow path, the arguments have been moved to the right place, so here we are
5698   // guaranteed that the receiver is the first register of the calling convention.
5699   InvokeDexCallingConvention calling_convention;
5700   Register receiver = calling_convention.GetRegisterAt(0);
5701   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5702   // /* HeapReference<Class> */ temp = receiver->klass_
5703   __ movl(temp, Address(receiver, class_offset));
5704   MaybeRecordImplicitNullCheck(invoke);
5705   // Instead of simply (possibly) unpoisoning `temp` here, we should
5706   // emit a read barrier for the previous class reference load.
5707   // However this is not required in practice, as this is an
5708   // intermediate/temporary reference and because the current
5709   // concurrent copying collector keeps the from-space memory
5710   // intact/accessible until the end of the marking phase (the
5711   // concurrent copying collector may not in the future).
5712   __ MaybeUnpoisonHeapReference(temp);
5713 
5714   MaybeGenerateInlineCacheCheck(invoke, temp);
5715 
5716   // temp = temp->GetMethodAt(method_offset);
5717   __ movl(temp, Address(temp, method_offset));
5718   // call temp->GetEntryPoint();
5719   __ call(Address(
5720       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5721   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5722 }
5723 
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5724 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5725                                                      uint32_t intrinsic_data) {
5726   boot_image_other_patches_.emplace_back(
5727       method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5728   __ Bind(&boot_image_other_patches_.back().label);
5729 }
5730 
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5731 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5732                                                  uint32_t boot_image_offset) {
5733   boot_image_other_patches_.emplace_back(
5734       method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5735   __ Bind(&boot_image_other_patches_.back().label);
5736 }
5737 
RecordBootImageMethodPatch(HInvoke * invoke)5738 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvoke* invoke) {
5739   size_t index = invoke->IsInvokeInterface()
5740       ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5741       : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5742   HX86ComputeBaseMethodAddress* method_address =
5743       invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5744   boot_image_method_patches_.emplace_back(
5745       method_address,
5746       invoke->GetResolvedMethodReference().dex_file,
5747       invoke->GetResolvedMethodReference().index);
5748   __ Bind(&boot_image_method_patches_.back().label);
5749 }
5750 
RecordAppImageMethodPatch(HInvoke * invoke)5751 void CodeGeneratorX86::RecordAppImageMethodPatch(HInvoke* invoke) {
5752   size_t index = invoke->IsInvokeInterface()
5753       ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5754       : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5755   HX86ComputeBaseMethodAddress* method_address =
5756       invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5757   app_image_method_patches_.emplace_back(
5758       method_address,
5759       invoke->GetResolvedMethodReference().dex_file,
5760       invoke->GetResolvedMethodReference().index);
5761   __ Bind(&app_image_method_patches_.back().label);
5762 }
5763 
RecordMethodBssEntryPatch(HInvoke * invoke)5764 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvoke* invoke) {
5765   size_t index = invoke->IsInvokeInterface()
5766       ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5767       : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5768   DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
5769          GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
5770          ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
5771                          invoke->GetMethodReference().dex_file));
5772   HX86ComputeBaseMethodAddress* method_address =
5773       invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5774   // Add the patch entry and bind its label at the end of the instruction.
5775   method_bss_entry_patches_.emplace_back(
5776       method_address,
5777       invoke->GetMethodReference().dex_file,
5778       invoke->GetMethodReference().index);
5779   __ Bind(&method_bss_entry_patches_.back().label);
5780 }
5781 
RecordBootImageTypePatch(HLoadClass * load_class)5782 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5783   HX86ComputeBaseMethodAddress* method_address =
5784       load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5785   boot_image_type_patches_.emplace_back(
5786       method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5787   __ Bind(&boot_image_type_patches_.back().label);
5788 }
5789 
RecordAppImageTypePatch(HLoadClass * load_class)5790 void CodeGeneratorX86::RecordAppImageTypePatch(HLoadClass* load_class) {
5791   HX86ComputeBaseMethodAddress* method_address =
5792       load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5793   app_image_type_patches_.emplace_back(
5794       method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5795   __ Bind(&app_image_type_patches_.back().label);
5796 }
5797 
NewTypeBssEntryPatch(HLoadClass * load_class)5798 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5799   HX86ComputeBaseMethodAddress* method_address =
5800       load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5801   ArenaDeque<X86PcRelativePatchInfo>* patches = nullptr;
5802   switch (load_class->GetLoadKind()) {
5803     case HLoadClass::LoadKind::kBssEntry:
5804       patches = &type_bss_entry_patches_;
5805       break;
5806     case HLoadClass::LoadKind::kBssEntryPublic:
5807       patches = &public_type_bss_entry_patches_;
5808       break;
5809     case HLoadClass::LoadKind::kBssEntryPackage:
5810       patches = &package_type_bss_entry_patches_;
5811       break;
5812     default:
5813       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5814       UNREACHABLE();
5815   }
5816   patches->emplace_back(
5817       method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5818   return &patches->back().label;
5819 }
5820 
RecordBootImageStringPatch(HLoadString * load_string)5821 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5822   HX86ComputeBaseMethodAddress* method_address =
5823       load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5824   boot_image_string_patches_.emplace_back(
5825       method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5826   __ Bind(&boot_image_string_patches_.back().label);
5827 }
5828 
NewStringBssEntryPatch(HLoadString * load_string)5829 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5830   HX86ComputeBaseMethodAddress* method_address =
5831       load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5832   string_bss_entry_patches_.emplace_back(
5833       method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5834   return &string_bss_entry_patches_.back().label;
5835 }
5836 
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)5837 void CodeGeneratorX86::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
5838   HX86ComputeBaseMethodAddress* method_address =
5839       invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5840   boot_image_jni_entrypoint_patches_.emplace_back(
5841       method_address,
5842       invoke->GetResolvedMethodReference().dex_file,
5843       invoke->GetResolvedMethodReference().index);
5844   __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
5845 }
5846 
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5847 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5848                                             uint32_t boot_image_reference,
5849                                             HInvokeStaticOrDirect* invoke) {
5850   if (GetCompilerOptions().IsBootImage()) {
5851     HX86ComputeBaseMethodAddress* method_address =
5852         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5853     DCHECK(method_address != nullptr);
5854     Register method_address_reg =
5855         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5856     __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5857     RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5858   } else if (GetCompilerOptions().GetCompilePic()) {
5859     HX86ComputeBaseMethodAddress* method_address =
5860         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5861     DCHECK(method_address != nullptr);
5862     Register method_address_reg =
5863         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5864     __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5865     RecordBootImageRelRoPatch(method_address, boot_image_reference);
5866   } else {
5867     DCHECK(GetCompilerOptions().IsJitCompiler());
5868     gc::Heap* heap = Runtime::Current()->GetHeap();
5869     DCHECK(!heap->GetBootImageSpaces().empty());
5870     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5871     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5872   }
5873 }
5874 
LoadIntrinsicDeclaringClass(Register reg,HInvokeStaticOrDirect * invoke)5875 void CodeGeneratorX86::LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke) {
5876   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5877   if (GetCompilerOptions().IsBootImage()) {
5878     // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5879     HX86ComputeBaseMethodAddress* method_address =
5880         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5881     DCHECK(method_address != nullptr);
5882     Register method_address_reg =
5883         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5884     __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5885     MethodReference target_method = invoke->GetResolvedMethodReference();
5886     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5887     boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5888     __ Bind(&boot_image_type_patches_.back().label);
5889   } else {
5890     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5891     LoadBootImageAddress(reg, boot_image_offset, invoke);
5892   }
5893 }
5894 
5895 // The label points to the end of the "movl" or another instruction but the literal offset
5896 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5897 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5898 
5899 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5900 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5901     const ArenaDeque<X86PcRelativePatchInfo>& infos,
5902     ArenaVector<linker::LinkerPatch>* linker_patches) {
5903   for (const X86PcRelativePatchInfo& info : infos) {
5904     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5905     linker_patches->push_back(Factory(literal_offset,
5906                                       info.target_dex_file,
5907                                       GetMethodAddressOffset(info.method_address),
5908                                       info.offset_or_index));
5909   }
5910 }
5911 
5912 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5913 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5914                                      const DexFile* target_dex_file,
5915                                      uint32_t pc_insn_offset,
5916                                      uint32_t boot_image_offset) {
5917   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
5918   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5919 }
5920 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5921 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5922   DCHECK(linker_patches->empty());
5923   size_t size =
5924       boot_image_method_patches_.size() +
5925       app_image_method_patches_.size() +
5926       method_bss_entry_patches_.size() +
5927       boot_image_type_patches_.size() +
5928       app_image_type_patches_.size() +
5929       type_bss_entry_patches_.size() +
5930       public_type_bss_entry_patches_.size() +
5931       package_type_bss_entry_patches_.size() +
5932       boot_image_string_patches_.size() +
5933       string_bss_entry_patches_.size() +
5934       boot_image_jni_entrypoint_patches_.size() +
5935       boot_image_other_patches_.size();
5936   linker_patches->reserve(size);
5937   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5938     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5939         boot_image_method_patches_, linker_patches);
5940     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5941         boot_image_type_patches_, linker_patches);
5942     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5943         boot_image_string_patches_, linker_patches);
5944   } else {
5945     DCHECK(boot_image_method_patches_.empty());
5946     DCHECK(boot_image_type_patches_.empty());
5947     DCHECK(boot_image_string_patches_.empty());
5948   }
5949   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
5950   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
5951   if (GetCompilerOptions().IsBootImage()) {
5952     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5953         boot_image_other_patches_, linker_patches);
5954   } else {
5955     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
5956         boot_image_other_patches_, linker_patches);
5957     EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
5958         app_image_method_patches_, linker_patches);
5959     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
5960         app_image_type_patches_, linker_patches);
5961   }
5962   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5963       method_bss_entry_patches_, linker_patches);
5964   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5965       type_bss_entry_patches_, linker_patches);
5966   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5967       public_type_bss_entry_patches_, linker_patches);
5968   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5969       package_type_bss_entry_patches_, linker_patches);
5970   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5971       string_bss_entry_patches_, linker_patches);
5972   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5973       boot_image_jni_entrypoint_patches_, linker_patches);
5974   DCHECK_EQ(size, linker_patches->size());
5975 }
5976 
MaybeMarkGCCard(Register temp,Register card,Register object,Register value,bool emit_null_check)5977 void CodeGeneratorX86::MaybeMarkGCCard(
5978     Register temp, Register card, Register object, Register value, bool emit_null_check) {
5979   NearLabel is_null;
5980   if (emit_null_check) {
5981     __ testl(value, value);
5982     __ j(kEqual, &is_null);
5983   }
5984   MarkGCCard(temp, card, object);
5985   if (emit_null_check) {
5986     __ Bind(&is_null);
5987   }
5988 }
5989 
MarkGCCard(Register temp,Register card,Register object)5990 void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object) {
5991   // Load the address of the card table into `card`.
5992   __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5993   // Calculate the offset (in the card table) of the card corresponding to `object`.
5994   __ movl(temp, object);
5995   __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5996   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5997   // `object`'s card.
5998   //
5999   // Register `card` contains the address of the card table. Note that the card
6000   // table's base is biased during its creation so that it always starts at an
6001   // address whose least-significant byte is equal to `kCardDirty` (see
6002   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
6003   // below writes the `kCardDirty` (byte) value into the `object`'s card
6004   // (located at `card + object >> kCardShift`).
6005   //
6006   // This dual use of the value in register `card` (1. to calculate the location
6007   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
6008   // (no need to explicitly load `kCardDirty` as an immediate value).
6009   __ movb(Address(temp, card, TIMES_1, 0),
6010           X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
6011 }
6012 
CheckGCCardIsValid(Register temp,Register card,Register object)6013 void CodeGeneratorX86::CheckGCCardIsValid(Register temp, Register card, Register object) {
6014   NearLabel done;
6015   __ j(kEqual, &done);
6016   // Load the address of the card table into `card`.
6017   __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
6018   // Calculate the offset (in the card table) of the card corresponding to `object`.
6019   __ movl(temp, object);
6020   __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
6021   // assert (!clean || !self->is_gc_marking)
6022   __ cmpb(Address(temp, card, TIMES_1, 0), Immediate(gc::accounting::CardTable::kCardClean));
6023   __ j(kNotEqual, &done);
6024   __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()), Immediate(0));
6025   __ j(kEqual, &done);
6026   __ int3();
6027   __ Bind(&done);
6028 }
6029 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6030 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
6031   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6032 
6033   bool object_field_get_with_read_barrier =
6034       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6035   LocationSummary* locations =
6036       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6037                                                        codegen_->EmitReadBarrier()
6038                                                            ? LocationSummary::kCallOnSlowPath
6039                                                            : LocationSummary::kNoCall);
6040   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6041     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6042   }
6043   // receiver_input
6044   locations->SetInAt(0, Location::RequiresRegister());
6045   if (DataType::IsFloatingPointType(instruction->GetType())) {
6046     locations->SetOut(Location::RequiresFpuRegister());
6047   } else {
6048     // The output overlaps in case of long: we don't want the low move
6049     // to overwrite the object's location.  Likewise, in the case of
6050     // an object field get with read barriers enabled, we do not want
6051     // the move to overwrite the object's location, as we need it to emit
6052     // the read barrier.
6053     locations->SetOut(
6054         Location::RequiresRegister(),
6055         (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64)
6056             ? Location::kOutputOverlap
6057             : Location::kNoOutputOverlap);
6058   }
6059 
6060   if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
6061     // Long values can be loaded atomically into an XMM using movsd.
6062     // So we use an XMM register as a temp to achieve atomicity (first
6063     // load the temp into the XMM and then copy the XMM into the
6064     // output, 32 bits at a time).
6065     locations->AddTemp(Location::RequiresFpuRegister());
6066   }
6067 }
6068 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6069 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
6070                                                  const FieldInfo& field_info) {
6071   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6072 
6073   LocationSummary* locations = instruction->GetLocations();
6074   Location base_loc = locations->InAt(0);
6075   Register base = base_loc.AsRegister<Register>();
6076   Location out = locations->Out();
6077   bool is_volatile = field_info.IsVolatile();
6078   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
6079   DataType::Type load_type = instruction->GetType();
6080   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6081 
6082   if (load_type == DataType::Type::kReference) {
6083     // /* HeapReference<Object> */ out = *(base + offset)
6084     if (codegen_->EmitBakerReadBarrier()) {
6085       // Note that a potential implicit null check is handled in this
6086       // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
6087       codegen_->GenerateFieldLoadWithBakerReadBarrier(
6088           instruction, out, base, offset, /* needs_null_check= */ true);
6089       if (is_volatile) {
6090         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6091       }
6092     } else {
6093       __ movl(out.AsRegister<Register>(), Address(base, offset));
6094       codegen_->MaybeRecordImplicitNullCheck(instruction);
6095       if (is_volatile) {
6096         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6097       }
6098       // If read barriers are enabled, emit read barriers other than
6099       // Baker's using a slow path (and also unpoison the loaded
6100       // reference, if heap poisoning is enabled).
6101       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
6102     }
6103   } else {
6104     Address src(base, offset);
6105     XmmRegister temp = (load_type == DataType::Type::kInt64 && is_volatile)
6106         ? locations->GetTemp(0).AsFpuRegister<XmmRegister>()
6107         : kNoXmmRegister;
6108     codegen_->LoadFromMemoryNoBarrier(load_type, out, src, instruction, temp, is_volatile);
6109     if (is_volatile) {
6110       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6111     }
6112   }
6113 }
6114 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)6115 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction,
6116                                          const FieldInfo& field_info,
6117                                          WriteBarrierKind write_barrier_kind) {
6118   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6119 
6120   LocationSummary* locations =
6121       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6122   locations->SetInAt(0, Location::RequiresRegister());
6123   bool is_volatile = field_info.IsVolatile();
6124   DataType::Type field_type = field_info.GetFieldType();
6125   bool is_byte_type = DataType::Size(field_type) == 1u;
6126 
6127   // The register allocator does not support multiple
6128   // inputs that die at entry with one in a specific register.
6129   if (is_byte_type) {
6130     // Ensure the value is in a byte register.
6131     locations->SetInAt(1, Location::RegisterLocation(EAX));
6132   } else if (DataType::IsFloatingPointType(field_type)) {
6133     if (is_volatile && field_type == DataType::Type::kFloat64) {
6134       // In order to satisfy the semantics of volatile, this must be a single instruction store.
6135       locations->SetInAt(1, Location::RequiresFpuRegister());
6136     } else {
6137       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
6138     }
6139   } else if (is_volatile && field_type == DataType::Type::kInt64) {
6140     // In order to satisfy the semantics of volatile, this must be a single instruction store.
6141     locations->SetInAt(1, Location::RequiresRegister());
6142 
6143     // 64bits value can be atomically written to an address with movsd and an XMM register.
6144     // We need two XMM registers because there's no easier way to (bit) copy a register pair
6145     // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
6146     // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
6147     // isolated cases when we need this it isn't worth adding the extra complexity.
6148     locations->AddTemp(Location::RequiresFpuRegister());
6149     locations->AddTemp(Location::RequiresFpuRegister());
6150   } else {
6151     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6152 
6153     bool needs_write_barrier =
6154         codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6155     bool check_gc_card =
6156         codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
6157 
6158     if (needs_write_barrier || check_gc_card) {
6159       locations->AddTemp(Location::RequiresRegister());
6160       // Ensure the card is in a byte register.
6161       locations->AddTemp(Location::RegisterLocation(ECX));
6162     } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6163       locations->AddTemp(Location::RequiresRegister());
6164     }
6165   }
6166 }
6167 
HandleFieldSet(HInstruction * instruction,uint32_t value_index,DataType::Type field_type,Address field_addr,Register base,bool is_volatile,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6168 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
6169                                                  uint32_t value_index,
6170                                                  DataType::Type field_type,
6171                                                  Address field_addr,
6172                                                  Register base,
6173                                                  bool is_volatile,
6174                                                  bool value_can_be_null,
6175                                                  WriteBarrierKind write_barrier_kind) {
6176   LocationSummary* locations = instruction->GetLocations();
6177   Location value = locations->InAt(value_index);
6178   bool needs_write_barrier =
6179       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6180 
6181   if (is_volatile) {
6182     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
6183   }
6184 
6185   bool maybe_record_implicit_null_check_done = false;
6186 
6187   switch (field_type) {
6188     case DataType::Type::kBool:
6189     case DataType::Type::kUint8:
6190     case DataType::Type::kInt8: {
6191       if (value.IsConstant()) {
6192         __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6193       } else {
6194         __ movb(field_addr, value.AsRegister<ByteRegister>());
6195       }
6196       break;
6197     }
6198 
6199     case DataType::Type::kUint16:
6200     case DataType::Type::kInt16: {
6201       if (value.IsConstant()) {
6202         __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6203       } else {
6204         __ movw(field_addr, value.AsRegister<Register>());
6205       }
6206       break;
6207     }
6208 
6209     case DataType::Type::kInt32:
6210     case DataType::Type::kReference: {
6211       if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6212         if (value.IsConstant()) {
6213           DCHECK(value.GetConstant()->IsNullConstant())
6214               << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6215               << " is not null. Instruction " << *instruction;
6216           // No need to poison null, just do a movl.
6217           __ movl(field_addr, Immediate(0));
6218         } else {
6219           Register temp = locations->GetTemp(0).AsRegister<Register>();
6220           __ movl(temp, value.AsRegister<Register>());
6221           __ PoisonHeapReference(temp);
6222           __ movl(field_addr, temp);
6223         }
6224       } else if (value.IsConstant()) {
6225         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6226         __ movl(field_addr, Immediate(v));
6227       } else {
6228         DCHECK(value.IsRegister()) << value;
6229         __ movl(field_addr, value.AsRegister<Register>());
6230       }
6231       break;
6232     }
6233 
6234     case DataType::Type::kInt64: {
6235       if (is_volatile) {
6236         XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
6237         XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
6238         __ movd(temp1, value.AsRegisterPairLow<Register>());
6239         __ movd(temp2, value.AsRegisterPairHigh<Register>());
6240         __ punpckldq(temp1, temp2);
6241         __ movsd(field_addr, temp1);
6242         codegen_->MaybeRecordImplicitNullCheck(instruction);
6243       } else if (value.IsConstant()) {
6244         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6245         __ movl(field_addr, Immediate(Low32Bits(v)));
6246         codegen_->MaybeRecordImplicitNullCheck(instruction);
6247         __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6248       } else {
6249         __ movl(field_addr, value.AsRegisterPairLow<Register>());
6250         codegen_->MaybeRecordImplicitNullCheck(instruction);
6251         __ movl(Address::displace(field_addr, kX86WordSize), value.AsRegisterPairHigh<Register>());
6252       }
6253       maybe_record_implicit_null_check_done = true;
6254       break;
6255     }
6256 
6257     case DataType::Type::kFloat32: {
6258       if (value.IsConstant()) {
6259         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6260         __ movl(field_addr, Immediate(v));
6261       } else {
6262         __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
6263       }
6264       break;
6265     }
6266 
6267     case DataType::Type::kFloat64: {
6268       if (value.IsConstant()) {
6269         DCHECK(!is_volatile);
6270         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6271         __ movl(field_addr, Immediate(Low32Bits(v)));
6272         codegen_->MaybeRecordImplicitNullCheck(instruction);
6273         __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6274         maybe_record_implicit_null_check_done = true;
6275       } else {
6276         __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
6277       }
6278       break;
6279     }
6280 
6281     case DataType::Type::kUint32:
6282     case DataType::Type::kUint64:
6283     case DataType::Type::kVoid:
6284       LOG(FATAL) << "Unreachable type " << field_type;
6285       UNREACHABLE();
6286   }
6287 
6288   if (!maybe_record_implicit_null_check_done) {
6289     codegen_->MaybeRecordImplicitNullCheck(instruction);
6290   }
6291 
6292   if (needs_write_barrier) {
6293     Register temp = locations->GetTemp(0).AsRegister<Register>();
6294     Register card = locations->GetTemp(1).AsRegister<Register>();
6295     if (value.IsConstant()) {
6296       DCHECK(value.GetConstant()->IsNullConstant())
6297           << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6298           << " is not null. Instruction: " << *instruction;
6299       if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6300         codegen_->MarkGCCard(temp, card, base);
6301       }
6302     } else {
6303       codegen_->MaybeMarkGCCard(
6304           temp,
6305           card,
6306           base,
6307           value.AsRegister<Register>(),
6308           value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
6309     }
6310   } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
6311     if (value.IsConstant()) {
6312       // If we are storing a constant for a reference, we are in the case where we are storing
6313       // null but we cannot skip it as this write barrier is being relied on by coalesced write
6314       // barriers.
6315       DCHECK(value.GetConstant()->IsNullConstant())
6316           << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6317           << " is not null. Instruction: " << *instruction;
6318       // No need to check the dirty bit as this value is null.
6319     } else {
6320       Register temp = locations->GetTemp(0).AsRegister<Register>();
6321       Register card = locations->GetTemp(1).AsRegister<Register>();
6322       codegen_->CheckGCCardIsValid(temp, card, base);
6323     }
6324   }
6325 
6326   if (is_volatile) {
6327     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6328   }
6329 }
6330 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6331 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
6332                                                  const FieldInfo& field_info,
6333                                                  bool value_can_be_null,
6334                                                  WriteBarrierKind write_barrier_kind) {
6335   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6336 
6337   LocationSummary* locations = instruction->GetLocations();
6338   Register base = locations->InAt(0).AsRegister<Register>();
6339   bool is_volatile = field_info.IsVolatile();
6340   DataType::Type field_type = field_info.GetFieldType();
6341   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6342   Address field_addr(base, offset);
6343 
6344   HandleFieldSet(instruction,
6345                  /* value_index= */ 1,
6346                  field_type,
6347                  field_addr,
6348                  base,
6349                  is_volatile,
6350                  value_can_be_null,
6351                  write_barrier_kind);
6352 }
6353 
VisitStaticFieldGet(HStaticFieldGet * instruction)6354 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6355   HandleFieldGet(instruction, instruction->GetFieldInfo());
6356 }
6357 
VisitStaticFieldGet(HStaticFieldGet * instruction)6358 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6359   HandleFieldGet(instruction, instruction->GetFieldInfo());
6360 }
6361 
VisitStaticFieldSet(HStaticFieldSet * instruction)6362 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6363   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6364 }
6365 
VisitStaticFieldSet(HStaticFieldSet * instruction)6366 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6367   HandleFieldSet(instruction,
6368                  instruction->GetFieldInfo(),
6369                  instruction->GetValueCanBeNull(),
6370                  instruction->GetWriteBarrierKind());
6371 }
6372 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6373 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6374   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6375 }
6376 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6377 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6378   HandleFieldSet(instruction,
6379                  instruction->GetFieldInfo(),
6380                  instruction->GetValueCanBeNull(),
6381                  instruction->GetWriteBarrierKind());
6382 }
6383 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6384 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6385   HandleFieldGet(instruction, instruction->GetFieldInfo());
6386 }
6387 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6388 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6389   HandleFieldGet(instruction, instruction->GetFieldInfo());
6390 }
6391 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6392 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6393   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
6394 }
6395 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6396 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6397   __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
6398   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6399 }
6400 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6401 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
6402     HUnresolvedInstanceFieldGet* instruction) {
6403   FieldAccessCallingConventionX86 calling_convention;
6404   codegen_->CreateUnresolvedFieldLocationSummary(
6405       instruction, instruction->GetFieldType(), calling_convention);
6406 }
6407 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6408 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
6409     HUnresolvedInstanceFieldGet* instruction) {
6410   FieldAccessCallingConventionX86 calling_convention;
6411   codegen_->GenerateUnresolvedFieldAccess(instruction,
6412                                           instruction->GetFieldType(),
6413                                           instruction->GetFieldIndex(),
6414                                           instruction->GetDexPc(),
6415                                           calling_convention);
6416 }
6417 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6418 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
6419     HUnresolvedInstanceFieldSet* instruction) {
6420   FieldAccessCallingConventionX86 calling_convention;
6421   codegen_->CreateUnresolvedFieldLocationSummary(
6422       instruction, instruction->GetFieldType(), calling_convention);
6423 }
6424 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6425 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
6426     HUnresolvedInstanceFieldSet* instruction) {
6427   FieldAccessCallingConventionX86 calling_convention;
6428   codegen_->GenerateUnresolvedFieldAccess(instruction,
6429                                           instruction->GetFieldType(),
6430                                           instruction->GetFieldIndex(),
6431                                           instruction->GetDexPc(),
6432                                           calling_convention);
6433 }
6434 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6435 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
6436     HUnresolvedStaticFieldGet* instruction) {
6437   FieldAccessCallingConventionX86 calling_convention;
6438   codegen_->CreateUnresolvedFieldLocationSummary(
6439       instruction, instruction->GetFieldType(), calling_convention);
6440 }
6441 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6442 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
6443     HUnresolvedStaticFieldGet* instruction) {
6444   FieldAccessCallingConventionX86 calling_convention;
6445   codegen_->GenerateUnresolvedFieldAccess(instruction,
6446                                           instruction->GetFieldType(),
6447                                           instruction->GetFieldIndex(),
6448                                           instruction->GetDexPc(),
6449                                           calling_convention);
6450 }
6451 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6452 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
6453     HUnresolvedStaticFieldSet* instruction) {
6454   FieldAccessCallingConventionX86 calling_convention;
6455   codegen_->CreateUnresolvedFieldLocationSummary(
6456       instruction, instruction->GetFieldType(), calling_convention);
6457 }
6458 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6459 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
6460     HUnresolvedStaticFieldSet* instruction) {
6461   FieldAccessCallingConventionX86 calling_convention;
6462   codegen_->GenerateUnresolvedFieldAccess(instruction,
6463                                           instruction->GetFieldType(),
6464                                           instruction->GetFieldIndex(),
6465                                           instruction->GetDexPc(),
6466                                           calling_convention);
6467 }
6468 
VisitNullCheck(HNullCheck * instruction)6469 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
6470   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6471   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
6472       ? Location::RequiresRegister()
6473       : Location::Any();
6474   locations->SetInAt(0, loc);
6475 }
6476 
GenerateImplicitNullCheck(HNullCheck * instruction)6477 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
6478   if (CanMoveNullCheckToUser(instruction)) {
6479     return;
6480   }
6481   LocationSummary* locations = instruction->GetLocations();
6482   Location obj = locations->InAt(0);
6483 
6484   __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
6485   RecordPcInfo(instruction, instruction->GetDexPc());
6486 }
6487 
GenerateExplicitNullCheck(HNullCheck * instruction)6488 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
6489   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
6490   AddSlowPath(slow_path);
6491 
6492   LocationSummary* locations = instruction->GetLocations();
6493   Location obj = locations->InAt(0);
6494 
6495   if (obj.IsRegister()) {
6496     __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
6497   } else if (obj.IsStackSlot()) {
6498     __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
6499   } else {
6500     DCHECK(obj.IsConstant()) << obj;
6501     DCHECK(obj.GetConstant()->IsNullConstant());
6502     __ jmp(slow_path->GetEntryLabel());
6503     return;
6504   }
6505   __ j(kEqual, slow_path->GetEntryLabel());
6506 }
6507 
VisitNullCheck(HNullCheck * instruction)6508 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
6509   codegen_->GenerateNullCheck(instruction);
6510 }
6511 
VisitArrayGet(HArrayGet * instruction)6512 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
6513   bool object_array_get_with_read_barrier =
6514       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6515   LocationSummary* locations =
6516       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6517                                                        object_array_get_with_read_barrier
6518                                                            ? LocationSummary::kCallOnSlowPath
6519                                                            : LocationSummary::kNoCall);
6520   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6521     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6522   }
6523   locations->SetInAt(0, Location::RequiresRegister());
6524   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6525   if (DataType::IsFloatingPointType(instruction->GetType())) {
6526     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6527   } else {
6528     // The output overlaps in case of long: we don't want the low move
6529     // to overwrite the array's location.  Likewise, in the case of an
6530     // object array get with read barriers enabled, we do not want the
6531     // move to overwrite the array's location, as we need it to emit
6532     // the read barrier.
6533     locations->SetOut(
6534         Location::RequiresRegister(),
6535         (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
6536             ? Location::kOutputOverlap
6537             : Location::kNoOutputOverlap);
6538   }
6539 }
6540 
VisitArrayGet(HArrayGet * instruction)6541 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
6542   LocationSummary* locations = instruction->GetLocations();
6543   Location obj_loc = locations->InAt(0);
6544   Register obj = obj_loc.AsRegister<Register>();
6545   Location index = locations->InAt(1);
6546   Location out_loc = locations->Out();
6547   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6548 
6549   DataType::Type type = instruction->GetType();
6550   if (type == DataType::Type::kReference) {
6551     static_assert(
6552         sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6553         "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6554     // /* HeapReference<Object> */ out =
6555     //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6556     if (codegen_->EmitBakerReadBarrier()) {
6557       // Note that a potential implicit null check is handled in this
6558       // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
6559       codegen_->GenerateArrayLoadWithBakerReadBarrier(
6560           instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
6561     } else {
6562       Register out = out_loc.AsRegister<Register>();
6563       __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6564       codegen_->MaybeRecordImplicitNullCheck(instruction);
6565       // If read barriers are enabled, emit read barriers other than
6566       // Baker's using a slow path (and also unpoison the loaded
6567       // reference, if heap poisoning is enabled).
6568       if (index.IsConstant()) {
6569         uint32_t offset =
6570             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
6571         codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6572       } else {
6573         codegen_->MaybeGenerateReadBarrierSlow(
6574             instruction, out_loc, out_loc, obj_loc, data_offset, index);
6575       }
6576     }
6577   } else if (type == DataType::Type::kUint16
6578       && mirror::kUseStringCompression
6579       && instruction->IsStringCharAt()) {
6580     // Branch cases into compressed and uncompressed for each index's type.
6581     Register out = out_loc.AsRegister<Register>();
6582     uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6583     NearLabel done, not_compressed;
6584     __ testb(Address(obj, count_offset), Immediate(1));
6585     codegen_->MaybeRecordImplicitNullCheck(instruction);
6586     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6587                   "Expecting 0=compressed, 1=uncompressed");
6588     __ j(kNotZero, &not_compressed);
6589     __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6590     __ jmp(&done);
6591     __ Bind(&not_compressed);
6592     __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6593     __ Bind(&done);
6594   } else {
6595     ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
6596     Address src = CodeGeneratorX86::ArrayAddress(obj, index, scale, data_offset);
6597     codegen_->LoadFromMemoryNoBarrier(type, out_loc, src, instruction);
6598   }
6599 }
6600 
VisitArraySet(HArraySet * instruction)6601 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
6602   DataType::Type value_type = instruction->GetComponentType();
6603 
6604   WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6605   bool needs_write_barrier =
6606       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6607   bool check_gc_card =
6608       codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
6609   bool needs_type_check = instruction->NeedsTypeCheck();
6610 
6611   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6612       instruction,
6613       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6614 
6615   bool is_byte_type = DataType::Size(value_type) == 1u;
6616   // We need the inputs to be different than the output in case of long operation.
6617   // In case of a byte operation, the register allocator does not support multiple
6618   // inputs that die at entry with one in a specific register.
6619   locations->SetInAt(0, Location::RequiresRegister());
6620   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6621   if (is_byte_type) {
6622     // Ensure the value is in a byte register.
6623     locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
6624   } else if (DataType::IsFloatingPointType(value_type)) {
6625     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
6626   } else {
6627     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
6628   }
6629   if (needs_write_barrier || check_gc_card) {
6630     // Used by reference poisoning, type checking, emitting, or checking a write barrier.
6631     locations->AddTemp(Location::RequiresRegister());
6632     // Only used when emitting or checking a write barrier. Ensure the card is in a byte register.
6633     locations->AddTemp(Location::RegisterLocation(ECX));
6634   } else if ((kPoisonHeapReferences && value_type == DataType::Type::kReference) ||
6635              instruction->NeedsTypeCheck()) {
6636     locations->AddTemp(Location::RequiresRegister());
6637   }
6638 }
6639 
VisitArraySet(HArraySet * instruction)6640 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
6641   LocationSummary* locations = instruction->GetLocations();
6642   Location array_loc = locations->InAt(0);
6643   Register array = array_loc.AsRegister<Register>();
6644   Location index = locations->InAt(1);
6645   Location value = locations->InAt(2);
6646   DataType::Type value_type = instruction->GetComponentType();
6647   bool needs_type_check = instruction->NeedsTypeCheck();
6648   WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6649   bool needs_write_barrier =
6650       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6651 
6652   switch (value_type) {
6653     case DataType::Type::kBool:
6654     case DataType::Type::kUint8:
6655     case DataType::Type::kInt8: {
6656       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
6657       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
6658       if (value.IsRegister()) {
6659         __ movb(address, value.AsRegister<ByteRegister>());
6660       } else {
6661         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6662       }
6663       codegen_->MaybeRecordImplicitNullCheck(instruction);
6664       break;
6665     }
6666 
6667     case DataType::Type::kUint16:
6668     case DataType::Type::kInt16: {
6669       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
6670       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
6671       if (value.IsRegister()) {
6672         __ movw(address, value.AsRegister<Register>());
6673       } else {
6674         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6675       }
6676       codegen_->MaybeRecordImplicitNullCheck(instruction);
6677       break;
6678     }
6679 
6680     case DataType::Type::kReference: {
6681       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6682       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6683 
6684       if (!value.IsRegister()) {
6685         // Just setting null.
6686         DCHECK(instruction->InputAt(2)->IsNullConstant());
6687         DCHECK(value.IsConstant()) << value;
6688         __ movl(address, Immediate(0));
6689         codegen_->MaybeRecordImplicitNullCheck(instruction);
6690         if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6691           // We need to set a write barrier here even though we are writing null, since this write
6692           // barrier is being relied on.
6693           DCHECK(needs_write_barrier);
6694           Register temp = locations->GetTemp(0).AsRegister<Register>();
6695           Register card = locations->GetTemp(1).AsRegister<Register>();
6696           codegen_->MarkGCCard(temp, card, array);
6697         }
6698         DCHECK(!needs_type_check);
6699         break;
6700       }
6701 
6702       Register register_value = value.AsRegister<Register>();
6703       const bool can_value_be_null = instruction->GetValueCanBeNull();
6704       // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
6705       // value is null (without an extra CompareAndBranchIfZero since we already checked if the
6706       // value is null for the type check).
6707       const bool skip_marking_gc_card =
6708           can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
6709       NearLabel do_store;
6710       NearLabel skip_writing_card;
6711       if (can_value_be_null) {
6712         __ testl(register_value, register_value);
6713         if (skip_marking_gc_card) {
6714           __ j(kEqual, &skip_writing_card);
6715         } else {
6716           __ j(kEqual, &do_store);
6717         }
6718       }
6719 
6720       SlowPathCode* slow_path = nullptr;
6721       if (needs_type_check) {
6722         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6723         codegen_->AddSlowPath(slow_path);
6724 
6725         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6726         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6727         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6728 
6729         // Note that when Baker read barriers are enabled, the type
6730         // checks are performed without read barriers.  This is fine,
6731         // even in the case where a class object is in the from-space
6732         // after the flip, as a comparison involving such a type would
6733         // not produce a false positive; it may of course produce a
6734         // false negative, in which case we would take the ArraySet
6735         // slow path.
6736 
6737         Register temp = locations->GetTemp(0).AsRegister<Register>();
6738         // /* HeapReference<Class> */ temp = array->klass_
6739         __ movl(temp, Address(array, class_offset));
6740         codegen_->MaybeRecordImplicitNullCheck(instruction);
6741         __ MaybeUnpoisonHeapReference(temp);
6742 
6743         // /* HeapReference<Class> */ temp = temp->component_type_
6744         __ movl(temp, Address(temp, component_offset));
6745         // If heap poisoning is enabled, no need to unpoison `temp`
6746         // nor the object reference in `register_value->klass`, as
6747         // we are comparing two poisoned references.
6748         __ cmpl(temp, Address(register_value, class_offset));
6749 
6750         if (instruction->StaticTypeOfArrayIsObjectArray()) {
6751           NearLabel do_put;
6752           __ j(kEqual, &do_put);
6753           // If heap poisoning is enabled, the `temp` reference has
6754           // not been unpoisoned yet; unpoison it now.
6755           __ MaybeUnpoisonHeapReference(temp);
6756 
6757           // If heap poisoning is enabled, no need to unpoison the
6758           // heap reference loaded below, as it is only used for a
6759           // comparison with null.
6760           __ cmpl(Address(temp, super_offset), Immediate(0));
6761           __ j(kNotEqual, slow_path->GetEntryLabel());
6762           __ Bind(&do_put);
6763         } else {
6764           __ j(kNotEqual, slow_path->GetEntryLabel());
6765         }
6766       }
6767 
6768       if (can_value_be_null && !skip_marking_gc_card) {
6769         DCHECK(do_store.IsLinked());
6770         __ Bind(&do_store);
6771       }
6772 
6773       if (needs_write_barrier) {
6774         Register temp = locations->GetTemp(0).AsRegister<Register>();
6775         Register card = locations->GetTemp(1).AsRegister<Register>();
6776         codegen_->MarkGCCard(temp, card, array);
6777       } else if (codegen_->ShouldCheckGCCard(
6778                      value_type, instruction->GetValue(), write_barrier_kind)) {
6779         Register temp = locations->GetTemp(0).AsRegister<Register>();
6780         Register card = locations->GetTemp(1).AsRegister<Register>();
6781         codegen_->CheckGCCardIsValid(temp, card, array);
6782       }
6783 
6784       if (skip_marking_gc_card) {
6785         // Note that we don't check that the GC card is valid as it can be correctly clean.
6786         DCHECK(skip_writing_card.IsLinked());
6787         __ Bind(&skip_writing_card);
6788       }
6789 
6790       Register source = register_value;
6791       if (kPoisonHeapReferences) {
6792         Register temp = locations->GetTemp(0).AsRegister<Register>();
6793         __ movl(temp, register_value);
6794         __ PoisonHeapReference(temp);
6795         source = temp;
6796       }
6797 
6798       __ movl(address, source);
6799 
6800       if (can_value_be_null || !needs_type_check) {
6801         codegen_->MaybeRecordImplicitNullCheck(instruction);
6802       }
6803 
6804       if (slow_path != nullptr) {
6805         __ Bind(slow_path->GetExitLabel());
6806       }
6807 
6808       break;
6809     }
6810 
6811     case DataType::Type::kInt32: {
6812       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6813       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6814       if (value.IsRegister()) {
6815         __ movl(address, value.AsRegister<Register>());
6816       } else {
6817         DCHECK(value.IsConstant()) << value;
6818         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6819         __ movl(address, Immediate(v));
6820       }
6821       codegen_->MaybeRecordImplicitNullCheck(instruction);
6822       break;
6823     }
6824 
6825     case DataType::Type::kInt64: {
6826       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6827       if (value.IsRegisterPair()) {
6828         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6829                 value.AsRegisterPairLow<Register>());
6830         codegen_->MaybeRecordImplicitNullCheck(instruction);
6831         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6832                 value.AsRegisterPairHigh<Register>());
6833       } else {
6834         DCHECK(value.IsConstant());
6835         int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6836         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6837                 Immediate(Low32Bits(val)));
6838         codegen_->MaybeRecordImplicitNullCheck(instruction);
6839         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6840                 Immediate(High32Bits(val)));
6841       }
6842       break;
6843     }
6844 
6845     case DataType::Type::kFloat32: {
6846       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6847       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6848       if (value.IsFpuRegister()) {
6849         __ movss(address, value.AsFpuRegister<XmmRegister>());
6850       } else {
6851         DCHECK(value.IsConstant());
6852         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6853         __ movl(address, Immediate(v));
6854       }
6855       codegen_->MaybeRecordImplicitNullCheck(instruction);
6856       break;
6857     }
6858 
6859     case DataType::Type::kFloat64: {
6860       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6861       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6862       if (value.IsFpuRegister()) {
6863         __ movsd(address, value.AsFpuRegister<XmmRegister>());
6864       } else {
6865         DCHECK(value.IsConstant());
6866         Address address_hi =
6867             CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6868         int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6869         __ movl(address, Immediate(Low32Bits(v)));
6870         codegen_->MaybeRecordImplicitNullCheck(instruction);
6871         __ movl(address_hi, Immediate(High32Bits(v)));
6872       }
6873       break;
6874     }
6875 
6876     case DataType::Type::kUint32:
6877     case DataType::Type::kUint64:
6878     case DataType::Type::kVoid:
6879       LOG(FATAL) << "Unreachable type " << instruction->GetType();
6880       UNREACHABLE();
6881   }
6882 }
6883 
VisitArrayLength(HArrayLength * instruction)6884 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6885   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6886   locations->SetInAt(0, Location::RequiresRegister());
6887   if (!instruction->IsEmittedAtUseSite()) {
6888     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6889   }
6890 }
6891 
VisitArrayLength(HArrayLength * instruction)6892 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6893   if (instruction->IsEmittedAtUseSite()) {
6894     return;
6895   }
6896 
6897   LocationSummary* locations = instruction->GetLocations();
6898   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6899   Register obj = locations->InAt(0).AsRegister<Register>();
6900   Register out = locations->Out().AsRegister<Register>();
6901   __ movl(out, Address(obj, offset));
6902   codegen_->MaybeRecordImplicitNullCheck(instruction);
6903   // Mask out most significant bit in case the array is String's array of char.
6904   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6905     __ shrl(out, Immediate(1));
6906   }
6907 }
6908 
VisitBoundsCheck(HBoundsCheck * instruction)6909 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6910   RegisterSet caller_saves = RegisterSet::Empty();
6911   InvokeRuntimeCallingConvention calling_convention;
6912   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6913   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6914   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6915   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6916   HInstruction* length = instruction->InputAt(1);
6917   if (!length->IsEmittedAtUseSite()) {
6918     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6919   }
6920   // Need register to see array's length.
6921   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6922     locations->AddTemp(Location::RequiresRegister());
6923   }
6924 }
6925 
VisitBoundsCheck(HBoundsCheck * instruction)6926 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6927   const bool is_string_compressed_char_at =
6928       mirror::kUseStringCompression && instruction->IsStringCharAt();
6929   LocationSummary* locations = instruction->GetLocations();
6930   Location index_loc = locations->InAt(0);
6931   Location length_loc = locations->InAt(1);
6932   SlowPathCode* slow_path =
6933     new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6934 
6935   if (length_loc.IsConstant()) {
6936     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6937     if (index_loc.IsConstant()) {
6938       // BCE will remove the bounds check if we are guarenteed to pass.
6939       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6940       if (index < 0 || index >= length) {
6941         codegen_->AddSlowPath(slow_path);
6942         __ jmp(slow_path->GetEntryLabel());
6943       } else {
6944         // Some optimization after BCE may have generated this, and we should not
6945         // generate a bounds check if it is a valid range.
6946       }
6947       return;
6948     }
6949 
6950     // We have to reverse the jump condition because the length is the constant.
6951     Register index_reg = index_loc.AsRegister<Register>();
6952     __ cmpl(index_reg, Immediate(length));
6953     codegen_->AddSlowPath(slow_path);
6954     __ j(kAboveEqual, slow_path->GetEntryLabel());
6955   } else {
6956     HInstruction* array_length = instruction->InputAt(1);
6957     if (array_length->IsEmittedAtUseSite()) {
6958       // Address the length field in the array.
6959       DCHECK(array_length->IsArrayLength());
6960       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6961       Location array_loc = array_length->GetLocations()->InAt(0);
6962       Address array_len(array_loc.AsRegister<Register>(), len_offset);
6963       if (is_string_compressed_char_at) {
6964         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6965         // the string compression flag) with the in-memory length and avoid the temporary.
6966         Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6967         __ movl(length_reg, array_len);
6968         codegen_->MaybeRecordImplicitNullCheck(array_length);
6969         __ shrl(length_reg, Immediate(1));
6970         codegen_->GenerateIntCompare(length_reg, index_loc);
6971       } else {
6972         // Checking bounds for general case:
6973         // Array of char or string's array with feature compression off.
6974         if (index_loc.IsConstant()) {
6975           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6976           __ cmpl(array_len, Immediate(value));
6977         } else {
6978           __ cmpl(array_len, index_loc.AsRegister<Register>());
6979         }
6980         codegen_->MaybeRecordImplicitNullCheck(array_length);
6981       }
6982     } else {
6983       codegen_->GenerateIntCompare(length_loc, index_loc);
6984     }
6985     codegen_->AddSlowPath(slow_path);
6986     __ j(kBelowEqual, slow_path->GetEntryLabel());
6987   }
6988 }
6989 
VisitParallelMove(HParallelMove * instruction)6990 void LocationsBuilderX86::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6991   LOG(FATAL) << "Unreachable";
6992 }
6993 
VisitParallelMove(HParallelMove * instruction)6994 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6995   if (instruction->GetNext()->IsSuspendCheck() &&
6996       instruction->GetBlock()->GetLoopInformation() != nullptr) {
6997     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6998     // The back edge will generate the suspend check.
6999     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
7000   }
7001 
7002   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
7003 }
7004 
VisitSuspendCheck(HSuspendCheck * instruction)7005 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
7006   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7007       instruction, LocationSummary::kCallOnSlowPath);
7008   // In suspend check slow path, usually there are no caller-save registers at all.
7009   // If SIMD instructions are present, however, we force spilling all live SIMD
7010   // registers in full width (since the runtime only saves/restores lower part).
7011   locations->SetCustomSlowPathCallerSaves(
7012       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
7013 }
7014 
VisitSuspendCheck(HSuspendCheck * instruction)7015 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
7016   HBasicBlock* block = instruction->GetBlock();
7017   if (block->GetLoopInformation() != nullptr) {
7018     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
7019     // The back edge will generate the suspend check.
7020     return;
7021   }
7022   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
7023     // The goto will generate the suspend check.
7024     return;
7025   }
7026   GenerateSuspendCheck(instruction, nullptr);
7027 }
7028 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)7029 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
7030                                                        HBasicBlock* successor) {
7031   SuspendCheckSlowPathX86* slow_path =
7032       down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
7033   if (slow_path == nullptr) {
7034     slow_path =
7035         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
7036     instruction->SetSlowPath(slow_path);
7037     codegen_->AddSlowPath(slow_path);
7038     if (successor != nullptr) {
7039       DCHECK(successor->IsLoopHeader());
7040     }
7041   } else {
7042     DCHECK_EQ(slow_path->GetSuccessor(), successor);
7043   }
7044 
7045   __ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
7046                  Immediate(Thread::SuspendOrCheckpointRequestFlags()));
7047   if (successor == nullptr) {
7048     __ j(kNotZero, slow_path->GetEntryLabel());
7049     __ Bind(slow_path->GetReturnLabel());
7050   } else {
7051     __ j(kZero, codegen_->GetLabelOf(successor));
7052     __ jmp(slow_path->GetEntryLabel());
7053   }
7054 }
7055 
GetAssembler() const7056 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
7057   return codegen_->GetAssembler();
7058 }
7059 
MoveMemoryToMemory(int dst,int src,int number_of_words)7060 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
7061   ScratchRegisterScope ensure_scratch(
7062       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7063   Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
7064   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7065 
7066   // Now that temp register is available (possibly spilled), move blocks of memory.
7067   for (int i = 0; i < number_of_words; i++) {
7068     __ movl(temp_reg, Address(ESP, src + stack_offset));
7069     __ movl(Address(ESP, dst + stack_offset), temp_reg);
7070     stack_offset += kX86WordSize;
7071   }
7072 }
7073 
EmitMove(size_t index)7074 void ParallelMoveResolverX86::EmitMove(size_t index) {
7075   MoveOperands* move = moves_[index];
7076   Location source = move->GetSource();
7077   Location destination = move->GetDestination();
7078 
7079   if (source.IsRegister()) {
7080     if (destination.IsRegister()) {
7081       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7082     } else if (destination.IsFpuRegister()) {
7083       __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
7084     } else {
7085       DCHECK(destination.IsStackSlot());
7086       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
7087     }
7088   } else if (source.IsRegisterPair()) {
7089     if (destination.IsRegisterPair()) {
7090       __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
7091       DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
7092       __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
7093     } else if (destination.IsFpuRegister()) {
7094       size_t elem_size = DataType::Size(DataType::Type::kInt32);
7095       // Push the 2 source registers to the stack.
7096       __ pushl(source.AsRegisterPairHigh<Register>());
7097       __ cfi().AdjustCFAOffset(elem_size);
7098       __ pushl(source.AsRegisterPairLow<Register>());
7099       __ cfi().AdjustCFAOffset(elem_size);
7100       // Load the destination register.
7101       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
7102       // And remove the temporary stack space we allocated.
7103       codegen_->DecreaseFrame(2 * elem_size);
7104     } else {
7105       DCHECK(destination.IsDoubleStackSlot());
7106       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
7107       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
7108               source.AsRegisterPairHigh<Register>());
7109     }
7110   } else if (source.IsFpuRegister()) {
7111     if (destination.IsRegister()) {
7112       __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
7113     } else if (destination.IsFpuRegister()) {
7114       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7115     } else if (destination.IsRegisterPair()) {
7116       size_t elem_size = DataType::Size(DataType::Type::kInt32);
7117       // Create stack space for 2 elements.
7118       codegen_->IncreaseFrame(2 * elem_size);
7119       // Store the source register.
7120       __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
7121       // And pop the values into destination registers.
7122       __ popl(destination.AsRegisterPairLow<Register>());
7123       __ cfi().AdjustCFAOffset(-elem_size);
7124       __ popl(destination.AsRegisterPairHigh<Register>());
7125       __ cfi().AdjustCFAOffset(-elem_size);
7126     } else if (destination.IsStackSlot()) {
7127       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7128     } else if (destination.IsDoubleStackSlot()) {
7129       __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7130     } else {
7131       DCHECK(destination.IsSIMDStackSlot());
7132       __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7133     }
7134   } else if (source.IsStackSlot()) {
7135     if (destination.IsRegister()) {
7136       __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
7137     } else if (destination.IsFpuRegister()) {
7138       __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7139     } else {
7140       DCHECK(destination.IsStackSlot());
7141       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7142     }
7143   } else if (source.IsDoubleStackSlot()) {
7144     if (destination.IsRegisterPair()) {
7145       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
7146       __ movl(destination.AsRegisterPairHigh<Register>(),
7147               Address(ESP, source.GetHighStackIndex(kX86WordSize)));
7148     } else if (destination.IsFpuRegister()) {
7149       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7150     } else {
7151       DCHECK(destination.IsDoubleStackSlot()) << destination;
7152       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7153     }
7154   } else if (source.IsSIMDStackSlot()) {
7155     if (destination.IsFpuRegister()) {
7156       __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7157     } else {
7158       DCHECK(destination.IsSIMDStackSlot());
7159       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7160     }
7161   } else if (source.IsConstant()) {
7162     HConstant* constant = source.GetConstant();
7163     if (constant->IsIntConstant() || constant->IsNullConstant()) {
7164       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7165       if (destination.IsRegister()) {
7166         if (value == 0) {
7167           __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
7168         } else {
7169           __ movl(destination.AsRegister<Register>(), Immediate(value));
7170         }
7171       } else {
7172         DCHECK(destination.IsStackSlot()) << destination;
7173         __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
7174       }
7175     } else if (constant->IsFloatConstant()) {
7176       float fp_value = constant->AsFloatConstant()->GetValue();
7177       int32_t value = bit_cast<int32_t, float>(fp_value);
7178       Immediate imm(value);
7179       if (destination.IsFpuRegister()) {
7180         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
7181         if (value == 0) {
7182           // Easy handling of 0.0.
7183           __ xorps(dest, dest);
7184         } else {
7185           ScratchRegisterScope ensure_scratch(
7186               this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7187           Register temp = static_cast<Register>(ensure_scratch.GetRegister());
7188           __ movl(temp, Immediate(value));
7189           __ movd(dest, temp);
7190         }
7191       } else {
7192         DCHECK(destination.IsStackSlot()) << destination;
7193         __ movl(Address(ESP, destination.GetStackIndex()), imm);
7194       }
7195     } else if (constant->IsLongConstant()) {
7196       int64_t value = constant->AsLongConstant()->GetValue();
7197       int32_t low_value = Low32Bits(value);
7198       int32_t high_value = High32Bits(value);
7199       Immediate low(low_value);
7200       Immediate high(high_value);
7201       if (destination.IsDoubleStackSlot()) {
7202         __ movl(Address(ESP, destination.GetStackIndex()), low);
7203         __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
7204       } else {
7205         __ movl(destination.AsRegisterPairLow<Register>(), low);
7206         __ movl(destination.AsRegisterPairHigh<Register>(), high);
7207       }
7208     } else {
7209       DCHECK(constant->IsDoubleConstant());
7210       double dbl_value = constant->AsDoubleConstant()->GetValue();
7211       int64_t value = bit_cast<int64_t, double>(dbl_value);
7212       int32_t low_value = Low32Bits(value);
7213       int32_t high_value = High32Bits(value);
7214       Immediate low(low_value);
7215       Immediate high(high_value);
7216       if (destination.IsFpuRegister()) {
7217         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
7218         if (value == 0) {
7219           // Easy handling of 0.0.
7220           __ xorpd(dest, dest);
7221         } else {
7222           __ pushl(high);
7223           __ cfi().AdjustCFAOffset(4);
7224           __ pushl(low);
7225           __ cfi().AdjustCFAOffset(4);
7226           __ movsd(dest, Address(ESP, 0));
7227           codegen_->DecreaseFrame(8);
7228         }
7229       } else {
7230         DCHECK(destination.IsDoubleStackSlot()) << destination;
7231         __ movl(Address(ESP, destination.GetStackIndex()), low);
7232         __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
7233       }
7234     }
7235   } else {
7236     LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
7237   }
7238 }
7239 
Exchange(Register reg,int mem)7240 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
7241   Register suggested_scratch = reg == EAX ? EBX : EAX;
7242   ScratchRegisterScope ensure_scratch(
7243       this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7244 
7245   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7246   __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
7247   __ movl(Address(ESP, mem + stack_offset), reg);
7248   __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
7249 }
7250 
Exchange32(XmmRegister reg,int mem)7251 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
7252   ScratchRegisterScope ensure_scratch(
7253       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7254 
7255   Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
7256   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7257   __ movl(temp_reg, Address(ESP, mem + stack_offset));
7258   __ movss(Address(ESP, mem + stack_offset), reg);
7259   __ movd(reg, temp_reg);
7260 }
7261 
Exchange128(XmmRegister reg,int mem)7262 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
7263   size_t extra_slot = 4 * kX86WordSize;
7264   codegen_->IncreaseFrame(extra_slot);
7265   __ movups(Address(ESP, 0), XmmRegister(reg));
7266   ExchangeMemory(0, mem + extra_slot, 4);
7267   __ movups(XmmRegister(reg), Address(ESP, 0));
7268   codegen_->DecreaseFrame(extra_slot);
7269 }
7270 
ExchangeMemory(int mem1,int mem2,int number_of_words)7271 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
7272   ScratchRegisterScope ensure_scratch1(
7273       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7274 
7275   Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
7276   ScratchRegisterScope ensure_scratch2(
7277       this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7278 
7279   int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
7280   stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
7281 
7282   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
7283   for (int i = 0; i < number_of_words; i++) {
7284     __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
7285     __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
7286     __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
7287     __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
7288     stack_offset += kX86WordSize;
7289   }
7290 }
7291 
EmitSwap(size_t index)7292 void ParallelMoveResolverX86::EmitSwap(size_t index) {
7293   MoveOperands* move = moves_[index];
7294   Location source = move->GetSource();
7295   Location destination = move->GetDestination();
7296 
7297   if (source.IsRegister() && destination.IsRegister()) {
7298     // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
7299     DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
7300     __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7301     __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
7302     __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7303   } else if (source.IsRegister() && destination.IsStackSlot()) {
7304     Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
7305   } else if (source.IsStackSlot() && destination.IsRegister()) {
7306     Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
7307   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7308     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7309   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7310     // Use XOR Swap algorithm to avoid a temporary.
7311     DCHECK_NE(source.reg(), destination.reg());
7312     __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7313     __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
7314     __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7315   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
7316     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7317   } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
7318     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7319   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
7320     // Take advantage of the 16 bytes in the XMM register.
7321     XmmRegister reg = source.AsFpuRegister<XmmRegister>();
7322     Address stack(ESP, destination.GetStackIndex());
7323     // Load the double into the high doubleword.
7324     __ movhpd(reg, stack);
7325 
7326     // Store the low double into the destination.
7327     __ movsd(stack, reg);
7328 
7329     // Move the high double to the low double.
7330     __ psrldq(reg, Immediate(8));
7331   } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
7332     // Take advantage of the 16 bytes in the XMM register.
7333     XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
7334     Address stack(ESP, source.GetStackIndex());
7335     // Load the double into the high doubleword.
7336     __ movhpd(reg, stack);
7337 
7338     // Store the low double into the destination.
7339     __ movsd(stack, reg);
7340 
7341     // Move the high double to the low double.
7342     __ psrldq(reg, Immediate(8));
7343   } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
7344     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7345   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
7346     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7347   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
7348     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7349   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
7350     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7351   } else {
7352     LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
7353   }
7354 }
7355 
SpillScratch(int reg)7356 void ParallelMoveResolverX86::SpillScratch(int reg) {
7357   __ pushl(static_cast<Register>(reg));
7358 }
7359 
RestoreScratch(int reg)7360 void ParallelMoveResolverX86::RestoreScratch(int reg) {
7361   __ popl(static_cast<Register>(reg));
7362 }
7363 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7364 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
7365     HLoadClass::LoadKind desired_class_load_kind) {
7366   switch (desired_class_load_kind) {
7367     case HLoadClass::LoadKind::kInvalid:
7368       LOG(FATAL) << "UNREACHABLE";
7369       UNREACHABLE();
7370     case HLoadClass::LoadKind::kReferrersClass:
7371       break;
7372     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7373     case HLoadClass::LoadKind::kBootImageRelRo:
7374     case HLoadClass::LoadKind::kAppImageRelRo:
7375     case HLoadClass::LoadKind::kBssEntry:
7376     case HLoadClass::LoadKind::kBssEntryPublic:
7377     case HLoadClass::LoadKind::kBssEntryPackage:
7378       DCHECK(!GetCompilerOptions().IsJitCompiler());
7379       break;
7380     case HLoadClass::LoadKind::kJitBootImageAddress:
7381     case HLoadClass::LoadKind::kJitTableAddress:
7382       DCHECK(GetCompilerOptions().IsJitCompiler());
7383       break;
7384     case HLoadClass::LoadKind::kRuntimeCall:
7385       break;
7386   }
7387   return desired_class_load_kind;
7388 }
7389 
VisitLoadClass(HLoadClass * cls)7390 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
7391   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7392   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7393     InvokeRuntimeCallingConvention calling_convention;
7394     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7395         cls,
7396         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
7397         Location::RegisterLocation(EAX));
7398     DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
7399     return;
7400   }
7401   DCHECK_EQ(cls->NeedsAccessCheck(),
7402             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7403                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7404 
7405   const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
7406   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7407       ? LocationSummary::kCallOnSlowPath
7408       : LocationSummary::kNoCall;
7409   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7410   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7411     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7412   }
7413 
7414   if (load_kind == HLoadClass::LoadKind::kReferrersClass || cls->HasPcRelativeLoadKind()) {
7415     locations->SetInAt(0, Location::RequiresRegister());
7416   }
7417   locations->SetOut(Location::RequiresRegister());
7418   if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
7419     if (codegen_->EmitNonBakerReadBarrier()) {
7420       // For non-Baker read barrier we have a temp-clobbering call.
7421     } else {
7422       // Rely on the type resolution and/or initialization to save everything.
7423       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7424     }
7425   }
7426 }
7427 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)7428 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
7429                                               dex::TypeIndex type_index,
7430                                               Handle<mirror::Class> handle) {
7431   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
7432   // Add a patch entry and return the label.
7433   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
7434   PatchInfo<Label>* info = &jit_class_patches_.back();
7435   return &info->label;
7436 }
7437 
7438 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7439 // move.
VisitLoadClass(HLoadClass * cls)7440 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7441   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7442   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7443     codegen_->GenerateLoadClassRuntimeCall(cls);
7444     return;
7445   }
7446   DCHECK_EQ(cls->NeedsAccessCheck(),
7447             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7448                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7449 
7450   LocationSummary* locations = cls->GetLocations();
7451   Location out_loc = locations->Out();
7452   Register out = out_loc.AsRegister<Register>();
7453 
7454   bool generate_null_check = false;
7455   const ReadBarrierOption read_barrier_option =
7456       cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
7457   switch (load_kind) {
7458     case HLoadClass::LoadKind::kReferrersClass: {
7459       DCHECK(!cls->CanCallRuntime());
7460       DCHECK(!cls->MustGenerateClinitCheck());
7461       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7462       Register current_method = locations->InAt(0).AsRegister<Register>();
7463       GenerateGcRootFieldLoad(
7464           cls,
7465           out_loc,
7466           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
7467           /* fixup_label= */ nullptr,
7468           read_barrier_option);
7469       break;
7470     }
7471     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7472       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7473              codegen_->GetCompilerOptions().IsBootImageExtension());
7474       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7475       Register method_address = locations->InAt(0).AsRegister<Register>();
7476       __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7477       codegen_->RecordBootImageTypePatch(cls);
7478       break;
7479     }
7480     case HLoadClass::LoadKind::kBootImageRelRo: {
7481       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7482       Register method_address = locations->InAt(0).AsRegister<Register>();
7483       __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7484       codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7485                                           CodeGenerator::GetBootImageOffset(cls));
7486       break;
7487     }
7488     case HLoadClass::LoadKind::kAppImageRelRo: {
7489       DCHECK(codegen_->GetCompilerOptions().IsAppImage());
7490       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7491       Register method_address = locations->InAt(0).AsRegister<Register>();
7492       __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7493       codegen_->RecordAppImageTypePatch(cls);
7494       break;
7495     }
7496     case HLoadClass::LoadKind::kBssEntry:
7497     case HLoadClass::LoadKind::kBssEntryPublic:
7498     case HLoadClass::LoadKind::kBssEntryPackage: {
7499       Register method_address = locations->InAt(0).AsRegister<Register>();
7500       Address address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7501       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
7502       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7503       // No need for memory fence, thanks to the x86 memory model.
7504       generate_null_check = true;
7505       break;
7506     }
7507     case HLoadClass::LoadKind::kJitBootImageAddress: {
7508       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7509       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7510       DCHECK_NE(address, 0u);
7511       __ movl(out, Immediate(address));
7512       break;
7513     }
7514     case HLoadClass::LoadKind::kJitTableAddress: {
7515       Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7516       Label* fixup_label = codegen_->NewJitRootClassPatch(
7517           cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
7518       // /* GcRoot<mirror::Class> */ out = *address
7519       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7520       break;
7521     }
7522     case HLoadClass::LoadKind::kRuntimeCall:
7523     case HLoadClass::LoadKind::kInvalid:
7524       LOG(FATAL) << "UNREACHABLE";
7525       UNREACHABLE();
7526   }
7527 
7528   if (generate_null_check || cls->MustGenerateClinitCheck()) {
7529     DCHECK(cls->CanCallRuntime());
7530     SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
7531     codegen_->AddSlowPath(slow_path);
7532 
7533     if (generate_null_check) {
7534       __ testl(out, out);
7535       __ j(kEqual, slow_path->GetEntryLabel());
7536     }
7537 
7538     if (cls->MustGenerateClinitCheck()) {
7539       GenerateClassInitializationCheck(slow_path, out);
7540     } else {
7541       __ Bind(slow_path->GetExitLabel());
7542     }
7543   }
7544 }
7545 
VisitLoadMethodHandle(HLoadMethodHandle * load)7546 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7547   InvokeRuntimeCallingConvention calling_convention;
7548   Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7549   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7550 }
7551 
VisitLoadMethodHandle(HLoadMethodHandle * load)7552 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7553   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7554 }
7555 
VisitLoadMethodType(HLoadMethodType * load)7556 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
7557   InvokeRuntimeCallingConvention calling_convention;
7558   Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7559   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7560 }
7561 
VisitLoadMethodType(HLoadMethodType * load)7562 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
7563   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7564 }
7565 
VisitClinitCheck(HClinitCheck * check)7566 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
7567   LocationSummary* locations =
7568       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7569   locations->SetInAt(0, Location::RequiresRegister());
7570   if (check->HasUses()) {
7571     locations->SetOut(Location::SameAsFirstInput());
7572   }
7573   // Rely on the type initialization to save everything we need.
7574   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7575 }
7576 
VisitClinitCheck(HClinitCheck * check)7577 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
7578   // We assume the class to not be null.
7579   SlowPathCode* slow_path =
7580       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
7581   codegen_->AddSlowPath(slow_path);
7582   GenerateClassInitializationCheck(slow_path,
7583                                    check->GetLocations()->InAt(0).AsRegister<Register>());
7584 }
7585 
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)7586 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
7587     SlowPathCode* slow_path, Register class_reg) {
7588   __ cmpb(Address(class_reg, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
7589   __ j(kBelow, slow_path->GetEntryLabel());
7590   __ Bind(slow_path->GetExitLabel());
7591 }
7592 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)7593 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
7594                                                                     Register temp) {
7595   uint32_t path_to_root = check->GetBitstringPathToRoot();
7596   uint32_t mask = check->GetBitstringMask();
7597   DCHECK(IsPowerOfTwo(mask + 1));
7598   size_t mask_bits = WhichPowerOf2(mask + 1);
7599 
7600   if (mask_bits == 16u) {
7601     // Compare the bitstring in memory.
7602     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
7603   } else {
7604     // /* uint32_t */ temp = temp->status_
7605     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
7606     // Compare the bitstring bits using SUB.
7607     __ subl(temp, Immediate(path_to_root));
7608     // Shift out bits that do not contribute to the comparison.
7609     __ shll(temp, Immediate(32u - mask_bits));
7610   }
7611 }
7612 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7613 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
7614     HLoadString::LoadKind desired_string_load_kind) {
7615   switch (desired_string_load_kind) {
7616     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7617     case HLoadString::LoadKind::kBootImageRelRo:
7618     case HLoadString::LoadKind::kBssEntry:
7619       DCHECK(!GetCompilerOptions().IsJitCompiler());
7620       break;
7621     case HLoadString::LoadKind::kJitBootImageAddress:
7622     case HLoadString::LoadKind::kJitTableAddress:
7623       DCHECK(GetCompilerOptions().IsJitCompiler());
7624       break;
7625     case HLoadString::LoadKind::kRuntimeCall:
7626       break;
7627   }
7628   return desired_string_load_kind;
7629 }
7630 
VisitLoadString(HLoadString * load)7631 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
7632   LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
7633   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7634   HLoadString::LoadKind load_kind = load->GetLoadKind();
7635   if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
7636       load_kind == HLoadString::LoadKind::kBootImageRelRo ||
7637       load_kind == HLoadString::LoadKind::kBssEntry) {
7638     locations->SetInAt(0, Location::RequiresRegister());
7639   }
7640   if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7641     locations->SetOut(Location::RegisterLocation(EAX));
7642   } else {
7643     locations->SetOut(Location::RequiresRegister());
7644     if (load_kind == HLoadString::LoadKind::kBssEntry) {
7645       if (codegen_->EmitNonBakerReadBarrier()) {
7646         // For non-Baker read barrier we have a temp-clobbering call.
7647       } else {
7648         // Rely on the pResolveString to save everything.
7649         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7650       }
7651     }
7652   }
7653 }
7654 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7655 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
7656                                                dex::StringIndex string_index,
7657                                                Handle<mirror::String> handle) {
7658   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7659   // Add a patch entry and return the label.
7660   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7661   PatchInfo<Label>* info = &jit_string_patches_.back();
7662   return &info->label;
7663 }
7664 
7665 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7666 // move.
VisitLoadString(HLoadString * load)7667 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7668   LocationSummary* locations = load->GetLocations();
7669   Location out_loc = locations->Out();
7670   Register out = out_loc.AsRegister<Register>();
7671 
7672   switch (load->GetLoadKind()) {
7673     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7674       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7675              codegen_->GetCompilerOptions().IsBootImageExtension());
7676       Register method_address = locations->InAt(0).AsRegister<Register>();
7677       __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7678       codegen_->RecordBootImageStringPatch(load);
7679       return;
7680     }
7681     case HLoadString::LoadKind::kBootImageRelRo: {
7682       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7683       Register method_address = locations->InAt(0).AsRegister<Register>();
7684       __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7685       codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7686                                           CodeGenerator::GetBootImageOffset(load));
7687       return;
7688     }
7689     case HLoadString::LoadKind::kBssEntry: {
7690       Register method_address = locations->InAt(0).AsRegister<Register>();
7691       Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7692       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7693       // /* GcRoot<mirror::String> */ out = *address  /* PC-relative */
7694       GenerateGcRootFieldLoad(
7695           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7696       // No need for memory fence, thanks to the x86 memory model.
7697       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
7698       codegen_->AddSlowPath(slow_path);
7699       __ testl(out, out);
7700       __ j(kEqual, slow_path->GetEntryLabel());
7701       __ Bind(slow_path->GetExitLabel());
7702       return;
7703     }
7704     case HLoadString::LoadKind::kJitBootImageAddress: {
7705       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7706       DCHECK_NE(address, 0u);
7707       __ movl(out, Immediate(address));
7708       return;
7709     }
7710     case HLoadString::LoadKind::kJitTableAddress: {
7711       Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7712       Label* fixup_label = codegen_->NewJitRootStringPatch(
7713           load->GetDexFile(), load->GetStringIndex(), load->GetString());
7714       // /* GcRoot<mirror::String> */ out = *address
7715       GenerateGcRootFieldLoad(
7716           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7717       return;
7718     }
7719     default:
7720       break;
7721   }
7722 
7723   InvokeRuntimeCallingConvention calling_convention;
7724   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
7725   __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
7726   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7727   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7728 }
7729 
GetExceptionTlsAddress()7730 static Address GetExceptionTlsAddress() {
7731   return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
7732 }
7733 
VisitLoadException(HLoadException * load)7734 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
7735   LocationSummary* locations =
7736       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7737   locations->SetOut(Location::RequiresRegister());
7738 }
7739 
VisitLoadException(HLoadException * load)7740 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
7741   __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
7742 }
7743 
VisitClearException(HClearException * clear)7744 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
7745   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7746 }
7747 
VisitClearException(HClearException * clear)7748 void InstructionCodeGeneratorX86::VisitClearException([[maybe_unused]] HClearException* clear) {
7749   __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7750 }
7751 
VisitThrow(HThrow * instruction)7752 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7753   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7754       instruction, LocationSummary::kCallOnMainOnly);
7755   InvokeRuntimeCallingConvention calling_convention;
7756   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7757 }
7758 
VisitThrow(HThrow * instruction)7759 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7760   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7761   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7762 }
7763 
7764 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7765 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7766   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7767     return 1;
7768   }
7769   if (emit_read_barrier &&
7770       !kUseBakerReadBarrier &&
7771       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7772        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7773        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7774     return 1;
7775   }
7776   return 0;
7777 }
7778 
7779 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7780 // interface pointer, the current interface is compared in memory.
7781 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7782 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7783   return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
7784 }
7785 
VisitInstanceOf(HInstanceOf * instruction)7786 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7787   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7788   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7789   bool baker_read_barrier_slow_path = false;
7790   switch (type_check_kind) {
7791     case TypeCheckKind::kExactCheck:
7792     case TypeCheckKind::kAbstractClassCheck:
7793     case TypeCheckKind::kClassHierarchyCheck:
7794     case TypeCheckKind::kArrayObjectCheck:
7795     case TypeCheckKind::kInterfaceCheck: {
7796       bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
7797       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7798       baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
7799                                      (type_check_kind != TypeCheckKind::kInterfaceCheck);
7800       break;
7801     }
7802     case TypeCheckKind::kArrayCheck:
7803     case TypeCheckKind::kUnresolvedCheck:
7804       call_kind = LocationSummary::kCallOnSlowPath;
7805       break;
7806     case TypeCheckKind::kBitstringCheck:
7807       break;
7808   }
7809 
7810   LocationSummary* locations =
7811       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7812   if (baker_read_barrier_slow_path) {
7813     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7814   }
7815   locations->SetInAt(0, Location::RequiresRegister());
7816   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7817     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7818     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7819     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7820   } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7821     locations->SetInAt(1, Location::RequiresRegister());
7822   } else {
7823     locations->SetInAt(1, Location::Any());
7824   }
7825   // Note that TypeCheckSlowPathX86 uses this "out" register too.
7826   locations->SetOut(Location::RequiresRegister());
7827   // When read barriers are enabled, we need a temporary register for some cases.
7828   locations->AddRegisterTemps(
7829       NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
7830 }
7831 
VisitInstanceOf(HInstanceOf * instruction)7832 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7833   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7834   LocationSummary* locations = instruction->GetLocations();
7835   Location obj_loc = locations->InAt(0);
7836   Register obj = obj_loc.AsRegister<Register>();
7837   Location cls = locations->InAt(1);
7838   Location out_loc = locations->Out();
7839   Register out = out_loc.AsRegister<Register>();
7840   const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
7841   DCHECK_LE(num_temps, 1u);
7842   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7843   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7844   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7845   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7846   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7847   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7848   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7849   const uint32_t object_array_data_offset =
7850       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7851   SlowPathCode* slow_path = nullptr;
7852   NearLabel done, zero;
7853 
7854   // Return 0 if `obj` is null.
7855   // Avoid null check if we know obj is not null.
7856   if (instruction->MustDoNullCheck()) {
7857     __ testl(obj, obj);
7858     __ j(kEqual, &zero);
7859   }
7860 
7861   switch (type_check_kind) {
7862     case TypeCheckKind::kExactCheck: {
7863       ReadBarrierOption read_barrier_option =
7864           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7865       // /* HeapReference<Class> */ out = obj->klass_
7866       GenerateReferenceLoadTwoRegisters(instruction,
7867                                         out_loc,
7868                                         obj_loc,
7869                                         class_offset,
7870                                         read_barrier_option);
7871       if (cls.IsRegister()) {
7872         __ cmpl(out, cls.AsRegister<Register>());
7873       } else {
7874         DCHECK(cls.IsStackSlot()) << cls;
7875         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7876       }
7877 
7878       // Classes must be equal for the instanceof to succeed.
7879       __ j(kNotEqual, &zero);
7880       __ movl(out, Immediate(1));
7881       __ jmp(&done);
7882       break;
7883     }
7884 
7885     case TypeCheckKind::kAbstractClassCheck: {
7886       ReadBarrierOption read_barrier_option =
7887           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7888       // /* HeapReference<Class> */ out = obj->klass_
7889       GenerateReferenceLoadTwoRegisters(instruction,
7890                                         out_loc,
7891                                         obj_loc,
7892                                         class_offset,
7893                                         read_barrier_option);
7894       // If the class is abstract, we eagerly fetch the super class of the
7895       // object to avoid doing a comparison we know will fail.
7896       NearLabel loop;
7897       __ Bind(&loop);
7898       // /* HeapReference<Class> */ out = out->super_class_
7899       GenerateReferenceLoadOneRegister(instruction,
7900                                        out_loc,
7901                                        super_offset,
7902                                        maybe_temp_loc,
7903                                        read_barrier_option);
7904       __ testl(out, out);
7905       // If `out` is null, we use it for the result, and jump to `done`.
7906       __ j(kEqual, &done);
7907       if (cls.IsRegister()) {
7908         __ cmpl(out, cls.AsRegister<Register>());
7909       } else {
7910         DCHECK(cls.IsStackSlot()) << cls;
7911         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7912       }
7913       __ j(kNotEqual, &loop);
7914       __ movl(out, Immediate(1));
7915       if (zero.IsLinked()) {
7916         __ jmp(&done);
7917       }
7918       break;
7919     }
7920 
7921     case TypeCheckKind::kClassHierarchyCheck: {
7922       ReadBarrierOption read_barrier_option =
7923           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7924       // /* HeapReference<Class> */ out = obj->klass_
7925       GenerateReferenceLoadTwoRegisters(instruction,
7926                                         out_loc,
7927                                         obj_loc,
7928                                         class_offset,
7929                                         read_barrier_option);
7930       // Walk over the class hierarchy to find a match.
7931       NearLabel loop, success;
7932       __ Bind(&loop);
7933       if (cls.IsRegister()) {
7934         __ cmpl(out, cls.AsRegister<Register>());
7935       } else {
7936         DCHECK(cls.IsStackSlot()) << cls;
7937         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7938       }
7939       __ j(kEqual, &success);
7940       // /* HeapReference<Class> */ out = out->super_class_
7941       GenerateReferenceLoadOneRegister(instruction,
7942                                        out_loc,
7943                                        super_offset,
7944                                        maybe_temp_loc,
7945                                        read_barrier_option);
7946       __ testl(out, out);
7947       __ j(kNotEqual, &loop);
7948       // If `out` is null, we use it for the result, and jump to `done`.
7949       __ jmp(&done);
7950       __ Bind(&success);
7951       __ movl(out, Immediate(1));
7952       if (zero.IsLinked()) {
7953         __ jmp(&done);
7954       }
7955       break;
7956     }
7957 
7958     case TypeCheckKind::kArrayObjectCheck: {
7959       ReadBarrierOption read_barrier_option =
7960           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7961       // /* HeapReference<Class> */ out = obj->klass_
7962       GenerateReferenceLoadTwoRegisters(instruction,
7963                                         out_loc,
7964                                         obj_loc,
7965                                         class_offset,
7966                                         read_barrier_option);
7967       // Do an exact check.
7968       NearLabel exact_check;
7969       if (cls.IsRegister()) {
7970         __ cmpl(out, cls.AsRegister<Register>());
7971       } else {
7972         DCHECK(cls.IsStackSlot()) << cls;
7973         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7974       }
7975       __ j(kEqual, &exact_check);
7976       // Otherwise, we need to check that the object's class is a non-primitive array.
7977       // /* HeapReference<Class> */ out = out->component_type_
7978       GenerateReferenceLoadOneRegister(instruction,
7979                                        out_loc,
7980                                        component_offset,
7981                                        maybe_temp_loc,
7982                                        read_barrier_option);
7983       __ testl(out, out);
7984       // If `out` is null, we use it for the result, and jump to `done`.
7985       __ j(kEqual, &done);
7986       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7987       __ j(kNotEqual, &zero);
7988       __ Bind(&exact_check);
7989       __ movl(out, Immediate(1));
7990       __ jmp(&done);
7991       break;
7992     }
7993 
7994     case TypeCheckKind::kArrayCheck: {
7995       // No read barrier since the slow path will retry upon failure.
7996       // /* HeapReference<Class> */ out = obj->klass_
7997       GenerateReferenceLoadTwoRegisters(instruction,
7998                                         out_loc,
7999                                         obj_loc,
8000                                         class_offset,
8001                                         kWithoutReadBarrier);
8002       if (cls.IsRegister()) {
8003         __ cmpl(out, cls.AsRegister<Register>());
8004       } else {
8005         DCHECK(cls.IsStackSlot()) << cls;
8006         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
8007       }
8008       DCHECK(locations->OnlyCallsOnSlowPath());
8009       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8010           instruction, /* is_fatal= */ false);
8011       codegen_->AddSlowPath(slow_path);
8012       __ j(kNotEqual, slow_path->GetEntryLabel());
8013       __ movl(out, Immediate(1));
8014       if (zero.IsLinked()) {
8015         __ jmp(&done);
8016       }
8017       break;
8018     }
8019 
8020     case TypeCheckKind::kInterfaceCheck: {
8021       if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
8022         DCHECK(locations->OnlyCallsOnSlowPath());
8023         slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8024             instruction, /* is_fatal= */ false);
8025         codegen_->AddSlowPath(slow_path);
8026         if (codegen_->EmitNonBakerReadBarrier()) {
8027           __ jmp(slow_path->GetEntryLabel());
8028           break;
8029         }
8030         // For Baker read barrier, take the slow path while marking.
8031         __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()),
8032                       Immediate(0));
8033         __ j(kNotEqual, slow_path->GetEntryLabel());
8034       }
8035 
8036       // Fast-path without read barriers.
8037       Register temp = maybe_temp_loc.AsRegister<Register>();
8038       // /* HeapReference<Class> */ temp = obj->klass_
8039       __ movl(temp, Address(obj, class_offset));
8040       __ MaybeUnpoisonHeapReference(temp);
8041       // /* HeapReference<Class> */ temp = temp->iftable_
8042       __ movl(temp, Address(temp, iftable_offset));
8043       __ MaybeUnpoisonHeapReference(temp);
8044       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8045       __ movl(out, Address(temp, array_length_offset));
8046       // Maybe poison the `cls` for direct comparison with memory.
8047       __ MaybePoisonHeapReference(cls.AsRegister<Register>());
8048       // Loop through the iftable and check if any class matches.
8049       NearLabel loop, end;
8050       __ Bind(&loop);
8051       // Check if we still have an entry to compare.
8052       __ subl(out, Immediate(2));
8053       __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end);
8054       // Go to next interface if the classes do not match.
8055       __ cmpl(cls.AsRegister<Register>(),
8056               CodeGeneratorX86::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset));
8057       __ j(kNotEqual, &loop);
8058       if (zero.IsLinked()) {
8059         __ movl(out, Immediate(1));
8060         // If `cls` was poisoned above, unpoison it.
8061         __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8062         __ jmp(&done);
8063         if (kPoisonHeapReferences) {
8064           // The false case needs to unpoison the class before jumping to `zero`.
8065           __ Bind(&end);
8066           __ UnpoisonHeapReference(cls.AsRegister<Register>());
8067           __ jmp(&zero);
8068         }
8069       } else {
8070         // To reduce branching, use the fact that the false case branches with a `-2` in `out`.
8071         __ movl(out, Immediate(-1));
8072         __ Bind(&end);
8073         __ addl(out, Immediate(2));
8074         // If `cls` was poisoned above, unpoison it.
8075         __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8076       }
8077       break;
8078     }
8079 
8080     case TypeCheckKind::kUnresolvedCheck: {
8081       // Note that we indeed only call on slow path, but we always go
8082       // into the slow path for the unresolved check case.
8083       //
8084       // We cannot directly call the InstanceofNonTrivial runtime
8085       // entry point without resorting to a type checking slow path
8086       // here (i.e. by calling InvokeRuntime directly), as it would
8087       // require to assign fixed registers for the inputs of this
8088       // HInstanceOf instruction (following the runtime calling
8089       // convention), which might be cluttered by the potential first
8090       // read barrier emission at the beginning of this method.
8091       //
8092       // TODO: Introduce a new runtime entry point taking the object
8093       // to test (instead of its class) as argument, and let it deal
8094       // with the read barrier issues. This will let us refactor this
8095       // case of the `switch` code as it was previously (with a direct
8096       // call to the runtime not using a type checking slow path).
8097       // This should also be beneficial for the other cases above.
8098       DCHECK(locations->OnlyCallsOnSlowPath());
8099       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8100           instruction, /* is_fatal= */ false);
8101       codegen_->AddSlowPath(slow_path);
8102       __ jmp(slow_path->GetEntryLabel());
8103       break;
8104     }
8105 
8106     case TypeCheckKind::kBitstringCheck: {
8107       // /* HeapReference<Class> */ temp = obj->klass_
8108       GenerateReferenceLoadTwoRegisters(instruction,
8109                                         out_loc,
8110                                         obj_loc,
8111                                         class_offset,
8112                                         kWithoutReadBarrier);
8113 
8114       GenerateBitstringTypeCheckCompare(instruction, out);
8115       __ j(kNotEqual, &zero);
8116       __ movl(out, Immediate(1));
8117       __ jmp(&done);
8118       break;
8119     }
8120   }
8121 
8122   if (zero.IsLinked()) {
8123     __ Bind(&zero);
8124     __ xorl(out, out);
8125   }
8126 
8127   if (done.IsLinked()) {
8128     __ Bind(&done);
8129   }
8130 
8131   if (slow_path != nullptr) {
8132     __ Bind(slow_path->GetExitLabel());
8133   }
8134 }
8135 
VisitCheckCast(HCheckCast * instruction)8136 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
8137   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8138   LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
8139   LocationSummary* locations =
8140       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8141   locations->SetInAt(0, Location::RequiresRegister());
8142   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8143     // Require a register for the interface check since there is a loop that compares the class to
8144     // a memory address.
8145     locations->SetInAt(1, Location::RequiresRegister());
8146   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8147     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8148     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8149     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8150   } else {
8151     locations->SetInAt(1, Location::Any());
8152   }
8153   locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
8154 }
8155 
VisitCheckCast(HCheckCast * instruction)8156 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
8157   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8158   LocationSummary* locations = instruction->GetLocations();
8159   Location obj_loc = locations->InAt(0);
8160   Register obj = obj_loc.AsRegister<Register>();
8161   Location cls = locations->InAt(1);
8162   Location temp_loc = locations->GetTemp(0);
8163   Register temp = temp_loc.AsRegister<Register>();
8164   const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
8165   DCHECK_GE(num_temps, 1u);
8166   DCHECK_LE(num_temps, 2u);
8167   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8168   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8169   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8170   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8171   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8172   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8173   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8174   const uint32_t object_array_data_offset =
8175       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8176 
8177   bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
8178   SlowPathCode* type_check_slow_path =
8179       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8180           instruction, is_type_check_slow_path_fatal);
8181   codegen_->AddSlowPath(type_check_slow_path);
8182 
8183   NearLabel done;
8184   // Avoid null check if we know obj is not null.
8185   if (instruction->MustDoNullCheck()) {
8186     __ testl(obj, obj);
8187     __ j(kEqual, &done);
8188   }
8189 
8190   switch (type_check_kind) {
8191     case TypeCheckKind::kExactCheck:
8192     case TypeCheckKind::kArrayCheck: {
8193       // /* HeapReference<Class> */ temp = obj->klass_
8194       GenerateReferenceLoadTwoRegisters(instruction,
8195                                         temp_loc,
8196                                         obj_loc,
8197                                         class_offset,
8198                                         kWithoutReadBarrier);
8199 
8200       if (cls.IsRegister()) {
8201         __ cmpl(temp, cls.AsRegister<Register>());
8202       } else {
8203         DCHECK(cls.IsStackSlot()) << cls;
8204         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8205       }
8206       // Jump to slow path for throwing the exception or doing a
8207       // more involved array check.
8208       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8209       break;
8210     }
8211 
8212     case TypeCheckKind::kAbstractClassCheck: {
8213       // /* HeapReference<Class> */ temp = obj->klass_
8214       GenerateReferenceLoadTwoRegisters(instruction,
8215                                         temp_loc,
8216                                         obj_loc,
8217                                         class_offset,
8218                                         kWithoutReadBarrier);
8219 
8220       // If the class is abstract, we eagerly fetch the super class of the
8221       // object to avoid doing a comparison we know will fail.
8222       NearLabel loop;
8223       __ Bind(&loop);
8224       // /* HeapReference<Class> */ temp = temp->super_class_
8225       GenerateReferenceLoadOneRegister(instruction,
8226                                        temp_loc,
8227                                        super_offset,
8228                                        maybe_temp2_loc,
8229                                        kWithoutReadBarrier);
8230 
8231       // If the class reference currently in `temp` is null, jump to the slow path to throw the
8232       // exception.
8233       __ testl(temp, temp);
8234       __ j(kZero, type_check_slow_path->GetEntryLabel());
8235 
8236       // Otherwise, compare the classes
8237       if (cls.IsRegister()) {
8238         __ cmpl(temp, cls.AsRegister<Register>());
8239       } else {
8240         DCHECK(cls.IsStackSlot()) << cls;
8241         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8242       }
8243       __ j(kNotEqual, &loop);
8244       break;
8245     }
8246 
8247     case TypeCheckKind::kClassHierarchyCheck: {
8248       // /* HeapReference<Class> */ temp = obj->klass_
8249       GenerateReferenceLoadTwoRegisters(instruction,
8250                                         temp_loc,
8251                                         obj_loc,
8252                                         class_offset,
8253                                         kWithoutReadBarrier);
8254 
8255       // Walk over the class hierarchy to find a match.
8256       NearLabel loop;
8257       __ Bind(&loop);
8258       if (cls.IsRegister()) {
8259         __ cmpl(temp, cls.AsRegister<Register>());
8260       } else {
8261         DCHECK(cls.IsStackSlot()) << cls;
8262         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8263       }
8264       __ j(kEqual, &done);
8265 
8266       // /* HeapReference<Class> */ temp = temp->super_class_
8267       GenerateReferenceLoadOneRegister(instruction,
8268                                        temp_loc,
8269                                        super_offset,
8270                                        maybe_temp2_loc,
8271                                        kWithoutReadBarrier);
8272 
8273       // If the class reference currently in `temp` is not null, jump
8274       // back at the beginning of the loop.
8275       __ testl(temp, temp);
8276       __ j(kNotZero, &loop);
8277       // Otherwise, jump to the slow path to throw the exception.;
8278       __ jmp(type_check_slow_path->GetEntryLabel());
8279       break;
8280     }
8281 
8282     case TypeCheckKind::kArrayObjectCheck: {
8283       // /* HeapReference<Class> */ temp = obj->klass_
8284       GenerateReferenceLoadTwoRegisters(instruction,
8285                                         temp_loc,
8286                                         obj_loc,
8287                                         class_offset,
8288                                         kWithoutReadBarrier);
8289 
8290       // Do an exact check.
8291       if (cls.IsRegister()) {
8292         __ cmpl(temp, cls.AsRegister<Register>());
8293       } else {
8294         DCHECK(cls.IsStackSlot()) << cls;
8295         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8296       }
8297       __ j(kEqual, &done);
8298 
8299       // Otherwise, we need to check that the object's class is a non-primitive array.
8300       // /* HeapReference<Class> */ temp = temp->component_type_
8301       GenerateReferenceLoadOneRegister(instruction,
8302                                        temp_loc,
8303                                        component_offset,
8304                                        maybe_temp2_loc,
8305                                        kWithoutReadBarrier);
8306 
8307       // If the component type is null (i.e. the object not an array),  jump to the slow path to
8308       // throw the exception. Otherwise proceed with the check.
8309       __ testl(temp, temp);
8310       __ j(kZero, type_check_slow_path->GetEntryLabel());
8311 
8312       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
8313       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8314       break;
8315     }
8316 
8317     case TypeCheckKind::kUnresolvedCheck:
8318       // We always go into the type check slow path for the unresolved check case.
8319       // We cannot directly call the CheckCast runtime entry point
8320       // without resorting to a type checking slow path here (i.e. by
8321       // calling InvokeRuntime directly), as it would require to
8322       // assign fixed registers for the inputs of this HInstanceOf
8323       // instruction (following the runtime calling convention), which
8324       // might be cluttered by the potential first read barrier
8325       // emission at the beginning of this method.
8326       __ jmp(type_check_slow_path->GetEntryLabel());
8327       break;
8328 
8329     case TypeCheckKind::kInterfaceCheck: {
8330       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
8331       // We can not get false positives by doing this.
8332       // /* HeapReference<Class> */ temp = obj->klass_
8333       GenerateReferenceLoadTwoRegisters(instruction,
8334                                         temp_loc,
8335                                         obj_loc,
8336                                         class_offset,
8337                                         kWithoutReadBarrier);
8338 
8339       // /* HeapReference<Class> */ temp = temp->iftable_
8340       GenerateReferenceLoadOneRegister(instruction,
8341                                        temp_loc,
8342                                        iftable_offset,
8343                                        maybe_temp2_loc,
8344                                        kWithoutReadBarrier);
8345       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8346       __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
8347       // Maybe poison the `cls` for direct comparison with memory.
8348       __ MaybePoisonHeapReference(cls.AsRegister<Register>());
8349       // Loop through the iftable and check if any class matches.
8350       NearLabel start_loop;
8351       __ Bind(&start_loop);
8352       // Check if we still have an entry to compare.
8353       __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
8354       __ j(kNegative, type_check_slow_path->GetEntryLabel());
8355       // Go to next interface if the classes do not match.
8356       __ cmpl(cls.AsRegister<Register>(),
8357               CodeGeneratorX86::ArrayAddress(temp,
8358                                              maybe_temp2_loc,
8359                                              TIMES_4,
8360                                              object_array_data_offset));
8361       __ j(kNotEqual, &start_loop);
8362       // If `cls` was poisoned above, unpoison it.
8363       __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8364       break;
8365     }
8366 
8367     case TypeCheckKind::kBitstringCheck: {
8368       // /* HeapReference<Class> */ temp = obj->klass_
8369       GenerateReferenceLoadTwoRegisters(instruction,
8370                                         temp_loc,
8371                                         obj_loc,
8372                                         class_offset,
8373                                         kWithoutReadBarrier);
8374 
8375       GenerateBitstringTypeCheckCompare(instruction, temp);
8376       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8377       break;
8378     }
8379   }
8380   __ Bind(&done);
8381 
8382   __ Bind(type_check_slow_path->GetExitLabel());
8383 }
8384 
VisitMonitorOperation(HMonitorOperation * instruction)8385 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8386   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8387       instruction, LocationSummary::kCallOnMainOnly);
8388   InvokeRuntimeCallingConvention calling_convention;
8389   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
8390 }
8391 
VisitMonitorOperation(HMonitorOperation * instruction)8392 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8393   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
8394                                                  : kQuickUnlockObject,
8395                           instruction,
8396                           instruction->GetDexPc());
8397   if (instruction->IsEnter()) {
8398     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8399   } else {
8400     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8401   }
8402 }
8403 
VisitX86AndNot(HX86AndNot * instruction)8404 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
8405   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8406   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
8407   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8408   locations->SetInAt(0, Location::RequiresRegister());
8409   locations->SetInAt(1, Location::RequiresRegister());
8410   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8411 }
8412 
VisitX86AndNot(HX86AndNot * instruction)8413 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
8414   LocationSummary* locations = instruction->GetLocations();
8415   Location first = locations->InAt(0);
8416   Location second = locations->InAt(1);
8417   Location dest = locations->Out();
8418   if (instruction->GetResultType() == DataType::Type::kInt32) {
8419     __ andn(dest.AsRegister<Register>(),
8420             first.AsRegister<Register>(),
8421             second.AsRegister<Register>());
8422   } else {
8423     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8424     __ andn(dest.AsRegisterPairLow<Register>(),
8425             first.AsRegisterPairLow<Register>(),
8426             second.AsRegisterPairLow<Register>());
8427     __ andn(dest.AsRegisterPairHigh<Register>(),
8428             first.AsRegisterPairHigh<Register>(),
8429             second.AsRegisterPairHigh<Register>());
8430   }
8431 }
8432 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8433 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
8434   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8435   DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
8436   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8437   locations->SetInAt(0, Location::RequiresRegister());
8438   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8439 }
8440 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8441 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
8442     HX86MaskOrResetLeastSetBit* instruction) {
8443   LocationSummary* locations = instruction->GetLocations();
8444   Location src = locations->InAt(0);
8445   Location dest = locations->Out();
8446   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
8447   switch (instruction->GetOpKind()) {
8448     case HInstruction::kAnd:
8449       __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
8450       break;
8451     case HInstruction::kXor:
8452       __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
8453       break;
8454     default:
8455       LOG(FATAL) << "Unreachable";
8456   }
8457 }
8458 
VisitAnd(HAnd * instruction)8459 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)8460 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)8461 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
8462 
HandleBitwiseOperation(HBinaryOperation * instruction)8463 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8464   LocationSummary* locations =
8465       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8466   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8467          || instruction->GetResultType() == DataType::Type::kInt64);
8468   locations->SetInAt(0, Location::RequiresRegister());
8469   locations->SetInAt(1, Location::Any());
8470   locations->SetOut(Location::SameAsFirstInput());
8471 }
8472 
VisitAnd(HAnd * instruction)8473 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
8474   HandleBitwiseOperation(instruction);
8475 }
8476 
VisitOr(HOr * instruction)8477 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
8478   HandleBitwiseOperation(instruction);
8479 }
8480 
VisitXor(HXor * instruction)8481 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
8482   HandleBitwiseOperation(instruction);
8483 }
8484 
HandleBitwiseOperation(HBinaryOperation * instruction)8485 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8486   LocationSummary* locations = instruction->GetLocations();
8487   Location first = locations->InAt(0);
8488   Location second = locations->InAt(1);
8489   DCHECK(first.Equals(locations->Out()));
8490 
8491   if (instruction->GetResultType() == DataType::Type::kInt32) {
8492     if (second.IsRegister()) {
8493       if (instruction->IsAnd()) {
8494         __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
8495       } else if (instruction->IsOr()) {
8496         __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
8497       } else {
8498         DCHECK(instruction->IsXor());
8499         __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
8500       }
8501     } else if (second.IsConstant()) {
8502       if (instruction->IsAnd()) {
8503         __ andl(first.AsRegister<Register>(),
8504                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8505       } else if (instruction->IsOr()) {
8506         __ orl(first.AsRegister<Register>(),
8507                Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8508       } else {
8509         DCHECK(instruction->IsXor());
8510         __ xorl(first.AsRegister<Register>(),
8511                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8512       }
8513     } else {
8514       if (instruction->IsAnd()) {
8515         __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8516       } else if (instruction->IsOr()) {
8517         __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8518       } else {
8519         DCHECK(instruction->IsXor());
8520         __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8521       }
8522     }
8523   } else {
8524     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8525     if (second.IsRegisterPair()) {
8526       if (instruction->IsAnd()) {
8527         __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8528         __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8529       } else if (instruction->IsOr()) {
8530         __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8531         __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8532       } else {
8533         DCHECK(instruction->IsXor());
8534         __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8535         __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8536       }
8537     } else if (second.IsDoubleStackSlot()) {
8538       if (instruction->IsAnd()) {
8539         __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8540         __ andl(first.AsRegisterPairHigh<Register>(),
8541                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8542       } else if (instruction->IsOr()) {
8543         __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8544         __ orl(first.AsRegisterPairHigh<Register>(),
8545                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8546       } else {
8547         DCHECK(instruction->IsXor());
8548         __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8549         __ xorl(first.AsRegisterPairHigh<Register>(),
8550                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8551       }
8552     } else {
8553       DCHECK(second.IsConstant()) << second;
8554       int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
8555       int32_t low_value = Low32Bits(value);
8556       int32_t high_value = High32Bits(value);
8557       Immediate low(low_value);
8558       Immediate high(high_value);
8559       Register first_low = first.AsRegisterPairLow<Register>();
8560       Register first_high = first.AsRegisterPairHigh<Register>();
8561       if (instruction->IsAnd()) {
8562         if (low_value == 0) {
8563           __ xorl(first_low, first_low);
8564         } else if (low_value != -1) {
8565           __ andl(first_low, low);
8566         }
8567         if (high_value == 0) {
8568           __ xorl(first_high, first_high);
8569         } else if (high_value != -1) {
8570           __ andl(first_high, high);
8571         }
8572       } else if (instruction->IsOr()) {
8573         if (low_value != 0) {
8574           __ orl(first_low, low);
8575         }
8576         if (high_value != 0) {
8577           __ orl(first_high, high);
8578         }
8579       } else {
8580         DCHECK(instruction->IsXor());
8581         if (low_value != 0) {
8582           __ xorl(first_low, low);
8583         }
8584         if (high_value != 0) {
8585           __ xorl(first_high, high);
8586         }
8587       }
8588     }
8589   }
8590 }
8591 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8592 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
8593     HInstruction* instruction,
8594     Location out,
8595     uint32_t offset,
8596     Location maybe_temp,
8597     ReadBarrierOption read_barrier_option) {
8598   Register out_reg = out.AsRegister<Register>();
8599   if (read_barrier_option == kWithReadBarrier) {
8600     DCHECK(codegen_->EmitReadBarrier());
8601     if (kUseBakerReadBarrier) {
8602       // Load with fast path based Baker's read barrier.
8603       // /* HeapReference<Object> */ out = *(out + offset)
8604       codegen_->GenerateFieldLoadWithBakerReadBarrier(
8605           instruction, out, out_reg, offset, /* needs_null_check= */ false);
8606     } else {
8607       // Load with slow path based read barrier.
8608       // Save the value of `out` into `maybe_temp` before overwriting it
8609       // in the following move operation, as we will need it for the
8610       // read barrier below.
8611       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8612       __ movl(maybe_temp.AsRegister<Register>(), out_reg);
8613       // /* HeapReference<Object> */ out = *(out + offset)
8614       __ movl(out_reg, Address(out_reg, offset));
8615       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8616     }
8617   } else {
8618     // Plain load with no read barrier.
8619     // /* HeapReference<Object> */ out = *(out + offset)
8620     __ movl(out_reg, Address(out_reg, offset));
8621     __ MaybeUnpoisonHeapReference(out_reg);
8622   }
8623 }
8624 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)8625 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
8626     HInstruction* instruction,
8627     Location out,
8628     Location obj,
8629     uint32_t offset,
8630     ReadBarrierOption read_barrier_option) {
8631   Register out_reg = out.AsRegister<Register>();
8632   Register obj_reg = obj.AsRegister<Register>();
8633   if (read_barrier_option == kWithReadBarrier) {
8634     DCHECK(codegen_->EmitReadBarrier());
8635     if (kUseBakerReadBarrier) {
8636       // Load with fast path based Baker's read barrier.
8637       // /* HeapReference<Object> */ out = *(obj + offset)
8638       codegen_->GenerateFieldLoadWithBakerReadBarrier(
8639           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
8640     } else {
8641       // Load with slow path based read barrier.
8642       // /* HeapReference<Object> */ out = *(obj + offset)
8643       __ movl(out_reg, Address(obj_reg, offset));
8644       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8645     }
8646   } else {
8647     // Plain load with no read barrier.
8648     // /* HeapReference<Object> */ out = *(obj + offset)
8649     __ movl(out_reg, Address(obj_reg, offset));
8650     __ MaybeUnpoisonHeapReference(out_reg);
8651   }
8652 }
8653 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)8654 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
8655     HInstruction* instruction,
8656     Location root,
8657     const Address& address,
8658     Label* fixup_label,
8659     ReadBarrierOption read_barrier_option) {
8660   Register root_reg = root.AsRegister<Register>();
8661   if (read_barrier_option == kWithReadBarrier) {
8662     DCHECK(codegen_->EmitReadBarrier());
8663     if (kUseBakerReadBarrier) {
8664       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8665       // Baker's read barrier are used:
8666       //
8667       //   root = obj.field;
8668       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
8669       //   if (temp != null) {
8670       //     root = temp(root)
8671       //   }
8672 
8673       // /* GcRoot<mirror::Object> */ root = *address
8674       __ movl(root_reg, address);
8675       if (fixup_label != nullptr) {
8676         __ Bind(fixup_label);
8677       }
8678       static_assert(
8679           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
8680           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
8681           "have different sizes.");
8682       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
8683                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
8684                     "have different sizes.");
8685 
8686       // Slow path marking the GC root `root`.
8687       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8688           instruction, root, /* unpoison_ref_before_marking= */ false);
8689       codegen_->AddSlowPath(slow_path);
8690 
8691       // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
8692       const int32_t entry_point_offset =
8693           Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
8694       __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
8695       // The entrypoint is null when the GC is not marking.
8696       __ j(kNotEqual, slow_path->GetEntryLabel());
8697       __ Bind(slow_path->GetExitLabel());
8698     } else {
8699       // GC root loaded through a slow path for read barriers other
8700       // than Baker's.
8701       // /* GcRoot<mirror::Object>* */ root = address
8702       __ leal(root_reg, address);
8703       if (fixup_label != nullptr) {
8704         __ Bind(fixup_label);
8705       }
8706       // /* mirror::Object* */ root = root->Read()
8707       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8708     }
8709   } else {
8710     // Plain GC root load with no read barrier.
8711     // /* GcRoot<mirror::Object> */ root = *address
8712     __ movl(root_reg, address);
8713     if (fixup_label != nullptr) {
8714       __ Bind(fixup_label);
8715     }
8716     // Note that GC roots are not affected by heap poisoning, thus we
8717     // do not have to unpoison `root_reg` here.
8718   }
8719 }
8720 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)8721 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8722                                                              Location ref,
8723                                                              Register obj,
8724                                                              uint32_t offset,
8725                                                              bool needs_null_check) {
8726   DCHECK(EmitBakerReadBarrier());
8727 
8728   // /* HeapReference<Object> */ ref = *(obj + offset)
8729   Address src(obj, offset);
8730   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8731 }
8732 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)8733 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8734                                                              Location ref,
8735                                                              Register obj,
8736                                                              uint32_t data_offset,
8737                                                              Location index,
8738                                                              bool needs_null_check) {
8739   DCHECK(EmitBakerReadBarrier());
8740 
8741   static_assert(
8742       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8743       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8744   // /* HeapReference<Object> */ ref =
8745   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
8746   Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
8747   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8748 }
8749 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)8750 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8751                                                                  Location ref,
8752                                                                  Register obj,
8753                                                                  const Address& src,
8754                                                                  bool needs_null_check,
8755                                                                  bool always_update_field,
8756                                                                  Register* temp) {
8757   DCHECK(EmitBakerReadBarrier());
8758 
8759   // In slow path based read barriers, the read barrier call is
8760   // inserted after the original load. However, in fast path based
8761   // Baker's read barriers, we need to perform the load of
8762   // mirror::Object::monitor_ *before* the original reference load.
8763   // This load-load ordering is required by the read barrier.
8764   // The fast path/slow path (for Baker's algorithm) should look like:
8765   //
8766   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8767   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
8768   //   HeapReference<Object> ref = *src;  // Original reference load.
8769   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
8770   //   if (is_gray) {
8771   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
8772   //   }
8773   //
8774   // Note: the original implementation in ReadBarrier::Barrier is
8775   // slightly more complex as:
8776   // - it implements the load-load fence using a data dependency on
8777   //   the high-bits of rb_state, which are expected to be all zeroes
8778   //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
8779   //   which is a no-op thanks to the x86 memory model);
8780   // - it performs additional checks that we do not do here for
8781   //   performance reasons.
8782 
8783   Register ref_reg = ref.AsRegister<Register>();
8784   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8785 
8786   // Given the numeric representation, it's enough to check the low bit of the rb_state.
8787   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8788   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8789   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8790   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8791   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8792 
8793   // if (rb_state == ReadBarrier::GrayState())
8794   //   ref = ReadBarrier::Mark(ref);
8795   // At this point, just do the "if" and make sure that flags are preserved until the branch.
8796   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8797   if (needs_null_check) {
8798     MaybeRecordImplicitNullCheck(instruction);
8799   }
8800 
8801   // Load fence to prevent load-load reordering.
8802   // Note that this is a no-op, thanks to the x86 memory model.
8803   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8804 
8805   // The actual reference load.
8806   // /* HeapReference<Object> */ ref = *src
8807   __ movl(ref_reg, src);  // Flags are unaffected.
8808 
8809   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8810   // Slow path marking the object `ref` when it is gray.
8811   SlowPathCode* slow_path;
8812   if (always_update_field) {
8813     DCHECK(temp != nullptr);
8814     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8815         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8816   } else {
8817     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8818         instruction, ref, /* unpoison_ref_before_marking= */ true);
8819   }
8820   AddSlowPath(slow_path);
8821 
8822   // We have done the "if" of the gray bit check above, now branch based on the flags.
8823   __ j(kNotZero, slow_path->GetEntryLabel());
8824 
8825   // Object* ref = ref_addr->AsMirrorPtr()
8826   __ MaybeUnpoisonHeapReference(ref_reg);
8827 
8828   __ Bind(slow_path->GetExitLabel());
8829 }
8830 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8831 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8832                                                Location out,
8833                                                Location ref,
8834                                                Location obj,
8835                                                uint32_t offset,
8836                                                Location index) {
8837   DCHECK(EmitReadBarrier());
8838 
8839   // Insert a slow path based read barrier *after* the reference load.
8840   //
8841   // If heap poisoning is enabled, the unpoisoning of the loaded
8842   // reference will be carried out by the runtime within the slow
8843   // path.
8844   //
8845   // Note that `ref` currently does not get unpoisoned (when heap
8846   // poisoning is enabled), which is alright as the `ref` argument is
8847   // not used by the artReadBarrierSlow entry point.
8848   //
8849   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8850   SlowPathCode* slow_path = new (GetScopedAllocator())
8851       ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8852   AddSlowPath(slow_path);
8853 
8854   __ jmp(slow_path->GetEntryLabel());
8855   __ Bind(slow_path->GetExitLabel());
8856 }
8857 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8858 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8859                                                     Location out,
8860                                                     Location ref,
8861                                                     Location obj,
8862                                                     uint32_t offset,
8863                                                     Location index) {
8864   if (EmitReadBarrier()) {
8865     // Baker's read barriers shall be handled by the fast path
8866     // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8867     DCHECK(!kUseBakerReadBarrier);
8868     // If heap poisoning is enabled, unpoisoning will be taken care of
8869     // by the runtime within the slow path.
8870     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8871   } else if (kPoisonHeapReferences) {
8872     __ UnpoisonHeapReference(out.AsRegister<Register>());
8873   }
8874 }
8875 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8876 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8877                                                       Location out,
8878                                                       Location root) {
8879   DCHECK(EmitReadBarrier());
8880 
8881   // Insert a slow path based read barrier *after* the GC root load.
8882   //
8883   // Note that GC roots are not affected by heap poisoning, so we do
8884   // not need to do anything special for this here.
8885   SlowPathCode* slow_path =
8886       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8887   AddSlowPath(slow_path);
8888 
8889   __ jmp(slow_path->GetEntryLabel());
8890   __ Bind(slow_path->GetExitLabel());
8891 }
8892 
VisitBoundType(HBoundType * instruction)8893 void LocationsBuilderX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8894   // Nothing to do, this should be removed during prepare for register allocator.
8895   LOG(FATAL) << "Unreachable";
8896 }
8897 
VisitBoundType(HBoundType * instruction)8898 void InstructionCodeGeneratorX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8899   // Nothing to do, this should be removed during prepare for register allocator.
8900   LOG(FATAL) << "Unreachable";
8901 }
8902 
8903 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8904 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8905   LocationSummary* locations =
8906       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8907   locations->SetInAt(0, Location::RequiresRegister());
8908 }
8909 
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8910 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8911                                                               int32_t lower_bound,
8912                                                               uint32_t num_entries,
8913                                                               HBasicBlock* switch_block,
8914                                                               HBasicBlock* default_block) {
8915   // Figure out the correct compare values and jump conditions.
8916   // Handle the first compare/branch as a special case because it might
8917   // jump to the default case.
8918   DCHECK_GT(num_entries, 2u);
8919   Condition first_condition;
8920   uint32_t index;
8921   const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8922   if (lower_bound != 0) {
8923     first_condition = kLess;
8924     __ cmpl(value_reg, Immediate(lower_bound));
8925     __ j(first_condition, codegen_->GetLabelOf(default_block));
8926     __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8927 
8928     index = 1;
8929   } else {
8930     // Handle all the compare/jumps below.
8931     first_condition = kBelow;
8932     index = 0;
8933   }
8934 
8935   // Handle the rest of the compare/jumps.
8936   for (; index + 1 < num_entries; index += 2) {
8937     int32_t compare_to_value = lower_bound + index + 1;
8938     __ cmpl(value_reg, Immediate(compare_to_value));
8939     // Jump to successors[index] if value < case_value[index].
8940     __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8941     // Jump to successors[index + 1] if value == case_value[index + 1].
8942     __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8943   }
8944 
8945   if (index != num_entries) {
8946     // There are an odd number of entries. Handle the last one.
8947     DCHECK_EQ(index + 1, num_entries);
8948     __ cmpl(value_reg, Immediate(lower_bound + index));
8949     __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8950   }
8951 
8952   // And the default for any other value.
8953   if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8954     __ jmp(codegen_->GetLabelOf(default_block));
8955   }
8956 }
8957 
VisitPackedSwitch(HPackedSwitch * switch_instr)8958 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8959   int32_t lower_bound = switch_instr->GetStartValue();
8960   uint32_t num_entries = switch_instr->GetNumEntries();
8961   LocationSummary* locations = switch_instr->GetLocations();
8962   Register value_reg = locations->InAt(0).AsRegister<Register>();
8963 
8964   GenPackedSwitchWithCompares(value_reg,
8965                               lower_bound,
8966                               num_entries,
8967                               switch_instr->GetBlock(),
8968                               switch_instr->GetDefaultBlock());
8969 }
8970 
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8971 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8972   LocationSummary* locations =
8973       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8974   locations->SetInAt(0, Location::RequiresRegister());
8975 
8976   // Constant area pointer.
8977   locations->SetInAt(1, Location::RequiresRegister());
8978 
8979   // And the temporary we need.
8980   locations->AddTemp(Location::RequiresRegister());
8981 }
8982 
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8983 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8984   int32_t lower_bound = switch_instr->GetStartValue();
8985   uint32_t num_entries = switch_instr->GetNumEntries();
8986   LocationSummary* locations = switch_instr->GetLocations();
8987   Register value_reg = locations->InAt(0).AsRegister<Register>();
8988   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8989 
8990   if (num_entries <= kPackedSwitchJumpTableThreshold) {
8991     GenPackedSwitchWithCompares(value_reg,
8992                                 lower_bound,
8993                                 num_entries,
8994                                 switch_instr->GetBlock(),
8995                                 default_block);
8996     return;
8997   }
8998 
8999   // Optimizing has a jump area.
9000   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
9001   Register constant_area = locations->InAt(1).AsRegister<Register>();
9002 
9003   // Remove the bias, if needed.
9004   if (lower_bound != 0) {
9005     __ leal(temp_reg, Address(value_reg, -lower_bound));
9006     value_reg = temp_reg;
9007   }
9008 
9009   // Is the value in range?
9010   DCHECK_GE(num_entries, 1u);
9011   __ cmpl(value_reg, Immediate(num_entries - 1));
9012   __ j(kAbove, codegen_->GetLabelOf(default_block));
9013 
9014   // We are in the range of the table.
9015   // Load (target-constant_area) from the jump table, indexing by the value.
9016   __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
9017 
9018   // Compute the actual target address by adding in constant_area.
9019   __ addl(temp_reg, constant_area);
9020 
9021   // And jump.
9022   __ jmp(temp_reg);
9023 }
9024 
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)9025 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
9026     HX86ComputeBaseMethodAddress* insn) {
9027   LocationSummary* locations =
9028       new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
9029   locations->SetOut(Location::RequiresRegister());
9030 }
9031 
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)9032 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
9033     HX86ComputeBaseMethodAddress* insn) {
9034   LocationSummary* locations = insn->GetLocations();
9035   Register reg = locations->Out().AsRegister<Register>();
9036 
9037   // Generate call to next instruction.
9038   Label next_instruction;
9039   __ call(&next_instruction);
9040   __ Bind(&next_instruction);
9041 
9042   // Remember this offset for later use with constant area.
9043   codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
9044 
9045   // Grab the return address off the stack.
9046   __ popl(reg);
9047 }
9048 
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)9049 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
9050     HX86LoadFromConstantTable* insn) {
9051   LocationSummary* locations =
9052       new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
9053 
9054   locations->SetInAt(0, Location::RequiresRegister());
9055   locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
9056 
9057   // If we don't need to be materialized, we only need the inputs to be set.
9058   if (insn->IsEmittedAtUseSite()) {
9059     return;
9060   }
9061 
9062   switch (insn->GetType()) {
9063     case DataType::Type::kFloat32:
9064     case DataType::Type::kFloat64:
9065       locations->SetOut(Location::RequiresFpuRegister());
9066       break;
9067 
9068     case DataType::Type::kInt32:
9069       locations->SetOut(Location::RequiresRegister());
9070       break;
9071 
9072     default:
9073       LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
9074   }
9075 }
9076 
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)9077 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
9078   if (insn->IsEmittedAtUseSite()) {
9079     return;
9080   }
9081 
9082   LocationSummary* locations = insn->GetLocations();
9083   Location out = locations->Out();
9084   Register const_area = locations->InAt(0).AsRegister<Register>();
9085   HConstant *value = insn->GetConstant();
9086 
9087   switch (insn->GetType()) {
9088     case DataType::Type::kFloat32:
9089       __ movss(out.AsFpuRegister<XmmRegister>(),
9090                codegen_->LiteralFloatAddress(
9091                    value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
9092       break;
9093 
9094     case DataType::Type::kFloat64:
9095       __ movsd(out.AsFpuRegister<XmmRegister>(),
9096                codegen_->LiteralDoubleAddress(
9097                    value->AsDoubleConstant()->GetValue(),
9098                    insn->GetBaseMethodAddress(),
9099                    const_area));
9100       break;
9101 
9102     case DataType::Type::kInt32:
9103       __ movl(out.AsRegister<Register>(),
9104               codegen_->LiteralInt32Address(
9105                   value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
9106       break;
9107 
9108     default:
9109       LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
9110   }
9111 }
9112 
9113 /**
9114  * Class to handle late fixup of offsets into constant area.
9115  */
9116 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
9117  public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)9118   RIPFixup(CodeGeneratorX86& codegen,
9119            HX86ComputeBaseMethodAddress* base_method_address,
9120            size_t offset)
9121       : codegen_(&codegen),
9122         base_method_address_(base_method_address),
9123         offset_into_constant_area_(offset) {}
9124 
9125  protected:
SetOffset(size_t offset)9126   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
9127 
9128   CodeGeneratorX86* codegen_;
9129   HX86ComputeBaseMethodAddress* base_method_address_;
9130 
9131  private:
Process(const MemoryRegion & region,int pos)9132   void Process(const MemoryRegion& region, int pos) override {
9133     // Patch the correct offset for the instruction.  The place to patch is the
9134     // last 4 bytes of the instruction.
9135     // The value to patch is the distance from the offset in the constant area
9136     // from the address computed by the HX86ComputeBaseMethodAddress instruction.
9137     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
9138     int32_t relative_position =
9139         constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
9140 
9141     // Patch in the right value.
9142     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
9143   }
9144 
9145   // Location in constant area that the fixup refers to.
9146   int32_t offset_into_constant_area_;
9147 };
9148 
9149 /**
9150  * Class to handle late fixup of offsets to a jump table that will be created in the
9151  * constant area.
9152  */
9153 class JumpTableRIPFixup : public RIPFixup {
9154  public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)9155   JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
9156       : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
9157         switch_instr_(switch_instr) {}
9158 
CreateJumpTable()9159   void CreateJumpTable() {
9160     X86Assembler* assembler = codegen_->GetAssembler();
9161 
9162     // Ensure that the reference to the jump table has the correct offset.
9163     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
9164     SetOffset(offset_in_constant_table);
9165 
9166     // The label values in the jump table are computed relative to the
9167     // instruction addressing the constant area.
9168     const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
9169 
9170     // Populate the jump table with the correct values for the jump table.
9171     int32_t num_entries = switch_instr_->GetNumEntries();
9172     HBasicBlock* block = switch_instr_->GetBlock();
9173     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
9174     // The value that we want is the target offset - the position of the table.
9175     for (int32_t i = 0; i < num_entries; i++) {
9176       HBasicBlock* b = successors[i];
9177       Label* l = codegen_->GetLabelOf(b);
9178       DCHECK(l->IsBound());
9179       int32_t offset_to_block = l->Position() - relative_offset;
9180       assembler->AppendInt32(offset_to_block);
9181     }
9182   }
9183 
9184  private:
9185   const HX86PackedSwitch* switch_instr_;
9186 };
9187 
Finalize()9188 void CodeGeneratorX86::Finalize() {
9189   // Generate the constant area if needed.
9190   X86Assembler* assembler = GetAssembler();
9191 
9192   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
9193     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
9194     // byte values.
9195     assembler->Align(4, 0);
9196     constant_area_start_ = assembler->CodeSize();
9197 
9198     // Populate any jump tables.
9199     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
9200       jump_table->CreateJumpTable();
9201     }
9202 
9203     // And now add the constant area to the generated code.
9204     assembler->AddConstantArea();
9205   }
9206 
9207   // And finish up.
9208   CodeGenerator::Finalize();
9209 }
9210 
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)9211 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
9212                                                HX86ComputeBaseMethodAddress* method_base,
9213                                                Register reg) {
9214   AssemblerFixup* fixup =
9215       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
9216   return Address(reg, kPlaceholder32BitOffset, fixup);
9217 }
9218 
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)9219 Address CodeGeneratorX86::LiteralFloatAddress(float v,
9220                                               HX86ComputeBaseMethodAddress* method_base,
9221                                               Register reg) {
9222   AssemblerFixup* fixup =
9223       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
9224   return Address(reg, kPlaceholder32BitOffset, fixup);
9225 }
9226 
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)9227 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
9228                                               HX86ComputeBaseMethodAddress* method_base,
9229                                               Register reg) {
9230   AssemblerFixup* fixup =
9231       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
9232   return Address(reg, kPlaceholder32BitOffset, fixup);
9233 }
9234 
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)9235 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
9236                                               HX86ComputeBaseMethodAddress* method_base,
9237                                               Register reg) {
9238   AssemblerFixup* fixup =
9239       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
9240   return Address(reg, kPlaceholder32BitOffset, fixup);
9241 }
9242 
Load32BitValue(Register dest,int32_t value)9243 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
9244   if (value == 0) {
9245     __ xorl(dest, dest);
9246   } else {
9247     __ movl(dest, Immediate(value));
9248   }
9249 }
9250 
Compare32BitValue(Register dest,int32_t value)9251 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
9252   if (value == 0) {
9253     __ testl(dest, dest);
9254   } else {
9255     __ cmpl(dest, Immediate(value));
9256   }
9257 }
9258 
GenerateIntCompare(Location lhs,Location rhs)9259 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
9260   Register lhs_reg = lhs.AsRegister<Register>();
9261   GenerateIntCompare(lhs_reg, rhs);
9262 }
9263 
GenerateIntCompare(Register lhs,Location rhs)9264 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
9265   if (rhs.IsConstant()) {
9266     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
9267     Compare32BitValue(lhs, value);
9268   } else if (rhs.IsStackSlot()) {
9269     __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
9270   } else {
9271     __ cmpl(lhs, rhs.AsRegister<Register>());
9272   }
9273 }
9274 
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)9275 Address CodeGeneratorX86::ArrayAddress(Register obj,
9276                                        Location index,
9277                                        ScaleFactor scale,
9278                                        uint32_t data_offset) {
9279   return index.IsConstant()
9280       ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
9281       : Address(obj, index.AsRegister<Register>(), scale, data_offset);
9282 }
9283 
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)9284 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
9285                                            Register reg,
9286                                            Register value) {
9287   // Create a fixup to be used to create and address the jump table.
9288   JumpTableRIPFixup* table_fixup =
9289       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
9290 
9291   // We have to populate the jump tables.
9292   fixups_to_jump_tables_.push_back(table_fixup);
9293 
9294   // We want a scaled address, as we are extracting the correct offset from the table.
9295   return Address(reg, value, TIMES_4, kPlaceholder32BitOffset, table_fixup);
9296 }
9297 
9298 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)9299 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
9300   if (!target.IsValid()) {
9301     DCHECK_EQ(type, DataType::Type::kVoid);
9302     return;
9303   }
9304 
9305   DCHECK_NE(type, DataType::Type::kVoid);
9306 
9307   Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
9308   if (target.Equals(return_loc)) {
9309     return;
9310   }
9311 
9312   // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
9313   //       with the else branch.
9314   if (type == DataType::Type::kInt64) {
9315     HParallelMove parallel_move(GetGraph()->GetAllocator());
9316     parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
9317     parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
9318     GetMoveResolver()->EmitNativeCode(&parallel_move);
9319   } else {
9320     // Let the parallel move resolver take care of all of this.
9321     HParallelMove parallel_move(GetGraph()->GetAllocator());
9322     parallel_move.AddMove(return_loc, target, type, nullptr);
9323     GetMoveResolver()->EmitNativeCode(&parallel_move);
9324   }
9325 }
9326 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const9327 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
9328                                        const uint8_t* roots_data,
9329                                        const PatchInfo<Label>& info,
9330                                        uint64_t index_in_table) const {
9331   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
9332   uintptr_t address =
9333       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
9334   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
9335   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
9336       dchecked_integral_cast<uint32_t>(address);
9337 }
9338 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)9339 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
9340   for (const PatchInfo<Label>& info : jit_string_patches_) {
9341     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
9342     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
9343     PatchJitRootUse(code, roots_data, info, index_in_table);
9344   }
9345 
9346   for (const PatchInfo<Label>& info : jit_class_patches_) {
9347     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
9348     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
9349     PatchJitRootUse(code, roots_data, info, index_in_table);
9350   }
9351 }
9352 
VisitIntermediateAddress(HIntermediateAddress * instruction)9353 void LocationsBuilderX86::VisitIntermediateAddress(
9354     [[maybe_unused]] HIntermediateAddress* instruction) {
9355   LOG(FATAL) << "Unreachable";
9356 }
9357 
VisitIntermediateAddress(HIntermediateAddress * instruction)9358 void InstructionCodeGeneratorX86::VisitIntermediateAddress(
9359     [[maybe_unused]] HIntermediateAddress* instruction) {
9360   LOG(FATAL) << "Unreachable";
9361 }
9362 
CpuHasAvxFeatureFlag()9363 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
9364   return codegen_->GetInstructionSetFeatures().HasAVX();
9365 }
CpuHasAvx2FeatureFlag()9366 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
9367   return codegen_->GetInstructionSetFeatures().HasAVX2();
9368 }
CpuHasAvxFeatureFlag()9369 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
9370   return codegen_->GetInstructionSetFeatures().HasAVX();
9371 }
CpuHasAvx2FeatureFlag()9372 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
9373   return codegen_->GetInstructionSetFeatures().HasAVX2();
9374 }
9375 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)9376 void LocationsBuilderX86::VisitBitwiseNegatedRight(
9377     [[maybe_unused]] HBitwiseNegatedRight* instruction) {
9378   LOG(FATAL) << "Unimplemented";
9379 }
9380 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)9381 void InstructionCodeGeneratorX86::VisitBitwiseNegatedRight(
9382     [[maybe_unused]] HBitwiseNegatedRight* instruction) {
9383   LOG(FATAL) << "Unimplemented";
9384 }
9385 
9386 #undef __
9387 
9388 }  // namespace x86
9389 }  // namespace art
9390