1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "arch/x86/jni_frame_x86.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "entrypoints/quick/quick_entrypoints_enum.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_list.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/var_handle.h"
39 #include "optimizing/nodes.h"
40 #include "profiling_info_builder.h"
41 #include "scoped_thread_state_change-inl.h"
42 #include "thread.h"
43 #include "trace.h"
44 #include "utils/assembler.h"
45 #include "utils/stack_checks.h"
46 #include "utils/x86/assembler_x86.h"
47 #include "utils/x86/constants_x86.h"
48 #include "utils/x86/managed_register_x86.h"
49
50 namespace art HIDDEN {
51
52 template<class MirrorType>
53 class GcRoot;
54
55 namespace x86 {
56
57 static constexpr int kCurrentMethodStackOffset = 0;
58 static constexpr Register kMethodRegisterArgument = EAX;
59 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
60
61 static constexpr int kC2ConditionMask = 0x400;
62
63 static constexpr int kFakeReturnRegister = Register(8);
64
65 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
66 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
67
OneRegInReferenceOutSaveEverythingCallerSaves()68 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
69 InvokeRuntimeCallingConvention calling_convention;
70 RegisterSet caller_saves = RegisterSet::Empty();
71 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
72 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
73 // that the kPrimNot result register is the same as the first argument register.
74 return caller_saves;
75 }
76
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
79 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
80
81 class NullCheckSlowPathX86 : public SlowPathCode {
82 public:
NullCheckSlowPathX86(HNullCheck * instruction)83 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
84
EmitNativeCode(CodeGenerator * codegen)85 void EmitNativeCode(CodeGenerator* codegen) override {
86 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
87 __ Bind(GetEntryLabel());
88 if (instruction_->CanThrowIntoCatchBlock()) {
89 // Live registers will be restored in the catch block if caught.
90 SaveLiveRegisters(codegen, instruction_->GetLocations());
91 }
92 x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
93 instruction_,
94 instruction_->GetDexPc(),
95 this);
96 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
97 }
98
IsFatal() const99 bool IsFatal() const override { return true; }
100
GetDescription() const101 const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
102
103 private:
104 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
105 };
106
107 class DivZeroCheckSlowPathX86 : public SlowPathCode {
108 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)109 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
110
EmitNativeCode(CodeGenerator * codegen)111 void EmitNativeCode(CodeGenerator* codegen) override {
112 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
113 __ Bind(GetEntryLabel());
114 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
115 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
116 }
117
IsFatal() const118 bool IsFatal() const override { return true; }
119
GetDescription() const120 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
121
122 private:
123 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
124 };
125
126 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
127 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)128 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
129 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
130
EmitNativeCode(CodeGenerator * codegen)131 void EmitNativeCode(CodeGenerator* codegen) override {
132 __ Bind(GetEntryLabel());
133 if (is_div_) {
134 __ negl(reg_);
135 } else {
136 __ movl(reg_, Immediate(0));
137 }
138 __ jmp(GetExitLabel());
139 }
140
GetDescription() const141 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
142
143 private:
144 Register reg_;
145 bool is_div_;
146 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
147 };
148
149 class BoundsCheckSlowPathX86 : public SlowPathCode {
150 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)151 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
152
EmitNativeCode(CodeGenerator * codegen)153 void EmitNativeCode(CodeGenerator* codegen) override {
154 LocationSummary* locations = instruction_->GetLocations();
155 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
156 __ Bind(GetEntryLabel());
157 if (instruction_->CanThrowIntoCatchBlock()) {
158 // Live registers will be restored in the catch block if caught.
159 SaveLiveRegisters(codegen, locations);
160 }
161
162 Location index_loc = locations->InAt(0);
163 Location length_loc = locations->InAt(1);
164 InvokeRuntimeCallingConvention calling_convention;
165 Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
166 Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
167
168 // Are we using an array length from memory?
169 if (!length_loc.IsValid()) {
170 DCHECK(instruction_->InputAt(1)->IsArrayLength());
171 HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
172 DCHECK(array_length->IsEmittedAtUseSite());
173 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
174 Location array_loc = array_length->GetLocations()->InAt(0);
175 if (!index_loc.Equals(length_arg)) {
176 // The index is not clobbered by loading the length directly to `length_arg`.
177 __ movl(length_arg.AsRegister<Register>(),
178 Address(array_loc.AsRegister<Register>(), len_offset));
179 x86_codegen->Move32(index_arg, index_loc);
180 } else if (!array_loc.Equals(index_arg)) {
181 // The array reference is not clobbered by the index move.
182 x86_codegen->Move32(index_arg, index_loc);
183 __ movl(length_arg.AsRegister<Register>(),
184 Address(array_loc.AsRegister<Register>(), len_offset));
185 } else {
186 // We do not have a temporary we could use, so swap the registers using the
187 // parallel move resolver and replace the array with the length afterwards.
188 codegen->EmitParallelMoves(
189 index_loc,
190 index_arg,
191 DataType::Type::kInt32,
192 array_loc,
193 length_arg,
194 DataType::Type::kReference);
195 __ movl(length_arg.AsRegister<Register>(),
196 Address(length_arg.AsRegister<Register>(), len_offset));
197 }
198 if (mirror::kUseStringCompression && array_length->IsStringLength()) {
199 __ shrl(length_arg.AsRegister<Register>(), Immediate(1));
200 }
201 } else {
202 // We're moving two locations to locations that could overlap,
203 // so we need a parallel move resolver.
204 codegen->EmitParallelMoves(
205 index_loc,
206 index_arg,
207 DataType::Type::kInt32,
208 length_loc,
209 length_arg,
210 DataType::Type::kInt32);
211 }
212
213 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
214 ? kQuickThrowStringBounds
215 : kQuickThrowArrayBounds;
216 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
217 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
218 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
219 }
220
IsFatal() const221 bool IsFatal() const override { return true; }
222
GetDescription() const223 const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
224
225 private:
226 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
227 };
228
229 class SuspendCheckSlowPathX86 : public SlowPathCode {
230 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)231 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
232 : SlowPathCode(instruction), successor_(successor) {}
233
EmitNativeCode(CodeGenerator * codegen)234 void EmitNativeCode(CodeGenerator* codegen) override {
235 LocationSummary* locations = instruction_->GetLocations();
236 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
237 __ Bind(GetEntryLabel());
238 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
239 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
240 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
241 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
242 if (successor_ == nullptr) {
243 __ jmp(GetReturnLabel());
244 } else {
245 __ jmp(x86_codegen->GetLabelOf(successor_));
246 }
247 }
248
GetReturnLabel()249 Label* GetReturnLabel() {
250 DCHECK(successor_ == nullptr);
251 return &return_label_;
252 }
253
GetSuccessor() const254 HBasicBlock* GetSuccessor() const {
255 return successor_;
256 }
257
GetDescription() const258 const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
259
260 private:
261 HBasicBlock* const successor_;
262 Label return_label_;
263
264 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
265 };
266
267 class LoadStringSlowPathX86 : public SlowPathCode {
268 public:
LoadStringSlowPathX86(HLoadString * instruction)269 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
270
EmitNativeCode(CodeGenerator * codegen)271 void EmitNativeCode(CodeGenerator* codegen) override {
272 LocationSummary* locations = instruction_->GetLocations();
273 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
274
275 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
276 __ Bind(GetEntryLabel());
277 SaveLiveRegisters(codegen, locations);
278
279 InvokeRuntimeCallingConvention calling_convention;
280 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
281 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
282 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
283 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
284 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
285 RestoreLiveRegisters(codegen, locations);
286
287 __ jmp(GetExitLabel());
288 }
289
GetDescription() const290 const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
291
292 private:
293 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
294 };
295
296 class LoadClassSlowPathX86 : public SlowPathCode {
297 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)298 LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
299 : SlowPathCode(at), cls_(cls) {
300 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
301 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
302 }
303
EmitNativeCode(CodeGenerator * codegen)304 void EmitNativeCode(CodeGenerator* codegen) override {
305 LocationSummary* locations = instruction_->GetLocations();
306 Location out = locations->Out();
307 const uint32_t dex_pc = instruction_->GetDexPc();
308 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
309 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
310
311 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
312 __ Bind(GetEntryLabel());
313 SaveLiveRegisters(codegen, locations);
314
315 InvokeRuntimeCallingConvention calling_convention;
316 if (must_resolve_type) {
317 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()) ||
318 x86_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
319 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
320 &cls_->GetDexFile()));
321 dex::TypeIndex type_index = cls_->GetTypeIndex();
322 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
323 if (cls_->NeedsAccessCheck()) {
324 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
325 x86_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
326 } else {
327 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
328 x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
329 }
330 // If we also must_do_clinit, the resolved type is now in the correct register.
331 } else {
332 DCHECK(must_do_clinit);
333 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
334 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
335 }
336 if (must_do_clinit) {
337 x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
338 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
339 }
340
341 // Move the class to the desired location.
342 if (out.IsValid()) {
343 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
344 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
345 }
346 RestoreLiveRegisters(codegen, locations);
347 __ jmp(GetExitLabel());
348 }
349
GetDescription() const350 const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
351
352 private:
353 // The class this slow path will load.
354 HLoadClass* const cls_;
355
356 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
357 };
358
359 class TypeCheckSlowPathX86 : public SlowPathCode {
360 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)361 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
362 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
363
EmitNativeCode(CodeGenerator * codegen)364 void EmitNativeCode(CodeGenerator* codegen) override {
365 LocationSummary* locations = instruction_->GetLocations();
366 DCHECK(instruction_->IsCheckCast()
367 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
368
369 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
370 __ Bind(GetEntryLabel());
371
372 if (kPoisonHeapReferences &&
373 instruction_->IsCheckCast() &&
374 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
375 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
376 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
377 }
378
379 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
380 SaveLiveRegisters(codegen, locations);
381 }
382
383 // We're moving two locations to locations that could overlap, so we need a parallel
384 // move resolver.
385 InvokeRuntimeCallingConvention calling_convention;
386 x86_codegen->EmitParallelMoves(locations->InAt(0),
387 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
388 DataType::Type::kReference,
389 locations->InAt(1),
390 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
391 DataType::Type::kReference);
392 if (instruction_->IsInstanceOf()) {
393 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
394 instruction_,
395 instruction_->GetDexPc(),
396 this);
397 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
398 } else {
399 DCHECK(instruction_->IsCheckCast());
400 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
401 instruction_,
402 instruction_->GetDexPc(),
403 this);
404 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
405 }
406
407 if (!is_fatal_) {
408 if (instruction_->IsInstanceOf()) {
409 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
410 }
411 RestoreLiveRegisters(codegen, locations);
412
413 __ jmp(GetExitLabel());
414 }
415 }
416
GetDescription() const417 const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const418 bool IsFatal() const override { return is_fatal_; }
419
420 private:
421 const bool is_fatal_;
422
423 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
424 };
425
426 class DeoptimizationSlowPathX86 : public SlowPathCode {
427 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)428 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
429 : SlowPathCode(instruction) {}
430
EmitNativeCode(CodeGenerator * codegen)431 void EmitNativeCode(CodeGenerator* codegen) override {
432 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
433 __ Bind(GetEntryLabel());
434 LocationSummary* locations = instruction_->GetLocations();
435 SaveLiveRegisters(codegen, locations);
436 InvokeRuntimeCallingConvention calling_convention;
437 x86_codegen->Load32BitValue(
438 calling_convention.GetRegisterAt(0),
439 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
440 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
441 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
442 }
443
GetDescription() const444 const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
445
446 private:
447 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
448 };
449
450 class ArraySetSlowPathX86 : public SlowPathCode {
451 public:
ArraySetSlowPathX86(HInstruction * instruction)452 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
453
EmitNativeCode(CodeGenerator * codegen)454 void EmitNativeCode(CodeGenerator* codegen) override {
455 LocationSummary* locations = instruction_->GetLocations();
456 __ Bind(GetEntryLabel());
457 SaveLiveRegisters(codegen, locations);
458
459 InvokeRuntimeCallingConvention calling_convention;
460 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
461 parallel_move.AddMove(
462 locations->InAt(0),
463 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
464 DataType::Type::kReference,
465 nullptr);
466 parallel_move.AddMove(
467 locations->InAt(1),
468 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
469 DataType::Type::kInt32,
470 nullptr);
471 parallel_move.AddMove(
472 locations->InAt(2),
473 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
474 DataType::Type::kReference,
475 nullptr);
476 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
477
478 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
479 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
480 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
481 RestoreLiveRegisters(codegen, locations);
482 __ jmp(GetExitLabel());
483 }
484
GetDescription() const485 const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
486
487 private:
488 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
489 };
490
491 // Slow path marking an object reference `ref` during a read
492 // barrier. The field `obj.field` in the object `obj` holding this
493 // reference does not get updated by this slow path after marking (see
494 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
495 //
496 // This means that after the execution of this slow path, `ref` will
497 // always be up-to-date, but `obj.field` may not; i.e., after the
498 // flip, `ref` will be a to-space reference, but `obj.field` will
499 // probably still be a from-space reference (unless it gets updated by
500 // another thread, or if another thread installed another object
501 // reference (different from `ref`) in `obj.field`).
502 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
503 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)504 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
505 Location ref,
506 bool unpoison_ref_before_marking)
507 : SlowPathCode(instruction),
508 ref_(ref),
509 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
510 }
511
GetDescription() const512 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
513
EmitNativeCode(CodeGenerator * codegen)514 void EmitNativeCode(CodeGenerator* codegen) override {
515 DCHECK(codegen->EmitReadBarrier());
516 LocationSummary* locations = instruction_->GetLocations();
517 Register ref_reg = ref_.AsRegister<Register>();
518 DCHECK(locations->CanCall());
519 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
520 DCHECK(instruction_->IsInstanceFieldGet() ||
521 instruction_->IsStaticFieldGet() ||
522 instruction_->IsArrayGet() ||
523 instruction_->IsArraySet() ||
524 instruction_->IsLoadClass() ||
525 instruction_->IsLoadString() ||
526 instruction_->IsInstanceOf() ||
527 instruction_->IsCheckCast() ||
528 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
529 << "Unexpected instruction in read barrier marking slow path: "
530 << instruction_->DebugName();
531
532 __ Bind(GetEntryLabel());
533 if (unpoison_ref_before_marking_) {
534 // Object* ref = ref_addr->AsMirrorPtr()
535 __ MaybeUnpoisonHeapReference(ref_reg);
536 }
537 // No need to save live registers; it's taken care of by the
538 // entrypoint. Also, there is no need to update the stack mask,
539 // as this runtime call will not trigger a garbage collection.
540 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
541 DCHECK_NE(ref_reg, ESP);
542 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
543 // "Compact" slow path, saving two moves.
544 //
545 // Instead of using the standard runtime calling convention (input
546 // and output in EAX):
547 //
548 // EAX <- ref
549 // EAX <- ReadBarrierMark(EAX)
550 // ref <- EAX
551 //
552 // we just use rX (the register containing `ref`) as input and output
553 // of a dedicated entrypoint:
554 //
555 // rX <- ReadBarrierMarkRegX(rX)
556 //
557 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
558 // This runtime call does not require a stack map.
559 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
560 __ jmp(GetExitLabel());
561 }
562
563 private:
564 // The location (register) of the marked object reference.
565 const Location ref_;
566 // Should the reference in `ref_` be unpoisoned prior to marking it?
567 const bool unpoison_ref_before_marking_;
568
569 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
570 };
571
572 // Slow path marking an object reference `ref` during a read barrier,
573 // and if needed, atomically updating the field `obj.field` in the
574 // object `obj` holding this reference after marking (contrary to
575 // ReadBarrierMarkSlowPathX86 above, which never tries to update
576 // `obj.field`).
577 //
578 // This means that after the execution of this slow path, both `ref`
579 // and `obj.field` will be up-to-date; i.e., after the flip, both will
580 // hold the same to-space reference (unless another thread installed
581 // another object reference (different from `ref`) in `obj.field`).
582 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
583 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)584 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
585 Location ref,
586 Register obj,
587 const Address& field_addr,
588 bool unpoison_ref_before_marking,
589 Register temp)
590 : SlowPathCode(instruction),
591 ref_(ref),
592 obj_(obj),
593 field_addr_(field_addr),
594 unpoison_ref_before_marking_(unpoison_ref_before_marking),
595 temp_(temp) {
596 }
597
GetDescription() const598 const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
599
EmitNativeCode(CodeGenerator * codegen)600 void EmitNativeCode(CodeGenerator* codegen) override {
601 DCHECK(codegen->EmitReadBarrier());
602 LocationSummary* locations = instruction_->GetLocations();
603 Register ref_reg = ref_.AsRegister<Register>();
604 DCHECK(locations->CanCall());
605 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
606 DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
607 << "Unexpected instruction in read barrier marking and field updating slow path: "
608 << instruction_->DebugName();
609 HInvoke* invoke = instruction_->AsInvoke();
610 DCHECK(IsUnsafeCASReference(invoke) ||
611 IsUnsafeGetAndSetReference(invoke) ||
612 IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
613
614 __ Bind(GetEntryLabel());
615 if (unpoison_ref_before_marking_) {
616 // Object* ref = ref_addr->AsMirrorPtr()
617 __ MaybeUnpoisonHeapReference(ref_reg);
618 }
619
620 // Save the old (unpoisoned) reference.
621 __ movl(temp_, ref_reg);
622
623 // No need to save live registers; it's taken care of by the
624 // entrypoint. Also, there is no need to update the stack mask,
625 // as this runtime call will not trigger a garbage collection.
626 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
627 DCHECK_NE(ref_reg, ESP);
628 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
629 // "Compact" slow path, saving two moves.
630 //
631 // Instead of using the standard runtime calling convention (input
632 // and output in EAX):
633 //
634 // EAX <- ref
635 // EAX <- ReadBarrierMark(EAX)
636 // ref <- EAX
637 //
638 // we just use rX (the register containing `ref`) as input and output
639 // of a dedicated entrypoint:
640 //
641 // rX <- ReadBarrierMarkRegX(rX)
642 //
643 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
644 // This runtime call does not require a stack map.
645 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
646
647 // If the new reference is different from the old reference,
648 // update the field in the holder (`*field_addr`).
649 //
650 // Note that this field could also hold a different object, if
651 // another thread had concurrently changed it. In that case, the
652 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
653 // operation below would abort the CAS, leaving the field as-is.
654 NearLabel done;
655 __ cmpl(temp_, ref_reg);
656 __ j(kEqual, &done);
657
658 // Update the holder's field atomically. This may fail if
659 // mutator updates before us, but it's OK. This is achieved
660 // using a strong compare-and-set (CAS) operation with relaxed
661 // memory synchronization ordering, where the expected value is
662 // the old reference and the desired value is the new reference.
663 // This operation is implemented with a 32-bit LOCK CMPXLCHG
664 // instruction, which requires the expected value (the old
665 // reference) to be in EAX. Save EAX beforehand, and move the
666 // expected value (stored in `temp_`) into EAX.
667 __ pushl(EAX);
668 __ movl(EAX, temp_);
669
670 // Convenience aliases.
671 Register base = obj_;
672 Register expected = EAX;
673 Register value = ref_reg;
674
675 bool base_equals_value = (base == value);
676 if (kPoisonHeapReferences) {
677 if (base_equals_value) {
678 // If `base` and `value` are the same register location, move
679 // `value` to a temporary register. This way, poisoning
680 // `value` won't invalidate `base`.
681 value = temp_;
682 __ movl(value, base);
683 }
684
685 // Check that the register allocator did not assign the location
686 // of `expected` (EAX) to `value` nor to `base`, so that heap
687 // poisoning (when enabled) works as intended below.
688 // - If `value` were equal to `expected`, both references would
689 // be poisoned twice, meaning they would not be poisoned at
690 // all, as heap poisoning uses address negation.
691 // - If `base` were equal to `expected`, poisoning `expected`
692 // would invalidate `base`.
693 DCHECK_NE(value, expected);
694 DCHECK_NE(base, expected);
695
696 __ PoisonHeapReference(expected);
697 __ PoisonHeapReference(value);
698 }
699
700 __ LockCmpxchgl(field_addr_, value);
701
702 // If heap poisoning is enabled, we need to unpoison the values
703 // that were poisoned earlier.
704 if (kPoisonHeapReferences) {
705 if (base_equals_value) {
706 // `value` has been moved to a temporary register, no need
707 // to unpoison it.
708 } else {
709 __ UnpoisonHeapReference(value);
710 }
711 // No need to unpoison `expected` (EAX), as it is be overwritten below.
712 }
713
714 // Restore EAX.
715 __ popl(EAX);
716
717 __ Bind(&done);
718 __ jmp(GetExitLabel());
719 }
720
721 private:
722 // The location (register) of the marked object reference.
723 const Location ref_;
724 // The register containing the object holding the marked object reference field.
725 const Register obj_;
726 // The address of the marked reference field. The base of this address must be `obj_`.
727 const Address field_addr_;
728
729 // Should the reference in `ref_` be unpoisoned prior to marking it?
730 const bool unpoison_ref_before_marking_;
731
732 const Register temp_;
733
734 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
735 };
736
737 // Slow path generating a read barrier for a heap reference.
738 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
739 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)740 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
741 Location out,
742 Location ref,
743 Location obj,
744 uint32_t offset,
745 Location index)
746 : SlowPathCode(instruction),
747 out_(out),
748 ref_(ref),
749 obj_(obj),
750 offset_(offset),
751 index_(index) {
752 // If `obj` is equal to `out` or `ref`, it means the initial object
753 // has been overwritten by (or after) the heap object reference load
754 // to be instrumented, e.g.:
755 //
756 // __ movl(out, Address(out, offset));
757 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
758 //
759 // In that case, we have lost the information about the original
760 // object, and the emitted read barrier cannot work properly.
761 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
762 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
763 }
764
EmitNativeCode(CodeGenerator * codegen)765 void EmitNativeCode(CodeGenerator* codegen) override {
766 DCHECK(codegen->EmitReadBarrier());
767 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
768 LocationSummary* locations = instruction_->GetLocations();
769 Register reg_out = out_.AsRegister<Register>();
770 DCHECK(locations->CanCall());
771 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
772 DCHECK(instruction_->IsInstanceFieldGet() ||
773 instruction_->IsStaticFieldGet() ||
774 instruction_->IsArrayGet() ||
775 instruction_->IsInstanceOf() ||
776 instruction_->IsCheckCast() ||
777 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
778 << "Unexpected instruction in read barrier for heap reference slow path: "
779 << instruction_->DebugName();
780
781 __ Bind(GetEntryLabel());
782 SaveLiveRegisters(codegen, locations);
783
784 // We may have to change the index's value, but as `index_` is a
785 // constant member (like other "inputs" of this slow path),
786 // introduce a copy of it, `index`.
787 Location index = index_;
788 if (index_.IsValid()) {
789 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
790 if (instruction_->IsArrayGet()) {
791 // Compute the actual memory offset and store it in `index`.
792 Register index_reg = index_.AsRegister<Register>();
793 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
794 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
795 // We are about to change the value of `index_reg` (see the
796 // calls to art::x86::X86Assembler::shll and
797 // art::x86::X86Assembler::AddImmediate below), but it has
798 // not been saved by the previous call to
799 // art::SlowPathCode::SaveLiveRegisters, as it is a
800 // callee-save register --
801 // art::SlowPathCode::SaveLiveRegisters does not consider
802 // callee-save registers, as it has been designed with the
803 // assumption that callee-save registers are supposed to be
804 // handled by the called function. So, as a callee-save
805 // register, `index_reg` _would_ eventually be saved onto
806 // the stack, but it would be too late: we would have
807 // changed its value earlier. Therefore, we manually save
808 // it here into another freely available register,
809 // `free_reg`, chosen of course among the caller-save
810 // registers (as a callee-save `free_reg` register would
811 // exhibit the same problem).
812 //
813 // Note we could have requested a temporary register from
814 // the register allocator instead; but we prefer not to, as
815 // this is a slow path, and we know we can find a
816 // caller-save register that is available.
817 Register free_reg = FindAvailableCallerSaveRegister(codegen);
818 __ movl(free_reg, index_reg);
819 index_reg = free_reg;
820 index = Location::RegisterLocation(index_reg);
821 } else {
822 // The initial register stored in `index_` has already been
823 // saved in the call to art::SlowPathCode::SaveLiveRegisters
824 // (as it is not a callee-save register), so we can freely
825 // use it.
826 }
827 // Shifting the index value contained in `index_reg` by the scale
828 // factor (2) cannot overflow in practice, as the runtime is
829 // unable to allocate object arrays with a size larger than
830 // 2^26 - 1 (that is, 2^28 - 4 bytes).
831 __ shll(index_reg, Immediate(TIMES_4));
832 static_assert(
833 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
834 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
835 __ AddImmediate(index_reg, Immediate(offset_));
836 } else {
837 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
838 // intrinsics, `index_` is not shifted by a scale factor of 2
839 // (as in the case of ArrayGet), as it is actually an offset
840 // to an object field within an object.
841 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
842 DCHECK(instruction_->GetLocations()->Intrinsified());
843 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
844 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
845 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
846 (instruction_->AsInvoke()->GetIntrinsic() ==
847 Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
848 (instruction_->AsInvoke()->GetIntrinsic() ==
849 Intrinsics::kJdkUnsafeGetReferenceAcquire))
850 << instruction_->AsInvoke()->GetIntrinsic();
851 DCHECK_EQ(offset_, 0U);
852 DCHECK(index_.IsRegisterPair());
853 // UnsafeGet's offset location is a register pair, the low
854 // part contains the correct offset.
855 index = index_.ToLow();
856 }
857 }
858
859 // We're moving two or three locations to locations that could
860 // overlap, so we need a parallel move resolver.
861 InvokeRuntimeCallingConvention calling_convention;
862 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
863 parallel_move.AddMove(ref_,
864 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
865 DataType::Type::kReference,
866 nullptr);
867 parallel_move.AddMove(obj_,
868 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
869 DataType::Type::kReference,
870 nullptr);
871 if (index.IsValid()) {
872 parallel_move.AddMove(index,
873 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
874 DataType::Type::kInt32,
875 nullptr);
876 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
877 } else {
878 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
879 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
880 }
881 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
882 CheckEntrypointTypes<
883 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
884 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
885
886 RestoreLiveRegisters(codegen, locations);
887 __ jmp(GetExitLabel());
888 }
889
GetDescription() const890 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
891
892 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)893 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
894 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
895 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
896 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
897 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
898 return static_cast<Register>(i);
899 }
900 }
901 // We shall never fail to find a free caller-save register, as
902 // there are more than two core caller-save registers on x86
903 // (meaning it is possible to find one which is different from
904 // `ref` and `obj`).
905 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
906 LOG(FATAL) << "Could not find a free caller-save register";
907 UNREACHABLE();
908 }
909
910 const Location out_;
911 const Location ref_;
912 const Location obj_;
913 const uint32_t offset_;
914 // An additional location containing an index to an array.
915 // Only used for HArrayGet and the UnsafeGetObject &
916 // UnsafeGetObjectVolatile intrinsics.
917 const Location index_;
918
919 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
920 };
921
922 // Slow path generating a read barrier for a GC root.
923 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
924 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)925 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
926 : SlowPathCode(instruction), out_(out), root_(root) {
927 }
928
EmitNativeCode(CodeGenerator * codegen)929 void EmitNativeCode(CodeGenerator* codegen) override {
930 DCHECK(codegen->EmitReadBarrier());
931 LocationSummary* locations = instruction_->GetLocations();
932 Register reg_out = out_.AsRegister<Register>();
933 DCHECK(locations->CanCall());
934 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
935 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
936 << "Unexpected instruction in read barrier for GC root slow path: "
937 << instruction_->DebugName();
938
939 __ Bind(GetEntryLabel());
940 SaveLiveRegisters(codegen, locations);
941
942 InvokeRuntimeCallingConvention calling_convention;
943 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
944 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
945 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
946 instruction_,
947 instruction_->GetDexPc(),
948 this);
949 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
950 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
951
952 RestoreLiveRegisters(codegen, locations);
953 __ jmp(GetExitLabel());
954 }
955
GetDescription() const956 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
957
958 private:
959 const Location out_;
960 const Location root_;
961
962 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
963 };
964
965 class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
966 public:
MethodEntryExitHooksSlowPathX86(HInstruction * instruction)967 explicit MethodEntryExitHooksSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
968
EmitNativeCode(CodeGenerator * codegen)969 void EmitNativeCode(CodeGenerator* codegen) override {
970 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
971 LocationSummary* locations = instruction_->GetLocations();
972 QuickEntrypointEnum entry_point =
973 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
974 __ Bind(GetEntryLabel());
975 SaveLiveRegisters(codegen, locations);
976 if (instruction_->IsMethodExitHook()) {
977 __ movl(EBX, Immediate(codegen->GetFrameSize()));
978 }
979 x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
980 RestoreLiveRegisters(codegen, locations);
981 __ jmp(GetExitLabel());
982 }
983
GetDescription() const984 const char* GetDescription() const override {
985 return "MethodEntryExitHooksSlowPath";
986 }
987
988 private:
989 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86);
990 };
991
992 class CompileOptimizedSlowPathX86 : public SlowPathCode {
993 public:
CompileOptimizedSlowPathX86(HSuspendCheck * suspend_check,uint32_t counter_address)994 CompileOptimizedSlowPathX86(HSuspendCheck* suspend_check, uint32_t counter_address)
995 : SlowPathCode(suspend_check),
996 counter_address_(counter_address) {}
997
EmitNativeCode(CodeGenerator * codegen)998 void EmitNativeCode(CodeGenerator* codegen) override {
999 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
1000 __ Bind(GetEntryLabel());
1001 __ movw(Address::Absolute(counter_address_), Immediate(ProfilingInfo::GetOptimizeThreshold()));
1002 if (instruction_ != nullptr) {
1003 // Only saves full width XMM for SIMD.
1004 SaveLiveRegisters(codegen, instruction_->GetLocations());
1005 }
1006 x86_codegen->GenerateInvokeRuntime(
1007 GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1008 if (instruction_ != nullptr) {
1009 // Only restores full width XMM for SIMD.
1010 RestoreLiveRegisters(codegen, instruction_->GetLocations());
1011 }
1012 __ jmp(GetExitLabel());
1013 }
1014
GetDescription() const1015 const char* GetDescription() const override {
1016 return "CompileOptimizedSlowPath";
1017 }
1018
1019 private:
1020 uint32_t counter_address_;
1021
1022 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86);
1023 };
1024
1025 #undef __
1026 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1027 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
1028
X86Condition(IfCondition cond)1029 inline Condition X86Condition(IfCondition cond) {
1030 switch (cond) {
1031 case kCondEQ: return kEqual;
1032 case kCondNE: return kNotEqual;
1033 case kCondLT: return kLess;
1034 case kCondLE: return kLessEqual;
1035 case kCondGT: return kGreater;
1036 case kCondGE: return kGreaterEqual;
1037 case kCondB: return kBelow;
1038 case kCondBE: return kBelowEqual;
1039 case kCondA: return kAbove;
1040 case kCondAE: return kAboveEqual;
1041 }
1042 LOG(FATAL) << "Unreachable";
1043 UNREACHABLE();
1044 }
1045
1046 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)1047 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
1048 switch (cond) {
1049 case kCondEQ: return kEqual;
1050 case kCondNE: return kNotEqual;
1051 // Signed to unsigned, and FP to x86 name.
1052 case kCondLT: return kBelow;
1053 case kCondLE: return kBelowEqual;
1054 case kCondGT: return kAbove;
1055 case kCondGE: return kAboveEqual;
1056 // Unsigned remain unchanged.
1057 case kCondB: return kBelow;
1058 case kCondBE: return kBelowEqual;
1059 case kCondA: return kAbove;
1060 case kCondAE: return kAboveEqual;
1061 }
1062 LOG(FATAL) << "Unreachable";
1063 UNREACHABLE();
1064 }
1065
DumpCoreRegister(std::ostream & stream,int reg) const1066 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
1067 stream << Register(reg);
1068 }
1069
DumpFloatingPointRegister(std::ostream & stream,int reg) const1070 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1071 stream << XmmRegister(reg);
1072 }
1073
GetInstructionSetFeatures() const1074 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
1075 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
1076 }
1077
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1078 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1079 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
1080 return kX86WordSize;
1081 }
1082
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1083 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1084 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
1085 return kX86WordSize;
1086 }
1087
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1088 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1089 if (GetGraph()->HasSIMD()) {
1090 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
1091 } else {
1092 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
1093 }
1094 return GetSlowPathFPWidth();
1095 }
1096
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1097 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1098 if (GetGraph()->HasSIMD()) {
1099 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
1100 } else {
1101 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1102 }
1103 return GetSlowPathFPWidth();
1104 }
1105
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1106 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1107 HInstruction* instruction,
1108 uint32_t dex_pc,
1109 SlowPathCode* slow_path) {
1110 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1111 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1112 if (EntrypointRequiresStackMap(entrypoint)) {
1113 RecordPcInfo(instruction, dex_pc, slow_path);
1114 }
1115 }
1116
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1117 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1118 HInstruction* instruction,
1119 SlowPathCode* slow_path) {
1120 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1121 GenerateInvokeRuntime(entry_point_offset);
1122 }
1123
GenerateInvokeRuntime(int32_t entry_point_offset)1124 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1125 __ fs()->call(Address::Absolute(entry_point_offset));
1126 }
1127
1128 namespace detail {
1129
1130 // Mark which intrinsics we don't have handcrafted code for.
1131 template <Intrinsics T>
1132 struct IsUnimplemented {
1133 bool is_unimplemented = false;
1134 };
1135
1136 #define TRUE_OVERRIDE(Name) \
1137 template <> \
1138 struct IsUnimplemented<Intrinsics::k##Name> { \
1139 bool is_unimplemented = true; \
1140 };
1141 UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE)
1142 #undef TRUE_OVERRIDE
1143
1144 static constexpr bool kIsIntrinsicUnimplemented[] = {
1145 false, // kNone
1146 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1147 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1148 ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1149 #undef IS_UNIMPLEMENTED
1150 };
1151
1152 } // namespace detail
1153
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1154 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1155 const CompilerOptions& compiler_options,
1156 OptimizingCompilerStats* stats)
1157 : CodeGenerator(graph,
1158 kNumberOfCpuRegisters,
1159 kNumberOfXmmRegisters,
1160 kNumberOfRegisterPairs,
1161 ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
1162 | (1 << kFakeReturnRegister),
1163 0,
1164 compiler_options,
1165 stats,
1166 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1167 block_labels_(nullptr),
1168 location_builder_(graph, this),
1169 instruction_visitor_(graph, this),
1170 move_resolver_(graph->GetAllocator(), this),
1171 assembler_(graph->GetAllocator(),
1172 compiler_options.GetInstructionSetFeatures()->AsX86InstructionSetFeatures()),
1173 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1174 app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1175 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1176 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1177 app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1178 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1179 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1180 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1181 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1182 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1183 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1184 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1185 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1186 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1187 constant_area_start_(-1),
1188 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1189 method_address_offset_(std::less<uint32_t>(),
1190 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1191 // Use a fake return address register to mimic Quick.
1192 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1193 }
1194
SetupBlockedRegisters() const1195 void CodeGeneratorX86::SetupBlockedRegisters() const {
1196 // Stack register is always reserved.
1197 blocked_core_registers_[ESP] = true;
1198 }
1199
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1200 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1201 : InstructionCodeGenerator(graph, codegen),
1202 assembler_(codegen->GetAssembler()),
1203 codegen_(codegen) {}
1204
DWARFReg(Register reg)1205 static dwarf::Reg DWARFReg(Register reg) {
1206 return dwarf::Reg::X86Core(static_cast<int>(reg));
1207 }
1208
SetInForReturnValue(HInstruction * ret,LocationSummary * locations)1209 void SetInForReturnValue(HInstruction* ret, LocationSummary* locations) {
1210 switch (ret->InputAt(0)->GetType()) {
1211 case DataType::Type::kReference:
1212 case DataType::Type::kBool:
1213 case DataType::Type::kUint8:
1214 case DataType::Type::kInt8:
1215 case DataType::Type::kUint16:
1216 case DataType::Type::kInt16:
1217 case DataType::Type::kInt32:
1218 locations->SetInAt(0, Location::RegisterLocation(EAX));
1219 break;
1220
1221 case DataType::Type::kInt64:
1222 locations->SetInAt(0, Location::RegisterPairLocation(EAX, EDX));
1223 break;
1224
1225 case DataType::Type::kFloat32:
1226 case DataType::Type::kFloat64:
1227 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1228 break;
1229
1230 case DataType::Type::kVoid:
1231 locations->SetInAt(0, Location::NoLocation());
1232 break;
1233
1234 default:
1235 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
1236 }
1237 }
1238
VisitMethodExitHook(HMethodExitHook * method_hook)1239 void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) {
1240 LocationSummary* locations = new (GetGraph()->GetAllocator())
1241 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1242 SetInForReturnValue(method_hook, locations);
1243 // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
1244 locations->AddTemp(Location::RegisterLocation(EAX));
1245 locations->AddTemp(Location::RegisterLocation(EDX));
1246 // An additional temporary register to hold address to store the timestamp counter.
1247 locations->AddTemp(Location::RequiresRegister());
1248 }
1249
GenerateMethodEntryExitHook(HInstruction * instruction)1250 void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) {
1251 SlowPathCode* slow_path =
1252 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
1253 codegen_->AddSlowPath(slow_path);
1254 LocationSummary* locations = instruction->GetLocations();
1255
1256 if (instruction->IsMethodExitHook()) {
1257 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1258 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1259 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1260 // disabled in debuggable runtime. The other bit is used when this method itself requires a
1261 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1262 __ cmpl(Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1263 __ j(kNotEqual, slow_path->GetEntryLabel());
1264 }
1265
1266 uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1267 MemberOffset offset = instruction->IsMethodExitHook() ?
1268 instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
1269 instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1270 __ cmpb(Address::Absolute(address + offset.Int32Value()),
1271 Immediate(instrumentation::Instrumentation::kFastTraceListeners));
1272 // Check if there are any trace method entry / exit listeners. If no, continue.
1273 __ j(kLess, slow_path->GetExitLabel());
1274 // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
1275 // If yes, just take the slow path.
1276 __ j(kGreater, slow_path->GetEntryLabel());
1277
1278 // For curr_entry use the register that isn't EAX or EDX. We need this after
1279 // rdtsc which returns values in EAX + EDX.
1280 Register curr_entry = locations->GetTemp(2).AsRegister<Register>();
1281 Register init_entry = locations->GetTemp(1).AsRegister<Register>();
1282
1283 // Check if there is place in the buffer for a new entry, if no, take slow path.
1284 uint32_t trace_buffer_ptr = Thread::TraceBufferPtrOffset<kX86PointerSize>().Int32Value();
1285 uint64_t trace_buffer_curr_entry_offset =
1286 Thread::TraceBufferCurrPtrOffset<kX86PointerSize>().Int32Value();
1287
1288 __ fs()->movl(curr_entry, Address::Absolute(trace_buffer_curr_entry_offset));
1289 __ subl(curr_entry, Immediate(kNumEntriesForWallClock * sizeof(void*)));
1290 __ fs()->movl(init_entry, Address::Absolute(trace_buffer_ptr));
1291 __ cmpl(curr_entry, init_entry);
1292 __ j(kLess, slow_path->GetEntryLabel());
1293
1294 // Update the index in the `Thread`.
1295 __ fs()->movl(Address::Absolute(trace_buffer_curr_entry_offset), curr_entry);
1296
1297 // Record method pointer and trace action.
1298 Register method = init_entry;
1299 __ movl(method, Address(ESP, kCurrentMethodStackOffset));
1300 // Use last two bits to encode trace method action. For MethodEntry it is 0
1301 // so no need to set the bits since they are 0 already.
1302 if (instruction->IsMethodExitHook()) {
1303 DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1304 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1305 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1306 __ orl(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1307 }
1308 __ movl(Address(curr_entry, kMethodOffsetInBytes), method);
1309 // Get the timestamp. rdtsc returns timestamp in EAX + EDX.
1310 __ rdtsc();
1311 __ movl(Address(curr_entry, kTimestampOffsetInBytes), EAX);
1312 __ movl(Address(curr_entry, kHighTimestampOffsetInBytes), EDX);
1313 __ Bind(slow_path->GetExitLabel());
1314 }
1315
VisitMethodExitHook(HMethodExitHook * instruction)1316 void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instruction) {
1317 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1318 DCHECK(codegen_->RequiresCurrentMethod());
1319 GenerateMethodEntryExitHook(instruction);
1320 }
1321
VisitMethodEntryHook(HMethodEntryHook * method_hook)1322 void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1323 LocationSummary* locations = new (GetGraph()->GetAllocator())
1324 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1325 // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
1326 locations->AddTemp(Location::RegisterLocation(EAX));
1327 locations->AddTemp(Location::RegisterLocation(EDX));
1328 // An additional temporary register to hold address to store the timestamp counter.
1329 locations->AddTemp(Location::RequiresRegister());
1330 }
1331
VisitMethodEntryHook(HMethodEntryHook * instruction)1332 void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1333 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1334 DCHECK(codegen_->RequiresCurrentMethod());
1335 GenerateMethodEntryExitHook(instruction);
1336 }
1337
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1338 void CodeGeneratorX86::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1339 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1340 Register reg = EAX;
1341 if (is_frame_entry) {
1342 reg = kMethodRegisterArgument;
1343 } else {
1344 __ pushl(EAX);
1345 __ cfi().AdjustCFAOffset(4);
1346 __ movl(EAX, Address(ESP, kX86WordSize));
1347 }
1348 NearLabel overflow;
1349 __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1350 Immediate(interpreter::kNterpHotnessValue));
1351 __ j(kEqual, &overflow);
1352 __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(-1));
1353 __ Bind(&overflow);
1354 if (!is_frame_entry) {
1355 __ popl(EAX);
1356 __ cfi().AdjustCFAOffset(-4);
1357 }
1358 }
1359
1360 if (GetGraph()->IsCompilingBaseline() &&
1361 GetGraph()->IsUsefulOptimizing() &&
1362 !Runtime::Current()->IsAotCompiler()) {
1363 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1364 DCHECK(info != nullptr);
1365 uint32_t address = reinterpret_cast32<uint32_t>(info) +
1366 ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1367 DCHECK(!HasEmptyFrame());
1368 SlowPathCode* slow_path =
1369 new (GetScopedAllocator()) CompileOptimizedSlowPathX86(suspend_check, address);
1370 AddSlowPath(slow_path);
1371 // With multiple threads, this can overflow. This is OK, we will eventually get to see
1372 // it reaching 0. Also, at this point we have no register available to look
1373 // at the counter directly.
1374 __ addw(Address::Absolute(address), Immediate(-1));
1375 __ j(kEqual, slow_path->GetEntryLabel());
1376 __ Bind(slow_path->GetExitLabel());
1377 }
1378 }
1379
GenerateFrameEntry()1380 void CodeGeneratorX86::GenerateFrameEntry() {
1381 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1382
1383 // Check if we need to generate the clinit check. We will jump to the
1384 // resolution stub if the class is not initialized and the executing thread is
1385 // not the thread initializing it.
1386 // We do this before constructing the frame to get the correct stack trace if
1387 // an exception is thrown.
1388 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1389 NearLabel continue_execution, resolution;
1390 // We'll use EBP as temporary.
1391 __ pushl(EBP);
1392 __ cfi().AdjustCFAOffset(4);
1393 // Check if we're visibly initialized.
1394
1395 // We don't emit a read barrier here to save on code size. We rely on the
1396 // resolution trampoline to do a suspend check before re-entering this code.
1397 __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value()));
1398 __ cmpb(Address(EBP, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
1399 __ j(kAboveEqual, &continue_execution);
1400
1401 // Check if we're initializing and the thread initializing is the one
1402 // executing the code.
1403 __ cmpb(Address(EBP, kClassStatusByteOffset), Immediate(kShiftedInitializingValue));
1404 __ j(kBelow, &resolution);
1405
1406 __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value()));
1407 __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value()));
1408 __ j(kEqual, &continue_execution);
1409 __ Bind(&resolution);
1410
1411 __ popl(EBP);
1412 __ cfi().AdjustCFAOffset(-4);
1413 // Jump to the resolution stub.
1414 ThreadOffset32 entrypoint_offset =
1415 GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline);
1416 __ fs()->jmp(Address::Absolute(entrypoint_offset));
1417
1418 __ Bind(&continue_execution);
1419 __ cfi().AdjustCFAOffset(4); // Undo the `-4` adjustment above. We get here with EBP pushed.
1420 __ popl(EBP);
1421 __ cfi().AdjustCFAOffset(-4);
1422 }
1423
1424 __ Bind(&frame_entry_label_);
1425 bool skip_overflow_check =
1426 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1427 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1428
1429 if (!skip_overflow_check) {
1430 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1431 __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1432 RecordPcInfo(nullptr, 0);
1433 }
1434
1435 if (!HasEmptyFrame()) {
1436 // Make sure the frame size isn't unreasonably large.
1437 DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1438
1439 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1440 Register reg = kCoreCalleeSaves[i];
1441 if (allocated_registers_.ContainsCoreRegister(reg)) {
1442 __ pushl(reg);
1443 __ cfi().AdjustCFAOffset(kX86WordSize);
1444 __ cfi().RelOffset(DWARFReg(reg), 0);
1445 }
1446 }
1447
1448 int adjust = GetFrameSize() - FrameEntrySpillSize();
1449 IncreaseFrame(adjust);
1450 // Save the current method if we need it. Note that we do not
1451 // do this in HCurrentMethod, as the instruction might have been removed
1452 // in the SSA graph.
1453 if (RequiresCurrentMethod()) {
1454 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1455 }
1456
1457 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1458 // Initialize should_deoptimize flag to 0.
1459 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1460 }
1461 }
1462
1463 MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1464 }
1465
GenerateFrameExit()1466 void CodeGeneratorX86::GenerateFrameExit() {
1467 __ cfi().RememberState();
1468 if (!HasEmptyFrame()) {
1469 int adjust = GetFrameSize() - FrameEntrySpillSize();
1470 DecreaseFrame(adjust);
1471
1472 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1473 Register reg = kCoreCalleeSaves[i];
1474 if (allocated_registers_.ContainsCoreRegister(reg)) {
1475 __ popl(reg);
1476 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1477 __ cfi().Restore(DWARFReg(reg));
1478 }
1479 }
1480 }
1481 __ ret();
1482 __ cfi().RestoreState();
1483 __ cfi().DefCFAOffset(GetFrameSize());
1484 }
1485
Bind(HBasicBlock * block)1486 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1487 __ Bind(GetLabelOf(block));
1488 }
1489
GetReturnLocation(DataType::Type type) const1490 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1491 switch (type) {
1492 case DataType::Type::kReference:
1493 case DataType::Type::kBool:
1494 case DataType::Type::kUint8:
1495 case DataType::Type::kInt8:
1496 case DataType::Type::kUint16:
1497 case DataType::Type::kInt16:
1498 case DataType::Type::kUint32:
1499 case DataType::Type::kInt32:
1500 return Location::RegisterLocation(EAX);
1501
1502 case DataType::Type::kUint64:
1503 case DataType::Type::kInt64:
1504 return Location::RegisterPairLocation(EAX, EDX);
1505
1506 case DataType::Type::kVoid:
1507 return Location::NoLocation();
1508
1509 case DataType::Type::kFloat64:
1510 case DataType::Type::kFloat32:
1511 return Location::FpuRegisterLocation(XMM0);
1512 }
1513 }
1514
GetMethodLocation() const1515 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1516 return Location::RegisterLocation(kMethodRegisterArgument);
1517 }
1518
GetNextLocation(DataType::Type type)1519 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1520 switch (type) {
1521 case DataType::Type::kReference:
1522 case DataType::Type::kBool:
1523 case DataType::Type::kUint8:
1524 case DataType::Type::kInt8:
1525 case DataType::Type::kUint16:
1526 case DataType::Type::kInt16:
1527 case DataType::Type::kInt32: {
1528 uint32_t index = gp_index_++;
1529 stack_index_++;
1530 if (index < calling_convention.GetNumberOfRegisters()) {
1531 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1532 } else {
1533 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1534 }
1535 }
1536
1537 case DataType::Type::kInt64: {
1538 uint32_t index = gp_index_;
1539 gp_index_ += 2;
1540 stack_index_ += 2;
1541 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1542 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1543 calling_convention.GetRegisterPairAt(index));
1544 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1545 } else {
1546 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1547 }
1548 }
1549
1550 case DataType::Type::kFloat32: {
1551 uint32_t index = float_index_++;
1552 stack_index_++;
1553 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1554 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1555 } else {
1556 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1557 }
1558 }
1559
1560 case DataType::Type::kFloat64: {
1561 uint32_t index = float_index_++;
1562 stack_index_ += 2;
1563 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1564 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1565 } else {
1566 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1567 }
1568 }
1569
1570 case DataType::Type::kUint32:
1571 case DataType::Type::kUint64:
1572 case DataType::Type::kVoid:
1573 LOG(FATAL) << "Unexpected parameter type " << type;
1574 UNREACHABLE();
1575 }
1576 return Location::NoLocation();
1577 }
1578
GetNextLocation(DataType::Type type)1579 Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1580 DCHECK_NE(type, DataType::Type::kReference);
1581
1582 Location location;
1583 if (DataType::Is64BitType(type)) {
1584 location = Location::DoubleStackSlot(stack_offset_);
1585 stack_offset_ += 2 * kFramePointerSize;
1586 } else {
1587 location = Location::StackSlot(stack_offset_);
1588 stack_offset_ += kFramePointerSize;
1589 }
1590 if (for_register_allocation_) {
1591 location = Location::Any();
1592 }
1593 return location;
1594 }
1595
GetReturnLocation(DataType::Type type) const1596 Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1597 // We perform conversion to the managed ABI return register after the call if needed.
1598 InvokeDexCallingConventionVisitorX86 dex_calling_convention;
1599 return dex_calling_convention.GetReturnLocation(type);
1600 }
1601
GetMethodLocation() const1602 Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
1603 // Pass the method in the hidden argument EAX.
1604 return Location::RegisterLocation(EAX);
1605 }
1606
Move32(Location destination,Location source)1607 void CodeGeneratorX86::Move32(Location destination, Location source) {
1608 if (source.Equals(destination)) {
1609 return;
1610 }
1611 if (destination.IsRegister()) {
1612 if (source.IsRegister()) {
1613 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1614 } else if (source.IsFpuRegister()) {
1615 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1616 } else if (source.IsConstant()) {
1617 int32_t value = GetInt32ValueOf(source.GetConstant());
1618 __ movl(destination.AsRegister<Register>(), Immediate(value));
1619 } else {
1620 DCHECK(source.IsStackSlot());
1621 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1622 }
1623 } else if (destination.IsFpuRegister()) {
1624 if (source.IsRegister()) {
1625 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1626 } else if (source.IsFpuRegister()) {
1627 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1628 } else {
1629 DCHECK(source.IsStackSlot());
1630 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1631 }
1632 } else {
1633 DCHECK(destination.IsStackSlot()) << destination;
1634 if (source.IsRegister()) {
1635 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1636 } else if (source.IsFpuRegister()) {
1637 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1638 } else if (source.IsConstant()) {
1639 HConstant* constant = source.GetConstant();
1640 int32_t value = GetInt32ValueOf(constant);
1641 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1642 } else {
1643 DCHECK(source.IsStackSlot());
1644 __ pushl(Address(ESP, source.GetStackIndex()));
1645 __ popl(Address(ESP, destination.GetStackIndex()));
1646 }
1647 }
1648 }
1649
Move64(Location destination,Location source)1650 void CodeGeneratorX86::Move64(Location destination, Location source) {
1651 if (source.Equals(destination)) {
1652 return;
1653 }
1654 if (destination.IsRegisterPair()) {
1655 if (source.IsRegisterPair()) {
1656 EmitParallelMoves(
1657 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1658 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1659 DataType::Type::kInt32,
1660 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1661 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1662 DataType::Type::kInt32);
1663 } else if (source.IsFpuRegister()) {
1664 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1665 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1666 __ psrlq(src_reg, Immediate(32));
1667 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1668 } else {
1669 // No conflict possible, so just do the moves.
1670 DCHECK(source.IsDoubleStackSlot());
1671 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1672 __ movl(destination.AsRegisterPairHigh<Register>(),
1673 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1674 }
1675 } else if (destination.IsFpuRegister()) {
1676 if (source.IsFpuRegister()) {
1677 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1678 } else if (source.IsDoubleStackSlot()) {
1679 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1680 } else if (source.IsRegisterPair()) {
1681 size_t elem_size = DataType::Size(DataType::Type::kInt32);
1682 // Push the 2 source registers to the stack.
1683 __ pushl(source.AsRegisterPairHigh<Register>());
1684 __ cfi().AdjustCFAOffset(elem_size);
1685 __ pushl(source.AsRegisterPairLow<Register>());
1686 __ cfi().AdjustCFAOffset(elem_size);
1687 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1688 // And remove the temporary stack space we allocated.
1689 DecreaseFrame(2 * elem_size);
1690 } else {
1691 LOG(FATAL) << "Unimplemented";
1692 }
1693 } else {
1694 DCHECK(destination.IsDoubleStackSlot()) << destination;
1695 if (source.IsRegisterPair()) {
1696 // No conflict possible, so just do the moves.
1697 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1698 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1699 source.AsRegisterPairHigh<Register>());
1700 } else if (source.IsFpuRegister()) {
1701 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1702 } else if (source.IsConstant()) {
1703 HConstant* constant = source.GetConstant();
1704 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1705 int64_t value = GetInt64ValueOf(constant);
1706 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1707 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1708 Immediate(High32Bits(value)));
1709 } else {
1710 DCHECK(source.IsDoubleStackSlot()) << source;
1711 EmitParallelMoves(
1712 Location::StackSlot(source.GetStackIndex()),
1713 Location::StackSlot(destination.GetStackIndex()),
1714 DataType::Type::kInt32,
1715 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1716 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1717 DataType::Type::kInt32);
1718 }
1719 }
1720 }
1721
CreateAddress(Register base,Register index=Register::kNoRegister,ScaleFactor scale=TIMES_1,int32_t disp=0)1722 static Address CreateAddress(Register base,
1723 Register index = Register::kNoRegister,
1724 ScaleFactor scale = TIMES_1,
1725 int32_t disp = 0) {
1726 if (index == Register::kNoRegister) {
1727 return Address(base, disp);
1728 }
1729
1730 return Address(base, index, scale, disp);
1731 }
1732
LoadFromMemoryNoBarrier(DataType::Type dst_type,Location dst,Address src,HInstruction * instr,XmmRegister temp,bool is_atomic_load)1733 void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
1734 Location dst,
1735 Address src,
1736 HInstruction* instr,
1737 XmmRegister temp,
1738 bool is_atomic_load) {
1739 switch (dst_type) {
1740 case DataType::Type::kBool:
1741 case DataType::Type::kUint8:
1742 __ movzxb(dst.AsRegister<Register>(), src);
1743 break;
1744 case DataType::Type::kInt8:
1745 __ movsxb(dst.AsRegister<Register>(), src);
1746 break;
1747 case DataType::Type::kInt16:
1748 __ movsxw(dst.AsRegister<Register>(), src);
1749 break;
1750 case DataType::Type::kUint16:
1751 __ movzxw(dst.AsRegister<Register>(), src);
1752 break;
1753 case DataType::Type::kInt32:
1754 __ movl(dst.AsRegister<Register>(), src);
1755 break;
1756 case DataType::Type::kInt64: {
1757 if (is_atomic_load) {
1758 __ movsd(temp, src);
1759 if (instr != nullptr) {
1760 MaybeRecordImplicitNullCheck(instr);
1761 }
1762 __ movd(dst.AsRegisterPairLow<Register>(), temp);
1763 __ psrlq(temp, Immediate(32));
1764 __ movd(dst.AsRegisterPairHigh<Register>(), temp);
1765 } else {
1766 DCHECK_NE(src.GetBaseRegister(), dst.AsRegisterPairLow<Register>());
1767 Address src_high = Address::displace(src, kX86WordSize);
1768 __ movl(dst.AsRegisterPairLow<Register>(), src);
1769 if (instr != nullptr) {
1770 MaybeRecordImplicitNullCheck(instr);
1771 }
1772 __ movl(dst.AsRegisterPairHigh<Register>(), src_high);
1773 }
1774 break;
1775 }
1776 case DataType::Type::kFloat32:
1777 __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1778 break;
1779 case DataType::Type::kFloat64:
1780 __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1781 break;
1782 case DataType::Type::kReference:
1783 DCHECK(!EmitReadBarrier());
1784 __ movl(dst.AsRegister<Register>(), src);
1785 __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
1786 break;
1787 default:
1788 LOG(FATAL) << "Unreachable type " << dst_type;
1789 }
1790 if (instr != nullptr && dst_type != DataType::Type::kInt64) {
1791 // kInt64 needs special handling that is done in the above switch.
1792 MaybeRecordImplicitNullCheck(instr);
1793 }
1794 }
1795
MoveToMemory(DataType::Type src_type,Location src,Register dst_base,Register dst_index,ScaleFactor dst_scale,int32_t dst_disp)1796 void CodeGeneratorX86::MoveToMemory(DataType::Type src_type,
1797 Location src,
1798 Register dst_base,
1799 Register dst_index,
1800 ScaleFactor dst_scale,
1801 int32_t dst_disp) {
1802 DCHECK(dst_base != Register::kNoRegister);
1803 Address dst = CreateAddress(dst_base, dst_index, dst_scale, dst_disp);
1804
1805 switch (src_type) {
1806 case DataType::Type::kBool:
1807 case DataType::Type::kUint8:
1808 case DataType::Type::kInt8: {
1809 if (src.IsConstant()) {
1810 __ movb(dst, Immediate(CodeGenerator::GetInt8ValueOf(src.GetConstant())));
1811 } else {
1812 __ movb(dst, src.AsRegister<ByteRegister>());
1813 }
1814 break;
1815 }
1816 case DataType::Type::kUint16:
1817 case DataType::Type::kInt16: {
1818 if (src.IsConstant()) {
1819 __ movw(dst, Immediate(CodeGenerator::GetInt16ValueOf(src.GetConstant())));
1820 } else {
1821 __ movw(dst, src.AsRegister<Register>());
1822 }
1823 break;
1824 }
1825 case DataType::Type::kUint32:
1826 case DataType::Type::kInt32: {
1827 if (src.IsConstant()) {
1828 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1829 __ movl(dst, Immediate(v));
1830 } else {
1831 __ movl(dst, src.AsRegister<Register>());
1832 }
1833 break;
1834 }
1835 case DataType::Type::kUint64:
1836 case DataType::Type::kInt64: {
1837 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1838 if (src.IsConstant()) {
1839 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1840 __ movl(dst, Immediate(Low32Bits(v)));
1841 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1842 } else {
1843 __ movl(dst, src.AsRegisterPairLow<Register>());
1844 __ movl(dst_next_4_bytes, src.AsRegisterPairHigh<Register>());
1845 }
1846 break;
1847 }
1848 case DataType::Type::kFloat32: {
1849 if (src.IsConstant()) {
1850 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1851 __ movl(dst, Immediate(v));
1852 } else {
1853 __ movss(dst, src.AsFpuRegister<XmmRegister>());
1854 }
1855 break;
1856 }
1857 case DataType::Type::kFloat64: {
1858 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1859 if (src.IsConstant()) {
1860 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1861 __ movl(dst, Immediate(Low32Bits(v)));
1862 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1863 } else {
1864 __ movsd(dst, src.AsFpuRegister<XmmRegister>());
1865 }
1866 break;
1867 }
1868 case DataType::Type::kVoid:
1869 case DataType::Type::kReference:
1870 LOG(FATAL) << "Unreachable type " << src_type;
1871 }
1872 }
1873
MoveConstant(Location location,int32_t value)1874 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1875 DCHECK(location.IsRegister());
1876 __ movl(location.AsRegister<Register>(), Immediate(value));
1877 }
1878
MoveLocation(Location dst,Location src,DataType::Type dst_type)1879 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1880 HParallelMove move(GetGraph()->GetAllocator());
1881 if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1882 move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1883 move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1884 } else {
1885 move.AddMove(src, dst, dst_type, nullptr);
1886 }
1887 GetMoveResolver()->EmitNativeCode(&move);
1888 }
1889
AddLocationAsTemp(Location location,LocationSummary * locations)1890 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1891 if (location.IsRegister()) {
1892 locations->AddTemp(location);
1893 } else if (location.IsRegisterPair()) {
1894 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1895 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1896 } else {
1897 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1898 }
1899 }
1900
HandleGoto(HInstruction * got,HBasicBlock * successor)1901 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1902 if (successor->IsExitBlock()) {
1903 DCHECK(got->GetPrevious()->AlwaysThrows());
1904 return; // no code needed
1905 }
1906
1907 HBasicBlock* block = got->GetBlock();
1908 HInstruction* previous = got->GetPrevious();
1909
1910 HLoopInformation* info = block->GetLoopInformation();
1911 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1912 codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
1913 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1914 return;
1915 }
1916
1917 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1918 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1919 }
1920 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1921 __ jmp(codegen_->GetLabelOf(successor));
1922 }
1923 }
1924
VisitGoto(HGoto * got)1925 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1926 got->SetLocations(nullptr);
1927 }
1928
VisitGoto(HGoto * got)1929 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1930 HandleGoto(got, got->GetSuccessor());
1931 }
1932
VisitTryBoundary(HTryBoundary * try_boundary)1933 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1934 try_boundary->SetLocations(nullptr);
1935 }
1936
VisitTryBoundary(HTryBoundary * try_boundary)1937 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1938 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1939 if (!successor->IsExitBlock()) {
1940 HandleGoto(try_boundary, successor);
1941 }
1942 }
1943
VisitExit(HExit * exit)1944 void LocationsBuilderX86::VisitExit(HExit* exit) {
1945 exit->SetLocations(nullptr);
1946 }
1947
VisitExit(HExit * exit)1948 void InstructionCodeGeneratorX86::VisitExit([[maybe_unused]] HExit* exit) {}
1949
1950 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1951 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1952 LabelType* true_label,
1953 LabelType* false_label) {
1954 if (cond->IsFPConditionTrueIfNaN()) {
1955 __ j(kUnordered, true_label);
1956 } else if (cond->IsFPConditionFalseIfNaN()) {
1957 __ j(kUnordered, false_label);
1958 }
1959 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1960 }
1961
1962 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1963 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1964 LabelType* true_label,
1965 LabelType* false_label) {
1966 LocationSummary* locations = cond->GetLocations();
1967 Location left = locations->InAt(0);
1968 Location right = locations->InAt(1);
1969 IfCondition if_cond = cond->GetCondition();
1970
1971 Register left_high = left.AsRegisterPairHigh<Register>();
1972 Register left_low = left.AsRegisterPairLow<Register>();
1973 IfCondition true_high_cond = if_cond;
1974 IfCondition false_high_cond = cond->GetOppositeCondition();
1975 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1976
1977 // Set the conditions for the test, remembering that == needs to be
1978 // decided using the low words.
1979 switch (if_cond) {
1980 case kCondEQ:
1981 case kCondNE:
1982 // Nothing to do.
1983 break;
1984 case kCondLT:
1985 false_high_cond = kCondGT;
1986 break;
1987 case kCondLE:
1988 true_high_cond = kCondLT;
1989 break;
1990 case kCondGT:
1991 false_high_cond = kCondLT;
1992 break;
1993 case kCondGE:
1994 true_high_cond = kCondGT;
1995 break;
1996 case kCondB:
1997 false_high_cond = kCondA;
1998 break;
1999 case kCondBE:
2000 true_high_cond = kCondB;
2001 break;
2002 case kCondA:
2003 false_high_cond = kCondB;
2004 break;
2005 case kCondAE:
2006 true_high_cond = kCondA;
2007 break;
2008 }
2009
2010 if (right.IsConstant()) {
2011 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
2012 int32_t val_high = High32Bits(value);
2013 int32_t val_low = Low32Bits(value);
2014
2015 codegen_->Compare32BitValue(left_high, val_high);
2016 if (if_cond == kCondNE) {
2017 __ j(X86Condition(true_high_cond), true_label);
2018 } else if (if_cond == kCondEQ) {
2019 __ j(X86Condition(false_high_cond), false_label);
2020 } else {
2021 __ j(X86Condition(true_high_cond), true_label);
2022 __ j(X86Condition(false_high_cond), false_label);
2023 }
2024 // Must be equal high, so compare the lows.
2025 codegen_->Compare32BitValue(left_low, val_low);
2026 } else if (right.IsRegisterPair()) {
2027 Register right_high = right.AsRegisterPairHigh<Register>();
2028 Register right_low = right.AsRegisterPairLow<Register>();
2029
2030 __ cmpl(left_high, right_high);
2031 if (if_cond == kCondNE) {
2032 __ j(X86Condition(true_high_cond), true_label);
2033 } else if (if_cond == kCondEQ) {
2034 __ j(X86Condition(false_high_cond), false_label);
2035 } else {
2036 __ j(X86Condition(true_high_cond), true_label);
2037 __ j(X86Condition(false_high_cond), false_label);
2038 }
2039 // Must be equal high, so compare the lows.
2040 __ cmpl(left_low, right_low);
2041 } else {
2042 DCHECK(right.IsDoubleStackSlot());
2043 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
2044 if (if_cond == kCondNE) {
2045 __ j(X86Condition(true_high_cond), true_label);
2046 } else if (if_cond == kCondEQ) {
2047 __ j(X86Condition(false_high_cond), false_label);
2048 } else {
2049 __ j(X86Condition(true_high_cond), true_label);
2050 __ j(X86Condition(false_high_cond), false_label);
2051 }
2052 // Must be equal high, so compare the lows.
2053 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
2054 }
2055 // The last comparison might be unsigned.
2056 __ j(final_condition, true_label);
2057 }
2058
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)2059 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
2060 Location rhs,
2061 HInstruction* insn,
2062 bool is_double) {
2063 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTableOrNull();
2064 if (is_double) {
2065 if (rhs.IsFpuRegister()) {
2066 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
2067 } else if (const_area != nullptr) {
2068 DCHECK(const_area->IsEmittedAtUseSite());
2069 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
2070 codegen_->LiteralDoubleAddress(
2071 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
2072 const_area->GetBaseMethodAddress(),
2073 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
2074 } else {
2075 DCHECK(rhs.IsDoubleStackSlot());
2076 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
2077 }
2078 } else {
2079 if (rhs.IsFpuRegister()) {
2080 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
2081 } else if (const_area != nullptr) {
2082 DCHECK(const_area->IsEmittedAtUseSite());
2083 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
2084 codegen_->LiteralFloatAddress(
2085 const_area->GetConstant()->AsFloatConstant()->GetValue(),
2086 const_area->GetBaseMethodAddress(),
2087 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
2088 } else {
2089 DCHECK(rhs.IsStackSlot());
2090 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
2091 }
2092 }
2093 }
2094
2095 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2096 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
2097 LabelType* true_target_in,
2098 LabelType* false_target_in) {
2099 // Generated branching requires both targets to be explicit. If either of the
2100 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2101 LabelType fallthrough_target;
2102 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2103 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2104
2105 LocationSummary* locations = condition->GetLocations();
2106 Location left = locations->InAt(0);
2107 Location right = locations->InAt(1);
2108
2109 DataType::Type type = condition->InputAt(0)->GetType();
2110 switch (type) {
2111 case DataType::Type::kInt64:
2112 GenerateLongComparesAndJumps(condition, true_target, false_target);
2113 break;
2114 case DataType::Type::kFloat32:
2115 GenerateFPCompare(left, right, condition, false);
2116 GenerateFPJumps(condition, true_target, false_target);
2117 break;
2118 case DataType::Type::kFloat64:
2119 GenerateFPCompare(left, right, condition, true);
2120 GenerateFPJumps(condition, true_target, false_target);
2121 break;
2122 default:
2123 LOG(FATAL) << "Unexpected compare type " << type;
2124 }
2125
2126 if (false_target != &fallthrough_target) {
2127 __ jmp(false_target);
2128 }
2129
2130 if (fallthrough_target.IsLinked()) {
2131 __ Bind(&fallthrough_target);
2132 }
2133 }
2134
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch,const CompilerOptions & compiler_options)2135 static bool AreEflagsSetFrom(HInstruction* cond,
2136 HInstruction* branch,
2137 const CompilerOptions& compiler_options) {
2138 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2139 // are set only strictly before `branch`. We can't use the eflags on long/FP
2140 // conditions if they are materialized due to the complex branching.
2141 return cond->IsCondition() &&
2142 cond->GetNext() == branch &&
2143 cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
2144 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
2145 !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
2146 compiler_options.ProfileBranches());
2147 }
2148
2149 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2150 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
2151 size_t condition_input_index,
2152 LabelType* true_target,
2153 LabelType* false_target) {
2154 HInstruction* cond = instruction->InputAt(condition_input_index);
2155
2156 if (true_target == nullptr && false_target == nullptr) {
2157 // Nothing to do. The code always falls through.
2158 return;
2159 } else if (cond->IsIntConstant()) {
2160 // Constant condition, statically compared against "true" (integer value 1).
2161 if (cond->AsIntConstant()->IsTrue()) {
2162 if (true_target != nullptr) {
2163 __ jmp(true_target);
2164 }
2165 } else {
2166 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2167 if (false_target != nullptr) {
2168 __ jmp(false_target);
2169 }
2170 }
2171 return;
2172 }
2173
2174 // The following code generates these patterns:
2175 // (1) true_target == nullptr && false_target != nullptr
2176 // - opposite condition true => branch to false_target
2177 // (2) true_target != nullptr && false_target == nullptr
2178 // - condition true => branch to true_target
2179 // (3) true_target != nullptr && false_target != nullptr
2180 // - condition true => branch to true_target
2181 // - branch to false_target
2182 if (IsBooleanValueOrMaterializedCondition(cond)) {
2183 if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
2184 if (true_target == nullptr) {
2185 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
2186 } else {
2187 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
2188 }
2189 } else {
2190 // Materialized condition, compare against 0.
2191 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2192 if (lhs.IsRegister()) {
2193 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
2194 } else {
2195 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
2196 }
2197 if (true_target == nullptr) {
2198 __ j(kEqual, false_target);
2199 } else {
2200 __ j(kNotEqual, true_target);
2201 }
2202 }
2203 } else {
2204 // Condition has not been materialized, use its inputs as the comparison and
2205 // its condition as the branch condition.
2206 HCondition* condition = cond->AsCondition();
2207
2208 // If this is a long or FP comparison that has been folded into
2209 // the HCondition, generate the comparison directly.
2210 DataType::Type type = condition->InputAt(0)->GetType();
2211 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2212 GenerateCompareTestAndBranch(condition, true_target, false_target);
2213 return;
2214 }
2215
2216 Location lhs = condition->GetLocations()->InAt(0);
2217 Location rhs = condition->GetLocations()->InAt(1);
2218 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
2219 codegen_->GenerateIntCompare(lhs, rhs);
2220 if (true_target == nullptr) {
2221 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
2222 } else {
2223 __ j(X86Condition(condition->GetCondition()), true_target);
2224 }
2225 }
2226
2227 // If neither branch falls through (case 3), the conditional branch to `true_target`
2228 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2229 if (true_target != nullptr && false_target != nullptr) {
2230 __ jmp(false_target);
2231 }
2232 }
2233
VisitIf(HIf * if_instr)2234 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
2235 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2236 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2237 if (GetGraph()->IsCompilingBaseline() &&
2238 codegen_->GetCompilerOptions().ProfileBranches() &&
2239 !Runtime::Current()->IsAotCompiler()) {
2240 locations->SetInAt(0, Location::RequiresRegister());
2241 locations->AddRegisterTemps(2);
2242 } else {
2243 locations->SetInAt(0, Location::Any());
2244 }
2245 }
2246 }
2247
VisitIf(HIf * if_instr)2248 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
2249 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2250 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2251 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2252 nullptr : codegen_->GetLabelOf(true_successor);
2253 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2254 nullptr : codegen_->GetLabelOf(false_successor);
2255 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2256 if (GetGraph()->IsCompilingBaseline() &&
2257 codegen_->GetCompilerOptions().ProfileBranches() &&
2258 !Runtime::Current()->IsAotCompiler()) {
2259 DCHECK(if_instr->InputAt(0)->IsCondition());
2260 Register temp = if_instr->GetLocations()->GetTemp(0).AsRegister<Register>();
2261 Register counter = if_instr->GetLocations()->GetTemp(1).AsRegister<Register>();
2262 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2263 DCHECK(info != nullptr);
2264 BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
2265 // Currently, not all If branches are profiled.
2266 if (cache != nullptr) {
2267 uint64_t address =
2268 reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
2269 static_assert(
2270 BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
2271 "Unexpected offsets for BranchCache");
2272 NearLabel done;
2273 Location lhs = if_instr->GetLocations()->InAt(0);
2274 __ movl(temp, Immediate(address));
2275 __ movzxw(counter, Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0));
2276 __ addw(counter, Immediate(1));
2277 __ j(kEqual, &done);
2278 __ movw(Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0), counter);
2279 __ Bind(&done);
2280 }
2281 }
2282 }
2283 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2284 }
2285
VisitDeoptimize(HDeoptimize * deoptimize)2286 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2287 LocationSummary* locations = new (GetGraph()->GetAllocator())
2288 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2289 InvokeRuntimeCallingConvention calling_convention;
2290 RegisterSet caller_saves = RegisterSet::Empty();
2291 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2292 locations->SetCustomSlowPathCallerSaves(caller_saves);
2293 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2294 locations->SetInAt(0, Location::Any());
2295 }
2296 }
2297
VisitDeoptimize(HDeoptimize * deoptimize)2298 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2299 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
2300 GenerateTestAndBranch<Label>(deoptimize,
2301 /* condition_input_index= */ 0,
2302 slow_path->GetEntryLabel(),
2303 /* false_target= */ nullptr);
2304 }
2305
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2306 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2307 LocationSummary* locations = new (GetGraph()->GetAllocator())
2308 LocationSummary(flag, LocationSummary::kNoCall);
2309 locations->SetOut(Location::RequiresRegister());
2310 }
2311
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2312 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2313 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
2314 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2315 }
2316
SelectCanUseCMOV(HSelect * select)2317 static bool SelectCanUseCMOV(HSelect* select) {
2318 // There are no conditional move instructions for XMMs.
2319 if (DataType::IsFloatingPointType(select->GetType())) {
2320 return false;
2321 }
2322
2323 // A FP condition doesn't generate the single CC that we need.
2324 // In 32 bit mode, a long condition doesn't generate a single CC either.
2325 HInstruction* condition = select->GetCondition();
2326 if (condition->IsCondition()) {
2327 DataType::Type compare_type = condition->InputAt(0)->GetType();
2328 if (compare_type == DataType::Type::kInt64 ||
2329 DataType::IsFloatingPointType(compare_type)) {
2330 return false;
2331 }
2332 }
2333
2334 // We can generate a CMOV for this Select.
2335 return true;
2336 }
2337
VisitSelect(HSelect * select)2338 void LocationsBuilderX86::VisitSelect(HSelect* select) {
2339 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2340 if (DataType::IsFloatingPointType(select->GetType())) {
2341 locations->SetInAt(0, Location::RequiresFpuRegister());
2342 locations->SetInAt(1, Location::Any());
2343 } else {
2344 locations->SetInAt(0, Location::RequiresRegister());
2345 if (SelectCanUseCMOV(select)) {
2346 if (select->InputAt(1)->IsConstant()) {
2347 // Cmov can't handle a constant value.
2348 locations->SetInAt(1, Location::RequiresRegister());
2349 } else {
2350 locations->SetInAt(1, Location::Any());
2351 }
2352 } else {
2353 locations->SetInAt(1, Location::Any());
2354 }
2355 }
2356 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2357 locations->SetInAt(2, Location::RequiresRegister());
2358 }
2359 locations->SetOut(Location::SameAsFirstInput());
2360 }
2361
VisitSelect(HSelect * select)2362 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
2363 LocationSummary* locations = select->GetLocations();
2364 DCHECK(locations->InAt(0).Equals(locations->Out()));
2365 if (SelectCanUseCMOV(select)) {
2366 // If both the condition and the source types are integer, we can generate
2367 // a CMOV to implement Select.
2368
2369 HInstruction* select_condition = select->GetCondition();
2370 Condition cond = kNotEqual;
2371
2372 // Figure out how to test the 'condition'.
2373 if (select_condition->IsCondition()) {
2374 HCondition* condition = select_condition->AsCondition();
2375 if (!condition->IsEmittedAtUseSite()) {
2376 // This was a previously materialized condition.
2377 // Can we use the existing condition code?
2378 if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
2379 // Materialization was the previous instruction. Condition codes are right.
2380 cond = X86Condition(condition->GetCondition());
2381 } else {
2382 // No, we have to recreate the condition code.
2383 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2384 __ testl(cond_reg, cond_reg);
2385 }
2386 } else {
2387 // We can't handle FP or long here.
2388 DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
2389 DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
2390 LocationSummary* cond_locations = condition->GetLocations();
2391 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
2392 cond = X86Condition(condition->GetCondition());
2393 }
2394 } else {
2395 // Must be a Boolean condition, which needs to be compared to 0.
2396 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2397 __ testl(cond_reg, cond_reg);
2398 }
2399
2400 // If the condition is true, overwrite the output, which already contains false.
2401 Location false_loc = locations->InAt(0);
2402 Location true_loc = locations->InAt(1);
2403 if (select->GetType() == DataType::Type::kInt64) {
2404 // 64 bit conditional move.
2405 Register false_high = false_loc.AsRegisterPairHigh<Register>();
2406 Register false_low = false_loc.AsRegisterPairLow<Register>();
2407 if (true_loc.IsRegisterPair()) {
2408 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
2409 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
2410 } else {
2411 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
2412 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
2413 }
2414 } else {
2415 // 32 bit conditional move.
2416 Register false_reg = false_loc.AsRegister<Register>();
2417 if (true_loc.IsRegister()) {
2418 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
2419 } else {
2420 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
2421 }
2422 }
2423 } else {
2424 NearLabel false_target;
2425 GenerateTestAndBranch<NearLabel>(
2426 select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
2427 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2428 __ Bind(&false_target);
2429 }
2430 }
2431
VisitNop(HNop * nop)2432 void LocationsBuilderX86::VisitNop(HNop* nop) {
2433 new (GetGraph()->GetAllocator()) LocationSummary(nop);
2434 }
2435
VisitNop(HNop *)2436 void InstructionCodeGeneratorX86::VisitNop(HNop*) {
2437 // The environment recording already happened in CodeGenerator::Compile.
2438 }
2439
IncreaseFrame(size_t adjustment)2440 void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
2441 __ subl(ESP, Immediate(adjustment));
2442 __ cfi().AdjustCFAOffset(adjustment);
2443 }
2444
DecreaseFrame(size_t adjustment)2445 void CodeGeneratorX86::DecreaseFrame(size_t adjustment) {
2446 __ addl(ESP, Immediate(adjustment));
2447 __ cfi().AdjustCFAOffset(-adjustment);
2448 }
2449
GenerateNop()2450 void CodeGeneratorX86::GenerateNop() {
2451 __ nop();
2452 }
2453
HandleCondition(HCondition * cond)2454 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
2455 LocationSummary* locations =
2456 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2457 // Handle the long/FP comparisons made in instruction simplification.
2458 switch (cond->InputAt(0)->GetType()) {
2459 case DataType::Type::kInt64: {
2460 locations->SetInAt(0, Location::RequiresRegister());
2461 locations->SetInAt(1, Location::Any());
2462 if (!cond->IsEmittedAtUseSite()) {
2463 locations->SetOut(Location::RequiresRegister());
2464 }
2465 break;
2466 }
2467 case DataType::Type::kFloat32:
2468 case DataType::Type::kFloat64: {
2469 locations->SetInAt(0, Location::RequiresFpuRegister());
2470 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
2471 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
2472 } else if (cond->InputAt(1)->IsConstant()) {
2473 locations->SetInAt(1, Location::RequiresFpuRegister());
2474 } else {
2475 locations->SetInAt(1, Location::Any());
2476 }
2477 if (!cond->IsEmittedAtUseSite()) {
2478 locations->SetOut(Location::RequiresRegister());
2479 }
2480 break;
2481 }
2482 default:
2483 locations->SetInAt(0, Location::RequiresRegister());
2484 locations->SetInAt(1, Location::Any());
2485 if (!cond->IsEmittedAtUseSite()) {
2486 // We need a byte register.
2487 locations->SetOut(Location::RegisterLocation(ECX));
2488 }
2489 break;
2490 }
2491 }
2492
HandleCondition(HCondition * cond)2493 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
2494 if (cond->IsEmittedAtUseSite()) {
2495 return;
2496 }
2497
2498 LocationSummary* locations = cond->GetLocations();
2499 Location lhs = locations->InAt(0);
2500 Location rhs = locations->InAt(1);
2501 Register reg = locations->Out().AsRegister<Register>();
2502 NearLabel true_label, false_label;
2503
2504 switch (cond->InputAt(0)->GetType()) {
2505 default: {
2506 // Integer case.
2507
2508 // Clear output register: setb only sets the low byte.
2509 __ xorl(reg, reg);
2510 codegen_->GenerateIntCompare(lhs, rhs);
2511 __ setb(X86Condition(cond->GetCondition()), reg);
2512 return;
2513 }
2514 case DataType::Type::kInt64:
2515 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2516 break;
2517 case DataType::Type::kFloat32:
2518 GenerateFPCompare(lhs, rhs, cond, false);
2519 GenerateFPJumps(cond, &true_label, &false_label);
2520 break;
2521 case DataType::Type::kFloat64:
2522 GenerateFPCompare(lhs, rhs, cond, true);
2523 GenerateFPJumps(cond, &true_label, &false_label);
2524 break;
2525 }
2526
2527 // Convert the jumps into the result.
2528 NearLabel done_label;
2529
2530 // False case: result = 0.
2531 __ Bind(&false_label);
2532 __ xorl(reg, reg);
2533 __ jmp(&done_label);
2534
2535 // True case: result = 1.
2536 __ Bind(&true_label);
2537 __ movl(reg, Immediate(1));
2538 __ Bind(&done_label);
2539 }
2540
VisitEqual(HEqual * comp)2541 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2542 HandleCondition(comp);
2543 }
2544
VisitEqual(HEqual * comp)2545 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2546 HandleCondition(comp);
2547 }
2548
VisitNotEqual(HNotEqual * comp)2549 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2550 HandleCondition(comp);
2551 }
2552
VisitNotEqual(HNotEqual * comp)2553 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2554 HandleCondition(comp);
2555 }
2556
VisitLessThan(HLessThan * comp)2557 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2558 HandleCondition(comp);
2559 }
2560
VisitLessThan(HLessThan * comp)2561 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2562 HandleCondition(comp);
2563 }
2564
VisitLessThanOrEqual(HLessThanOrEqual * comp)2565 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2566 HandleCondition(comp);
2567 }
2568
VisitLessThanOrEqual(HLessThanOrEqual * comp)2569 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2570 HandleCondition(comp);
2571 }
2572
VisitGreaterThan(HGreaterThan * comp)2573 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2574 HandleCondition(comp);
2575 }
2576
VisitGreaterThan(HGreaterThan * comp)2577 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2578 HandleCondition(comp);
2579 }
2580
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2581 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2582 HandleCondition(comp);
2583 }
2584
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2585 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2586 HandleCondition(comp);
2587 }
2588
VisitBelow(HBelow * comp)2589 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2590 HandleCondition(comp);
2591 }
2592
VisitBelow(HBelow * comp)2593 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2594 HandleCondition(comp);
2595 }
2596
VisitBelowOrEqual(HBelowOrEqual * comp)2597 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2598 HandleCondition(comp);
2599 }
2600
VisitBelowOrEqual(HBelowOrEqual * comp)2601 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2602 HandleCondition(comp);
2603 }
2604
VisitAbove(HAbove * comp)2605 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2606 HandleCondition(comp);
2607 }
2608
VisitAbove(HAbove * comp)2609 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2610 HandleCondition(comp);
2611 }
2612
VisitAboveOrEqual(HAboveOrEqual * comp)2613 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2614 HandleCondition(comp);
2615 }
2616
VisitAboveOrEqual(HAboveOrEqual * comp)2617 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2618 HandleCondition(comp);
2619 }
2620
VisitIntConstant(HIntConstant * constant)2621 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2622 LocationSummary* locations =
2623 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2624 locations->SetOut(Location::ConstantLocation(constant));
2625 }
2626
VisitIntConstant(HIntConstant * constant)2627 void InstructionCodeGeneratorX86::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
2628 // Will be generated at use site.
2629 }
2630
VisitNullConstant(HNullConstant * constant)2631 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2632 LocationSummary* locations =
2633 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2634 locations->SetOut(Location::ConstantLocation(constant));
2635 }
2636
VisitNullConstant(HNullConstant * constant)2637 void InstructionCodeGeneratorX86::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
2638 // Will be generated at use site.
2639 }
2640
VisitLongConstant(HLongConstant * constant)2641 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2642 LocationSummary* locations =
2643 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2644 locations->SetOut(Location::ConstantLocation(constant));
2645 }
2646
VisitLongConstant(HLongConstant * constant)2647 void InstructionCodeGeneratorX86::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
2648 // Will be generated at use site.
2649 }
2650
VisitFloatConstant(HFloatConstant * constant)2651 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2652 LocationSummary* locations =
2653 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2654 locations->SetOut(Location::ConstantLocation(constant));
2655 }
2656
VisitFloatConstant(HFloatConstant * constant)2657 void InstructionCodeGeneratorX86::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
2658 // Will be generated at use site.
2659 }
2660
VisitDoubleConstant(HDoubleConstant * constant)2661 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2662 LocationSummary* locations =
2663 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2664 locations->SetOut(Location::ConstantLocation(constant));
2665 }
2666
VisitDoubleConstant(HDoubleConstant * constant)2667 void InstructionCodeGeneratorX86::VisitDoubleConstant([[maybe_unused]] HDoubleConstant* constant) {
2668 // Will be generated at use site.
2669 }
2670
VisitConstructorFence(HConstructorFence * constructor_fence)2671 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2672 constructor_fence->SetLocations(nullptr);
2673 }
2674
VisitConstructorFence(HConstructorFence * constructor_fence)2675 void InstructionCodeGeneratorX86::VisitConstructorFence(
2676 [[maybe_unused]] HConstructorFence* constructor_fence) {
2677 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2678 }
2679
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2680 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2681 memory_barrier->SetLocations(nullptr);
2682 }
2683
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2684 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2685 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2686 }
2687
VisitReturnVoid(HReturnVoid * ret)2688 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2689 ret->SetLocations(nullptr);
2690 }
2691
VisitReturnVoid(HReturnVoid * ret)2692 void InstructionCodeGeneratorX86::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
2693 codegen_->GenerateFrameExit();
2694 }
2695
VisitReturn(HReturn * ret)2696 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2697 LocationSummary* locations =
2698 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2699 SetInForReturnValue(ret, locations);
2700 }
2701
VisitReturn(HReturn * ret)2702 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2703 switch (ret->InputAt(0)->GetType()) {
2704 case DataType::Type::kReference:
2705 case DataType::Type::kBool:
2706 case DataType::Type::kUint8:
2707 case DataType::Type::kInt8:
2708 case DataType::Type::kUint16:
2709 case DataType::Type::kInt16:
2710 case DataType::Type::kInt32:
2711 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2712 break;
2713
2714 case DataType::Type::kInt64:
2715 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2716 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2717 break;
2718
2719 case DataType::Type::kFloat32:
2720 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2721 if (GetGraph()->IsCompilingOsr()) {
2722 // To simplify callers of an OSR method, we put the return value in both
2723 // floating point and core registers.
2724 __ movd(EAX, XMM0);
2725 }
2726 break;
2727
2728 case DataType::Type::kFloat64:
2729 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2730 if (GetGraph()->IsCompilingOsr()) {
2731 // To simplify callers of an OSR method, we put the return value in both
2732 // floating point and core registers.
2733 __ movd(EAX, XMM0);
2734 // Use XMM1 as temporary register to not clobber XMM0.
2735 __ movaps(XMM1, XMM0);
2736 __ psrlq(XMM1, Immediate(32));
2737 __ movd(EDX, XMM1);
2738 }
2739 break;
2740
2741 default:
2742 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2743 }
2744 codegen_->GenerateFrameExit();
2745 }
2746
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2747 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2748 // The trampoline uses the same calling convention as dex calling conventions,
2749 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2750 // the method_idx.
2751 HandleInvoke(invoke);
2752 }
2753
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2754 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2755 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2756 }
2757
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2758 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2759 // Explicit clinit checks triggered by static invokes must have been pruned by
2760 // art::PrepareForRegisterAllocation.
2761 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2762
2763 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2764 if (intrinsic.TryDispatch(invoke)) {
2765 if (invoke->GetLocations()->CanCall() &&
2766 invoke->HasPcRelativeMethodLoadKind() &&
2767 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2768 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2769 }
2770 return;
2771 }
2772
2773 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2774 CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
2775 /*for_register_allocation=*/ true);
2776 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2777 } else {
2778 HandleInvoke(invoke);
2779 }
2780
2781 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2782 if (invoke->HasPcRelativeMethodLoadKind()) {
2783 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2784 }
2785 }
2786
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2787 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2788 if (invoke->GetLocations()->Intrinsified()) {
2789 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2790 intrinsic.Dispatch(invoke);
2791 return true;
2792 }
2793 return false;
2794 }
2795
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2796 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2797 // Explicit clinit checks triggered by static invokes must have been pruned by
2798 // art::PrepareForRegisterAllocation.
2799 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2800
2801 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2802 return;
2803 }
2804
2805 LocationSummary* locations = invoke->GetLocations();
2806 codegen_->GenerateStaticOrDirectCall(
2807 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2808 }
2809
VisitInvokeVirtual(HInvokeVirtual * invoke)2810 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2811 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2812 if (intrinsic.TryDispatch(invoke)) {
2813 return;
2814 }
2815
2816 HandleInvoke(invoke);
2817
2818 if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) {
2819 // Add one temporary for inline cache update.
2820 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2821 }
2822 }
2823
HandleInvoke(HInvoke * invoke)2824 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2825 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2826 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2827 }
2828
VisitInvokeVirtual(HInvokeVirtual * invoke)2829 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2830 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2831 return;
2832 }
2833
2834 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2835 DCHECK(!codegen_->IsLeafMethod());
2836 }
2837
VisitInvokeInterface(HInvokeInterface * invoke)2838 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2839 // This call to HandleInvoke allocates a temporary (core) register
2840 // which is also used to transfer the hidden argument from FP to
2841 // core register.
2842 HandleInvoke(invoke);
2843 // Add the hidden argument.
2844 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2845
2846 if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) {
2847 // Add one temporary for inline cache update.
2848 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2849 }
2850
2851 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2852 if (IsPcRelativeMethodLoadKind(invoke->GetHiddenArgumentLoadKind())) {
2853 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2854 }
2855
2856 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2857 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2858 Location::RequiresRegister());
2859 }
2860 }
2861
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2862 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2863 DCHECK_EQ(EAX, klass);
2864 if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
2865 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2866 DCHECK(info != nullptr);
2867 InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
2868 info, GetCompilerOptions(), instruction->AsInvoke());
2869 if (cache != nullptr) {
2870 uint32_t address = reinterpret_cast32<uint32_t>(cache);
2871 if (kIsDebugBuild) {
2872 uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2873 CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2874 }
2875 Register temp = EBP;
2876 NearLabel done;
2877 __ movl(temp, Immediate(address));
2878 // Fast path for a monomorphic cache.
2879 __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2880 __ j(kEqual, &done);
2881 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2882 __ Bind(&done);
2883 } else {
2884 // This is unexpected, but we don't guarantee stable compilation across
2885 // JIT runs so just warn about it.
2886 ScopedObjectAccess soa(Thread::Current());
2887 LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
2888 }
2889 }
2890 }
2891
VisitInvokeInterface(HInvokeInterface * invoke)2892 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2893 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2894 LocationSummary* locations = invoke->GetLocations();
2895 Register temp = locations->GetTemp(0).AsRegister<Register>();
2896 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2897 Location receiver = locations->InAt(0);
2898 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2899
2900 // Set the hidden argument. This is safe to do this here, as XMM7
2901 // won't be modified thereafter, before the `call` instruction.
2902 DCHECK_EQ(XMM7, hidden_reg);
2903 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2904 __ movd(hidden_reg, locations->InAt(invoke->GetNumberOfArguments() - 1).AsRegister<Register>());
2905 } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2906 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), locations->GetTemp(0), invoke);
2907 __ movd(hidden_reg, temp);
2908 }
2909
2910 if (receiver.IsStackSlot()) {
2911 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2912 // /* HeapReference<Class> */ temp = temp->klass_
2913 __ movl(temp, Address(temp, class_offset));
2914 } else {
2915 // /* HeapReference<Class> */ temp = receiver->klass_
2916 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2917 }
2918 codegen_->MaybeRecordImplicitNullCheck(invoke);
2919 // Instead of simply (possibly) unpoisoning `temp` here, we should
2920 // emit a read barrier for the previous class reference load.
2921 // However this is not required in practice, as this is an
2922 // intermediate/temporary reference and because the current
2923 // concurrent copying collector keeps the from-space memory
2924 // intact/accessible until the end of the marking phase (the
2925 // concurrent copying collector may not in the future).
2926 __ MaybeUnpoisonHeapReference(temp);
2927
2928 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2929
2930 // temp = temp->GetAddressOfIMT()
2931 __ movl(temp,
2932 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2933 // temp = temp->GetImtEntryAt(method_offset);
2934 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2935 invoke->GetImtIndex(), kX86PointerSize));
2936 __ movl(temp, Address(temp, method_offset));
2937 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2938 // We pass the method from the IMT in case of a conflict. This will ensure
2939 // we go into the runtime to resolve the actual method.
2940 __ movd(hidden_reg, temp);
2941 }
2942 // call temp->GetEntryPoint();
2943 __ call(Address(temp,
2944 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2945
2946 DCHECK(!codegen_->IsLeafMethod());
2947 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2948 }
2949
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2950 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2951 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2952 if (intrinsic.TryDispatch(invoke)) {
2953 return;
2954 }
2955 HandleInvoke(invoke);
2956 }
2957
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2958 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2959 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2960 return;
2961 }
2962 codegen_->GenerateInvokePolymorphicCall(invoke);
2963 }
2964
VisitInvokeCustom(HInvokeCustom * invoke)2965 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2966 HandleInvoke(invoke);
2967 }
2968
VisitInvokeCustom(HInvokeCustom * invoke)2969 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2970 codegen_->GenerateInvokeCustomCall(invoke);
2971 }
2972
VisitNeg(HNeg * neg)2973 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2974 LocationSummary* locations =
2975 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2976 switch (neg->GetResultType()) {
2977 case DataType::Type::kInt32:
2978 case DataType::Type::kInt64:
2979 locations->SetInAt(0, Location::RequiresRegister());
2980 locations->SetOut(Location::SameAsFirstInput());
2981 break;
2982
2983 case DataType::Type::kFloat32:
2984 locations->SetInAt(0, Location::RequiresFpuRegister());
2985 locations->SetOut(Location::SameAsFirstInput());
2986 locations->AddTemp(Location::RequiresRegister());
2987 locations->AddTemp(Location::RequiresFpuRegister());
2988 break;
2989
2990 case DataType::Type::kFloat64:
2991 locations->SetInAt(0, Location::RequiresFpuRegister());
2992 locations->SetOut(Location::SameAsFirstInput());
2993 locations->AddTemp(Location::RequiresFpuRegister());
2994 break;
2995
2996 default:
2997 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2998 }
2999 }
3000
VisitNeg(HNeg * neg)3001 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
3002 LocationSummary* locations = neg->GetLocations();
3003 Location out = locations->Out();
3004 Location in = locations->InAt(0);
3005 switch (neg->GetResultType()) {
3006 case DataType::Type::kInt32:
3007 DCHECK(in.IsRegister());
3008 DCHECK(in.Equals(out));
3009 __ negl(out.AsRegister<Register>());
3010 break;
3011
3012 case DataType::Type::kInt64:
3013 DCHECK(in.IsRegisterPair());
3014 DCHECK(in.Equals(out));
3015 __ negl(out.AsRegisterPairLow<Register>());
3016 // Negation is similar to subtraction from zero. The least
3017 // significant byte triggers a borrow when it is different from
3018 // zero; to take it into account, add 1 to the most significant
3019 // byte if the carry flag (CF) is set to 1 after the first NEGL
3020 // operation.
3021 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
3022 __ negl(out.AsRegisterPairHigh<Register>());
3023 break;
3024
3025 case DataType::Type::kFloat32: {
3026 DCHECK(in.Equals(out));
3027 Register constant = locations->GetTemp(0).AsRegister<Register>();
3028 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
3029 // Implement float negation with an exclusive or with value
3030 // 0x80000000 (mask for bit 31, representing the sign of a
3031 // single-precision floating-point number).
3032 __ movl(constant, Immediate(INT32_C(0x80000000)));
3033 __ movd(mask, constant);
3034 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3035 break;
3036 }
3037
3038 case DataType::Type::kFloat64: {
3039 DCHECK(in.Equals(out));
3040 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3041 // Implement double negation with an exclusive or with value
3042 // 0x8000000000000000 (mask for bit 63, representing the sign of
3043 // a double-precision floating-point number).
3044 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
3045 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3046 break;
3047 }
3048
3049 default:
3050 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3051 }
3052 }
3053
VisitX86FPNeg(HX86FPNeg * neg)3054 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
3055 LocationSummary* locations =
3056 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3057 DCHECK(DataType::IsFloatingPointType(neg->GetType()));
3058 locations->SetInAt(0, Location::RequiresFpuRegister());
3059 locations->SetInAt(1, Location::RequiresRegister());
3060 locations->SetOut(Location::SameAsFirstInput());
3061 locations->AddTemp(Location::RequiresFpuRegister());
3062 }
3063
VisitX86FPNeg(HX86FPNeg * neg)3064 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
3065 LocationSummary* locations = neg->GetLocations();
3066 Location out = locations->Out();
3067 DCHECK(locations->InAt(0).Equals(out));
3068
3069 Register constant_area = locations->InAt(1).AsRegister<Register>();
3070 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3071 if (neg->GetType() == DataType::Type::kFloat32) {
3072 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
3073 neg->GetBaseMethodAddress(),
3074 constant_area));
3075 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3076 } else {
3077 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
3078 neg->GetBaseMethodAddress(),
3079 constant_area));
3080 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3081 }
3082 }
3083
VisitTypeConversion(HTypeConversion * conversion)3084 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
3085 DataType::Type result_type = conversion->GetResultType();
3086 DataType::Type input_type = conversion->GetInputType();
3087 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3088 << input_type << " -> " << result_type;
3089
3090 // The float-to-long and double-to-long type conversions rely on a
3091 // call to the runtime.
3092 LocationSummary::CallKind call_kind =
3093 ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3094 && result_type == DataType::Type::kInt64)
3095 ? LocationSummary::kCallOnMainOnly
3096 : LocationSummary::kNoCall;
3097 LocationSummary* locations =
3098 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3099
3100 switch (result_type) {
3101 case DataType::Type::kUint8:
3102 case DataType::Type::kInt8:
3103 switch (input_type) {
3104 case DataType::Type::kUint8:
3105 case DataType::Type::kInt8:
3106 case DataType::Type::kUint16:
3107 case DataType::Type::kInt16:
3108 case DataType::Type::kInt32:
3109 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
3110 // Make the output overlap to please the register allocator. This greatly simplifies
3111 // the validation of the linear scan implementation
3112 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3113 break;
3114 case DataType::Type::kInt64: {
3115 HInstruction* input = conversion->InputAt(0);
3116 Location input_location = input->IsConstant()
3117 ? Location::ConstantLocation(input)
3118 : Location::RegisterPairLocation(EAX, EDX);
3119 locations->SetInAt(0, input_location);
3120 // Make the output overlap to please the register allocator. This greatly simplifies
3121 // the validation of the linear scan implementation
3122 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3123 break;
3124 }
3125
3126 default:
3127 LOG(FATAL) << "Unexpected type conversion from " << input_type
3128 << " to " << result_type;
3129 }
3130 break;
3131
3132 case DataType::Type::kUint16:
3133 case DataType::Type::kInt16:
3134 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3135 locations->SetInAt(0, Location::Any());
3136 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3137 break;
3138
3139 case DataType::Type::kInt32:
3140 switch (input_type) {
3141 case DataType::Type::kInt64:
3142 locations->SetInAt(0, Location::Any());
3143 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3144 break;
3145
3146 case DataType::Type::kFloat32:
3147 locations->SetInAt(0, Location::RequiresFpuRegister());
3148 locations->SetOut(Location::RequiresRegister());
3149 locations->AddTemp(Location::RequiresFpuRegister());
3150 break;
3151
3152 case DataType::Type::kFloat64:
3153 locations->SetInAt(0, Location::RequiresFpuRegister());
3154 locations->SetOut(Location::RequiresRegister());
3155 locations->AddTemp(Location::RequiresFpuRegister());
3156 break;
3157
3158 default:
3159 LOG(FATAL) << "Unexpected type conversion from " << input_type
3160 << " to " << result_type;
3161 }
3162 break;
3163
3164 case DataType::Type::kInt64:
3165 switch (input_type) {
3166 case DataType::Type::kBool:
3167 case DataType::Type::kUint8:
3168 case DataType::Type::kInt8:
3169 case DataType::Type::kUint16:
3170 case DataType::Type::kInt16:
3171 case DataType::Type::kInt32:
3172 locations->SetInAt(0, Location::RegisterLocation(EAX));
3173 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3174 break;
3175
3176 case DataType::Type::kFloat32:
3177 case DataType::Type::kFloat64: {
3178 InvokeRuntimeCallingConvention calling_convention;
3179 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
3180 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
3181
3182 // The runtime helper puts the result in EAX, EDX.
3183 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3184 }
3185 break;
3186
3187 default:
3188 LOG(FATAL) << "Unexpected type conversion from " << input_type
3189 << " to " << result_type;
3190 }
3191 break;
3192
3193 case DataType::Type::kFloat32:
3194 switch (input_type) {
3195 case DataType::Type::kBool:
3196 case DataType::Type::kUint8:
3197 case DataType::Type::kInt8:
3198 case DataType::Type::kUint16:
3199 case DataType::Type::kInt16:
3200 case DataType::Type::kInt32:
3201 locations->SetInAt(0, Location::RequiresRegister());
3202 locations->SetOut(Location::RequiresFpuRegister());
3203 break;
3204
3205 case DataType::Type::kInt64:
3206 locations->SetInAt(0, Location::Any());
3207 locations->SetOut(Location::Any());
3208 break;
3209
3210 case DataType::Type::kFloat64:
3211 locations->SetInAt(0, Location::RequiresFpuRegister());
3212 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3213 break;
3214
3215 default:
3216 LOG(FATAL) << "Unexpected type conversion from " << input_type
3217 << " to " << result_type;
3218 }
3219 break;
3220
3221 case DataType::Type::kFloat64:
3222 switch (input_type) {
3223 case DataType::Type::kBool:
3224 case DataType::Type::kUint8:
3225 case DataType::Type::kInt8:
3226 case DataType::Type::kUint16:
3227 case DataType::Type::kInt16:
3228 case DataType::Type::kInt32:
3229 locations->SetInAt(0, Location::RequiresRegister());
3230 locations->SetOut(Location::RequiresFpuRegister());
3231 break;
3232
3233 case DataType::Type::kInt64:
3234 locations->SetInAt(0, Location::Any());
3235 locations->SetOut(Location::Any());
3236 break;
3237
3238 case DataType::Type::kFloat32:
3239 locations->SetInAt(0, Location::RequiresFpuRegister());
3240 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3241 break;
3242
3243 default:
3244 LOG(FATAL) << "Unexpected type conversion from " << input_type
3245 << " to " << result_type;
3246 }
3247 break;
3248
3249 default:
3250 LOG(FATAL) << "Unexpected type conversion from " << input_type
3251 << " to " << result_type;
3252 }
3253 }
3254
VisitTypeConversion(HTypeConversion * conversion)3255 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
3256 LocationSummary* locations = conversion->GetLocations();
3257 Location out = locations->Out();
3258 Location in = locations->InAt(0);
3259 DataType::Type result_type = conversion->GetResultType();
3260 DataType::Type input_type = conversion->GetInputType();
3261 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3262 << input_type << " -> " << result_type;
3263 switch (result_type) {
3264 case DataType::Type::kUint8:
3265 switch (input_type) {
3266 case DataType::Type::kInt8:
3267 case DataType::Type::kUint16:
3268 case DataType::Type::kInt16:
3269 case DataType::Type::kInt32:
3270 if (in.IsRegister()) {
3271 __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3272 } else {
3273 DCHECK(in.GetConstant()->IsIntConstant());
3274 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3275 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3276 }
3277 break;
3278 case DataType::Type::kInt64:
3279 if (in.IsRegisterPair()) {
3280 __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3281 } else {
3282 DCHECK(in.GetConstant()->IsLongConstant());
3283 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3284 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3285 }
3286 break;
3287
3288 default:
3289 LOG(FATAL) << "Unexpected type conversion from " << input_type
3290 << " to " << result_type;
3291 }
3292 break;
3293
3294 case DataType::Type::kInt8:
3295 switch (input_type) {
3296 case DataType::Type::kUint8:
3297 case DataType::Type::kUint16:
3298 case DataType::Type::kInt16:
3299 case DataType::Type::kInt32:
3300 if (in.IsRegister()) {
3301 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3302 } else {
3303 DCHECK(in.GetConstant()->IsIntConstant());
3304 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3305 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3306 }
3307 break;
3308 case DataType::Type::kInt64:
3309 if (in.IsRegisterPair()) {
3310 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3311 } else {
3312 DCHECK(in.GetConstant()->IsLongConstant());
3313 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3314 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3315 }
3316 break;
3317
3318 default:
3319 LOG(FATAL) << "Unexpected type conversion from " << input_type
3320 << " to " << result_type;
3321 }
3322 break;
3323
3324 case DataType::Type::kUint16:
3325 switch (input_type) {
3326 case DataType::Type::kInt8:
3327 case DataType::Type::kInt16:
3328 case DataType::Type::kInt32:
3329 if (in.IsRegister()) {
3330 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3331 } else if (in.IsStackSlot()) {
3332 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3333 } else {
3334 DCHECK(in.GetConstant()->IsIntConstant());
3335 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3336 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3337 }
3338 break;
3339 case DataType::Type::kInt64:
3340 if (in.IsRegisterPair()) {
3341 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3342 } else if (in.IsDoubleStackSlot()) {
3343 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3344 } else {
3345 DCHECK(in.GetConstant()->IsLongConstant());
3346 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3347 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3348 }
3349 break;
3350
3351 default:
3352 LOG(FATAL) << "Unexpected type conversion from " << input_type
3353 << " to " << result_type;
3354 }
3355 break;
3356
3357 case DataType::Type::kInt16:
3358 switch (input_type) {
3359 case DataType::Type::kUint16:
3360 case DataType::Type::kInt32:
3361 if (in.IsRegister()) {
3362 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3363 } else if (in.IsStackSlot()) {
3364 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3365 } else {
3366 DCHECK(in.GetConstant()->IsIntConstant());
3367 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3368 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3369 }
3370 break;
3371 case DataType::Type::kInt64:
3372 if (in.IsRegisterPair()) {
3373 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3374 } else if (in.IsDoubleStackSlot()) {
3375 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3376 } else {
3377 DCHECK(in.GetConstant()->IsLongConstant());
3378 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3379 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3380 }
3381 break;
3382
3383 default:
3384 LOG(FATAL) << "Unexpected type conversion from " << input_type
3385 << " to " << result_type;
3386 }
3387 break;
3388
3389 case DataType::Type::kInt32:
3390 switch (input_type) {
3391 case DataType::Type::kInt64:
3392 if (in.IsRegisterPair()) {
3393 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3394 } else if (in.IsDoubleStackSlot()) {
3395 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3396 } else {
3397 DCHECK(in.IsConstant());
3398 DCHECK(in.GetConstant()->IsLongConstant());
3399 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3400 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
3401 }
3402 break;
3403
3404 case DataType::Type::kFloat32: {
3405 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3406 Register output = out.AsRegister<Register>();
3407 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3408 NearLabel done, nan;
3409
3410 __ movl(output, Immediate(kPrimIntMax));
3411 // temp = int-to-float(output)
3412 __ cvtsi2ss(temp, output);
3413 // if input >= temp goto done
3414 __ comiss(input, temp);
3415 __ j(kAboveEqual, &done);
3416 // if input == NaN goto nan
3417 __ j(kUnordered, &nan);
3418 // output = float-to-int-truncate(input)
3419 __ cvttss2si(output, input);
3420 __ jmp(&done);
3421 __ Bind(&nan);
3422 // output = 0
3423 __ xorl(output, output);
3424 __ Bind(&done);
3425 break;
3426 }
3427
3428 case DataType::Type::kFloat64: {
3429 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3430 Register output = out.AsRegister<Register>();
3431 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3432 NearLabel done, nan;
3433
3434 __ movl(output, Immediate(kPrimIntMax));
3435 // temp = int-to-double(output)
3436 __ cvtsi2sd(temp, output);
3437 // if input >= temp goto done
3438 __ comisd(input, temp);
3439 __ j(kAboveEqual, &done);
3440 // if input == NaN goto nan
3441 __ j(kUnordered, &nan);
3442 // output = double-to-int-truncate(input)
3443 __ cvttsd2si(output, input);
3444 __ jmp(&done);
3445 __ Bind(&nan);
3446 // output = 0
3447 __ xorl(output, output);
3448 __ Bind(&done);
3449 break;
3450 }
3451
3452 default:
3453 LOG(FATAL) << "Unexpected type conversion from " << input_type
3454 << " to " << result_type;
3455 }
3456 break;
3457
3458 case DataType::Type::kInt64:
3459 switch (input_type) {
3460 case DataType::Type::kBool:
3461 case DataType::Type::kUint8:
3462 case DataType::Type::kInt8:
3463 case DataType::Type::kUint16:
3464 case DataType::Type::kInt16:
3465 case DataType::Type::kInt32:
3466 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
3467 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
3468 DCHECK_EQ(in.AsRegister<Register>(), EAX);
3469 __ cdq();
3470 break;
3471
3472 case DataType::Type::kFloat32:
3473 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3474 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3475 break;
3476
3477 case DataType::Type::kFloat64:
3478 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3479 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3480 break;
3481
3482 default:
3483 LOG(FATAL) << "Unexpected type conversion from " << input_type
3484 << " to " << result_type;
3485 }
3486 break;
3487
3488 case DataType::Type::kFloat32:
3489 switch (input_type) {
3490 case DataType::Type::kBool:
3491 case DataType::Type::kUint8:
3492 case DataType::Type::kInt8:
3493 case DataType::Type::kUint16:
3494 case DataType::Type::kInt16:
3495 case DataType::Type::kInt32:
3496 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3497 break;
3498
3499 case DataType::Type::kInt64: {
3500 size_t adjustment = 0;
3501
3502 // Create stack space for the call to
3503 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
3504 // TODO: enhance register allocator to ask for stack temporaries.
3505 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
3506 adjustment = DataType::Size(DataType::Type::kInt64);
3507 codegen_->IncreaseFrame(adjustment);
3508 }
3509
3510 // Load the value to the FP stack, using temporaries if needed.
3511 PushOntoFPStack(in, 0, adjustment, false, true);
3512
3513 if (out.IsStackSlot()) {
3514 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
3515 } else {
3516 __ fstps(Address(ESP, 0));
3517 Location stack_temp = Location::StackSlot(0);
3518 codegen_->Move32(out, stack_temp);
3519 }
3520
3521 // Remove the temporary stack space we allocated.
3522 if (adjustment != 0) {
3523 codegen_->DecreaseFrame(adjustment);
3524 }
3525 break;
3526 }
3527
3528 case DataType::Type::kFloat64:
3529 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3530 break;
3531
3532 default:
3533 LOG(FATAL) << "Unexpected type conversion from " << input_type
3534 << " to " << result_type;
3535 }
3536 break;
3537
3538 case DataType::Type::kFloat64:
3539 switch (input_type) {
3540 case DataType::Type::kBool:
3541 case DataType::Type::kUint8:
3542 case DataType::Type::kInt8:
3543 case DataType::Type::kUint16:
3544 case DataType::Type::kInt16:
3545 case DataType::Type::kInt32:
3546 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3547 break;
3548
3549 case DataType::Type::kInt64: {
3550 size_t adjustment = 0;
3551
3552 // Create stack space for the call to
3553 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3554 // TODO: enhance register allocator to ask for stack temporaries.
3555 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3556 adjustment = DataType::Size(DataType::Type::kInt64);
3557 codegen_->IncreaseFrame(adjustment);
3558 }
3559
3560 // Load the value to the FP stack, using temporaries if needed.
3561 PushOntoFPStack(in, 0, adjustment, false, true);
3562
3563 if (out.IsDoubleStackSlot()) {
3564 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3565 } else {
3566 __ fstpl(Address(ESP, 0));
3567 Location stack_temp = Location::DoubleStackSlot(0);
3568 codegen_->Move64(out, stack_temp);
3569 }
3570
3571 // Remove the temporary stack space we allocated.
3572 if (adjustment != 0) {
3573 codegen_->DecreaseFrame(adjustment);
3574 }
3575 break;
3576 }
3577
3578 case DataType::Type::kFloat32:
3579 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3580 break;
3581
3582 default:
3583 LOG(FATAL) << "Unexpected type conversion from " << input_type
3584 << " to " << result_type;
3585 }
3586 break;
3587
3588 default:
3589 LOG(FATAL) << "Unexpected type conversion from " << input_type
3590 << " to " << result_type;
3591 }
3592 }
3593
VisitAdd(HAdd * add)3594 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3595 LocationSummary* locations =
3596 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3597 switch (add->GetResultType()) {
3598 case DataType::Type::kInt32: {
3599 locations->SetInAt(0, Location::RequiresRegister());
3600 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3601 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3602 break;
3603 }
3604
3605 case DataType::Type::kInt64: {
3606 locations->SetInAt(0, Location::RequiresRegister());
3607 locations->SetInAt(1, Location::Any());
3608 locations->SetOut(Location::SameAsFirstInput());
3609 break;
3610 }
3611
3612 case DataType::Type::kFloat32:
3613 case DataType::Type::kFloat64: {
3614 locations->SetInAt(0, Location::RequiresFpuRegister());
3615 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3616 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3617 } else if (add->InputAt(1)->IsConstant()) {
3618 locations->SetInAt(1, Location::RequiresFpuRegister());
3619 } else {
3620 locations->SetInAt(1, Location::Any());
3621 }
3622 locations->SetOut(Location::SameAsFirstInput());
3623 break;
3624 }
3625
3626 default:
3627 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3628 UNREACHABLE();
3629 }
3630 }
3631
VisitAdd(HAdd * add)3632 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3633 LocationSummary* locations = add->GetLocations();
3634 Location first = locations->InAt(0);
3635 Location second = locations->InAt(1);
3636 Location out = locations->Out();
3637
3638 switch (add->GetResultType()) {
3639 case DataType::Type::kInt32: {
3640 if (second.IsRegister()) {
3641 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3642 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3643 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3644 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3645 } else {
3646 __ leal(out.AsRegister<Register>(), Address(
3647 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3648 }
3649 } else if (second.IsConstant()) {
3650 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3651 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3652 __ addl(out.AsRegister<Register>(), Immediate(value));
3653 } else {
3654 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3655 }
3656 } else {
3657 DCHECK(first.Equals(locations->Out()));
3658 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3659 }
3660 break;
3661 }
3662
3663 case DataType::Type::kInt64: {
3664 if (second.IsRegisterPair()) {
3665 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3666 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3667 } else if (second.IsDoubleStackSlot()) {
3668 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3669 __ adcl(first.AsRegisterPairHigh<Register>(),
3670 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3671 } else {
3672 DCHECK(second.IsConstant()) << second;
3673 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3674 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3675 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3676 }
3677 break;
3678 }
3679
3680 case DataType::Type::kFloat32: {
3681 if (second.IsFpuRegister()) {
3682 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3683 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3684 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3685 DCHECK(const_area->IsEmittedAtUseSite());
3686 __ addss(first.AsFpuRegister<XmmRegister>(),
3687 codegen_->LiteralFloatAddress(
3688 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3689 const_area->GetBaseMethodAddress(),
3690 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3691 } else {
3692 DCHECK(second.IsStackSlot());
3693 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3694 }
3695 break;
3696 }
3697
3698 case DataType::Type::kFloat64: {
3699 if (second.IsFpuRegister()) {
3700 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3701 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3702 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3703 DCHECK(const_area->IsEmittedAtUseSite());
3704 __ addsd(first.AsFpuRegister<XmmRegister>(),
3705 codegen_->LiteralDoubleAddress(
3706 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3707 const_area->GetBaseMethodAddress(),
3708 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3709 } else {
3710 DCHECK(second.IsDoubleStackSlot());
3711 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3712 }
3713 break;
3714 }
3715
3716 default:
3717 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3718 }
3719 }
3720
VisitSub(HSub * sub)3721 void LocationsBuilderX86::VisitSub(HSub* sub) {
3722 LocationSummary* locations =
3723 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3724 switch (sub->GetResultType()) {
3725 case DataType::Type::kInt32:
3726 case DataType::Type::kInt64: {
3727 locations->SetInAt(0, Location::RequiresRegister());
3728 locations->SetInAt(1, Location::Any());
3729 locations->SetOut(Location::SameAsFirstInput());
3730 break;
3731 }
3732 case DataType::Type::kFloat32:
3733 case DataType::Type::kFloat64: {
3734 locations->SetInAt(0, Location::RequiresFpuRegister());
3735 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3736 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3737 } else if (sub->InputAt(1)->IsConstant()) {
3738 locations->SetInAt(1, Location::RequiresFpuRegister());
3739 } else {
3740 locations->SetInAt(1, Location::Any());
3741 }
3742 locations->SetOut(Location::SameAsFirstInput());
3743 break;
3744 }
3745
3746 default:
3747 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3748 }
3749 }
3750
VisitSub(HSub * sub)3751 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3752 LocationSummary* locations = sub->GetLocations();
3753 Location first = locations->InAt(0);
3754 Location second = locations->InAt(1);
3755 DCHECK(first.Equals(locations->Out()));
3756 switch (sub->GetResultType()) {
3757 case DataType::Type::kInt32: {
3758 if (second.IsRegister()) {
3759 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3760 } else if (second.IsConstant()) {
3761 __ subl(first.AsRegister<Register>(),
3762 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3763 } else {
3764 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3765 }
3766 break;
3767 }
3768
3769 case DataType::Type::kInt64: {
3770 if (second.IsRegisterPair()) {
3771 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3772 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3773 } else if (second.IsDoubleStackSlot()) {
3774 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3775 __ sbbl(first.AsRegisterPairHigh<Register>(),
3776 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3777 } else {
3778 DCHECK(second.IsConstant()) << second;
3779 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3780 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3781 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3782 }
3783 break;
3784 }
3785
3786 case DataType::Type::kFloat32: {
3787 if (second.IsFpuRegister()) {
3788 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3789 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3790 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3791 DCHECK(const_area->IsEmittedAtUseSite());
3792 __ subss(first.AsFpuRegister<XmmRegister>(),
3793 codegen_->LiteralFloatAddress(
3794 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3795 const_area->GetBaseMethodAddress(),
3796 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3797 } else {
3798 DCHECK(second.IsStackSlot());
3799 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3800 }
3801 break;
3802 }
3803
3804 case DataType::Type::kFloat64: {
3805 if (second.IsFpuRegister()) {
3806 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3807 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3808 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3809 DCHECK(const_area->IsEmittedAtUseSite());
3810 __ subsd(first.AsFpuRegister<XmmRegister>(),
3811 codegen_->LiteralDoubleAddress(
3812 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3813 const_area->GetBaseMethodAddress(),
3814 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3815 } else {
3816 DCHECK(second.IsDoubleStackSlot());
3817 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3818 }
3819 break;
3820 }
3821
3822 default:
3823 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3824 }
3825 }
3826
VisitMul(HMul * mul)3827 void LocationsBuilderX86::VisitMul(HMul* mul) {
3828 LocationSummary* locations =
3829 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3830 switch (mul->GetResultType()) {
3831 case DataType::Type::kInt32:
3832 locations->SetInAt(0, Location::RequiresRegister());
3833 locations->SetInAt(1, Location::Any());
3834 if (mul->InputAt(1)->IsIntConstant()) {
3835 // Can use 3 operand multiply.
3836 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3837 } else {
3838 locations->SetOut(Location::SameAsFirstInput());
3839 }
3840 break;
3841 case DataType::Type::kInt64: {
3842 locations->SetInAt(0, Location::RequiresRegister());
3843 locations->SetInAt(1, Location::Any());
3844 locations->SetOut(Location::SameAsFirstInput());
3845 // Needed for imul on 32bits with 64bits output.
3846 locations->AddTemp(Location::RegisterLocation(EAX));
3847 locations->AddTemp(Location::RegisterLocation(EDX));
3848 break;
3849 }
3850 case DataType::Type::kFloat32:
3851 case DataType::Type::kFloat64: {
3852 locations->SetInAt(0, Location::RequiresFpuRegister());
3853 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3854 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3855 } else if (mul->InputAt(1)->IsConstant()) {
3856 locations->SetInAt(1, Location::RequiresFpuRegister());
3857 } else {
3858 locations->SetInAt(1, Location::Any());
3859 }
3860 locations->SetOut(Location::SameAsFirstInput());
3861 break;
3862 }
3863
3864 default:
3865 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3866 }
3867 }
3868
VisitMul(HMul * mul)3869 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3870 LocationSummary* locations = mul->GetLocations();
3871 Location first = locations->InAt(0);
3872 Location second = locations->InAt(1);
3873 Location out = locations->Out();
3874
3875 switch (mul->GetResultType()) {
3876 case DataType::Type::kInt32:
3877 // The constant may have ended up in a register, so test explicitly to avoid
3878 // problems where the output may not be the same as the first operand.
3879 if (mul->InputAt(1)->IsIntConstant()) {
3880 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3881 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3882 } else if (second.IsRegister()) {
3883 DCHECK(first.Equals(out));
3884 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3885 } else {
3886 DCHECK(second.IsStackSlot());
3887 DCHECK(first.Equals(out));
3888 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3889 }
3890 break;
3891
3892 case DataType::Type::kInt64: {
3893 Register in1_hi = first.AsRegisterPairHigh<Register>();
3894 Register in1_lo = first.AsRegisterPairLow<Register>();
3895 Register eax = locations->GetTemp(0).AsRegister<Register>();
3896 Register edx = locations->GetTemp(1).AsRegister<Register>();
3897
3898 DCHECK_EQ(EAX, eax);
3899 DCHECK_EQ(EDX, edx);
3900
3901 // input: in1 - 64 bits, in2 - 64 bits.
3902 // output: in1
3903 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3904 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3905 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3906 if (second.IsConstant()) {
3907 DCHECK(second.GetConstant()->IsLongConstant());
3908
3909 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3910 int32_t low_value = Low32Bits(value);
3911 int32_t high_value = High32Bits(value);
3912 Immediate low(low_value);
3913 Immediate high(high_value);
3914
3915 __ movl(eax, high);
3916 // eax <- in1.lo * in2.hi
3917 __ imull(eax, in1_lo);
3918 // in1.hi <- in1.hi * in2.lo
3919 __ imull(in1_hi, low);
3920 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3921 __ addl(in1_hi, eax);
3922 // move in2_lo to eax to prepare for double precision
3923 __ movl(eax, low);
3924 // edx:eax <- in1.lo * in2.lo
3925 __ mull(in1_lo);
3926 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3927 __ addl(in1_hi, edx);
3928 // in1.lo <- (in1.lo * in2.lo)[31:0];
3929 __ movl(in1_lo, eax);
3930 } else if (second.IsRegisterPair()) {
3931 Register in2_hi = second.AsRegisterPairHigh<Register>();
3932 Register in2_lo = second.AsRegisterPairLow<Register>();
3933
3934 __ movl(eax, in2_hi);
3935 // eax <- in1.lo * in2.hi
3936 __ imull(eax, in1_lo);
3937 // in1.hi <- in1.hi * in2.lo
3938 __ imull(in1_hi, in2_lo);
3939 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3940 __ addl(in1_hi, eax);
3941 // move in1_lo to eax to prepare for double precision
3942 __ movl(eax, in1_lo);
3943 // edx:eax <- in1.lo * in2.lo
3944 __ mull(in2_lo);
3945 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3946 __ addl(in1_hi, edx);
3947 // in1.lo <- (in1.lo * in2.lo)[31:0];
3948 __ movl(in1_lo, eax);
3949 } else {
3950 DCHECK(second.IsDoubleStackSlot()) << second;
3951 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3952 Address in2_lo(ESP, second.GetStackIndex());
3953
3954 __ movl(eax, in2_hi);
3955 // eax <- in1.lo * in2.hi
3956 __ imull(eax, in1_lo);
3957 // in1.hi <- in1.hi * in2.lo
3958 __ imull(in1_hi, in2_lo);
3959 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3960 __ addl(in1_hi, eax);
3961 // move in1_lo to eax to prepare for double precision
3962 __ movl(eax, in1_lo);
3963 // edx:eax <- in1.lo * in2.lo
3964 __ mull(in2_lo);
3965 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3966 __ addl(in1_hi, edx);
3967 // in1.lo <- (in1.lo * in2.lo)[31:0];
3968 __ movl(in1_lo, eax);
3969 }
3970
3971 break;
3972 }
3973
3974 case DataType::Type::kFloat32: {
3975 DCHECK(first.Equals(locations->Out()));
3976 if (second.IsFpuRegister()) {
3977 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3978 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3979 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3980 DCHECK(const_area->IsEmittedAtUseSite());
3981 __ mulss(first.AsFpuRegister<XmmRegister>(),
3982 codegen_->LiteralFloatAddress(
3983 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3984 const_area->GetBaseMethodAddress(),
3985 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3986 } else {
3987 DCHECK(second.IsStackSlot());
3988 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3989 }
3990 break;
3991 }
3992
3993 case DataType::Type::kFloat64: {
3994 DCHECK(first.Equals(locations->Out()));
3995 if (second.IsFpuRegister()) {
3996 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3997 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3998 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3999 DCHECK(const_area->IsEmittedAtUseSite());
4000 __ mulsd(first.AsFpuRegister<XmmRegister>(),
4001 codegen_->LiteralDoubleAddress(
4002 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4003 const_area->GetBaseMethodAddress(),
4004 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4005 } else {
4006 DCHECK(second.IsDoubleStackSlot());
4007 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4008 }
4009 break;
4010 }
4011
4012 default:
4013 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4014 }
4015 }
4016
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)4017 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
4018 uint32_t temp_offset,
4019 uint32_t stack_adjustment,
4020 bool is_fp,
4021 bool is_wide) {
4022 if (source.IsStackSlot()) {
4023 DCHECK(!is_wide);
4024 if (is_fp) {
4025 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
4026 } else {
4027 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
4028 }
4029 } else if (source.IsDoubleStackSlot()) {
4030 DCHECK(is_wide);
4031 if (is_fp) {
4032 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
4033 } else {
4034 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
4035 }
4036 } else {
4037 // Write the value to the temporary location on the stack and load to FP stack.
4038 if (!is_wide) {
4039 Location stack_temp = Location::StackSlot(temp_offset);
4040 codegen_->Move32(stack_temp, source);
4041 if (is_fp) {
4042 __ flds(Address(ESP, temp_offset));
4043 } else {
4044 __ filds(Address(ESP, temp_offset));
4045 }
4046 } else {
4047 Location stack_temp = Location::DoubleStackSlot(temp_offset);
4048 codegen_->Move64(stack_temp, source);
4049 if (is_fp) {
4050 __ fldl(Address(ESP, temp_offset));
4051 } else {
4052 __ fildl(Address(ESP, temp_offset));
4053 }
4054 }
4055 }
4056 }
4057
GenerateRemFP(HRem * rem)4058 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
4059 DataType::Type type = rem->GetResultType();
4060 bool is_float = type == DataType::Type::kFloat32;
4061 size_t elem_size = DataType::Size(type);
4062 LocationSummary* locations = rem->GetLocations();
4063 Location first = locations->InAt(0);
4064 Location second = locations->InAt(1);
4065 Location out = locations->Out();
4066
4067 // Create stack space for 2 elements.
4068 // TODO: enhance register allocator to ask for stack temporaries.
4069 codegen_->IncreaseFrame(2 * elem_size);
4070
4071 // Load the values to the FP stack in reverse order, using temporaries if needed.
4072 const bool is_wide = !is_float;
4073 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
4074 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
4075
4076 // Loop doing FPREM until we stabilize.
4077 NearLabel retry;
4078 __ Bind(&retry);
4079 __ fprem();
4080
4081 // Move FP status to AX.
4082 __ fstsw();
4083
4084 // And see if the argument reduction is complete. This is signaled by the
4085 // C2 FPU flag bit set to 0.
4086 __ andl(EAX, Immediate(kC2ConditionMask));
4087 __ j(kNotEqual, &retry);
4088
4089 // We have settled on the final value. Retrieve it into an XMM register.
4090 // Store FP top of stack to real stack.
4091 if (is_float) {
4092 __ fsts(Address(ESP, 0));
4093 } else {
4094 __ fstl(Address(ESP, 0));
4095 }
4096
4097 // Pop the 2 items from the FP stack.
4098 __ fucompp();
4099
4100 // Load the value from the stack into an XMM register.
4101 DCHECK(out.IsFpuRegister()) << out;
4102 if (is_float) {
4103 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
4104 } else {
4105 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
4106 }
4107
4108 // And remove the temporary stack space we allocated.
4109 codegen_->DecreaseFrame(2 * elem_size);
4110 }
4111
4112
DivRemOneOrMinusOne(HBinaryOperation * instruction)4113 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4114 DCHECK(instruction->IsDiv() || instruction->IsRem());
4115
4116 LocationSummary* locations = instruction->GetLocations();
4117 DCHECK(locations->InAt(1).IsConstant());
4118 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
4119
4120 Register out_register = locations->Out().AsRegister<Register>();
4121 Register input_register = locations->InAt(0).AsRegister<Register>();
4122 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4123
4124 DCHECK(imm == 1 || imm == -1);
4125
4126 if (instruction->IsRem()) {
4127 __ xorl(out_register, out_register);
4128 } else {
4129 __ movl(out_register, input_register);
4130 if (imm == -1) {
4131 __ negl(out_register);
4132 }
4133 }
4134 }
4135
RemByPowerOfTwo(HRem * instruction)4136 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
4137 LocationSummary* locations = instruction->GetLocations();
4138 Location second = locations->InAt(1);
4139
4140 Register out = locations->Out().AsRegister<Register>();
4141 Register numerator = locations->InAt(0).AsRegister<Register>();
4142
4143 int32_t imm = Int64FromConstant(second.GetConstant());
4144 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4145 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4146
4147 Register tmp = locations->GetTemp(0).AsRegister<Register>();
4148 NearLabel done;
4149 __ movl(out, numerator);
4150 __ andl(out, Immediate(abs_imm-1));
4151 __ j(Condition::kZero, &done);
4152 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4153 __ testl(numerator, numerator);
4154 __ cmovl(Condition::kLess, out, tmp);
4155 __ Bind(&done);
4156 }
4157
DivByPowerOfTwo(HDiv * instruction)4158 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
4159 LocationSummary* locations = instruction->GetLocations();
4160
4161 Register out_register = locations->Out().AsRegister<Register>();
4162 Register input_register = locations->InAt(0).AsRegister<Register>();
4163 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4164 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4165 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4166
4167 Register num = locations->GetTemp(0).AsRegister<Register>();
4168
4169 __ leal(num, Address(input_register, abs_imm - 1));
4170 __ testl(input_register, input_register);
4171 __ cmovl(kGreaterEqual, num, input_register);
4172 int shift = CTZ(imm);
4173 __ sarl(num, Immediate(shift));
4174
4175 if (imm < 0) {
4176 __ negl(num);
4177 }
4178
4179 __ movl(out_register, num);
4180 }
4181
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4182 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4183 DCHECK(instruction->IsDiv() || instruction->IsRem());
4184
4185 LocationSummary* locations = instruction->GetLocations();
4186 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4187
4188 Register eax = locations->InAt(0).AsRegister<Register>();
4189 Register out = locations->Out().AsRegister<Register>();
4190 Register num;
4191 Register edx;
4192
4193 if (instruction->IsDiv()) {
4194 edx = locations->GetTemp(0).AsRegister<Register>();
4195 num = locations->GetTemp(1).AsRegister<Register>();
4196 } else {
4197 edx = locations->Out().AsRegister<Register>();
4198 num = locations->GetTemp(0).AsRegister<Register>();
4199 }
4200
4201 DCHECK_EQ(EAX, eax);
4202 DCHECK_EQ(EDX, edx);
4203 if (instruction->IsDiv()) {
4204 DCHECK_EQ(EAX, out);
4205 } else {
4206 DCHECK_EQ(EDX, out);
4207 }
4208
4209 int64_t magic;
4210 int shift;
4211 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4212
4213 // Save the numerator.
4214 __ movl(num, eax);
4215
4216 // EAX = magic
4217 __ movl(eax, Immediate(magic));
4218
4219 // EDX:EAX = magic * numerator
4220 __ imull(num);
4221
4222 if (imm > 0 && magic < 0) {
4223 // EDX += num
4224 __ addl(edx, num);
4225 } else if (imm < 0 && magic > 0) {
4226 __ subl(edx, num);
4227 }
4228
4229 // Shift if needed.
4230 if (shift != 0) {
4231 __ sarl(edx, Immediate(shift));
4232 }
4233
4234 // EDX += 1 if EDX < 0
4235 __ movl(eax, edx);
4236 __ shrl(edx, Immediate(31));
4237 __ addl(edx, eax);
4238
4239 if (instruction->IsRem()) {
4240 __ movl(eax, num);
4241 __ imull(edx, Immediate(imm));
4242 __ subl(eax, edx);
4243 __ movl(edx, eax);
4244 } else {
4245 __ movl(eax, edx);
4246 }
4247 }
4248
GenerateDivRemIntegral(HBinaryOperation * instruction)4249 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4250 DCHECK(instruction->IsDiv() || instruction->IsRem());
4251
4252 LocationSummary* locations = instruction->GetLocations();
4253 Location out = locations->Out();
4254 Location first = locations->InAt(0);
4255 Location second = locations->InAt(1);
4256 bool is_div = instruction->IsDiv();
4257
4258 switch (instruction->GetResultType()) {
4259 case DataType::Type::kInt32: {
4260 DCHECK_EQ(EAX, first.AsRegister<Register>());
4261 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
4262
4263 if (second.IsConstant()) {
4264 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
4265
4266 if (imm == 0) {
4267 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
4268 } else if (imm == 1 || imm == -1) {
4269 DivRemOneOrMinusOne(instruction);
4270 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4271 if (is_div) {
4272 DivByPowerOfTwo(instruction->AsDiv());
4273 } else {
4274 RemByPowerOfTwo(instruction->AsRem());
4275 }
4276 } else {
4277 DCHECK(imm <= -2 || imm >= 2);
4278 GenerateDivRemWithAnyConstant(instruction);
4279 }
4280 } else {
4281 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
4282 instruction, out.AsRegister<Register>(), is_div);
4283 codegen_->AddSlowPath(slow_path);
4284
4285 Register second_reg = second.AsRegister<Register>();
4286 // 0x80000000/-1 triggers an arithmetic exception!
4287 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
4288 // it's safe to just use negl instead of more complex comparisons.
4289
4290 __ cmpl(second_reg, Immediate(-1));
4291 __ j(kEqual, slow_path->GetEntryLabel());
4292
4293 // edx:eax <- sign-extended of eax
4294 __ cdq();
4295 // eax = quotient, edx = remainder
4296 __ idivl(second_reg);
4297 __ Bind(slow_path->GetExitLabel());
4298 }
4299 break;
4300 }
4301
4302 case DataType::Type::kInt64: {
4303 InvokeRuntimeCallingConvention calling_convention;
4304 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
4305 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
4306 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
4307 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
4308 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
4309 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
4310
4311 if (is_div) {
4312 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
4313 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4314 } else {
4315 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
4316 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4317 }
4318 break;
4319 }
4320
4321 default:
4322 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
4323 }
4324 }
4325
VisitDiv(HDiv * div)4326 void LocationsBuilderX86::VisitDiv(HDiv* div) {
4327 LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
4328 ? LocationSummary::kCallOnMainOnly
4329 : LocationSummary::kNoCall;
4330 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4331
4332 switch (div->GetResultType()) {
4333 case DataType::Type::kInt32: {
4334 locations->SetInAt(0, Location::RegisterLocation(EAX));
4335 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4336 locations->SetOut(Location::SameAsFirstInput());
4337 // Intel uses edx:eax as the dividend.
4338 locations->AddTemp(Location::RegisterLocation(EDX));
4339 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4340 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
4341 // output and request another temp.
4342 if (div->InputAt(1)->IsIntConstant()) {
4343 locations->AddTemp(Location::RequiresRegister());
4344 }
4345 break;
4346 }
4347 case DataType::Type::kInt64: {
4348 InvokeRuntimeCallingConvention calling_convention;
4349 locations->SetInAt(0, Location::RegisterPairLocation(
4350 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4351 locations->SetInAt(1, Location::RegisterPairLocation(
4352 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4353 // Runtime helper puts the result in EAX, EDX.
4354 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4355 break;
4356 }
4357 case DataType::Type::kFloat32:
4358 case DataType::Type::kFloat64: {
4359 locations->SetInAt(0, Location::RequiresFpuRegister());
4360 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4361 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
4362 } else if (div->InputAt(1)->IsConstant()) {
4363 locations->SetInAt(1, Location::RequiresFpuRegister());
4364 } else {
4365 locations->SetInAt(1, Location::Any());
4366 }
4367 locations->SetOut(Location::SameAsFirstInput());
4368 break;
4369 }
4370
4371 default:
4372 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4373 }
4374 }
4375
VisitDiv(HDiv * div)4376 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
4377 LocationSummary* locations = div->GetLocations();
4378 Location first = locations->InAt(0);
4379 Location second = locations->InAt(1);
4380
4381 switch (div->GetResultType()) {
4382 case DataType::Type::kInt32:
4383 case DataType::Type::kInt64: {
4384 GenerateDivRemIntegral(div);
4385 break;
4386 }
4387
4388 case DataType::Type::kFloat32: {
4389 if (second.IsFpuRegister()) {
4390 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4391 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4392 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4393 DCHECK(const_area->IsEmittedAtUseSite());
4394 __ divss(first.AsFpuRegister<XmmRegister>(),
4395 codegen_->LiteralFloatAddress(
4396 const_area->GetConstant()->AsFloatConstant()->GetValue(),
4397 const_area->GetBaseMethodAddress(),
4398 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4399 } else {
4400 DCHECK(second.IsStackSlot());
4401 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4402 }
4403 break;
4404 }
4405
4406 case DataType::Type::kFloat64: {
4407 if (second.IsFpuRegister()) {
4408 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4409 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4410 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4411 DCHECK(const_area->IsEmittedAtUseSite());
4412 __ divsd(first.AsFpuRegister<XmmRegister>(),
4413 codegen_->LiteralDoubleAddress(
4414 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4415 const_area->GetBaseMethodAddress(),
4416 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4417 } else {
4418 DCHECK(second.IsDoubleStackSlot());
4419 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4420 }
4421 break;
4422 }
4423
4424 default:
4425 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4426 }
4427 }
4428
VisitRem(HRem * rem)4429 void LocationsBuilderX86::VisitRem(HRem* rem) {
4430 DataType::Type type = rem->GetResultType();
4431
4432 LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
4433 ? LocationSummary::kCallOnMainOnly
4434 : LocationSummary::kNoCall;
4435 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4436
4437 switch (type) {
4438 case DataType::Type::kInt32: {
4439 locations->SetInAt(0, Location::RegisterLocation(EAX));
4440 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4441 locations->SetOut(Location::RegisterLocation(EDX));
4442 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4443 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
4444 // output and request another temp.
4445 if (rem->InputAt(1)->IsIntConstant()) {
4446 locations->AddTemp(Location::RequiresRegister());
4447 }
4448 break;
4449 }
4450 case DataType::Type::kInt64: {
4451 InvokeRuntimeCallingConvention calling_convention;
4452 locations->SetInAt(0, Location::RegisterPairLocation(
4453 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4454 locations->SetInAt(1, Location::RegisterPairLocation(
4455 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4456 // Runtime helper puts the result in EAX, EDX.
4457 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4458 break;
4459 }
4460 case DataType::Type::kFloat64:
4461 case DataType::Type::kFloat32: {
4462 locations->SetInAt(0, Location::Any());
4463 locations->SetInAt(1, Location::Any());
4464 locations->SetOut(Location::RequiresFpuRegister());
4465 locations->AddTemp(Location::RegisterLocation(EAX));
4466 break;
4467 }
4468
4469 default:
4470 LOG(FATAL) << "Unexpected rem type " << type;
4471 }
4472 }
4473
VisitRem(HRem * rem)4474 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
4475 DataType::Type type = rem->GetResultType();
4476 switch (type) {
4477 case DataType::Type::kInt32:
4478 case DataType::Type::kInt64: {
4479 GenerateDivRemIntegral(rem);
4480 break;
4481 }
4482 case DataType::Type::kFloat32:
4483 case DataType::Type::kFloat64: {
4484 GenerateRemFP(rem);
4485 break;
4486 }
4487 default:
4488 LOG(FATAL) << "Unexpected rem type " << type;
4489 }
4490 }
4491
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4492 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4493 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4494 switch (minmax->GetResultType()) {
4495 case DataType::Type::kInt32:
4496 locations->SetInAt(0, Location::RequiresRegister());
4497 locations->SetInAt(1, Location::RequiresRegister());
4498 locations->SetOut(Location::SameAsFirstInput());
4499 break;
4500 case DataType::Type::kInt64:
4501 locations->SetInAt(0, Location::RequiresRegister());
4502 locations->SetInAt(1, Location::RequiresRegister());
4503 locations->SetOut(Location::SameAsFirstInput());
4504 // Register to use to perform a long subtract to set cc.
4505 locations->AddTemp(Location::RequiresRegister());
4506 break;
4507 case DataType::Type::kFloat32:
4508 locations->SetInAt(0, Location::RequiresFpuRegister());
4509 locations->SetInAt(1, Location::RequiresFpuRegister());
4510 locations->SetOut(Location::SameAsFirstInput());
4511 locations->AddTemp(Location::RequiresRegister());
4512 break;
4513 case DataType::Type::kFloat64:
4514 locations->SetInAt(0, Location::RequiresFpuRegister());
4515 locations->SetInAt(1, Location::RequiresFpuRegister());
4516 locations->SetOut(Location::SameAsFirstInput());
4517 break;
4518 default:
4519 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4520 }
4521 }
4522
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4523 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4524 bool is_min,
4525 DataType::Type type) {
4526 Location op1_loc = locations->InAt(0);
4527 Location op2_loc = locations->InAt(1);
4528
4529 // Shortcut for same input locations.
4530 if (op1_loc.Equals(op2_loc)) {
4531 // Can return immediately, as op1_loc == out_loc.
4532 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4533 // a copy here.
4534 DCHECK(locations->Out().Equals(op1_loc));
4535 return;
4536 }
4537
4538 if (type == DataType::Type::kInt64) {
4539 // Need to perform a subtract to get the sign right.
4540 // op1 is already in the same location as the output.
4541 Location output = locations->Out();
4542 Register output_lo = output.AsRegisterPairLow<Register>();
4543 Register output_hi = output.AsRegisterPairHigh<Register>();
4544
4545 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4546 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4547
4548 // The comparison is performed by subtracting the second operand from
4549 // the first operand and then setting the status flags in the same
4550 // manner as the SUB instruction."
4551 __ cmpl(output_lo, op2_lo);
4552
4553 // Now use a temp and the borrow to finish the subtraction of op2_hi.
4554 Register temp = locations->GetTemp(0).AsRegister<Register>();
4555 __ movl(temp, output_hi);
4556 __ sbbl(temp, op2_hi);
4557
4558 // Now the condition code is correct.
4559 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4560 __ cmovl(cond, output_lo, op2_lo);
4561 __ cmovl(cond, output_hi, op2_hi);
4562 } else {
4563 DCHECK_EQ(type, DataType::Type::kInt32);
4564 Register out = locations->Out().AsRegister<Register>();
4565 Register op2 = op2_loc.AsRegister<Register>();
4566
4567 // (out := op1)
4568 // out <=? op2
4569 // if out is min jmp done
4570 // out := op2
4571 // done:
4572
4573 __ cmpl(out, op2);
4574 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4575 __ cmovl(cond, out, op2);
4576 }
4577 }
4578
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4579 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4580 bool is_min,
4581 DataType::Type type) {
4582 Location op1_loc = locations->InAt(0);
4583 Location op2_loc = locations->InAt(1);
4584 Location out_loc = locations->Out();
4585 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4586
4587 // Shortcut for same input locations.
4588 if (op1_loc.Equals(op2_loc)) {
4589 DCHECK(out_loc.Equals(op1_loc));
4590 return;
4591 }
4592
4593 // (out := op1)
4594 // out <=? op2
4595 // if Nan jmp Nan_label
4596 // if out is min jmp done
4597 // if op2 is min jmp op2_label
4598 // handle -0/+0
4599 // jmp done
4600 // Nan_label:
4601 // out := NaN
4602 // op2_label:
4603 // out := op2
4604 // done:
4605 //
4606 // This removes one jmp, but needs to copy one input (op1) to out.
4607 //
4608 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4609
4610 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4611
4612 NearLabel nan, done, op2_label;
4613 if (type == DataType::Type::kFloat64) {
4614 __ ucomisd(out, op2);
4615 } else {
4616 DCHECK_EQ(type, DataType::Type::kFloat32);
4617 __ ucomiss(out, op2);
4618 }
4619
4620 __ j(Condition::kParityEven, &nan);
4621
4622 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4623 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4624
4625 // Handle 0.0/-0.0.
4626 if (is_min) {
4627 if (type == DataType::Type::kFloat64) {
4628 __ orpd(out, op2);
4629 } else {
4630 __ orps(out, op2);
4631 }
4632 } else {
4633 if (type == DataType::Type::kFloat64) {
4634 __ andpd(out, op2);
4635 } else {
4636 __ andps(out, op2);
4637 }
4638 }
4639 __ jmp(&done);
4640
4641 // NaN handling.
4642 __ Bind(&nan);
4643 if (type == DataType::Type::kFloat64) {
4644 // TODO: Use a constant from the constant table (requires extra input).
4645 __ LoadLongConstant(out, kDoubleNaN);
4646 } else {
4647 Register constant = locations->GetTemp(0).AsRegister<Register>();
4648 __ movl(constant, Immediate(kFloatNaN));
4649 __ movd(out, constant);
4650 }
4651 __ jmp(&done);
4652
4653 // out := op2;
4654 __ Bind(&op2_label);
4655 if (type == DataType::Type::kFloat64) {
4656 __ movsd(out, op2);
4657 } else {
4658 __ movss(out, op2);
4659 }
4660
4661 // Done.
4662 __ Bind(&done);
4663 }
4664
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4665 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4666 DataType::Type type = minmax->GetResultType();
4667 switch (type) {
4668 case DataType::Type::kInt32:
4669 case DataType::Type::kInt64:
4670 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4671 break;
4672 case DataType::Type::kFloat32:
4673 case DataType::Type::kFloat64:
4674 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4675 break;
4676 default:
4677 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4678 }
4679 }
4680
VisitMin(HMin * min)4681 void LocationsBuilderX86::VisitMin(HMin* min) {
4682 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4683 }
4684
VisitMin(HMin * min)4685 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4686 GenerateMinMax(min, /*is_min*/ true);
4687 }
4688
VisitMax(HMax * max)4689 void LocationsBuilderX86::VisitMax(HMax* max) {
4690 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4691 }
4692
VisitMax(HMax * max)4693 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4694 GenerateMinMax(max, /*is_min*/ false);
4695 }
4696
VisitAbs(HAbs * abs)4697 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4698 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4699 switch (abs->GetResultType()) {
4700 case DataType::Type::kInt32:
4701 locations->SetInAt(0, Location::RegisterLocation(EAX));
4702 locations->SetOut(Location::SameAsFirstInput());
4703 locations->AddTemp(Location::RegisterLocation(EDX));
4704 break;
4705 case DataType::Type::kInt64:
4706 locations->SetInAt(0, Location::RequiresRegister());
4707 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4708 locations->AddTemp(Location::RequiresRegister());
4709 break;
4710 case DataType::Type::kFloat32:
4711 locations->SetInAt(0, Location::RequiresFpuRegister());
4712 locations->SetOut(Location::SameAsFirstInput());
4713 locations->AddTemp(Location::RequiresFpuRegister());
4714 locations->AddTemp(Location::RequiresRegister());
4715 break;
4716 case DataType::Type::kFloat64:
4717 locations->SetInAt(0, Location::RequiresFpuRegister());
4718 locations->SetOut(Location::SameAsFirstInput());
4719 locations->AddTemp(Location::RequiresFpuRegister());
4720 break;
4721 default:
4722 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4723 }
4724 }
4725
VisitAbs(HAbs * abs)4726 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4727 LocationSummary* locations = abs->GetLocations();
4728 switch (abs->GetResultType()) {
4729 case DataType::Type::kInt32: {
4730 Register out = locations->Out().AsRegister<Register>();
4731 DCHECK_EQ(out, EAX);
4732 Register temp = locations->GetTemp(0).AsRegister<Register>();
4733 DCHECK_EQ(temp, EDX);
4734 // Sign extend EAX into EDX.
4735 __ cdq();
4736 // XOR EAX with sign.
4737 __ xorl(EAX, EDX);
4738 // Subtract out sign to correct.
4739 __ subl(EAX, EDX);
4740 // The result is in EAX.
4741 break;
4742 }
4743 case DataType::Type::kInt64: {
4744 Location input = locations->InAt(0);
4745 Register input_lo = input.AsRegisterPairLow<Register>();
4746 Register input_hi = input.AsRegisterPairHigh<Register>();
4747 Location output = locations->Out();
4748 Register output_lo = output.AsRegisterPairLow<Register>();
4749 Register output_hi = output.AsRegisterPairHigh<Register>();
4750 Register temp = locations->GetTemp(0).AsRegister<Register>();
4751 // Compute the sign into the temporary.
4752 __ movl(temp, input_hi);
4753 __ sarl(temp, Immediate(31));
4754 // Store the sign into the output.
4755 __ movl(output_lo, temp);
4756 __ movl(output_hi, temp);
4757 // XOR the input to the output.
4758 __ xorl(output_lo, input_lo);
4759 __ xorl(output_hi, input_hi);
4760 // Subtract the sign.
4761 __ subl(output_lo, temp);
4762 __ sbbl(output_hi, temp);
4763 break;
4764 }
4765 case DataType::Type::kFloat32: {
4766 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4767 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4768 Register constant = locations->GetTemp(1).AsRegister<Register>();
4769 __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4770 __ movd(temp, constant);
4771 __ andps(out, temp);
4772 break;
4773 }
4774 case DataType::Type::kFloat64: {
4775 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4776 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4777 // TODO: Use a constant from the constant table (requires extra input).
4778 __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4779 __ andpd(out, temp);
4780 break;
4781 }
4782 default:
4783 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4784 }
4785 }
4786
VisitDivZeroCheck(HDivZeroCheck * instruction)4787 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4788 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4789 switch (instruction->GetType()) {
4790 case DataType::Type::kBool:
4791 case DataType::Type::kUint8:
4792 case DataType::Type::kInt8:
4793 case DataType::Type::kUint16:
4794 case DataType::Type::kInt16:
4795 case DataType::Type::kInt32: {
4796 locations->SetInAt(0, Location::Any());
4797 break;
4798 }
4799 case DataType::Type::kInt64: {
4800 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4801 if (!instruction->IsConstant()) {
4802 locations->AddTemp(Location::RequiresRegister());
4803 }
4804 break;
4805 }
4806 default:
4807 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4808 }
4809 }
4810
VisitDivZeroCheck(HDivZeroCheck * instruction)4811 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4812 SlowPathCode* slow_path =
4813 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4814 codegen_->AddSlowPath(slow_path);
4815
4816 LocationSummary* locations = instruction->GetLocations();
4817 Location value = locations->InAt(0);
4818
4819 switch (instruction->GetType()) {
4820 case DataType::Type::kBool:
4821 case DataType::Type::kUint8:
4822 case DataType::Type::kInt8:
4823 case DataType::Type::kUint16:
4824 case DataType::Type::kInt16:
4825 case DataType::Type::kInt32: {
4826 if (value.IsRegister()) {
4827 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4828 __ j(kEqual, slow_path->GetEntryLabel());
4829 } else if (value.IsStackSlot()) {
4830 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4831 __ j(kEqual, slow_path->GetEntryLabel());
4832 } else {
4833 DCHECK(value.IsConstant()) << value;
4834 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4835 __ jmp(slow_path->GetEntryLabel());
4836 }
4837 }
4838 break;
4839 }
4840 case DataType::Type::kInt64: {
4841 if (value.IsRegisterPair()) {
4842 Register temp = locations->GetTemp(0).AsRegister<Register>();
4843 __ movl(temp, value.AsRegisterPairLow<Register>());
4844 __ orl(temp, value.AsRegisterPairHigh<Register>());
4845 __ j(kEqual, slow_path->GetEntryLabel());
4846 } else {
4847 DCHECK(value.IsConstant()) << value;
4848 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4849 __ jmp(slow_path->GetEntryLabel());
4850 }
4851 }
4852 break;
4853 }
4854 default:
4855 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4856 }
4857 }
4858
HandleShift(HBinaryOperation * op)4859 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4860 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4861
4862 LocationSummary* locations =
4863 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4864
4865 switch (op->GetResultType()) {
4866 case DataType::Type::kInt32:
4867 case DataType::Type::kInt64: {
4868 // Can't have Location::Any() and output SameAsFirstInput()
4869 locations->SetInAt(0, Location::RequiresRegister());
4870 // The shift count needs to be in CL or a constant.
4871 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4872 locations->SetOut(Location::SameAsFirstInput());
4873 break;
4874 }
4875 default:
4876 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4877 }
4878 }
4879
HandleShift(HBinaryOperation * op)4880 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4881 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4882
4883 LocationSummary* locations = op->GetLocations();
4884 Location first = locations->InAt(0);
4885 Location second = locations->InAt(1);
4886 DCHECK(first.Equals(locations->Out()));
4887
4888 switch (op->GetResultType()) {
4889 case DataType::Type::kInt32: {
4890 DCHECK(first.IsRegister());
4891 Register first_reg = first.AsRegister<Register>();
4892 if (second.IsRegister()) {
4893 Register second_reg = second.AsRegister<Register>();
4894 DCHECK_EQ(ECX, second_reg);
4895 if (op->IsShl()) {
4896 __ shll(first_reg, second_reg);
4897 } else if (op->IsShr()) {
4898 __ sarl(first_reg, second_reg);
4899 } else {
4900 __ shrl(first_reg, second_reg);
4901 }
4902 } else {
4903 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4904 if (shift == 0) {
4905 return;
4906 }
4907 Immediate imm(shift);
4908 if (op->IsShl()) {
4909 __ shll(first_reg, imm);
4910 } else if (op->IsShr()) {
4911 __ sarl(first_reg, imm);
4912 } else {
4913 __ shrl(first_reg, imm);
4914 }
4915 }
4916 break;
4917 }
4918 case DataType::Type::kInt64: {
4919 if (second.IsRegister()) {
4920 Register second_reg = second.AsRegister<Register>();
4921 DCHECK_EQ(ECX, second_reg);
4922 if (op->IsShl()) {
4923 GenerateShlLong(first, second_reg);
4924 } else if (op->IsShr()) {
4925 GenerateShrLong(first, second_reg);
4926 } else {
4927 GenerateUShrLong(first, second_reg);
4928 }
4929 } else {
4930 // Shift by a constant.
4931 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4932 // Nothing to do if the shift is 0, as the input is already the output.
4933 if (shift != 0) {
4934 if (op->IsShl()) {
4935 GenerateShlLong(first, shift);
4936 } else if (op->IsShr()) {
4937 GenerateShrLong(first, shift);
4938 } else {
4939 GenerateUShrLong(first, shift);
4940 }
4941 }
4942 }
4943 break;
4944 }
4945 default:
4946 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4947 }
4948 }
4949
GenerateShlLong(const Location & loc,int shift)4950 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4951 Register low = loc.AsRegisterPairLow<Register>();
4952 Register high = loc.AsRegisterPairHigh<Register>();
4953 if (shift == 1) {
4954 // This is just an addition.
4955 __ addl(low, low);
4956 __ adcl(high, high);
4957 } else if (shift == 32) {
4958 // Shift by 32 is easy. High gets low, and low gets 0.
4959 codegen_->EmitParallelMoves(
4960 loc.ToLow(),
4961 loc.ToHigh(),
4962 DataType::Type::kInt32,
4963 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4964 loc.ToLow(),
4965 DataType::Type::kInt32);
4966 } else if (shift > 32) {
4967 // Low part becomes 0. High part is low part << (shift-32).
4968 __ movl(high, low);
4969 __ shll(high, Immediate(shift - 32));
4970 __ xorl(low, low);
4971 } else {
4972 // Between 1 and 31.
4973 __ shld(high, low, Immediate(shift));
4974 __ shll(low, Immediate(shift));
4975 }
4976 }
4977
GenerateShlLong(const Location & loc,Register shifter)4978 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4979 NearLabel done;
4980 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4981 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4982 __ testl(shifter, Immediate(32));
4983 __ j(kEqual, &done);
4984 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4985 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4986 __ Bind(&done);
4987 }
4988
GenerateShrLong(const Location & loc,int shift)4989 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4990 Register low = loc.AsRegisterPairLow<Register>();
4991 Register high = loc.AsRegisterPairHigh<Register>();
4992 if (shift == 32) {
4993 // Need to copy the sign.
4994 DCHECK_NE(low, high);
4995 __ movl(low, high);
4996 __ sarl(high, Immediate(31));
4997 } else if (shift > 32) {
4998 DCHECK_NE(low, high);
4999 // High part becomes sign. Low part is shifted by shift - 32.
5000 __ movl(low, high);
5001 __ sarl(high, Immediate(31));
5002 __ sarl(low, Immediate(shift - 32));
5003 } else {
5004 // Between 1 and 31.
5005 __ shrd(low, high, Immediate(shift));
5006 __ sarl(high, Immediate(shift));
5007 }
5008 }
5009
GenerateShrLong(const Location & loc,Register shifter)5010 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
5011 NearLabel done;
5012 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
5013 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
5014 __ testl(shifter, Immediate(32));
5015 __ j(kEqual, &done);
5016 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
5017 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
5018 __ Bind(&done);
5019 }
5020
GenerateUShrLong(const Location & loc,int shift)5021 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
5022 Register low = loc.AsRegisterPairLow<Register>();
5023 Register high = loc.AsRegisterPairHigh<Register>();
5024 if (shift == 32) {
5025 // Shift by 32 is easy. Low gets high, and high gets 0.
5026 codegen_->EmitParallelMoves(
5027 loc.ToHigh(),
5028 loc.ToLow(),
5029 DataType::Type::kInt32,
5030 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
5031 loc.ToHigh(),
5032 DataType::Type::kInt32);
5033 } else if (shift > 32) {
5034 // Low part is high >> (shift - 32). High part becomes 0.
5035 __ movl(low, high);
5036 __ shrl(low, Immediate(shift - 32));
5037 __ xorl(high, high);
5038 } else {
5039 // Between 1 and 31.
5040 __ shrd(low, high, Immediate(shift));
5041 __ shrl(high, Immediate(shift));
5042 }
5043 }
5044
GenerateUShrLong(const Location & loc,Register shifter)5045 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
5046 NearLabel done;
5047 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
5048 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
5049 __ testl(shifter, Immediate(32));
5050 __ j(kEqual, &done);
5051 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
5052 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
5053 __ Bind(&done);
5054 }
5055
VisitRol(HRol * rol)5056 void LocationsBuilderX86::VisitRol(HRol* rol) {
5057 HandleRotate(rol);
5058 }
5059
VisitRor(HRor * ror)5060 void LocationsBuilderX86::VisitRor(HRor* ror) {
5061 HandleRotate(ror);
5062 }
5063
HandleRotate(HBinaryOperation * rotate)5064 void LocationsBuilderX86::HandleRotate(HBinaryOperation* rotate) {
5065 LocationSummary* locations =
5066 new (GetGraph()->GetAllocator()) LocationSummary(rotate, LocationSummary::kNoCall);
5067
5068 switch (rotate->GetResultType()) {
5069 case DataType::Type::kInt64:
5070 // Add the temporary needed.
5071 locations->AddTemp(Location::RequiresRegister());
5072 FALLTHROUGH_INTENDED;
5073 case DataType::Type::kInt32:
5074 locations->SetInAt(0, Location::RequiresRegister());
5075 // The shift count needs to be in CL (unless it is a constant).
5076 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, rotate->InputAt(1)));
5077 locations->SetOut(Location::SameAsFirstInput());
5078 break;
5079 default:
5080 LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5081 UNREACHABLE();
5082 }
5083 }
5084
VisitRol(HRol * rol)5085 void InstructionCodeGeneratorX86::VisitRol(HRol* rol) {
5086 HandleRotate(rol);
5087 }
5088
VisitRor(HRor * ror)5089 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
5090 HandleRotate(ror);
5091 }
5092
HandleRotate(HBinaryOperation * rotate)5093 void InstructionCodeGeneratorX86::HandleRotate(HBinaryOperation* rotate) {
5094 LocationSummary* locations = rotate->GetLocations();
5095 Location first = locations->InAt(0);
5096 Location second = locations->InAt(1);
5097
5098 if (rotate->GetResultType() == DataType::Type::kInt32) {
5099 Register first_reg = first.AsRegister<Register>();
5100 if (second.IsRegister()) {
5101 Register second_reg = second.AsRegister<Register>();
5102 if (rotate->IsRol()) {
5103 __ roll(first_reg, second_reg);
5104 } else {
5105 DCHECK(rotate->IsRor());
5106 __ rorl(first_reg, second_reg);
5107 }
5108 } else {
5109 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5110 if (rotate->IsRol()) {
5111 __ roll(first_reg, imm);
5112 } else {
5113 DCHECK(rotate->IsRor());
5114 __ rorl(first_reg, imm);
5115 }
5116 }
5117 return;
5118 }
5119
5120 DCHECK_EQ(rotate->GetResultType(), DataType::Type::kInt64);
5121 Register first_reg_lo = first.AsRegisterPairLow<Register>();
5122 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
5123 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
5124 if (second.IsRegister()) {
5125 Register second_reg = second.AsRegister<Register>();
5126 DCHECK_EQ(second_reg, ECX);
5127
5128 __ movl(temp_reg, first_reg_hi);
5129 if (rotate->IsRol()) {
5130 __ shld(first_reg_hi, first_reg_lo, second_reg);
5131 __ shld(first_reg_lo, temp_reg, second_reg);
5132 } else {
5133 __ shrd(first_reg_hi, first_reg_lo, second_reg);
5134 __ shrd(first_reg_lo, temp_reg, second_reg);
5135 }
5136 __ movl(temp_reg, first_reg_hi);
5137 __ testl(second_reg, Immediate(32));
5138 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
5139 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
5140 } else {
5141 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
5142 if (rotate->IsRol()) {
5143 value = -value;
5144 }
5145 int32_t shift_amt = value & kMaxLongShiftDistance;
5146
5147 if (shift_amt == 0) {
5148 // Already fine.
5149 return;
5150 }
5151 if (shift_amt == 32) {
5152 // Just swap.
5153 __ movl(temp_reg, first_reg_lo);
5154 __ movl(first_reg_lo, first_reg_hi);
5155 __ movl(first_reg_hi, temp_reg);
5156 return;
5157 }
5158
5159 Immediate imm(shift_amt);
5160 // Save the constents of the low value.
5161 __ movl(temp_reg, first_reg_lo);
5162
5163 // Shift right into low, feeding bits from high.
5164 __ shrd(first_reg_lo, first_reg_hi, imm);
5165
5166 // Shift right into high, feeding bits from the original low.
5167 __ shrd(first_reg_hi, temp_reg, imm);
5168
5169 // Swap if needed.
5170 if (shift_amt > 32) {
5171 __ movl(temp_reg, first_reg_lo);
5172 __ movl(first_reg_lo, first_reg_hi);
5173 __ movl(first_reg_hi, temp_reg);
5174 }
5175 }
5176 }
5177
VisitShl(HShl * shl)5178 void LocationsBuilderX86::VisitShl(HShl* shl) {
5179 HandleShift(shl);
5180 }
5181
VisitShl(HShl * shl)5182 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
5183 HandleShift(shl);
5184 }
5185
VisitShr(HShr * shr)5186 void LocationsBuilderX86::VisitShr(HShr* shr) {
5187 HandleShift(shr);
5188 }
5189
VisitShr(HShr * shr)5190 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
5191 HandleShift(shr);
5192 }
5193
VisitUShr(HUShr * ushr)5194 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
5195 HandleShift(ushr);
5196 }
5197
VisitUShr(HUShr * ushr)5198 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
5199 HandleShift(ushr);
5200 }
5201
VisitNewInstance(HNewInstance * instruction)5202 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
5203 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5204 instruction, LocationSummary::kCallOnMainOnly);
5205 locations->SetOut(Location::RegisterLocation(EAX));
5206 InvokeRuntimeCallingConvention calling_convention;
5207 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5208 }
5209
VisitNewInstance(HNewInstance * instruction)5210 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
5211 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5212 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5213 DCHECK(!codegen_->IsLeafMethod());
5214 }
5215
VisitNewArray(HNewArray * instruction)5216 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
5217 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5218 instruction, LocationSummary::kCallOnMainOnly);
5219 locations->SetOut(Location::RegisterLocation(EAX));
5220 InvokeRuntimeCallingConvention calling_convention;
5221 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5222 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5223 }
5224
VisitNewArray(HNewArray * instruction)5225 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
5226 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5227 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5228 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5229 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5230 DCHECK(!codegen_->IsLeafMethod());
5231 }
5232
VisitParameterValue(HParameterValue * instruction)5233 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
5234 LocationSummary* locations =
5235 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5236 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5237 if (location.IsStackSlot()) {
5238 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5239 } else if (location.IsDoubleStackSlot()) {
5240 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5241 }
5242 locations->SetOut(location);
5243 }
5244
VisitParameterValue(HParameterValue * instruction)5245 void InstructionCodeGeneratorX86::VisitParameterValue(
5246 [[maybe_unused]] HParameterValue* instruction) {}
5247
VisitCurrentMethod(HCurrentMethod * instruction)5248 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
5249 LocationSummary* locations =
5250 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5251 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5252 }
5253
VisitCurrentMethod(HCurrentMethod * instruction)5254 void InstructionCodeGeneratorX86::VisitCurrentMethod([[maybe_unused]] HCurrentMethod* instruction) {
5255 }
5256
VisitClassTableGet(HClassTableGet * instruction)5257 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
5258 LocationSummary* locations =
5259 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5260 locations->SetInAt(0, Location::RequiresRegister());
5261 locations->SetOut(Location::RequiresRegister());
5262 }
5263
VisitClassTableGet(HClassTableGet * instruction)5264 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
5265 LocationSummary* locations = instruction->GetLocations();
5266 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5267 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5268 instruction->GetIndex(), kX86PointerSize).SizeValue();
5269 __ movl(locations->Out().AsRegister<Register>(),
5270 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
5271 } else {
5272 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5273 instruction->GetIndex(), kX86PointerSize));
5274 __ movl(locations->Out().AsRegister<Register>(),
5275 Address(locations->InAt(0).AsRegister<Register>(),
5276 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
5277 // temp = temp->GetImtEntryAt(method_offset);
5278 __ movl(locations->Out().AsRegister<Register>(),
5279 Address(locations->Out().AsRegister<Register>(), method_offset));
5280 }
5281 }
5282
VisitNot(HNot * not_)5283 void LocationsBuilderX86::VisitNot(HNot* not_) {
5284 LocationSummary* locations =
5285 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5286 locations->SetInAt(0, Location::RequiresRegister());
5287 locations->SetOut(Location::SameAsFirstInput());
5288 }
5289
VisitNot(HNot * not_)5290 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
5291 LocationSummary* locations = not_->GetLocations();
5292 Location in = locations->InAt(0);
5293 Location out = locations->Out();
5294 DCHECK(in.Equals(out));
5295 switch (not_->GetResultType()) {
5296 case DataType::Type::kInt32:
5297 __ notl(out.AsRegister<Register>());
5298 break;
5299
5300 case DataType::Type::kInt64:
5301 __ notl(out.AsRegisterPairLow<Register>());
5302 __ notl(out.AsRegisterPairHigh<Register>());
5303 break;
5304
5305 default:
5306 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5307 }
5308 }
5309
VisitBooleanNot(HBooleanNot * bool_not)5310 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
5311 LocationSummary* locations =
5312 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5313 locations->SetInAt(0, Location::RequiresRegister());
5314 locations->SetOut(Location::SameAsFirstInput());
5315 }
5316
VisitBooleanNot(HBooleanNot * bool_not)5317 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
5318 LocationSummary* locations = bool_not->GetLocations();
5319 Location in = locations->InAt(0);
5320 Location out = locations->Out();
5321 DCHECK(in.Equals(out));
5322 __ xorl(out.AsRegister<Register>(), Immediate(1));
5323 }
5324
VisitCompare(HCompare * compare)5325 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
5326 LocationSummary* locations =
5327 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5328 switch (compare->GetComparisonType()) {
5329 case DataType::Type::kBool:
5330 case DataType::Type::kUint8:
5331 case DataType::Type::kInt8:
5332 case DataType::Type::kUint16:
5333 case DataType::Type::kInt16:
5334 case DataType::Type::kInt32:
5335 case DataType::Type::kUint32:
5336 case DataType::Type::kInt64:
5337 case DataType::Type::kUint64: {
5338 locations->SetInAt(0, Location::RequiresRegister());
5339 locations->SetInAt(1, Location::Any());
5340 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5341 break;
5342 }
5343 case DataType::Type::kFloat32:
5344 case DataType::Type::kFloat64: {
5345 locations->SetInAt(0, Location::RequiresFpuRegister());
5346 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
5347 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
5348 } else if (compare->InputAt(1)->IsConstant()) {
5349 locations->SetInAt(1, Location::RequiresFpuRegister());
5350 } else {
5351 locations->SetInAt(1, Location::Any());
5352 }
5353 locations->SetOut(Location::RequiresRegister());
5354 break;
5355 }
5356 default:
5357 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5358 }
5359 }
5360
VisitCompare(HCompare * compare)5361 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
5362 LocationSummary* locations = compare->GetLocations();
5363 Register out = locations->Out().AsRegister<Register>();
5364 Location left = locations->InAt(0);
5365 Location right = locations->InAt(1);
5366
5367 NearLabel less, greater, done;
5368 Condition less_cond = kLess;
5369 Condition greater_cond = kGreater;
5370
5371 switch (compare->GetComparisonType()) {
5372 case DataType::Type::kUint32:
5373 less_cond = kBelow;
5374 // greater_cond - is not needed below
5375 FALLTHROUGH_INTENDED;
5376 case DataType::Type::kBool:
5377 case DataType::Type::kUint8:
5378 case DataType::Type::kInt8:
5379 case DataType::Type::kUint16:
5380 case DataType::Type::kInt16:
5381 case DataType::Type::kInt32: {
5382 codegen_->GenerateIntCompare(left, right);
5383 break;
5384 }
5385 case DataType::Type::kUint64:
5386 less_cond = kBelow;
5387 greater_cond = kAbove;
5388 FALLTHROUGH_INTENDED;
5389 case DataType::Type::kInt64: {
5390 Register left_low = left.AsRegisterPairLow<Register>();
5391 Register left_high = left.AsRegisterPairHigh<Register>();
5392 int32_t val_low = 0;
5393 int32_t val_high = 0;
5394 bool right_is_const = false;
5395
5396 if (right.IsConstant()) {
5397 DCHECK(right.GetConstant()->IsLongConstant());
5398 right_is_const = true;
5399 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
5400 val_low = Low32Bits(val);
5401 val_high = High32Bits(val);
5402 }
5403
5404 if (right.IsRegisterPair()) {
5405 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
5406 } else if (right.IsDoubleStackSlot()) {
5407 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
5408 } else {
5409 DCHECK(right_is_const) << right;
5410 codegen_->Compare32BitValue(left_high, val_high);
5411 }
5412 __ j(less_cond, &less); // High part compare.
5413 __ j(greater_cond, &greater); // High part compare.
5414 if (right.IsRegisterPair()) {
5415 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
5416 } else if (right.IsDoubleStackSlot()) {
5417 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
5418 } else {
5419 DCHECK(right_is_const) << right;
5420 codegen_->Compare32BitValue(left_low, val_low);
5421 }
5422 less_cond = kBelow; // for CF (unsigned).
5423 // greater_cond - is not needed below
5424 break;
5425 }
5426 case DataType::Type::kFloat32: {
5427 GenerateFPCompare(left, right, compare, false);
5428 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5429 less_cond = kBelow; // for CF (floats).
5430 break;
5431 }
5432 case DataType::Type::kFloat64: {
5433 GenerateFPCompare(left, right, compare, true);
5434 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5435 less_cond = kBelow; // for CF (floats).
5436 break;
5437 }
5438 default:
5439 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5440 }
5441
5442 __ movl(out, Immediate(0));
5443 __ j(kEqual, &done);
5444 __ j(less_cond, &less);
5445
5446 __ Bind(&greater);
5447 __ movl(out, Immediate(1));
5448 __ jmp(&done);
5449
5450 __ Bind(&less);
5451 __ movl(out, Immediate(-1));
5452
5453 __ Bind(&done);
5454 }
5455
VisitPhi(HPhi * instruction)5456 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
5457 LocationSummary* locations =
5458 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5459 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5460 locations->SetInAt(i, Location::Any());
5461 }
5462 locations->SetOut(Location::Any());
5463 }
5464
VisitPhi(HPhi * instruction)5465 void InstructionCodeGeneratorX86::VisitPhi([[maybe_unused]] HPhi* instruction) {
5466 LOG(FATAL) << "Unreachable";
5467 }
5468
GenerateMemoryBarrier(MemBarrierKind kind)5469 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
5470 /*
5471 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
5472 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
5473 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5474 */
5475 switch (kind) {
5476 case MemBarrierKind::kAnyAny: {
5477 MemoryFence();
5478 break;
5479 }
5480 case MemBarrierKind::kAnyStore:
5481 case MemBarrierKind::kLoadAny:
5482 case MemBarrierKind::kStoreStore: {
5483 // nop
5484 break;
5485 }
5486 case MemBarrierKind::kNTStoreStore:
5487 // Non-Temporal Store/Store needs an explicit fence.
5488 MemoryFence(/* non-temporal= */ true);
5489 break;
5490 }
5491 }
5492
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)5493 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
5494 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
5495 [[maybe_unused]] ArtMethod* method) {
5496 return desired_dispatch_info;
5497 }
5498
GetInvokeExtraParameter(HInvoke * invoke,Register temp)5499 Register CodeGeneratorX86::GetInvokeExtraParameter(HInvoke* invoke, Register temp) {
5500 if (invoke->IsInvokeStaticOrDirect()) {
5501 return GetInvokeStaticOrDirectExtraParameter(invoke->AsInvokeStaticOrDirect(), temp);
5502 }
5503 DCHECK(invoke->IsInvokeInterface());
5504 Location location =
5505 invoke->GetLocations()->InAt(invoke->AsInvokeInterface()->GetSpecialInputIndex());
5506 return location.AsRegister<Register>();
5507 }
5508
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)5509 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
5510 Register temp) {
5511 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
5512 if (!invoke->GetLocations()->Intrinsified()) {
5513 return location.AsRegister<Register>();
5514 }
5515 // For intrinsics we allow any location, so it may be on the stack.
5516 if (!location.IsRegister()) {
5517 __ movl(temp, Address(ESP, location.GetStackIndex()));
5518 return temp;
5519 }
5520 // For register locations, check if the register was saved. If so, get it from the stack.
5521 // Note: There is a chance that the register was saved but not overwritten, so we could
5522 // save one load. However, since this is just an intrinsic slow path we prefer this
5523 // simple and more robust approach rather that trying to determine if that's the case.
5524 SlowPathCode* slow_path = GetCurrentSlowPath();
5525 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
5526 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
5527 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
5528 __ movl(temp, Address(ESP, stack_offset));
5529 return temp;
5530 }
5531 return location.AsRegister<Register>();
5532 }
5533
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)5534 void CodeGeneratorX86::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
5535 switch (load_kind) {
5536 case MethodLoadKind::kBootImageLinkTimePcRelative: {
5537 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5538 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5539 __ leal(temp.AsRegister<Register>(),
5540 Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5541 RecordBootImageMethodPatch(invoke);
5542 break;
5543 }
5544 case MethodLoadKind::kBootImageRelRo: {
5545 size_t index = invoke->IsInvokeInterface()
5546 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5547 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5548 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5549 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5550 RecordBootImageRelRoPatch(
5551 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress(),
5552 GetBootImageOffset(invoke));
5553 break;
5554 }
5555 case MethodLoadKind::kAppImageRelRo: {
5556 DCHECK(GetCompilerOptions().IsAppImage());
5557 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5558 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5559 RecordAppImageMethodPatch(invoke);
5560 break;
5561 }
5562 case MethodLoadKind::kBssEntry: {
5563 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5564 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5565 RecordMethodBssEntryPatch(invoke);
5566 // No need for memory fence, thanks to the x86 memory model.
5567 break;
5568 }
5569 case MethodLoadKind::kJitDirectAddress: {
5570 __ movl(temp.AsRegister<Register>(),
5571 Immediate(reinterpret_cast32<uint32_t>(invoke->GetResolvedMethod())));
5572 break;
5573 }
5574 case MethodLoadKind::kRuntimeCall: {
5575 // Test situation, don't do anything.
5576 break;
5577 }
5578 default: {
5579 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
5580 UNREACHABLE();
5581 }
5582 }
5583 }
5584
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5585 void CodeGeneratorX86::GenerateStaticOrDirectCall(
5586 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5587 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
5588 switch (invoke->GetMethodLoadKind()) {
5589 case MethodLoadKind::kStringInit: {
5590 // temp = thread->string_init_entrypoint
5591 uint32_t offset =
5592 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5593 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
5594 break;
5595 }
5596 case MethodLoadKind::kRecursive: {
5597 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5598 break;
5599 }
5600 case MethodLoadKind::kRuntimeCall: {
5601 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5602 return; // No code pointer retrieval; the runtime performs the call directly.
5603 }
5604 case MethodLoadKind::kBootImageLinkTimePcRelative:
5605 // For kCallCriticalNative we skip loading the method and do the call directly.
5606 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
5607 break;
5608 }
5609 FALLTHROUGH_INTENDED;
5610 default: {
5611 LoadMethod(invoke->GetMethodLoadKind(), callee_method, invoke);
5612 }
5613 }
5614
5615 switch (invoke->GetCodePtrLocation()) {
5616 case CodePtrLocation::kCallSelf:
5617 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
5618 __ call(GetFrameEntryLabel());
5619 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5620 break;
5621 case CodePtrLocation::kCallCriticalNative: {
5622 size_t out_frame_size =
5623 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
5624 kNativeStackAlignment,
5625 GetCriticalNativeDirectCallFrameSize>(invoke);
5626 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
5627 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5628 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5629 __ call(Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5630 RecordBootImageJniEntrypointPatch(invoke);
5631 } else {
5632 // (callee_method + offset_of_jni_entry_point)()
5633 __ call(Address(callee_method.AsRegister<Register>(),
5634 ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
5635 }
5636 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5637 if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
5638 // Create space for conversion.
5639 out_frame_size = 8u;
5640 IncreaseFrame(out_frame_size);
5641 }
5642 // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
5643 switch (invoke->GetType()) {
5644 case DataType::Type::kBool:
5645 __ movzxb(EAX, AL);
5646 break;
5647 case DataType::Type::kInt8:
5648 __ movsxb(EAX, AL);
5649 break;
5650 case DataType::Type::kUint16:
5651 __ movzxw(EAX, EAX);
5652 break;
5653 case DataType::Type::kInt16:
5654 __ movsxw(EAX, EAX);
5655 break;
5656 case DataType::Type::kFloat32:
5657 __ fstps(Address(ESP, 0));
5658 __ movss(XMM0, Address(ESP, 0));
5659 break;
5660 case DataType::Type::kFloat64:
5661 __ fstpl(Address(ESP, 0));
5662 __ movsd(XMM0, Address(ESP, 0));
5663 break;
5664 case DataType::Type::kInt32:
5665 case DataType::Type::kInt64:
5666 case DataType::Type::kVoid:
5667 break;
5668 default:
5669 DCHECK(false) << invoke->GetType();
5670 break;
5671 }
5672 if (out_frame_size != 0u) {
5673 DecreaseFrame(out_frame_size);
5674 }
5675 break;
5676 }
5677 case CodePtrLocation::kCallArtMethod:
5678 // (callee_method + offset_of_quick_compiled_code)()
5679 __ call(Address(callee_method.AsRegister<Register>(),
5680 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5681 kX86PointerSize).Int32Value()));
5682 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5683 break;
5684 }
5685
5686 DCHECK(!IsLeafMethod());
5687 }
5688
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5689 void CodeGeneratorX86::GenerateVirtualCall(
5690 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5691 Register temp = temp_in.AsRegister<Register>();
5692 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5693 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5694
5695 // Use the calling convention instead of the location of the receiver, as
5696 // intrinsics may have put the receiver in a different register. In the intrinsics
5697 // slow path, the arguments have been moved to the right place, so here we are
5698 // guaranteed that the receiver is the first register of the calling convention.
5699 InvokeDexCallingConvention calling_convention;
5700 Register receiver = calling_convention.GetRegisterAt(0);
5701 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5702 // /* HeapReference<Class> */ temp = receiver->klass_
5703 __ movl(temp, Address(receiver, class_offset));
5704 MaybeRecordImplicitNullCheck(invoke);
5705 // Instead of simply (possibly) unpoisoning `temp` here, we should
5706 // emit a read barrier for the previous class reference load.
5707 // However this is not required in practice, as this is an
5708 // intermediate/temporary reference and because the current
5709 // concurrent copying collector keeps the from-space memory
5710 // intact/accessible until the end of the marking phase (the
5711 // concurrent copying collector may not in the future).
5712 __ MaybeUnpoisonHeapReference(temp);
5713
5714 MaybeGenerateInlineCacheCheck(invoke, temp);
5715
5716 // temp = temp->GetMethodAt(method_offset);
5717 __ movl(temp, Address(temp, method_offset));
5718 // call temp->GetEntryPoint();
5719 __ call(Address(
5720 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5721 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5722 }
5723
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5724 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5725 uint32_t intrinsic_data) {
5726 boot_image_other_patches_.emplace_back(
5727 method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5728 __ Bind(&boot_image_other_patches_.back().label);
5729 }
5730
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5731 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5732 uint32_t boot_image_offset) {
5733 boot_image_other_patches_.emplace_back(
5734 method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5735 __ Bind(&boot_image_other_patches_.back().label);
5736 }
5737
RecordBootImageMethodPatch(HInvoke * invoke)5738 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvoke* invoke) {
5739 size_t index = invoke->IsInvokeInterface()
5740 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5741 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5742 HX86ComputeBaseMethodAddress* method_address =
5743 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5744 boot_image_method_patches_.emplace_back(
5745 method_address,
5746 invoke->GetResolvedMethodReference().dex_file,
5747 invoke->GetResolvedMethodReference().index);
5748 __ Bind(&boot_image_method_patches_.back().label);
5749 }
5750
RecordAppImageMethodPatch(HInvoke * invoke)5751 void CodeGeneratorX86::RecordAppImageMethodPatch(HInvoke* invoke) {
5752 size_t index = invoke->IsInvokeInterface()
5753 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5754 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5755 HX86ComputeBaseMethodAddress* method_address =
5756 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5757 app_image_method_patches_.emplace_back(
5758 method_address,
5759 invoke->GetResolvedMethodReference().dex_file,
5760 invoke->GetResolvedMethodReference().index);
5761 __ Bind(&app_image_method_patches_.back().label);
5762 }
5763
RecordMethodBssEntryPatch(HInvoke * invoke)5764 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvoke* invoke) {
5765 size_t index = invoke->IsInvokeInterface()
5766 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5767 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5768 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
5769 GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
5770 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
5771 invoke->GetMethodReference().dex_file));
5772 HX86ComputeBaseMethodAddress* method_address =
5773 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5774 // Add the patch entry and bind its label at the end of the instruction.
5775 method_bss_entry_patches_.emplace_back(
5776 method_address,
5777 invoke->GetMethodReference().dex_file,
5778 invoke->GetMethodReference().index);
5779 __ Bind(&method_bss_entry_patches_.back().label);
5780 }
5781
RecordBootImageTypePatch(HLoadClass * load_class)5782 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5783 HX86ComputeBaseMethodAddress* method_address =
5784 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5785 boot_image_type_patches_.emplace_back(
5786 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5787 __ Bind(&boot_image_type_patches_.back().label);
5788 }
5789
RecordAppImageTypePatch(HLoadClass * load_class)5790 void CodeGeneratorX86::RecordAppImageTypePatch(HLoadClass* load_class) {
5791 HX86ComputeBaseMethodAddress* method_address =
5792 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5793 app_image_type_patches_.emplace_back(
5794 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5795 __ Bind(&app_image_type_patches_.back().label);
5796 }
5797
NewTypeBssEntryPatch(HLoadClass * load_class)5798 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5799 HX86ComputeBaseMethodAddress* method_address =
5800 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5801 ArenaDeque<X86PcRelativePatchInfo>* patches = nullptr;
5802 switch (load_class->GetLoadKind()) {
5803 case HLoadClass::LoadKind::kBssEntry:
5804 patches = &type_bss_entry_patches_;
5805 break;
5806 case HLoadClass::LoadKind::kBssEntryPublic:
5807 patches = &public_type_bss_entry_patches_;
5808 break;
5809 case HLoadClass::LoadKind::kBssEntryPackage:
5810 patches = &package_type_bss_entry_patches_;
5811 break;
5812 default:
5813 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5814 UNREACHABLE();
5815 }
5816 patches->emplace_back(
5817 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5818 return &patches->back().label;
5819 }
5820
RecordBootImageStringPatch(HLoadString * load_string)5821 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5822 HX86ComputeBaseMethodAddress* method_address =
5823 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5824 boot_image_string_patches_.emplace_back(
5825 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5826 __ Bind(&boot_image_string_patches_.back().label);
5827 }
5828
NewStringBssEntryPatch(HLoadString * load_string)5829 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5830 HX86ComputeBaseMethodAddress* method_address =
5831 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5832 string_bss_entry_patches_.emplace_back(
5833 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5834 return &string_bss_entry_patches_.back().label;
5835 }
5836
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)5837 void CodeGeneratorX86::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
5838 HX86ComputeBaseMethodAddress* method_address =
5839 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5840 boot_image_jni_entrypoint_patches_.emplace_back(
5841 method_address,
5842 invoke->GetResolvedMethodReference().dex_file,
5843 invoke->GetResolvedMethodReference().index);
5844 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
5845 }
5846
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5847 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5848 uint32_t boot_image_reference,
5849 HInvokeStaticOrDirect* invoke) {
5850 if (GetCompilerOptions().IsBootImage()) {
5851 HX86ComputeBaseMethodAddress* method_address =
5852 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5853 DCHECK(method_address != nullptr);
5854 Register method_address_reg =
5855 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5856 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5857 RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5858 } else if (GetCompilerOptions().GetCompilePic()) {
5859 HX86ComputeBaseMethodAddress* method_address =
5860 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5861 DCHECK(method_address != nullptr);
5862 Register method_address_reg =
5863 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5864 __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5865 RecordBootImageRelRoPatch(method_address, boot_image_reference);
5866 } else {
5867 DCHECK(GetCompilerOptions().IsJitCompiler());
5868 gc::Heap* heap = Runtime::Current()->GetHeap();
5869 DCHECK(!heap->GetBootImageSpaces().empty());
5870 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5871 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5872 }
5873 }
5874
LoadIntrinsicDeclaringClass(Register reg,HInvokeStaticOrDirect * invoke)5875 void CodeGeneratorX86::LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke) {
5876 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5877 if (GetCompilerOptions().IsBootImage()) {
5878 // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5879 HX86ComputeBaseMethodAddress* method_address =
5880 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5881 DCHECK(method_address != nullptr);
5882 Register method_address_reg =
5883 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5884 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5885 MethodReference target_method = invoke->GetResolvedMethodReference();
5886 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5887 boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5888 __ Bind(&boot_image_type_patches_.back().label);
5889 } else {
5890 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5891 LoadBootImageAddress(reg, boot_image_offset, invoke);
5892 }
5893 }
5894
5895 // The label points to the end of the "movl" or another instruction but the literal offset
5896 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5897 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5898
5899 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5900 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5901 const ArenaDeque<X86PcRelativePatchInfo>& infos,
5902 ArenaVector<linker::LinkerPatch>* linker_patches) {
5903 for (const X86PcRelativePatchInfo& info : infos) {
5904 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5905 linker_patches->push_back(Factory(literal_offset,
5906 info.target_dex_file,
5907 GetMethodAddressOffset(info.method_address),
5908 info.offset_or_index));
5909 }
5910 }
5911
5912 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5913 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5914 const DexFile* target_dex_file,
5915 uint32_t pc_insn_offset,
5916 uint32_t boot_image_offset) {
5917 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
5918 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5919 }
5920
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5921 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5922 DCHECK(linker_patches->empty());
5923 size_t size =
5924 boot_image_method_patches_.size() +
5925 app_image_method_patches_.size() +
5926 method_bss_entry_patches_.size() +
5927 boot_image_type_patches_.size() +
5928 app_image_type_patches_.size() +
5929 type_bss_entry_patches_.size() +
5930 public_type_bss_entry_patches_.size() +
5931 package_type_bss_entry_patches_.size() +
5932 boot_image_string_patches_.size() +
5933 string_bss_entry_patches_.size() +
5934 boot_image_jni_entrypoint_patches_.size() +
5935 boot_image_other_patches_.size();
5936 linker_patches->reserve(size);
5937 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5938 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5939 boot_image_method_patches_, linker_patches);
5940 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5941 boot_image_type_patches_, linker_patches);
5942 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5943 boot_image_string_patches_, linker_patches);
5944 } else {
5945 DCHECK(boot_image_method_patches_.empty());
5946 DCHECK(boot_image_type_patches_.empty());
5947 DCHECK(boot_image_string_patches_.empty());
5948 }
5949 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
5950 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
5951 if (GetCompilerOptions().IsBootImage()) {
5952 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5953 boot_image_other_patches_, linker_patches);
5954 } else {
5955 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
5956 boot_image_other_patches_, linker_patches);
5957 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
5958 app_image_method_patches_, linker_patches);
5959 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
5960 app_image_type_patches_, linker_patches);
5961 }
5962 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5963 method_bss_entry_patches_, linker_patches);
5964 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5965 type_bss_entry_patches_, linker_patches);
5966 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5967 public_type_bss_entry_patches_, linker_patches);
5968 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5969 package_type_bss_entry_patches_, linker_patches);
5970 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5971 string_bss_entry_patches_, linker_patches);
5972 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5973 boot_image_jni_entrypoint_patches_, linker_patches);
5974 DCHECK_EQ(size, linker_patches->size());
5975 }
5976
MaybeMarkGCCard(Register temp,Register card,Register object,Register value,bool emit_null_check)5977 void CodeGeneratorX86::MaybeMarkGCCard(
5978 Register temp, Register card, Register object, Register value, bool emit_null_check) {
5979 NearLabel is_null;
5980 if (emit_null_check) {
5981 __ testl(value, value);
5982 __ j(kEqual, &is_null);
5983 }
5984 MarkGCCard(temp, card, object);
5985 if (emit_null_check) {
5986 __ Bind(&is_null);
5987 }
5988 }
5989
MarkGCCard(Register temp,Register card,Register object)5990 void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object) {
5991 // Load the address of the card table into `card`.
5992 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5993 // Calculate the offset (in the card table) of the card corresponding to `object`.
5994 __ movl(temp, object);
5995 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5996 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5997 // `object`'s card.
5998 //
5999 // Register `card` contains the address of the card table. Note that the card
6000 // table's base is biased during its creation so that it always starts at an
6001 // address whose least-significant byte is equal to `kCardDirty` (see
6002 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
6003 // below writes the `kCardDirty` (byte) value into the `object`'s card
6004 // (located at `card + object >> kCardShift`).
6005 //
6006 // This dual use of the value in register `card` (1. to calculate the location
6007 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
6008 // (no need to explicitly load `kCardDirty` as an immediate value).
6009 __ movb(Address(temp, card, TIMES_1, 0),
6010 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
6011 }
6012
CheckGCCardIsValid(Register temp,Register card,Register object)6013 void CodeGeneratorX86::CheckGCCardIsValid(Register temp, Register card, Register object) {
6014 NearLabel done;
6015 __ j(kEqual, &done);
6016 // Load the address of the card table into `card`.
6017 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
6018 // Calculate the offset (in the card table) of the card corresponding to `object`.
6019 __ movl(temp, object);
6020 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
6021 // assert (!clean || !self->is_gc_marking)
6022 __ cmpb(Address(temp, card, TIMES_1, 0), Immediate(gc::accounting::CardTable::kCardClean));
6023 __ j(kNotEqual, &done);
6024 __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()), Immediate(0));
6025 __ j(kEqual, &done);
6026 __ int3();
6027 __ Bind(&done);
6028 }
6029
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6030 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
6031 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6032
6033 bool object_field_get_with_read_barrier =
6034 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6035 LocationSummary* locations =
6036 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6037 codegen_->EmitReadBarrier()
6038 ? LocationSummary::kCallOnSlowPath
6039 : LocationSummary::kNoCall);
6040 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6041 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6042 }
6043 // receiver_input
6044 locations->SetInAt(0, Location::RequiresRegister());
6045 if (DataType::IsFloatingPointType(instruction->GetType())) {
6046 locations->SetOut(Location::RequiresFpuRegister());
6047 } else {
6048 // The output overlaps in case of long: we don't want the low move
6049 // to overwrite the object's location. Likewise, in the case of
6050 // an object field get with read barriers enabled, we do not want
6051 // the move to overwrite the object's location, as we need it to emit
6052 // the read barrier.
6053 locations->SetOut(
6054 Location::RequiresRegister(),
6055 (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64)
6056 ? Location::kOutputOverlap
6057 : Location::kNoOutputOverlap);
6058 }
6059
6060 if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
6061 // Long values can be loaded atomically into an XMM using movsd.
6062 // So we use an XMM register as a temp to achieve atomicity (first
6063 // load the temp into the XMM and then copy the XMM into the
6064 // output, 32 bits at a time).
6065 locations->AddTemp(Location::RequiresFpuRegister());
6066 }
6067 }
6068
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6069 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
6070 const FieldInfo& field_info) {
6071 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6072
6073 LocationSummary* locations = instruction->GetLocations();
6074 Location base_loc = locations->InAt(0);
6075 Register base = base_loc.AsRegister<Register>();
6076 Location out = locations->Out();
6077 bool is_volatile = field_info.IsVolatile();
6078 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
6079 DataType::Type load_type = instruction->GetType();
6080 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6081
6082 if (load_type == DataType::Type::kReference) {
6083 // /* HeapReference<Object> */ out = *(base + offset)
6084 if (codegen_->EmitBakerReadBarrier()) {
6085 // Note that a potential implicit null check is handled in this
6086 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
6087 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6088 instruction, out, base, offset, /* needs_null_check= */ true);
6089 if (is_volatile) {
6090 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6091 }
6092 } else {
6093 __ movl(out.AsRegister<Register>(), Address(base, offset));
6094 codegen_->MaybeRecordImplicitNullCheck(instruction);
6095 if (is_volatile) {
6096 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6097 }
6098 // If read barriers are enabled, emit read barriers other than
6099 // Baker's using a slow path (and also unpoison the loaded
6100 // reference, if heap poisoning is enabled).
6101 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
6102 }
6103 } else {
6104 Address src(base, offset);
6105 XmmRegister temp = (load_type == DataType::Type::kInt64 && is_volatile)
6106 ? locations->GetTemp(0).AsFpuRegister<XmmRegister>()
6107 : kNoXmmRegister;
6108 codegen_->LoadFromMemoryNoBarrier(load_type, out, src, instruction, temp, is_volatile);
6109 if (is_volatile) {
6110 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6111 }
6112 }
6113 }
6114
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)6115 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction,
6116 const FieldInfo& field_info,
6117 WriteBarrierKind write_barrier_kind) {
6118 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6119
6120 LocationSummary* locations =
6121 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6122 locations->SetInAt(0, Location::RequiresRegister());
6123 bool is_volatile = field_info.IsVolatile();
6124 DataType::Type field_type = field_info.GetFieldType();
6125 bool is_byte_type = DataType::Size(field_type) == 1u;
6126
6127 // The register allocator does not support multiple
6128 // inputs that die at entry with one in a specific register.
6129 if (is_byte_type) {
6130 // Ensure the value is in a byte register.
6131 locations->SetInAt(1, Location::RegisterLocation(EAX));
6132 } else if (DataType::IsFloatingPointType(field_type)) {
6133 if (is_volatile && field_type == DataType::Type::kFloat64) {
6134 // In order to satisfy the semantics of volatile, this must be a single instruction store.
6135 locations->SetInAt(1, Location::RequiresFpuRegister());
6136 } else {
6137 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
6138 }
6139 } else if (is_volatile && field_type == DataType::Type::kInt64) {
6140 // In order to satisfy the semantics of volatile, this must be a single instruction store.
6141 locations->SetInAt(1, Location::RequiresRegister());
6142
6143 // 64bits value can be atomically written to an address with movsd and an XMM register.
6144 // We need two XMM registers because there's no easier way to (bit) copy a register pair
6145 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
6146 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
6147 // isolated cases when we need this it isn't worth adding the extra complexity.
6148 locations->AddTemp(Location::RequiresFpuRegister());
6149 locations->AddTemp(Location::RequiresFpuRegister());
6150 } else {
6151 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6152
6153 bool needs_write_barrier =
6154 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6155 bool check_gc_card =
6156 codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
6157
6158 if (needs_write_barrier || check_gc_card) {
6159 locations->AddTemp(Location::RequiresRegister());
6160 // Ensure the card is in a byte register.
6161 locations->AddTemp(Location::RegisterLocation(ECX));
6162 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6163 locations->AddTemp(Location::RequiresRegister());
6164 }
6165 }
6166 }
6167
HandleFieldSet(HInstruction * instruction,uint32_t value_index,DataType::Type field_type,Address field_addr,Register base,bool is_volatile,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6168 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
6169 uint32_t value_index,
6170 DataType::Type field_type,
6171 Address field_addr,
6172 Register base,
6173 bool is_volatile,
6174 bool value_can_be_null,
6175 WriteBarrierKind write_barrier_kind) {
6176 LocationSummary* locations = instruction->GetLocations();
6177 Location value = locations->InAt(value_index);
6178 bool needs_write_barrier =
6179 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6180
6181 if (is_volatile) {
6182 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
6183 }
6184
6185 bool maybe_record_implicit_null_check_done = false;
6186
6187 switch (field_type) {
6188 case DataType::Type::kBool:
6189 case DataType::Type::kUint8:
6190 case DataType::Type::kInt8: {
6191 if (value.IsConstant()) {
6192 __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6193 } else {
6194 __ movb(field_addr, value.AsRegister<ByteRegister>());
6195 }
6196 break;
6197 }
6198
6199 case DataType::Type::kUint16:
6200 case DataType::Type::kInt16: {
6201 if (value.IsConstant()) {
6202 __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6203 } else {
6204 __ movw(field_addr, value.AsRegister<Register>());
6205 }
6206 break;
6207 }
6208
6209 case DataType::Type::kInt32:
6210 case DataType::Type::kReference: {
6211 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6212 if (value.IsConstant()) {
6213 DCHECK(value.GetConstant()->IsNullConstant())
6214 << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6215 << " is not null. Instruction " << *instruction;
6216 // No need to poison null, just do a movl.
6217 __ movl(field_addr, Immediate(0));
6218 } else {
6219 Register temp = locations->GetTemp(0).AsRegister<Register>();
6220 __ movl(temp, value.AsRegister<Register>());
6221 __ PoisonHeapReference(temp);
6222 __ movl(field_addr, temp);
6223 }
6224 } else if (value.IsConstant()) {
6225 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6226 __ movl(field_addr, Immediate(v));
6227 } else {
6228 DCHECK(value.IsRegister()) << value;
6229 __ movl(field_addr, value.AsRegister<Register>());
6230 }
6231 break;
6232 }
6233
6234 case DataType::Type::kInt64: {
6235 if (is_volatile) {
6236 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
6237 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
6238 __ movd(temp1, value.AsRegisterPairLow<Register>());
6239 __ movd(temp2, value.AsRegisterPairHigh<Register>());
6240 __ punpckldq(temp1, temp2);
6241 __ movsd(field_addr, temp1);
6242 codegen_->MaybeRecordImplicitNullCheck(instruction);
6243 } else if (value.IsConstant()) {
6244 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6245 __ movl(field_addr, Immediate(Low32Bits(v)));
6246 codegen_->MaybeRecordImplicitNullCheck(instruction);
6247 __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6248 } else {
6249 __ movl(field_addr, value.AsRegisterPairLow<Register>());
6250 codegen_->MaybeRecordImplicitNullCheck(instruction);
6251 __ movl(Address::displace(field_addr, kX86WordSize), value.AsRegisterPairHigh<Register>());
6252 }
6253 maybe_record_implicit_null_check_done = true;
6254 break;
6255 }
6256
6257 case DataType::Type::kFloat32: {
6258 if (value.IsConstant()) {
6259 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6260 __ movl(field_addr, Immediate(v));
6261 } else {
6262 __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
6263 }
6264 break;
6265 }
6266
6267 case DataType::Type::kFloat64: {
6268 if (value.IsConstant()) {
6269 DCHECK(!is_volatile);
6270 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6271 __ movl(field_addr, Immediate(Low32Bits(v)));
6272 codegen_->MaybeRecordImplicitNullCheck(instruction);
6273 __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6274 maybe_record_implicit_null_check_done = true;
6275 } else {
6276 __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
6277 }
6278 break;
6279 }
6280
6281 case DataType::Type::kUint32:
6282 case DataType::Type::kUint64:
6283 case DataType::Type::kVoid:
6284 LOG(FATAL) << "Unreachable type " << field_type;
6285 UNREACHABLE();
6286 }
6287
6288 if (!maybe_record_implicit_null_check_done) {
6289 codegen_->MaybeRecordImplicitNullCheck(instruction);
6290 }
6291
6292 if (needs_write_barrier) {
6293 Register temp = locations->GetTemp(0).AsRegister<Register>();
6294 Register card = locations->GetTemp(1).AsRegister<Register>();
6295 if (value.IsConstant()) {
6296 DCHECK(value.GetConstant()->IsNullConstant())
6297 << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6298 << " is not null. Instruction: " << *instruction;
6299 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6300 codegen_->MarkGCCard(temp, card, base);
6301 }
6302 } else {
6303 codegen_->MaybeMarkGCCard(
6304 temp,
6305 card,
6306 base,
6307 value.AsRegister<Register>(),
6308 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
6309 }
6310 } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
6311 if (value.IsConstant()) {
6312 // If we are storing a constant for a reference, we are in the case where we are storing
6313 // null but we cannot skip it as this write barrier is being relied on by coalesced write
6314 // barriers.
6315 DCHECK(value.GetConstant()->IsNullConstant())
6316 << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6317 << " is not null. Instruction: " << *instruction;
6318 // No need to check the dirty bit as this value is null.
6319 } else {
6320 Register temp = locations->GetTemp(0).AsRegister<Register>();
6321 Register card = locations->GetTemp(1).AsRegister<Register>();
6322 codegen_->CheckGCCardIsValid(temp, card, base);
6323 }
6324 }
6325
6326 if (is_volatile) {
6327 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6328 }
6329 }
6330
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6331 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
6332 const FieldInfo& field_info,
6333 bool value_can_be_null,
6334 WriteBarrierKind write_barrier_kind) {
6335 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6336
6337 LocationSummary* locations = instruction->GetLocations();
6338 Register base = locations->InAt(0).AsRegister<Register>();
6339 bool is_volatile = field_info.IsVolatile();
6340 DataType::Type field_type = field_info.GetFieldType();
6341 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6342 Address field_addr(base, offset);
6343
6344 HandleFieldSet(instruction,
6345 /* value_index= */ 1,
6346 field_type,
6347 field_addr,
6348 base,
6349 is_volatile,
6350 value_can_be_null,
6351 write_barrier_kind);
6352 }
6353
VisitStaticFieldGet(HStaticFieldGet * instruction)6354 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6355 HandleFieldGet(instruction, instruction->GetFieldInfo());
6356 }
6357
VisitStaticFieldGet(HStaticFieldGet * instruction)6358 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6359 HandleFieldGet(instruction, instruction->GetFieldInfo());
6360 }
6361
VisitStaticFieldSet(HStaticFieldSet * instruction)6362 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6363 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6364 }
6365
VisitStaticFieldSet(HStaticFieldSet * instruction)6366 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6367 HandleFieldSet(instruction,
6368 instruction->GetFieldInfo(),
6369 instruction->GetValueCanBeNull(),
6370 instruction->GetWriteBarrierKind());
6371 }
6372
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6373 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6374 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6375 }
6376
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6377 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6378 HandleFieldSet(instruction,
6379 instruction->GetFieldInfo(),
6380 instruction->GetValueCanBeNull(),
6381 instruction->GetWriteBarrierKind());
6382 }
6383
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6384 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6385 HandleFieldGet(instruction, instruction->GetFieldInfo());
6386 }
6387
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6388 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6389 HandleFieldGet(instruction, instruction->GetFieldInfo());
6390 }
6391
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6392 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6393 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
6394 }
6395
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6396 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6397 __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
6398 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6399 }
6400
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6401 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
6402 HUnresolvedInstanceFieldGet* instruction) {
6403 FieldAccessCallingConventionX86 calling_convention;
6404 codegen_->CreateUnresolvedFieldLocationSummary(
6405 instruction, instruction->GetFieldType(), calling_convention);
6406 }
6407
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6408 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
6409 HUnresolvedInstanceFieldGet* instruction) {
6410 FieldAccessCallingConventionX86 calling_convention;
6411 codegen_->GenerateUnresolvedFieldAccess(instruction,
6412 instruction->GetFieldType(),
6413 instruction->GetFieldIndex(),
6414 instruction->GetDexPc(),
6415 calling_convention);
6416 }
6417
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6418 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
6419 HUnresolvedInstanceFieldSet* instruction) {
6420 FieldAccessCallingConventionX86 calling_convention;
6421 codegen_->CreateUnresolvedFieldLocationSummary(
6422 instruction, instruction->GetFieldType(), calling_convention);
6423 }
6424
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6425 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
6426 HUnresolvedInstanceFieldSet* instruction) {
6427 FieldAccessCallingConventionX86 calling_convention;
6428 codegen_->GenerateUnresolvedFieldAccess(instruction,
6429 instruction->GetFieldType(),
6430 instruction->GetFieldIndex(),
6431 instruction->GetDexPc(),
6432 calling_convention);
6433 }
6434
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6435 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
6436 HUnresolvedStaticFieldGet* instruction) {
6437 FieldAccessCallingConventionX86 calling_convention;
6438 codegen_->CreateUnresolvedFieldLocationSummary(
6439 instruction, instruction->GetFieldType(), calling_convention);
6440 }
6441
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6442 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
6443 HUnresolvedStaticFieldGet* instruction) {
6444 FieldAccessCallingConventionX86 calling_convention;
6445 codegen_->GenerateUnresolvedFieldAccess(instruction,
6446 instruction->GetFieldType(),
6447 instruction->GetFieldIndex(),
6448 instruction->GetDexPc(),
6449 calling_convention);
6450 }
6451
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6452 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
6453 HUnresolvedStaticFieldSet* instruction) {
6454 FieldAccessCallingConventionX86 calling_convention;
6455 codegen_->CreateUnresolvedFieldLocationSummary(
6456 instruction, instruction->GetFieldType(), calling_convention);
6457 }
6458
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6459 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
6460 HUnresolvedStaticFieldSet* instruction) {
6461 FieldAccessCallingConventionX86 calling_convention;
6462 codegen_->GenerateUnresolvedFieldAccess(instruction,
6463 instruction->GetFieldType(),
6464 instruction->GetFieldIndex(),
6465 instruction->GetDexPc(),
6466 calling_convention);
6467 }
6468
VisitNullCheck(HNullCheck * instruction)6469 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
6470 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6471 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
6472 ? Location::RequiresRegister()
6473 : Location::Any();
6474 locations->SetInAt(0, loc);
6475 }
6476
GenerateImplicitNullCheck(HNullCheck * instruction)6477 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
6478 if (CanMoveNullCheckToUser(instruction)) {
6479 return;
6480 }
6481 LocationSummary* locations = instruction->GetLocations();
6482 Location obj = locations->InAt(0);
6483
6484 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
6485 RecordPcInfo(instruction, instruction->GetDexPc());
6486 }
6487
GenerateExplicitNullCheck(HNullCheck * instruction)6488 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
6489 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
6490 AddSlowPath(slow_path);
6491
6492 LocationSummary* locations = instruction->GetLocations();
6493 Location obj = locations->InAt(0);
6494
6495 if (obj.IsRegister()) {
6496 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
6497 } else if (obj.IsStackSlot()) {
6498 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
6499 } else {
6500 DCHECK(obj.IsConstant()) << obj;
6501 DCHECK(obj.GetConstant()->IsNullConstant());
6502 __ jmp(slow_path->GetEntryLabel());
6503 return;
6504 }
6505 __ j(kEqual, slow_path->GetEntryLabel());
6506 }
6507
VisitNullCheck(HNullCheck * instruction)6508 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
6509 codegen_->GenerateNullCheck(instruction);
6510 }
6511
VisitArrayGet(HArrayGet * instruction)6512 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
6513 bool object_array_get_with_read_barrier =
6514 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6515 LocationSummary* locations =
6516 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6517 object_array_get_with_read_barrier
6518 ? LocationSummary::kCallOnSlowPath
6519 : LocationSummary::kNoCall);
6520 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6521 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6522 }
6523 locations->SetInAt(0, Location::RequiresRegister());
6524 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6525 if (DataType::IsFloatingPointType(instruction->GetType())) {
6526 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6527 } else {
6528 // The output overlaps in case of long: we don't want the low move
6529 // to overwrite the array's location. Likewise, in the case of an
6530 // object array get with read barriers enabled, we do not want the
6531 // move to overwrite the array's location, as we need it to emit
6532 // the read barrier.
6533 locations->SetOut(
6534 Location::RequiresRegister(),
6535 (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
6536 ? Location::kOutputOverlap
6537 : Location::kNoOutputOverlap);
6538 }
6539 }
6540
VisitArrayGet(HArrayGet * instruction)6541 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
6542 LocationSummary* locations = instruction->GetLocations();
6543 Location obj_loc = locations->InAt(0);
6544 Register obj = obj_loc.AsRegister<Register>();
6545 Location index = locations->InAt(1);
6546 Location out_loc = locations->Out();
6547 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6548
6549 DataType::Type type = instruction->GetType();
6550 if (type == DataType::Type::kReference) {
6551 static_assert(
6552 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6553 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6554 // /* HeapReference<Object> */ out =
6555 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6556 if (codegen_->EmitBakerReadBarrier()) {
6557 // Note that a potential implicit null check is handled in this
6558 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
6559 codegen_->GenerateArrayLoadWithBakerReadBarrier(
6560 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
6561 } else {
6562 Register out = out_loc.AsRegister<Register>();
6563 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6564 codegen_->MaybeRecordImplicitNullCheck(instruction);
6565 // If read barriers are enabled, emit read barriers other than
6566 // Baker's using a slow path (and also unpoison the loaded
6567 // reference, if heap poisoning is enabled).
6568 if (index.IsConstant()) {
6569 uint32_t offset =
6570 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
6571 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6572 } else {
6573 codegen_->MaybeGenerateReadBarrierSlow(
6574 instruction, out_loc, out_loc, obj_loc, data_offset, index);
6575 }
6576 }
6577 } else if (type == DataType::Type::kUint16
6578 && mirror::kUseStringCompression
6579 && instruction->IsStringCharAt()) {
6580 // Branch cases into compressed and uncompressed for each index's type.
6581 Register out = out_loc.AsRegister<Register>();
6582 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6583 NearLabel done, not_compressed;
6584 __ testb(Address(obj, count_offset), Immediate(1));
6585 codegen_->MaybeRecordImplicitNullCheck(instruction);
6586 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6587 "Expecting 0=compressed, 1=uncompressed");
6588 __ j(kNotZero, ¬_compressed);
6589 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6590 __ jmp(&done);
6591 __ Bind(¬_compressed);
6592 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6593 __ Bind(&done);
6594 } else {
6595 ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
6596 Address src = CodeGeneratorX86::ArrayAddress(obj, index, scale, data_offset);
6597 codegen_->LoadFromMemoryNoBarrier(type, out_loc, src, instruction);
6598 }
6599 }
6600
VisitArraySet(HArraySet * instruction)6601 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
6602 DataType::Type value_type = instruction->GetComponentType();
6603
6604 WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6605 bool needs_write_barrier =
6606 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6607 bool check_gc_card =
6608 codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
6609 bool needs_type_check = instruction->NeedsTypeCheck();
6610
6611 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6612 instruction,
6613 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6614
6615 bool is_byte_type = DataType::Size(value_type) == 1u;
6616 // We need the inputs to be different than the output in case of long operation.
6617 // In case of a byte operation, the register allocator does not support multiple
6618 // inputs that die at entry with one in a specific register.
6619 locations->SetInAt(0, Location::RequiresRegister());
6620 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6621 if (is_byte_type) {
6622 // Ensure the value is in a byte register.
6623 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
6624 } else if (DataType::IsFloatingPointType(value_type)) {
6625 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
6626 } else {
6627 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
6628 }
6629 if (needs_write_barrier || check_gc_card) {
6630 // Used by reference poisoning, type checking, emitting, or checking a write barrier.
6631 locations->AddTemp(Location::RequiresRegister());
6632 // Only used when emitting or checking a write barrier. Ensure the card is in a byte register.
6633 locations->AddTemp(Location::RegisterLocation(ECX));
6634 } else if ((kPoisonHeapReferences && value_type == DataType::Type::kReference) ||
6635 instruction->NeedsTypeCheck()) {
6636 locations->AddTemp(Location::RequiresRegister());
6637 }
6638 }
6639
VisitArraySet(HArraySet * instruction)6640 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
6641 LocationSummary* locations = instruction->GetLocations();
6642 Location array_loc = locations->InAt(0);
6643 Register array = array_loc.AsRegister<Register>();
6644 Location index = locations->InAt(1);
6645 Location value = locations->InAt(2);
6646 DataType::Type value_type = instruction->GetComponentType();
6647 bool needs_type_check = instruction->NeedsTypeCheck();
6648 WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6649 bool needs_write_barrier =
6650 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6651
6652 switch (value_type) {
6653 case DataType::Type::kBool:
6654 case DataType::Type::kUint8:
6655 case DataType::Type::kInt8: {
6656 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
6657 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
6658 if (value.IsRegister()) {
6659 __ movb(address, value.AsRegister<ByteRegister>());
6660 } else {
6661 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6662 }
6663 codegen_->MaybeRecordImplicitNullCheck(instruction);
6664 break;
6665 }
6666
6667 case DataType::Type::kUint16:
6668 case DataType::Type::kInt16: {
6669 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
6670 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
6671 if (value.IsRegister()) {
6672 __ movw(address, value.AsRegister<Register>());
6673 } else {
6674 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6675 }
6676 codegen_->MaybeRecordImplicitNullCheck(instruction);
6677 break;
6678 }
6679
6680 case DataType::Type::kReference: {
6681 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6682 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6683
6684 if (!value.IsRegister()) {
6685 // Just setting null.
6686 DCHECK(instruction->InputAt(2)->IsNullConstant());
6687 DCHECK(value.IsConstant()) << value;
6688 __ movl(address, Immediate(0));
6689 codegen_->MaybeRecordImplicitNullCheck(instruction);
6690 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6691 // We need to set a write barrier here even though we are writing null, since this write
6692 // barrier is being relied on.
6693 DCHECK(needs_write_barrier);
6694 Register temp = locations->GetTemp(0).AsRegister<Register>();
6695 Register card = locations->GetTemp(1).AsRegister<Register>();
6696 codegen_->MarkGCCard(temp, card, array);
6697 }
6698 DCHECK(!needs_type_check);
6699 break;
6700 }
6701
6702 Register register_value = value.AsRegister<Register>();
6703 const bool can_value_be_null = instruction->GetValueCanBeNull();
6704 // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
6705 // value is null (without an extra CompareAndBranchIfZero since we already checked if the
6706 // value is null for the type check).
6707 const bool skip_marking_gc_card =
6708 can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
6709 NearLabel do_store;
6710 NearLabel skip_writing_card;
6711 if (can_value_be_null) {
6712 __ testl(register_value, register_value);
6713 if (skip_marking_gc_card) {
6714 __ j(kEqual, &skip_writing_card);
6715 } else {
6716 __ j(kEqual, &do_store);
6717 }
6718 }
6719
6720 SlowPathCode* slow_path = nullptr;
6721 if (needs_type_check) {
6722 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6723 codegen_->AddSlowPath(slow_path);
6724
6725 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6726 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6727 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6728
6729 // Note that when Baker read barriers are enabled, the type
6730 // checks are performed without read barriers. This is fine,
6731 // even in the case where a class object is in the from-space
6732 // after the flip, as a comparison involving such a type would
6733 // not produce a false positive; it may of course produce a
6734 // false negative, in which case we would take the ArraySet
6735 // slow path.
6736
6737 Register temp = locations->GetTemp(0).AsRegister<Register>();
6738 // /* HeapReference<Class> */ temp = array->klass_
6739 __ movl(temp, Address(array, class_offset));
6740 codegen_->MaybeRecordImplicitNullCheck(instruction);
6741 __ MaybeUnpoisonHeapReference(temp);
6742
6743 // /* HeapReference<Class> */ temp = temp->component_type_
6744 __ movl(temp, Address(temp, component_offset));
6745 // If heap poisoning is enabled, no need to unpoison `temp`
6746 // nor the object reference in `register_value->klass`, as
6747 // we are comparing two poisoned references.
6748 __ cmpl(temp, Address(register_value, class_offset));
6749
6750 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6751 NearLabel do_put;
6752 __ j(kEqual, &do_put);
6753 // If heap poisoning is enabled, the `temp` reference has
6754 // not been unpoisoned yet; unpoison it now.
6755 __ MaybeUnpoisonHeapReference(temp);
6756
6757 // If heap poisoning is enabled, no need to unpoison the
6758 // heap reference loaded below, as it is only used for a
6759 // comparison with null.
6760 __ cmpl(Address(temp, super_offset), Immediate(0));
6761 __ j(kNotEqual, slow_path->GetEntryLabel());
6762 __ Bind(&do_put);
6763 } else {
6764 __ j(kNotEqual, slow_path->GetEntryLabel());
6765 }
6766 }
6767
6768 if (can_value_be_null && !skip_marking_gc_card) {
6769 DCHECK(do_store.IsLinked());
6770 __ Bind(&do_store);
6771 }
6772
6773 if (needs_write_barrier) {
6774 Register temp = locations->GetTemp(0).AsRegister<Register>();
6775 Register card = locations->GetTemp(1).AsRegister<Register>();
6776 codegen_->MarkGCCard(temp, card, array);
6777 } else if (codegen_->ShouldCheckGCCard(
6778 value_type, instruction->GetValue(), write_barrier_kind)) {
6779 Register temp = locations->GetTemp(0).AsRegister<Register>();
6780 Register card = locations->GetTemp(1).AsRegister<Register>();
6781 codegen_->CheckGCCardIsValid(temp, card, array);
6782 }
6783
6784 if (skip_marking_gc_card) {
6785 // Note that we don't check that the GC card is valid as it can be correctly clean.
6786 DCHECK(skip_writing_card.IsLinked());
6787 __ Bind(&skip_writing_card);
6788 }
6789
6790 Register source = register_value;
6791 if (kPoisonHeapReferences) {
6792 Register temp = locations->GetTemp(0).AsRegister<Register>();
6793 __ movl(temp, register_value);
6794 __ PoisonHeapReference(temp);
6795 source = temp;
6796 }
6797
6798 __ movl(address, source);
6799
6800 if (can_value_be_null || !needs_type_check) {
6801 codegen_->MaybeRecordImplicitNullCheck(instruction);
6802 }
6803
6804 if (slow_path != nullptr) {
6805 __ Bind(slow_path->GetExitLabel());
6806 }
6807
6808 break;
6809 }
6810
6811 case DataType::Type::kInt32: {
6812 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6813 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6814 if (value.IsRegister()) {
6815 __ movl(address, value.AsRegister<Register>());
6816 } else {
6817 DCHECK(value.IsConstant()) << value;
6818 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6819 __ movl(address, Immediate(v));
6820 }
6821 codegen_->MaybeRecordImplicitNullCheck(instruction);
6822 break;
6823 }
6824
6825 case DataType::Type::kInt64: {
6826 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6827 if (value.IsRegisterPair()) {
6828 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6829 value.AsRegisterPairLow<Register>());
6830 codegen_->MaybeRecordImplicitNullCheck(instruction);
6831 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6832 value.AsRegisterPairHigh<Register>());
6833 } else {
6834 DCHECK(value.IsConstant());
6835 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6836 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6837 Immediate(Low32Bits(val)));
6838 codegen_->MaybeRecordImplicitNullCheck(instruction);
6839 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6840 Immediate(High32Bits(val)));
6841 }
6842 break;
6843 }
6844
6845 case DataType::Type::kFloat32: {
6846 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6847 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6848 if (value.IsFpuRegister()) {
6849 __ movss(address, value.AsFpuRegister<XmmRegister>());
6850 } else {
6851 DCHECK(value.IsConstant());
6852 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6853 __ movl(address, Immediate(v));
6854 }
6855 codegen_->MaybeRecordImplicitNullCheck(instruction);
6856 break;
6857 }
6858
6859 case DataType::Type::kFloat64: {
6860 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6861 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6862 if (value.IsFpuRegister()) {
6863 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6864 } else {
6865 DCHECK(value.IsConstant());
6866 Address address_hi =
6867 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6868 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6869 __ movl(address, Immediate(Low32Bits(v)));
6870 codegen_->MaybeRecordImplicitNullCheck(instruction);
6871 __ movl(address_hi, Immediate(High32Bits(v)));
6872 }
6873 break;
6874 }
6875
6876 case DataType::Type::kUint32:
6877 case DataType::Type::kUint64:
6878 case DataType::Type::kVoid:
6879 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6880 UNREACHABLE();
6881 }
6882 }
6883
VisitArrayLength(HArrayLength * instruction)6884 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6885 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6886 locations->SetInAt(0, Location::RequiresRegister());
6887 if (!instruction->IsEmittedAtUseSite()) {
6888 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6889 }
6890 }
6891
VisitArrayLength(HArrayLength * instruction)6892 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6893 if (instruction->IsEmittedAtUseSite()) {
6894 return;
6895 }
6896
6897 LocationSummary* locations = instruction->GetLocations();
6898 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6899 Register obj = locations->InAt(0).AsRegister<Register>();
6900 Register out = locations->Out().AsRegister<Register>();
6901 __ movl(out, Address(obj, offset));
6902 codegen_->MaybeRecordImplicitNullCheck(instruction);
6903 // Mask out most significant bit in case the array is String's array of char.
6904 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6905 __ shrl(out, Immediate(1));
6906 }
6907 }
6908
VisitBoundsCheck(HBoundsCheck * instruction)6909 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6910 RegisterSet caller_saves = RegisterSet::Empty();
6911 InvokeRuntimeCallingConvention calling_convention;
6912 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6913 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6914 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6915 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6916 HInstruction* length = instruction->InputAt(1);
6917 if (!length->IsEmittedAtUseSite()) {
6918 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6919 }
6920 // Need register to see array's length.
6921 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6922 locations->AddTemp(Location::RequiresRegister());
6923 }
6924 }
6925
VisitBoundsCheck(HBoundsCheck * instruction)6926 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6927 const bool is_string_compressed_char_at =
6928 mirror::kUseStringCompression && instruction->IsStringCharAt();
6929 LocationSummary* locations = instruction->GetLocations();
6930 Location index_loc = locations->InAt(0);
6931 Location length_loc = locations->InAt(1);
6932 SlowPathCode* slow_path =
6933 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6934
6935 if (length_loc.IsConstant()) {
6936 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6937 if (index_loc.IsConstant()) {
6938 // BCE will remove the bounds check if we are guarenteed to pass.
6939 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6940 if (index < 0 || index >= length) {
6941 codegen_->AddSlowPath(slow_path);
6942 __ jmp(slow_path->GetEntryLabel());
6943 } else {
6944 // Some optimization after BCE may have generated this, and we should not
6945 // generate a bounds check if it is a valid range.
6946 }
6947 return;
6948 }
6949
6950 // We have to reverse the jump condition because the length is the constant.
6951 Register index_reg = index_loc.AsRegister<Register>();
6952 __ cmpl(index_reg, Immediate(length));
6953 codegen_->AddSlowPath(slow_path);
6954 __ j(kAboveEqual, slow_path->GetEntryLabel());
6955 } else {
6956 HInstruction* array_length = instruction->InputAt(1);
6957 if (array_length->IsEmittedAtUseSite()) {
6958 // Address the length field in the array.
6959 DCHECK(array_length->IsArrayLength());
6960 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6961 Location array_loc = array_length->GetLocations()->InAt(0);
6962 Address array_len(array_loc.AsRegister<Register>(), len_offset);
6963 if (is_string_compressed_char_at) {
6964 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6965 // the string compression flag) with the in-memory length and avoid the temporary.
6966 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6967 __ movl(length_reg, array_len);
6968 codegen_->MaybeRecordImplicitNullCheck(array_length);
6969 __ shrl(length_reg, Immediate(1));
6970 codegen_->GenerateIntCompare(length_reg, index_loc);
6971 } else {
6972 // Checking bounds for general case:
6973 // Array of char or string's array with feature compression off.
6974 if (index_loc.IsConstant()) {
6975 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6976 __ cmpl(array_len, Immediate(value));
6977 } else {
6978 __ cmpl(array_len, index_loc.AsRegister<Register>());
6979 }
6980 codegen_->MaybeRecordImplicitNullCheck(array_length);
6981 }
6982 } else {
6983 codegen_->GenerateIntCompare(length_loc, index_loc);
6984 }
6985 codegen_->AddSlowPath(slow_path);
6986 __ j(kBelowEqual, slow_path->GetEntryLabel());
6987 }
6988 }
6989
VisitParallelMove(HParallelMove * instruction)6990 void LocationsBuilderX86::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6991 LOG(FATAL) << "Unreachable";
6992 }
6993
VisitParallelMove(HParallelMove * instruction)6994 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6995 if (instruction->GetNext()->IsSuspendCheck() &&
6996 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6997 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6998 // The back edge will generate the suspend check.
6999 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
7000 }
7001
7002 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
7003 }
7004
VisitSuspendCheck(HSuspendCheck * instruction)7005 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
7006 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7007 instruction, LocationSummary::kCallOnSlowPath);
7008 // In suspend check slow path, usually there are no caller-save registers at all.
7009 // If SIMD instructions are present, however, we force spilling all live SIMD
7010 // registers in full width (since the runtime only saves/restores lower part).
7011 locations->SetCustomSlowPathCallerSaves(
7012 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
7013 }
7014
VisitSuspendCheck(HSuspendCheck * instruction)7015 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
7016 HBasicBlock* block = instruction->GetBlock();
7017 if (block->GetLoopInformation() != nullptr) {
7018 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
7019 // The back edge will generate the suspend check.
7020 return;
7021 }
7022 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
7023 // The goto will generate the suspend check.
7024 return;
7025 }
7026 GenerateSuspendCheck(instruction, nullptr);
7027 }
7028
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)7029 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
7030 HBasicBlock* successor) {
7031 SuspendCheckSlowPathX86* slow_path =
7032 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
7033 if (slow_path == nullptr) {
7034 slow_path =
7035 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
7036 instruction->SetSlowPath(slow_path);
7037 codegen_->AddSlowPath(slow_path);
7038 if (successor != nullptr) {
7039 DCHECK(successor->IsLoopHeader());
7040 }
7041 } else {
7042 DCHECK_EQ(slow_path->GetSuccessor(), successor);
7043 }
7044
7045 __ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
7046 Immediate(Thread::SuspendOrCheckpointRequestFlags()));
7047 if (successor == nullptr) {
7048 __ j(kNotZero, slow_path->GetEntryLabel());
7049 __ Bind(slow_path->GetReturnLabel());
7050 } else {
7051 __ j(kZero, codegen_->GetLabelOf(successor));
7052 __ jmp(slow_path->GetEntryLabel());
7053 }
7054 }
7055
GetAssembler() const7056 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
7057 return codegen_->GetAssembler();
7058 }
7059
MoveMemoryToMemory(int dst,int src,int number_of_words)7060 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
7061 ScratchRegisterScope ensure_scratch(
7062 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7063 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
7064 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7065
7066 // Now that temp register is available (possibly spilled), move blocks of memory.
7067 for (int i = 0; i < number_of_words; i++) {
7068 __ movl(temp_reg, Address(ESP, src + stack_offset));
7069 __ movl(Address(ESP, dst + stack_offset), temp_reg);
7070 stack_offset += kX86WordSize;
7071 }
7072 }
7073
EmitMove(size_t index)7074 void ParallelMoveResolverX86::EmitMove(size_t index) {
7075 MoveOperands* move = moves_[index];
7076 Location source = move->GetSource();
7077 Location destination = move->GetDestination();
7078
7079 if (source.IsRegister()) {
7080 if (destination.IsRegister()) {
7081 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7082 } else if (destination.IsFpuRegister()) {
7083 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
7084 } else {
7085 DCHECK(destination.IsStackSlot());
7086 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
7087 }
7088 } else if (source.IsRegisterPair()) {
7089 if (destination.IsRegisterPair()) {
7090 __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
7091 DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
7092 __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
7093 } else if (destination.IsFpuRegister()) {
7094 size_t elem_size = DataType::Size(DataType::Type::kInt32);
7095 // Push the 2 source registers to the stack.
7096 __ pushl(source.AsRegisterPairHigh<Register>());
7097 __ cfi().AdjustCFAOffset(elem_size);
7098 __ pushl(source.AsRegisterPairLow<Register>());
7099 __ cfi().AdjustCFAOffset(elem_size);
7100 // Load the destination register.
7101 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
7102 // And remove the temporary stack space we allocated.
7103 codegen_->DecreaseFrame(2 * elem_size);
7104 } else {
7105 DCHECK(destination.IsDoubleStackSlot());
7106 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
7107 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
7108 source.AsRegisterPairHigh<Register>());
7109 }
7110 } else if (source.IsFpuRegister()) {
7111 if (destination.IsRegister()) {
7112 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
7113 } else if (destination.IsFpuRegister()) {
7114 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7115 } else if (destination.IsRegisterPair()) {
7116 size_t elem_size = DataType::Size(DataType::Type::kInt32);
7117 // Create stack space for 2 elements.
7118 codegen_->IncreaseFrame(2 * elem_size);
7119 // Store the source register.
7120 __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
7121 // And pop the values into destination registers.
7122 __ popl(destination.AsRegisterPairLow<Register>());
7123 __ cfi().AdjustCFAOffset(-elem_size);
7124 __ popl(destination.AsRegisterPairHigh<Register>());
7125 __ cfi().AdjustCFAOffset(-elem_size);
7126 } else if (destination.IsStackSlot()) {
7127 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7128 } else if (destination.IsDoubleStackSlot()) {
7129 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7130 } else {
7131 DCHECK(destination.IsSIMDStackSlot());
7132 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7133 }
7134 } else if (source.IsStackSlot()) {
7135 if (destination.IsRegister()) {
7136 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
7137 } else if (destination.IsFpuRegister()) {
7138 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7139 } else {
7140 DCHECK(destination.IsStackSlot());
7141 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7142 }
7143 } else if (source.IsDoubleStackSlot()) {
7144 if (destination.IsRegisterPair()) {
7145 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
7146 __ movl(destination.AsRegisterPairHigh<Register>(),
7147 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
7148 } else if (destination.IsFpuRegister()) {
7149 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7150 } else {
7151 DCHECK(destination.IsDoubleStackSlot()) << destination;
7152 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7153 }
7154 } else if (source.IsSIMDStackSlot()) {
7155 if (destination.IsFpuRegister()) {
7156 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7157 } else {
7158 DCHECK(destination.IsSIMDStackSlot());
7159 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7160 }
7161 } else if (source.IsConstant()) {
7162 HConstant* constant = source.GetConstant();
7163 if (constant->IsIntConstant() || constant->IsNullConstant()) {
7164 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7165 if (destination.IsRegister()) {
7166 if (value == 0) {
7167 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
7168 } else {
7169 __ movl(destination.AsRegister<Register>(), Immediate(value));
7170 }
7171 } else {
7172 DCHECK(destination.IsStackSlot()) << destination;
7173 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
7174 }
7175 } else if (constant->IsFloatConstant()) {
7176 float fp_value = constant->AsFloatConstant()->GetValue();
7177 int32_t value = bit_cast<int32_t, float>(fp_value);
7178 Immediate imm(value);
7179 if (destination.IsFpuRegister()) {
7180 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
7181 if (value == 0) {
7182 // Easy handling of 0.0.
7183 __ xorps(dest, dest);
7184 } else {
7185 ScratchRegisterScope ensure_scratch(
7186 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7187 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
7188 __ movl(temp, Immediate(value));
7189 __ movd(dest, temp);
7190 }
7191 } else {
7192 DCHECK(destination.IsStackSlot()) << destination;
7193 __ movl(Address(ESP, destination.GetStackIndex()), imm);
7194 }
7195 } else if (constant->IsLongConstant()) {
7196 int64_t value = constant->AsLongConstant()->GetValue();
7197 int32_t low_value = Low32Bits(value);
7198 int32_t high_value = High32Bits(value);
7199 Immediate low(low_value);
7200 Immediate high(high_value);
7201 if (destination.IsDoubleStackSlot()) {
7202 __ movl(Address(ESP, destination.GetStackIndex()), low);
7203 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
7204 } else {
7205 __ movl(destination.AsRegisterPairLow<Register>(), low);
7206 __ movl(destination.AsRegisterPairHigh<Register>(), high);
7207 }
7208 } else {
7209 DCHECK(constant->IsDoubleConstant());
7210 double dbl_value = constant->AsDoubleConstant()->GetValue();
7211 int64_t value = bit_cast<int64_t, double>(dbl_value);
7212 int32_t low_value = Low32Bits(value);
7213 int32_t high_value = High32Bits(value);
7214 Immediate low(low_value);
7215 Immediate high(high_value);
7216 if (destination.IsFpuRegister()) {
7217 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
7218 if (value == 0) {
7219 // Easy handling of 0.0.
7220 __ xorpd(dest, dest);
7221 } else {
7222 __ pushl(high);
7223 __ cfi().AdjustCFAOffset(4);
7224 __ pushl(low);
7225 __ cfi().AdjustCFAOffset(4);
7226 __ movsd(dest, Address(ESP, 0));
7227 codegen_->DecreaseFrame(8);
7228 }
7229 } else {
7230 DCHECK(destination.IsDoubleStackSlot()) << destination;
7231 __ movl(Address(ESP, destination.GetStackIndex()), low);
7232 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
7233 }
7234 }
7235 } else {
7236 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
7237 }
7238 }
7239
Exchange(Register reg,int mem)7240 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
7241 Register suggested_scratch = reg == EAX ? EBX : EAX;
7242 ScratchRegisterScope ensure_scratch(
7243 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7244
7245 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7246 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
7247 __ movl(Address(ESP, mem + stack_offset), reg);
7248 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
7249 }
7250
Exchange32(XmmRegister reg,int mem)7251 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
7252 ScratchRegisterScope ensure_scratch(
7253 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7254
7255 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
7256 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7257 __ movl(temp_reg, Address(ESP, mem + stack_offset));
7258 __ movss(Address(ESP, mem + stack_offset), reg);
7259 __ movd(reg, temp_reg);
7260 }
7261
Exchange128(XmmRegister reg,int mem)7262 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
7263 size_t extra_slot = 4 * kX86WordSize;
7264 codegen_->IncreaseFrame(extra_slot);
7265 __ movups(Address(ESP, 0), XmmRegister(reg));
7266 ExchangeMemory(0, mem + extra_slot, 4);
7267 __ movups(XmmRegister(reg), Address(ESP, 0));
7268 codegen_->DecreaseFrame(extra_slot);
7269 }
7270
ExchangeMemory(int mem1,int mem2,int number_of_words)7271 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
7272 ScratchRegisterScope ensure_scratch1(
7273 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7274
7275 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
7276 ScratchRegisterScope ensure_scratch2(
7277 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7278
7279 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
7280 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
7281
7282 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
7283 for (int i = 0; i < number_of_words; i++) {
7284 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
7285 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
7286 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
7287 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
7288 stack_offset += kX86WordSize;
7289 }
7290 }
7291
EmitSwap(size_t index)7292 void ParallelMoveResolverX86::EmitSwap(size_t index) {
7293 MoveOperands* move = moves_[index];
7294 Location source = move->GetSource();
7295 Location destination = move->GetDestination();
7296
7297 if (source.IsRegister() && destination.IsRegister()) {
7298 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
7299 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
7300 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7301 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
7302 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7303 } else if (source.IsRegister() && destination.IsStackSlot()) {
7304 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
7305 } else if (source.IsStackSlot() && destination.IsRegister()) {
7306 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
7307 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7308 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7309 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7310 // Use XOR Swap algorithm to avoid a temporary.
7311 DCHECK_NE(source.reg(), destination.reg());
7312 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7313 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
7314 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7315 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
7316 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7317 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
7318 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7319 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
7320 // Take advantage of the 16 bytes in the XMM register.
7321 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
7322 Address stack(ESP, destination.GetStackIndex());
7323 // Load the double into the high doubleword.
7324 __ movhpd(reg, stack);
7325
7326 // Store the low double into the destination.
7327 __ movsd(stack, reg);
7328
7329 // Move the high double to the low double.
7330 __ psrldq(reg, Immediate(8));
7331 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
7332 // Take advantage of the 16 bytes in the XMM register.
7333 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
7334 Address stack(ESP, source.GetStackIndex());
7335 // Load the double into the high doubleword.
7336 __ movhpd(reg, stack);
7337
7338 // Store the low double into the destination.
7339 __ movsd(stack, reg);
7340
7341 // Move the high double to the low double.
7342 __ psrldq(reg, Immediate(8));
7343 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
7344 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7345 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
7346 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7347 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
7348 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7349 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
7350 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7351 } else {
7352 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
7353 }
7354 }
7355
SpillScratch(int reg)7356 void ParallelMoveResolverX86::SpillScratch(int reg) {
7357 __ pushl(static_cast<Register>(reg));
7358 }
7359
RestoreScratch(int reg)7360 void ParallelMoveResolverX86::RestoreScratch(int reg) {
7361 __ popl(static_cast<Register>(reg));
7362 }
7363
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7364 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
7365 HLoadClass::LoadKind desired_class_load_kind) {
7366 switch (desired_class_load_kind) {
7367 case HLoadClass::LoadKind::kInvalid:
7368 LOG(FATAL) << "UNREACHABLE";
7369 UNREACHABLE();
7370 case HLoadClass::LoadKind::kReferrersClass:
7371 break;
7372 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7373 case HLoadClass::LoadKind::kBootImageRelRo:
7374 case HLoadClass::LoadKind::kAppImageRelRo:
7375 case HLoadClass::LoadKind::kBssEntry:
7376 case HLoadClass::LoadKind::kBssEntryPublic:
7377 case HLoadClass::LoadKind::kBssEntryPackage:
7378 DCHECK(!GetCompilerOptions().IsJitCompiler());
7379 break;
7380 case HLoadClass::LoadKind::kJitBootImageAddress:
7381 case HLoadClass::LoadKind::kJitTableAddress:
7382 DCHECK(GetCompilerOptions().IsJitCompiler());
7383 break;
7384 case HLoadClass::LoadKind::kRuntimeCall:
7385 break;
7386 }
7387 return desired_class_load_kind;
7388 }
7389
VisitLoadClass(HLoadClass * cls)7390 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
7391 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7392 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7393 InvokeRuntimeCallingConvention calling_convention;
7394 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7395 cls,
7396 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
7397 Location::RegisterLocation(EAX));
7398 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
7399 return;
7400 }
7401 DCHECK_EQ(cls->NeedsAccessCheck(),
7402 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7403 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7404
7405 const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
7406 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7407 ? LocationSummary::kCallOnSlowPath
7408 : LocationSummary::kNoCall;
7409 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7410 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7411 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7412 }
7413
7414 if (load_kind == HLoadClass::LoadKind::kReferrersClass || cls->HasPcRelativeLoadKind()) {
7415 locations->SetInAt(0, Location::RequiresRegister());
7416 }
7417 locations->SetOut(Location::RequiresRegister());
7418 if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
7419 if (codegen_->EmitNonBakerReadBarrier()) {
7420 // For non-Baker read barrier we have a temp-clobbering call.
7421 } else {
7422 // Rely on the type resolution and/or initialization to save everything.
7423 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7424 }
7425 }
7426 }
7427
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)7428 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
7429 dex::TypeIndex type_index,
7430 Handle<mirror::Class> handle) {
7431 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
7432 // Add a patch entry and return the label.
7433 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
7434 PatchInfo<Label>* info = &jit_class_patches_.back();
7435 return &info->label;
7436 }
7437
7438 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7439 // move.
VisitLoadClass(HLoadClass * cls)7440 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7441 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7442 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7443 codegen_->GenerateLoadClassRuntimeCall(cls);
7444 return;
7445 }
7446 DCHECK_EQ(cls->NeedsAccessCheck(),
7447 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7448 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7449
7450 LocationSummary* locations = cls->GetLocations();
7451 Location out_loc = locations->Out();
7452 Register out = out_loc.AsRegister<Register>();
7453
7454 bool generate_null_check = false;
7455 const ReadBarrierOption read_barrier_option =
7456 cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
7457 switch (load_kind) {
7458 case HLoadClass::LoadKind::kReferrersClass: {
7459 DCHECK(!cls->CanCallRuntime());
7460 DCHECK(!cls->MustGenerateClinitCheck());
7461 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7462 Register current_method = locations->InAt(0).AsRegister<Register>();
7463 GenerateGcRootFieldLoad(
7464 cls,
7465 out_loc,
7466 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
7467 /* fixup_label= */ nullptr,
7468 read_barrier_option);
7469 break;
7470 }
7471 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7472 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7473 codegen_->GetCompilerOptions().IsBootImageExtension());
7474 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7475 Register method_address = locations->InAt(0).AsRegister<Register>();
7476 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7477 codegen_->RecordBootImageTypePatch(cls);
7478 break;
7479 }
7480 case HLoadClass::LoadKind::kBootImageRelRo: {
7481 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7482 Register method_address = locations->InAt(0).AsRegister<Register>();
7483 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7484 codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7485 CodeGenerator::GetBootImageOffset(cls));
7486 break;
7487 }
7488 case HLoadClass::LoadKind::kAppImageRelRo: {
7489 DCHECK(codegen_->GetCompilerOptions().IsAppImage());
7490 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7491 Register method_address = locations->InAt(0).AsRegister<Register>();
7492 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7493 codegen_->RecordAppImageTypePatch(cls);
7494 break;
7495 }
7496 case HLoadClass::LoadKind::kBssEntry:
7497 case HLoadClass::LoadKind::kBssEntryPublic:
7498 case HLoadClass::LoadKind::kBssEntryPackage: {
7499 Register method_address = locations->InAt(0).AsRegister<Register>();
7500 Address address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7501 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
7502 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7503 // No need for memory fence, thanks to the x86 memory model.
7504 generate_null_check = true;
7505 break;
7506 }
7507 case HLoadClass::LoadKind::kJitBootImageAddress: {
7508 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7509 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7510 DCHECK_NE(address, 0u);
7511 __ movl(out, Immediate(address));
7512 break;
7513 }
7514 case HLoadClass::LoadKind::kJitTableAddress: {
7515 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7516 Label* fixup_label = codegen_->NewJitRootClassPatch(
7517 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
7518 // /* GcRoot<mirror::Class> */ out = *address
7519 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7520 break;
7521 }
7522 case HLoadClass::LoadKind::kRuntimeCall:
7523 case HLoadClass::LoadKind::kInvalid:
7524 LOG(FATAL) << "UNREACHABLE";
7525 UNREACHABLE();
7526 }
7527
7528 if (generate_null_check || cls->MustGenerateClinitCheck()) {
7529 DCHECK(cls->CanCallRuntime());
7530 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
7531 codegen_->AddSlowPath(slow_path);
7532
7533 if (generate_null_check) {
7534 __ testl(out, out);
7535 __ j(kEqual, slow_path->GetEntryLabel());
7536 }
7537
7538 if (cls->MustGenerateClinitCheck()) {
7539 GenerateClassInitializationCheck(slow_path, out);
7540 } else {
7541 __ Bind(slow_path->GetExitLabel());
7542 }
7543 }
7544 }
7545
VisitLoadMethodHandle(HLoadMethodHandle * load)7546 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7547 InvokeRuntimeCallingConvention calling_convention;
7548 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7549 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7550 }
7551
VisitLoadMethodHandle(HLoadMethodHandle * load)7552 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7553 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7554 }
7555
VisitLoadMethodType(HLoadMethodType * load)7556 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
7557 InvokeRuntimeCallingConvention calling_convention;
7558 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7559 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7560 }
7561
VisitLoadMethodType(HLoadMethodType * load)7562 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
7563 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7564 }
7565
VisitClinitCheck(HClinitCheck * check)7566 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
7567 LocationSummary* locations =
7568 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7569 locations->SetInAt(0, Location::RequiresRegister());
7570 if (check->HasUses()) {
7571 locations->SetOut(Location::SameAsFirstInput());
7572 }
7573 // Rely on the type initialization to save everything we need.
7574 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7575 }
7576
VisitClinitCheck(HClinitCheck * check)7577 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
7578 // We assume the class to not be null.
7579 SlowPathCode* slow_path =
7580 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
7581 codegen_->AddSlowPath(slow_path);
7582 GenerateClassInitializationCheck(slow_path,
7583 check->GetLocations()->InAt(0).AsRegister<Register>());
7584 }
7585
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)7586 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
7587 SlowPathCode* slow_path, Register class_reg) {
7588 __ cmpb(Address(class_reg, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
7589 __ j(kBelow, slow_path->GetEntryLabel());
7590 __ Bind(slow_path->GetExitLabel());
7591 }
7592
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)7593 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
7594 Register temp) {
7595 uint32_t path_to_root = check->GetBitstringPathToRoot();
7596 uint32_t mask = check->GetBitstringMask();
7597 DCHECK(IsPowerOfTwo(mask + 1));
7598 size_t mask_bits = WhichPowerOf2(mask + 1);
7599
7600 if (mask_bits == 16u) {
7601 // Compare the bitstring in memory.
7602 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
7603 } else {
7604 // /* uint32_t */ temp = temp->status_
7605 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
7606 // Compare the bitstring bits using SUB.
7607 __ subl(temp, Immediate(path_to_root));
7608 // Shift out bits that do not contribute to the comparison.
7609 __ shll(temp, Immediate(32u - mask_bits));
7610 }
7611 }
7612
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7613 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
7614 HLoadString::LoadKind desired_string_load_kind) {
7615 switch (desired_string_load_kind) {
7616 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7617 case HLoadString::LoadKind::kBootImageRelRo:
7618 case HLoadString::LoadKind::kBssEntry:
7619 DCHECK(!GetCompilerOptions().IsJitCompiler());
7620 break;
7621 case HLoadString::LoadKind::kJitBootImageAddress:
7622 case HLoadString::LoadKind::kJitTableAddress:
7623 DCHECK(GetCompilerOptions().IsJitCompiler());
7624 break;
7625 case HLoadString::LoadKind::kRuntimeCall:
7626 break;
7627 }
7628 return desired_string_load_kind;
7629 }
7630
VisitLoadString(HLoadString * load)7631 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
7632 LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
7633 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7634 HLoadString::LoadKind load_kind = load->GetLoadKind();
7635 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
7636 load_kind == HLoadString::LoadKind::kBootImageRelRo ||
7637 load_kind == HLoadString::LoadKind::kBssEntry) {
7638 locations->SetInAt(0, Location::RequiresRegister());
7639 }
7640 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7641 locations->SetOut(Location::RegisterLocation(EAX));
7642 } else {
7643 locations->SetOut(Location::RequiresRegister());
7644 if (load_kind == HLoadString::LoadKind::kBssEntry) {
7645 if (codegen_->EmitNonBakerReadBarrier()) {
7646 // For non-Baker read barrier we have a temp-clobbering call.
7647 } else {
7648 // Rely on the pResolveString to save everything.
7649 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7650 }
7651 }
7652 }
7653 }
7654
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7655 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
7656 dex::StringIndex string_index,
7657 Handle<mirror::String> handle) {
7658 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7659 // Add a patch entry and return the label.
7660 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7661 PatchInfo<Label>* info = &jit_string_patches_.back();
7662 return &info->label;
7663 }
7664
7665 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7666 // move.
VisitLoadString(HLoadString * load)7667 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7668 LocationSummary* locations = load->GetLocations();
7669 Location out_loc = locations->Out();
7670 Register out = out_loc.AsRegister<Register>();
7671
7672 switch (load->GetLoadKind()) {
7673 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7674 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7675 codegen_->GetCompilerOptions().IsBootImageExtension());
7676 Register method_address = locations->InAt(0).AsRegister<Register>();
7677 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7678 codegen_->RecordBootImageStringPatch(load);
7679 return;
7680 }
7681 case HLoadString::LoadKind::kBootImageRelRo: {
7682 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7683 Register method_address = locations->InAt(0).AsRegister<Register>();
7684 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7685 codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7686 CodeGenerator::GetBootImageOffset(load));
7687 return;
7688 }
7689 case HLoadString::LoadKind::kBssEntry: {
7690 Register method_address = locations->InAt(0).AsRegister<Register>();
7691 Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7692 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7693 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
7694 GenerateGcRootFieldLoad(
7695 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7696 // No need for memory fence, thanks to the x86 memory model.
7697 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
7698 codegen_->AddSlowPath(slow_path);
7699 __ testl(out, out);
7700 __ j(kEqual, slow_path->GetEntryLabel());
7701 __ Bind(slow_path->GetExitLabel());
7702 return;
7703 }
7704 case HLoadString::LoadKind::kJitBootImageAddress: {
7705 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7706 DCHECK_NE(address, 0u);
7707 __ movl(out, Immediate(address));
7708 return;
7709 }
7710 case HLoadString::LoadKind::kJitTableAddress: {
7711 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7712 Label* fixup_label = codegen_->NewJitRootStringPatch(
7713 load->GetDexFile(), load->GetStringIndex(), load->GetString());
7714 // /* GcRoot<mirror::String> */ out = *address
7715 GenerateGcRootFieldLoad(
7716 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7717 return;
7718 }
7719 default:
7720 break;
7721 }
7722
7723 InvokeRuntimeCallingConvention calling_convention;
7724 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
7725 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
7726 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7727 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7728 }
7729
GetExceptionTlsAddress()7730 static Address GetExceptionTlsAddress() {
7731 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
7732 }
7733
VisitLoadException(HLoadException * load)7734 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
7735 LocationSummary* locations =
7736 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7737 locations->SetOut(Location::RequiresRegister());
7738 }
7739
VisitLoadException(HLoadException * load)7740 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
7741 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
7742 }
7743
VisitClearException(HClearException * clear)7744 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
7745 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7746 }
7747
VisitClearException(HClearException * clear)7748 void InstructionCodeGeneratorX86::VisitClearException([[maybe_unused]] HClearException* clear) {
7749 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7750 }
7751
VisitThrow(HThrow * instruction)7752 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7753 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7754 instruction, LocationSummary::kCallOnMainOnly);
7755 InvokeRuntimeCallingConvention calling_convention;
7756 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7757 }
7758
VisitThrow(HThrow * instruction)7759 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7760 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7761 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7762 }
7763
7764 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7765 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7766 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7767 return 1;
7768 }
7769 if (emit_read_barrier &&
7770 !kUseBakerReadBarrier &&
7771 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7772 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7773 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7774 return 1;
7775 }
7776 return 0;
7777 }
7778
7779 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7780 // interface pointer, the current interface is compared in memory.
7781 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7782 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7783 return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
7784 }
7785
VisitInstanceOf(HInstanceOf * instruction)7786 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7787 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7788 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7789 bool baker_read_barrier_slow_path = false;
7790 switch (type_check_kind) {
7791 case TypeCheckKind::kExactCheck:
7792 case TypeCheckKind::kAbstractClassCheck:
7793 case TypeCheckKind::kClassHierarchyCheck:
7794 case TypeCheckKind::kArrayObjectCheck:
7795 case TypeCheckKind::kInterfaceCheck: {
7796 bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
7797 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7798 baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
7799 (type_check_kind != TypeCheckKind::kInterfaceCheck);
7800 break;
7801 }
7802 case TypeCheckKind::kArrayCheck:
7803 case TypeCheckKind::kUnresolvedCheck:
7804 call_kind = LocationSummary::kCallOnSlowPath;
7805 break;
7806 case TypeCheckKind::kBitstringCheck:
7807 break;
7808 }
7809
7810 LocationSummary* locations =
7811 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7812 if (baker_read_barrier_slow_path) {
7813 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7814 }
7815 locations->SetInAt(0, Location::RequiresRegister());
7816 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7817 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7818 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7819 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7820 } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7821 locations->SetInAt(1, Location::RequiresRegister());
7822 } else {
7823 locations->SetInAt(1, Location::Any());
7824 }
7825 // Note that TypeCheckSlowPathX86 uses this "out" register too.
7826 locations->SetOut(Location::RequiresRegister());
7827 // When read barriers are enabled, we need a temporary register for some cases.
7828 locations->AddRegisterTemps(
7829 NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
7830 }
7831
VisitInstanceOf(HInstanceOf * instruction)7832 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7833 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7834 LocationSummary* locations = instruction->GetLocations();
7835 Location obj_loc = locations->InAt(0);
7836 Register obj = obj_loc.AsRegister<Register>();
7837 Location cls = locations->InAt(1);
7838 Location out_loc = locations->Out();
7839 Register out = out_loc.AsRegister<Register>();
7840 const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
7841 DCHECK_LE(num_temps, 1u);
7842 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7843 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7844 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7845 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7846 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7847 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7848 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7849 const uint32_t object_array_data_offset =
7850 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7851 SlowPathCode* slow_path = nullptr;
7852 NearLabel done, zero;
7853
7854 // Return 0 if `obj` is null.
7855 // Avoid null check if we know obj is not null.
7856 if (instruction->MustDoNullCheck()) {
7857 __ testl(obj, obj);
7858 __ j(kEqual, &zero);
7859 }
7860
7861 switch (type_check_kind) {
7862 case TypeCheckKind::kExactCheck: {
7863 ReadBarrierOption read_barrier_option =
7864 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7865 // /* HeapReference<Class> */ out = obj->klass_
7866 GenerateReferenceLoadTwoRegisters(instruction,
7867 out_loc,
7868 obj_loc,
7869 class_offset,
7870 read_barrier_option);
7871 if (cls.IsRegister()) {
7872 __ cmpl(out, cls.AsRegister<Register>());
7873 } else {
7874 DCHECK(cls.IsStackSlot()) << cls;
7875 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7876 }
7877
7878 // Classes must be equal for the instanceof to succeed.
7879 __ j(kNotEqual, &zero);
7880 __ movl(out, Immediate(1));
7881 __ jmp(&done);
7882 break;
7883 }
7884
7885 case TypeCheckKind::kAbstractClassCheck: {
7886 ReadBarrierOption read_barrier_option =
7887 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7888 // /* HeapReference<Class> */ out = obj->klass_
7889 GenerateReferenceLoadTwoRegisters(instruction,
7890 out_loc,
7891 obj_loc,
7892 class_offset,
7893 read_barrier_option);
7894 // If the class is abstract, we eagerly fetch the super class of the
7895 // object to avoid doing a comparison we know will fail.
7896 NearLabel loop;
7897 __ Bind(&loop);
7898 // /* HeapReference<Class> */ out = out->super_class_
7899 GenerateReferenceLoadOneRegister(instruction,
7900 out_loc,
7901 super_offset,
7902 maybe_temp_loc,
7903 read_barrier_option);
7904 __ testl(out, out);
7905 // If `out` is null, we use it for the result, and jump to `done`.
7906 __ j(kEqual, &done);
7907 if (cls.IsRegister()) {
7908 __ cmpl(out, cls.AsRegister<Register>());
7909 } else {
7910 DCHECK(cls.IsStackSlot()) << cls;
7911 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7912 }
7913 __ j(kNotEqual, &loop);
7914 __ movl(out, Immediate(1));
7915 if (zero.IsLinked()) {
7916 __ jmp(&done);
7917 }
7918 break;
7919 }
7920
7921 case TypeCheckKind::kClassHierarchyCheck: {
7922 ReadBarrierOption read_barrier_option =
7923 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7924 // /* HeapReference<Class> */ out = obj->klass_
7925 GenerateReferenceLoadTwoRegisters(instruction,
7926 out_loc,
7927 obj_loc,
7928 class_offset,
7929 read_barrier_option);
7930 // Walk over the class hierarchy to find a match.
7931 NearLabel loop, success;
7932 __ Bind(&loop);
7933 if (cls.IsRegister()) {
7934 __ cmpl(out, cls.AsRegister<Register>());
7935 } else {
7936 DCHECK(cls.IsStackSlot()) << cls;
7937 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7938 }
7939 __ j(kEqual, &success);
7940 // /* HeapReference<Class> */ out = out->super_class_
7941 GenerateReferenceLoadOneRegister(instruction,
7942 out_loc,
7943 super_offset,
7944 maybe_temp_loc,
7945 read_barrier_option);
7946 __ testl(out, out);
7947 __ j(kNotEqual, &loop);
7948 // If `out` is null, we use it for the result, and jump to `done`.
7949 __ jmp(&done);
7950 __ Bind(&success);
7951 __ movl(out, Immediate(1));
7952 if (zero.IsLinked()) {
7953 __ jmp(&done);
7954 }
7955 break;
7956 }
7957
7958 case TypeCheckKind::kArrayObjectCheck: {
7959 ReadBarrierOption read_barrier_option =
7960 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7961 // /* HeapReference<Class> */ out = obj->klass_
7962 GenerateReferenceLoadTwoRegisters(instruction,
7963 out_loc,
7964 obj_loc,
7965 class_offset,
7966 read_barrier_option);
7967 // Do an exact check.
7968 NearLabel exact_check;
7969 if (cls.IsRegister()) {
7970 __ cmpl(out, cls.AsRegister<Register>());
7971 } else {
7972 DCHECK(cls.IsStackSlot()) << cls;
7973 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7974 }
7975 __ j(kEqual, &exact_check);
7976 // Otherwise, we need to check that the object's class is a non-primitive array.
7977 // /* HeapReference<Class> */ out = out->component_type_
7978 GenerateReferenceLoadOneRegister(instruction,
7979 out_loc,
7980 component_offset,
7981 maybe_temp_loc,
7982 read_barrier_option);
7983 __ testl(out, out);
7984 // If `out` is null, we use it for the result, and jump to `done`.
7985 __ j(kEqual, &done);
7986 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7987 __ j(kNotEqual, &zero);
7988 __ Bind(&exact_check);
7989 __ movl(out, Immediate(1));
7990 __ jmp(&done);
7991 break;
7992 }
7993
7994 case TypeCheckKind::kArrayCheck: {
7995 // No read barrier since the slow path will retry upon failure.
7996 // /* HeapReference<Class> */ out = obj->klass_
7997 GenerateReferenceLoadTwoRegisters(instruction,
7998 out_loc,
7999 obj_loc,
8000 class_offset,
8001 kWithoutReadBarrier);
8002 if (cls.IsRegister()) {
8003 __ cmpl(out, cls.AsRegister<Register>());
8004 } else {
8005 DCHECK(cls.IsStackSlot()) << cls;
8006 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
8007 }
8008 DCHECK(locations->OnlyCallsOnSlowPath());
8009 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8010 instruction, /* is_fatal= */ false);
8011 codegen_->AddSlowPath(slow_path);
8012 __ j(kNotEqual, slow_path->GetEntryLabel());
8013 __ movl(out, Immediate(1));
8014 if (zero.IsLinked()) {
8015 __ jmp(&done);
8016 }
8017 break;
8018 }
8019
8020 case TypeCheckKind::kInterfaceCheck: {
8021 if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
8022 DCHECK(locations->OnlyCallsOnSlowPath());
8023 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8024 instruction, /* is_fatal= */ false);
8025 codegen_->AddSlowPath(slow_path);
8026 if (codegen_->EmitNonBakerReadBarrier()) {
8027 __ jmp(slow_path->GetEntryLabel());
8028 break;
8029 }
8030 // For Baker read barrier, take the slow path while marking.
8031 __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()),
8032 Immediate(0));
8033 __ j(kNotEqual, slow_path->GetEntryLabel());
8034 }
8035
8036 // Fast-path without read barriers.
8037 Register temp = maybe_temp_loc.AsRegister<Register>();
8038 // /* HeapReference<Class> */ temp = obj->klass_
8039 __ movl(temp, Address(obj, class_offset));
8040 __ MaybeUnpoisonHeapReference(temp);
8041 // /* HeapReference<Class> */ temp = temp->iftable_
8042 __ movl(temp, Address(temp, iftable_offset));
8043 __ MaybeUnpoisonHeapReference(temp);
8044 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8045 __ movl(out, Address(temp, array_length_offset));
8046 // Maybe poison the `cls` for direct comparison with memory.
8047 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
8048 // Loop through the iftable and check if any class matches.
8049 NearLabel loop, end;
8050 __ Bind(&loop);
8051 // Check if we still have an entry to compare.
8052 __ subl(out, Immediate(2));
8053 __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end);
8054 // Go to next interface if the classes do not match.
8055 __ cmpl(cls.AsRegister<Register>(),
8056 CodeGeneratorX86::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset));
8057 __ j(kNotEqual, &loop);
8058 if (zero.IsLinked()) {
8059 __ movl(out, Immediate(1));
8060 // If `cls` was poisoned above, unpoison it.
8061 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8062 __ jmp(&done);
8063 if (kPoisonHeapReferences) {
8064 // The false case needs to unpoison the class before jumping to `zero`.
8065 __ Bind(&end);
8066 __ UnpoisonHeapReference(cls.AsRegister<Register>());
8067 __ jmp(&zero);
8068 }
8069 } else {
8070 // To reduce branching, use the fact that the false case branches with a `-2` in `out`.
8071 __ movl(out, Immediate(-1));
8072 __ Bind(&end);
8073 __ addl(out, Immediate(2));
8074 // If `cls` was poisoned above, unpoison it.
8075 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8076 }
8077 break;
8078 }
8079
8080 case TypeCheckKind::kUnresolvedCheck: {
8081 // Note that we indeed only call on slow path, but we always go
8082 // into the slow path for the unresolved check case.
8083 //
8084 // We cannot directly call the InstanceofNonTrivial runtime
8085 // entry point without resorting to a type checking slow path
8086 // here (i.e. by calling InvokeRuntime directly), as it would
8087 // require to assign fixed registers for the inputs of this
8088 // HInstanceOf instruction (following the runtime calling
8089 // convention), which might be cluttered by the potential first
8090 // read barrier emission at the beginning of this method.
8091 //
8092 // TODO: Introduce a new runtime entry point taking the object
8093 // to test (instead of its class) as argument, and let it deal
8094 // with the read barrier issues. This will let us refactor this
8095 // case of the `switch` code as it was previously (with a direct
8096 // call to the runtime not using a type checking slow path).
8097 // This should also be beneficial for the other cases above.
8098 DCHECK(locations->OnlyCallsOnSlowPath());
8099 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8100 instruction, /* is_fatal= */ false);
8101 codegen_->AddSlowPath(slow_path);
8102 __ jmp(slow_path->GetEntryLabel());
8103 break;
8104 }
8105
8106 case TypeCheckKind::kBitstringCheck: {
8107 // /* HeapReference<Class> */ temp = obj->klass_
8108 GenerateReferenceLoadTwoRegisters(instruction,
8109 out_loc,
8110 obj_loc,
8111 class_offset,
8112 kWithoutReadBarrier);
8113
8114 GenerateBitstringTypeCheckCompare(instruction, out);
8115 __ j(kNotEqual, &zero);
8116 __ movl(out, Immediate(1));
8117 __ jmp(&done);
8118 break;
8119 }
8120 }
8121
8122 if (zero.IsLinked()) {
8123 __ Bind(&zero);
8124 __ xorl(out, out);
8125 }
8126
8127 if (done.IsLinked()) {
8128 __ Bind(&done);
8129 }
8130
8131 if (slow_path != nullptr) {
8132 __ Bind(slow_path->GetExitLabel());
8133 }
8134 }
8135
VisitCheckCast(HCheckCast * instruction)8136 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
8137 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8138 LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
8139 LocationSummary* locations =
8140 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8141 locations->SetInAt(0, Location::RequiresRegister());
8142 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8143 // Require a register for the interface check since there is a loop that compares the class to
8144 // a memory address.
8145 locations->SetInAt(1, Location::RequiresRegister());
8146 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8147 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8148 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8149 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8150 } else {
8151 locations->SetInAt(1, Location::Any());
8152 }
8153 locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
8154 }
8155
VisitCheckCast(HCheckCast * instruction)8156 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
8157 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8158 LocationSummary* locations = instruction->GetLocations();
8159 Location obj_loc = locations->InAt(0);
8160 Register obj = obj_loc.AsRegister<Register>();
8161 Location cls = locations->InAt(1);
8162 Location temp_loc = locations->GetTemp(0);
8163 Register temp = temp_loc.AsRegister<Register>();
8164 const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
8165 DCHECK_GE(num_temps, 1u);
8166 DCHECK_LE(num_temps, 2u);
8167 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8168 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8169 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8170 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8171 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8172 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8173 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8174 const uint32_t object_array_data_offset =
8175 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8176
8177 bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
8178 SlowPathCode* type_check_slow_path =
8179 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8180 instruction, is_type_check_slow_path_fatal);
8181 codegen_->AddSlowPath(type_check_slow_path);
8182
8183 NearLabel done;
8184 // Avoid null check if we know obj is not null.
8185 if (instruction->MustDoNullCheck()) {
8186 __ testl(obj, obj);
8187 __ j(kEqual, &done);
8188 }
8189
8190 switch (type_check_kind) {
8191 case TypeCheckKind::kExactCheck:
8192 case TypeCheckKind::kArrayCheck: {
8193 // /* HeapReference<Class> */ temp = obj->klass_
8194 GenerateReferenceLoadTwoRegisters(instruction,
8195 temp_loc,
8196 obj_loc,
8197 class_offset,
8198 kWithoutReadBarrier);
8199
8200 if (cls.IsRegister()) {
8201 __ cmpl(temp, cls.AsRegister<Register>());
8202 } else {
8203 DCHECK(cls.IsStackSlot()) << cls;
8204 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8205 }
8206 // Jump to slow path for throwing the exception or doing a
8207 // more involved array check.
8208 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8209 break;
8210 }
8211
8212 case TypeCheckKind::kAbstractClassCheck: {
8213 // /* HeapReference<Class> */ temp = obj->klass_
8214 GenerateReferenceLoadTwoRegisters(instruction,
8215 temp_loc,
8216 obj_loc,
8217 class_offset,
8218 kWithoutReadBarrier);
8219
8220 // If the class is abstract, we eagerly fetch the super class of the
8221 // object to avoid doing a comparison we know will fail.
8222 NearLabel loop;
8223 __ Bind(&loop);
8224 // /* HeapReference<Class> */ temp = temp->super_class_
8225 GenerateReferenceLoadOneRegister(instruction,
8226 temp_loc,
8227 super_offset,
8228 maybe_temp2_loc,
8229 kWithoutReadBarrier);
8230
8231 // If the class reference currently in `temp` is null, jump to the slow path to throw the
8232 // exception.
8233 __ testl(temp, temp);
8234 __ j(kZero, type_check_slow_path->GetEntryLabel());
8235
8236 // Otherwise, compare the classes
8237 if (cls.IsRegister()) {
8238 __ cmpl(temp, cls.AsRegister<Register>());
8239 } else {
8240 DCHECK(cls.IsStackSlot()) << cls;
8241 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8242 }
8243 __ j(kNotEqual, &loop);
8244 break;
8245 }
8246
8247 case TypeCheckKind::kClassHierarchyCheck: {
8248 // /* HeapReference<Class> */ temp = obj->klass_
8249 GenerateReferenceLoadTwoRegisters(instruction,
8250 temp_loc,
8251 obj_loc,
8252 class_offset,
8253 kWithoutReadBarrier);
8254
8255 // Walk over the class hierarchy to find a match.
8256 NearLabel loop;
8257 __ Bind(&loop);
8258 if (cls.IsRegister()) {
8259 __ cmpl(temp, cls.AsRegister<Register>());
8260 } else {
8261 DCHECK(cls.IsStackSlot()) << cls;
8262 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8263 }
8264 __ j(kEqual, &done);
8265
8266 // /* HeapReference<Class> */ temp = temp->super_class_
8267 GenerateReferenceLoadOneRegister(instruction,
8268 temp_loc,
8269 super_offset,
8270 maybe_temp2_loc,
8271 kWithoutReadBarrier);
8272
8273 // If the class reference currently in `temp` is not null, jump
8274 // back at the beginning of the loop.
8275 __ testl(temp, temp);
8276 __ j(kNotZero, &loop);
8277 // Otherwise, jump to the slow path to throw the exception.;
8278 __ jmp(type_check_slow_path->GetEntryLabel());
8279 break;
8280 }
8281
8282 case TypeCheckKind::kArrayObjectCheck: {
8283 // /* HeapReference<Class> */ temp = obj->klass_
8284 GenerateReferenceLoadTwoRegisters(instruction,
8285 temp_loc,
8286 obj_loc,
8287 class_offset,
8288 kWithoutReadBarrier);
8289
8290 // Do an exact check.
8291 if (cls.IsRegister()) {
8292 __ cmpl(temp, cls.AsRegister<Register>());
8293 } else {
8294 DCHECK(cls.IsStackSlot()) << cls;
8295 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8296 }
8297 __ j(kEqual, &done);
8298
8299 // Otherwise, we need to check that the object's class is a non-primitive array.
8300 // /* HeapReference<Class> */ temp = temp->component_type_
8301 GenerateReferenceLoadOneRegister(instruction,
8302 temp_loc,
8303 component_offset,
8304 maybe_temp2_loc,
8305 kWithoutReadBarrier);
8306
8307 // If the component type is null (i.e. the object not an array), jump to the slow path to
8308 // throw the exception. Otherwise proceed with the check.
8309 __ testl(temp, temp);
8310 __ j(kZero, type_check_slow_path->GetEntryLabel());
8311
8312 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
8313 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8314 break;
8315 }
8316
8317 case TypeCheckKind::kUnresolvedCheck:
8318 // We always go into the type check slow path for the unresolved check case.
8319 // We cannot directly call the CheckCast runtime entry point
8320 // without resorting to a type checking slow path here (i.e. by
8321 // calling InvokeRuntime directly), as it would require to
8322 // assign fixed registers for the inputs of this HInstanceOf
8323 // instruction (following the runtime calling convention), which
8324 // might be cluttered by the potential first read barrier
8325 // emission at the beginning of this method.
8326 __ jmp(type_check_slow_path->GetEntryLabel());
8327 break;
8328
8329 case TypeCheckKind::kInterfaceCheck: {
8330 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
8331 // We can not get false positives by doing this.
8332 // /* HeapReference<Class> */ temp = obj->klass_
8333 GenerateReferenceLoadTwoRegisters(instruction,
8334 temp_loc,
8335 obj_loc,
8336 class_offset,
8337 kWithoutReadBarrier);
8338
8339 // /* HeapReference<Class> */ temp = temp->iftable_
8340 GenerateReferenceLoadOneRegister(instruction,
8341 temp_loc,
8342 iftable_offset,
8343 maybe_temp2_loc,
8344 kWithoutReadBarrier);
8345 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8346 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
8347 // Maybe poison the `cls` for direct comparison with memory.
8348 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
8349 // Loop through the iftable and check if any class matches.
8350 NearLabel start_loop;
8351 __ Bind(&start_loop);
8352 // Check if we still have an entry to compare.
8353 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
8354 __ j(kNegative, type_check_slow_path->GetEntryLabel());
8355 // Go to next interface if the classes do not match.
8356 __ cmpl(cls.AsRegister<Register>(),
8357 CodeGeneratorX86::ArrayAddress(temp,
8358 maybe_temp2_loc,
8359 TIMES_4,
8360 object_array_data_offset));
8361 __ j(kNotEqual, &start_loop);
8362 // If `cls` was poisoned above, unpoison it.
8363 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8364 break;
8365 }
8366
8367 case TypeCheckKind::kBitstringCheck: {
8368 // /* HeapReference<Class> */ temp = obj->klass_
8369 GenerateReferenceLoadTwoRegisters(instruction,
8370 temp_loc,
8371 obj_loc,
8372 class_offset,
8373 kWithoutReadBarrier);
8374
8375 GenerateBitstringTypeCheckCompare(instruction, temp);
8376 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8377 break;
8378 }
8379 }
8380 __ Bind(&done);
8381
8382 __ Bind(type_check_slow_path->GetExitLabel());
8383 }
8384
VisitMonitorOperation(HMonitorOperation * instruction)8385 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8386 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8387 instruction, LocationSummary::kCallOnMainOnly);
8388 InvokeRuntimeCallingConvention calling_convention;
8389 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
8390 }
8391
VisitMonitorOperation(HMonitorOperation * instruction)8392 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8393 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
8394 : kQuickUnlockObject,
8395 instruction,
8396 instruction->GetDexPc());
8397 if (instruction->IsEnter()) {
8398 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8399 } else {
8400 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8401 }
8402 }
8403
VisitX86AndNot(HX86AndNot * instruction)8404 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
8405 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8406 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
8407 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8408 locations->SetInAt(0, Location::RequiresRegister());
8409 locations->SetInAt(1, Location::RequiresRegister());
8410 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8411 }
8412
VisitX86AndNot(HX86AndNot * instruction)8413 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
8414 LocationSummary* locations = instruction->GetLocations();
8415 Location first = locations->InAt(0);
8416 Location second = locations->InAt(1);
8417 Location dest = locations->Out();
8418 if (instruction->GetResultType() == DataType::Type::kInt32) {
8419 __ andn(dest.AsRegister<Register>(),
8420 first.AsRegister<Register>(),
8421 second.AsRegister<Register>());
8422 } else {
8423 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8424 __ andn(dest.AsRegisterPairLow<Register>(),
8425 first.AsRegisterPairLow<Register>(),
8426 second.AsRegisterPairLow<Register>());
8427 __ andn(dest.AsRegisterPairHigh<Register>(),
8428 first.AsRegisterPairHigh<Register>(),
8429 second.AsRegisterPairHigh<Register>());
8430 }
8431 }
8432
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8433 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
8434 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8435 DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
8436 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8437 locations->SetInAt(0, Location::RequiresRegister());
8438 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8439 }
8440
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8441 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
8442 HX86MaskOrResetLeastSetBit* instruction) {
8443 LocationSummary* locations = instruction->GetLocations();
8444 Location src = locations->InAt(0);
8445 Location dest = locations->Out();
8446 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
8447 switch (instruction->GetOpKind()) {
8448 case HInstruction::kAnd:
8449 __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
8450 break;
8451 case HInstruction::kXor:
8452 __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
8453 break;
8454 default:
8455 LOG(FATAL) << "Unreachable";
8456 }
8457 }
8458
VisitAnd(HAnd * instruction)8459 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)8460 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)8461 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
8462
HandleBitwiseOperation(HBinaryOperation * instruction)8463 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8464 LocationSummary* locations =
8465 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8466 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8467 || instruction->GetResultType() == DataType::Type::kInt64);
8468 locations->SetInAt(0, Location::RequiresRegister());
8469 locations->SetInAt(1, Location::Any());
8470 locations->SetOut(Location::SameAsFirstInput());
8471 }
8472
VisitAnd(HAnd * instruction)8473 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
8474 HandleBitwiseOperation(instruction);
8475 }
8476
VisitOr(HOr * instruction)8477 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
8478 HandleBitwiseOperation(instruction);
8479 }
8480
VisitXor(HXor * instruction)8481 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
8482 HandleBitwiseOperation(instruction);
8483 }
8484
HandleBitwiseOperation(HBinaryOperation * instruction)8485 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8486 LocationSummary* locations = instruction->GetLocations();
8487 Location first = locations->InAt(0);
8488 Location second = locations->InAt(1);
8489 DCHECK(first.Equals(locations->Out()));
8490
8491 if (instruction->GetResultType() == DataType::Type::kInt32) {
8492 if (second.IsRegister()) {
8493 if (instruction->IsAnd()) {
8494 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
8495 } else if (instruction->IsOr()) {
8496 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
8497 } else {
8498 DCHECK(instruction->IsXor());
8499 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
8500 }
8501 } else if (second.IsConstant()) {
8502 if (instruction->IsAnd()) {
8503 __ andl(first.AsRegister<Register>(),
8504 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8505 } else if (instruction->IsOr()) {
8506 __ orl(first.AsRegister<Register>(),
8507 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8508 } else {
8509 DCHECK(instruction->IsXor());
8510 __ xorl(first.AsRegister<Register>(),
8511 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8512 }
8513 } else {
8514 if (instruction->IsAnd()) {
8515 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8516 } else if (instruction->IsOr()) {
8517 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8518 } else {
8519 DCHECK(instruction->IsXor());
8520 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8521 }
8522 }
8523 } else {
8524 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8525 if (second.IsRegisterPair()) {
8526 if (instruction->IsAnd()) {
8527 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8528 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8529 } else if (instruction->IsOr()) {
8530 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8531 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8532 } else {
8533 DCHECK(instruction->IsXor());
8534 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8535 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8536 }
8537 } else if (second.IsDoubleStackSlot()) {
8538 if (instruction->IsAnd()) {
8539 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8540 __ andl(first.AsRegisterPairHigh<Register>(),
8541 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8542 } else if (instruction->IsOr()) {
8543 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8544 __ orl(first.AsRegisterPairHigh<Register>(),
8545 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8546 } else {
8547 DCHECK(instruction->IsXor());
8548 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8549 __ xorl(first.AsRegisterPairHigh<Register>(),
8550 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8551 }
8552 } else {
8553 DCHECK(second.IsConstant()) << second;
8554 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
8555 int32_t low_value = Low32Bits(value);
8556 int32_t high_value = High32Bits(value);
8557 Immediate low(low_value);
8558 Immediate high(high_value);
8559 Register first_low = first.AsRegisterPairLow<Register>();
8560 Register first_high = first.AsRegisterPairHigh<Register>();
8561 if (instruction->IsAnd()) {
8562 if (low_value == 0) {
8563 __ xorl(first_low, first_low);
8564 } else if (low_value != -1) {
8565 __ andl(first_low, low);
8566 }
8567 if (high_value == 0) {
8568 __ xorl(first_high, first_high);
8569 } else if (high_value != -1) {
8570 __ andl(first_high, high);
8571 }
8572 } else if (instruction->IsOr()) {
8573 if (low_value != 0) {
8574 __ orl(first_low, low);
8575 }
8576 if (high_value != 0) {
8577 __ orl(first_high, high);
8578 }
8579 } else {
8580 DCHECK(instruction->IsXor());
8581 if (low_value != 0) {
8582 __ xorl(first_low, low);
8583 }
8584 if (high_value != 0) {
8585 __ xorl(first_high, high);
8586 }
8587 }
8588 }
8589 }
8590 }
8591
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8592 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
8593 HInstruction* instruction,
8594 Location out,
8595 uint32_t offset,
8596 Location maybe_temp,
8597 ReadBarrierOption read_barrier_option) {
8598 Register out_reg = out.AsRegister<Register>();
8599 if (read_barrier_option == kWithReadBarrier) {
8600 DCHECK(codegen_->EmitReadBarrier());
8601 if (kUseBakerReadBarrier) {
8602 // Load with fast path based Baker's read barrier.
8603 // /* HeapReference<Object> */ out = *(out + offset)
8604 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8605 instruction, out, out_reg, offset, /* needs_null_check= */ false);
8606 } else {
8607 // Load with slow path based read barrier.
8608 // Save the value of `out` into `maybe_temp` before overwriting it
8609 // in the following move operation, as we will need it for the
8610 // read barrier below.
8611 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8612 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
8613 // /* HeapReference<Object> */ out = *(out + offset)
8614 __ movl(out_reg, Address(out_reg, offset));
8615 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8616 }
8617 } else {
8618 // Plain load with no read barrier.
8619 // /* HeapReference<Object> */ out = *(out + offset)
8620 __ movl(out_reg, Address(out_reg, offset));
8621 __ MaybeUnpoisonHeapReference(out_reg);
8622 }
8623 }
8624
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)8625 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
8626 HInstruction* instruction,
8627 Location out,
8628 Location obj,
8629 uint32_t offset,
8630 ReadBarrierOption read_barrier_option) {
8631 Register out_reg = out.AsRegister<Register>();
8632 Register obj_reg = obj.AsRegister<Register>();
8633 if (read_barrier_option == kWithReadBarrier) {
8634 DCHECK(codegen_->EmitReadBarrier());
8635 if (kUseBakerReadBarrier) {
8636 // Load with fast path based Baker's read barrier.
8637 // /* HeapReference<Object> */ out = *(obj + offset)
8638 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8639 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
8640 } else {
8641 // Load with slow path based read barrier.
8642 // /* HeapReference<Object> */ out = *(obj + offset)
8643 __ movl(out_reg, Address(obj_reg, offset));
8644 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8645 }
8646 } else {
8647 // Plain load with no read barrier.
8648 // /* HeapReference<Object> */ out = *(obj + offset)
8649 __ movl(out_reg, Address(obj_reg, offset));
8650 __ MaybeUnpoisonHeapReference(out_reg);
8651 }
8652 }
8653
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)8654 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
8655 HInstruction* instruction,
8656 Location root,
8657 const Address& address,
8658 Label* fixup_label,
8659 ReadBarrierOption read_barrier_option) {
8660 Register root_reg = root.AsRegister<Register>();
8661 if (read_barrier_option == kWithReadBarrier) {
8662 DCHECK(codegen_->EmitReadBarrier());
8663 if (kUseBakerReadBarrier) {
8664 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8665 // Baker's read barrier are used:
8666 //
8667 // root = obj.field;
8668 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
8669 // if (temp != null) {
8670 // root = temp(root)
8671 // }
8672
8673 // /* GcRoot<mirror::Object> */ root = *address
8674 __ movl(root_reg, address);
8675 if (fixup_label != nullptr) {
8676 __ Bind(fixup_label);
8677 }
8678 static_assert(
8679 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
8680 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
8681 "have different sizes.");
8682 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
8683 "art::mirror::CompressedReference<mirror::Object> and int32_t "
8684 "have different sizes.");
8685
8686 // Slow path marking the GC root `root`.
8687 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8688 instruction, root, /* unpoison_ref_before_marking= */ false);
8689 codegen_->AddSlowPath(slow_path);
8690
8691 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
8692 const int32_t entry_point_offset =
8693 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
8694 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
8695 // The entrypoint is null when the GC is not marking.
8696 __ j(kNotEqual, slow_path->GetEntryLabel());
8697 __ Bind(slow_path->GetExitLabel());
8698 } else {
8699 // GC root loaded through a slow path for read barriers other
8700 // than Baker's.
8701 // /* GcRoot<mirror::Object>* */ root = address
8702 __ leal(root_reg, address);
8703 if (fixup_label != nullptr) {
8704 __ Bind(fixup_label);
8705 }
8706 // /* mirror::Object* */ root = root->Read()
8707 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8708 }
8709 } else {
8710 // Plain GC root load with no read barrier.
8711 // /* GcRoot<mirror::Object> */ root = *address
8712 __ movl(root_reg, address);
8713 if (fixup_label != nullptr) {
8714 __ Bind(fixup_label);
8715 }
8716 // Note that GC roots are not affected by heap poisoning, thus we
8717 // do not have to unpoison `root_reg` here.
8718 }
8719 }
8720
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)8721 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8722 Location ref,
8723 Register obj,
8724 uint32_t offset,
8725 bool needs_null_check) {
8726 DCHECK(EmitBakerReadBarrier());
8727
8728 // /* HeapReference<Object> */ ref = *(obj + offset)
8729 Address src(obj, offset);
8730 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8731 }
8732
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)8733 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8734 Location ref,
8735 Register obj,
8736 uint32_t data_offset,
8737 Location index,
8738 bool needs_null_check) {
8739 DCHECK(EmitBakerReadBarrier());
8740
8741 static_assert(
8742 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8743 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8744 // /* HeapReference<Object> */ ref =
8745 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
8746 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
8747 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8748 }
8749
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)8750 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8751 Location ref,
8752 Register obj,
8753 const Address& src,
8754 bool needs_null_check,
8755 bool always_update_field,
8756 Register* temp) {
8757 DCHECK(EmitBakerReadBarrier());
8758
8759 // In slow path based read barriers, the read barrier call is
8760 // inserted after the original load. However, in fast path based
8761 // Baker's read barriers, we need to perform the load of
8762 // mirror::Object::monitor_ *before* the original reference load.
8763 // This load-load ordering is required by the read barrier.
8764 // The fast path/slow path (for Baker's algorithm) should look like:
8765 //
8766 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8767 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
8768 // HeapReference<Object> ref = *src; // Original reference load.
8769 // bool is_gray = (rb_state == ReadBarrier::GrayState());
8770 // if (is_gray) {
8771 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
8772 // }
8773 //
8774 // Note: the original implementation in ReadBarrier::Barrier is
8775 // slightly more complex as:
8776 // - it implements the load-load fence using a data dependency on
8777 // the high-bits of rb_state, which are expected to be all zeroes
8778 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
8779 // which is a no-op thanks to the x86 memory model);
8780 // - it performs additional checks that we do not do here for
8781 // performance reasons.
8782
8783 Register ref_reg = ref.AsRegister<Register>();
8784 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8785
8786 // Given the numeric representation, it's enough to check the low bit of the rb_state.
8787 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8788 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8789 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8790 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8791 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8792
8793 // if (rb_state == ReadBarrier::GrayState())
8794 // ref = ReadBarrier::Mark(ref);
8795 // At this point, just do the "if" and make sure that flags are preserved until the branch.
8796 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8797 if (needs_null_check) {
8798 MaybeRecordImplicitNullCheck(instruction);
8799 }
8800
8801 // Load fence to prevent load-load reordering.
8802 // Note that this is a no-op, thanks to the x86 memory model.
8803 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8804
8805 // The actual reference load.
8806 // /* HeapReference<Object> */ ref = *src
8807 __ movl(ref_reg, src); // Flags are unaffected.
8808
8809 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8810 // Slow path marking the object `ref` when it is gray.
8811 SlowPathCode* slow_path;
8812 if (always_update_field) {
8813 DCHECK(temp != nullptr);
8814 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8815 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8816 } else {
8817 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8818 instruction, ref, /* unpoison_ref_before_marking= */ true);
8819 }
8820 AddSlowPath(slow_path);
8821
8822 // We have done the "if" of the gray bit check above, now branch based on the flags.
8823 __ j(kNotZero, slow_path->GetEntryLabel());
8824
8825 // Object* ref = ref_addr->AsMirrorPtr()
8826 __ MaybeUnpoisonHeapReference(ref_reg);
8827
8828 __ Bind(slow_path->GetExitLabel());
8829 }
8830
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8831 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8832 Location out,
8833 Location ref,
8834 Location obj,
8835 uint32_t offset,
8836 Location index) {
8837 DCHECK(EmitReadBarrier());
8838
8839 // Insert a slow path based read barrier *after* the reference load.
8840 //
8841 // If heap poisoning is enabled, the unpoisoning of the loaded
8842 // reference will be carried out by the runtime within the slow
8843 // path.
8844 //
8845 // Note that `ref` currently does not get unpoisoned (when heap
8846 // poisoning is enabled), which is alright as the `ref` argument is
8847 // not used by the artReadBarrierSlow entry point.
8848 //
8849 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8850 SlowPathCode* slow_path = new (GetScopedAllocator())
8851 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8852 AddSlowPath(slow_path);
8853
8854 __ jmp(slow_path->GetEntryLabel());
8855 __ Bind(slow_path->GetExitLabel());
8856 }
8857
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8858 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8859 Location out,
8860 Location ref,
8861 Location obj,
8862 uint32_t offset,
8863 Location index) {
8864 if (EmitReadBarrier()) {
8865 // Baker's read barriers shall be handled by the fast path
8866 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8867 DCHECK(!kUseBakerReadBarrier);
8868 // If heap poisoning is enabled, unpoisoning will be taken care of
8869 // by the runtime within the slow path.
8870 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8871 } else if (kPoisonHeapReferences) {
8872 __ UnpoisonHeapReference(out.AsRegister<Register>());
8873 }
8874 }
8875
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8876 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8877 Location out,
8878 Location root) {
8879 DCHECK(EmitReadBarrier());
8880
8881 // Insert a slow path based read barrier *after* the GC root load.
8882 //
8883 // Note that GC roots are not affected by heap poisoning, so we do
8884 // not need to do anything special for this here.
8885 SlowPathCode* slow_path =
8886 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8887 AddSlowPath(slow_path);
8888
8889 __ jmp(slow_path->GetEntryLabel());
8890 __ Bind(slow_path->GetExitLabel());
8891 }
8892
VisitBoundType(HBoundType * instruction)8893 void LocationsBuilderX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8894 // Nothing to do, this should be removed during prepare for register allocator.
8895 LOG(FATAL) << "Unreachable";
8896 }
8897
VisitBoundType(HBoundType * instruction)8898 void InstructionCodeGeneratorX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8899 // Nothing to do, this should be removed during prepare for register allocator.
8900 LOG(FATAL) << "Unreachable";
8901 }
8902
8903 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8904 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8905 LocationSummary* locations =
8906 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8907 locations->SetInAt(0, Location::RequiresRegister());
8908 }
8909
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8910 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8911 int32_t lower_bound,
8912 uint32_t num_entries,
8913 HBasicBlock* switch_block,
8914 HBasicBlock* default_block) {
8915 // Figure out the correct compare values and jump conditions.
8916 // Handle the first compare/branch as a special case because it might
8917 // jump to the default case.
8918 DCHECK_GT(num_entries, 2u);
8919 Condition first_condition;
8920 uint32_t index;
8921 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8922 if (lower_bound != 0) {
8923 first_condition = kLess;
8924 __ cmpl(value_reg, Immediate(lower_bound));
8925 __ j(first_condition, codegen_->GetLabelOf(default_block));
8926 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8927
8928 index = 1;
8929 } else {
8930 // Handle all the compare/jumps below.
8931 first_condition = kBelow;
8932 index = 0;
8933 }
8934
8935 // Handle the rest of the compare/jumps.
8936 for (; index + 1 < num_entries; index += 2) {
8937 int32_t compare_to_value = lower_bound + index + 1;
8938 __ cmpl(value_reg, Immediate(compare_to_value));
8939 // Jump to successors[index] if value < case_value[index].
8940 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8941 // Jump to successors[index + 1] if value == case_value[index + 1].
8942 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8943 }
8944
8945 if (index != num_entries) {
8946 // There are an odd number of entries. Handle the last one.
8947 DCHECK_EQ(index + 1, num_entries);
8948 __ cmpl(value_reg, Immediate(lower_bound + index));
8949 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8950 }
8951
8952 // And the default for any other value.
8953 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8954 __ jmp(codegen_->GetLabelOf(default_block));
8955 }
8956 }
8957
VisitPackedSwitch(HPackedSwitch * switch_instr)8958 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8959 int32_t lower_bound = switch_instr->GetStartValue();
8960 uint32_t num_entries = switch_instr->GetNumEntries();
8961 LocationSummary* locations = switch_instr->GetLocations();
8962 Register value_reg = locations->InAt(0).AsRegister<Register>();
8963
8964 GenPackedSwitchWithCompares(value_reg,
8965 lower_bound,
8966 num_entries,
8967 switch_instr->GetBlock(),
8968 switch_instr->GetDefaultBlock());
8969 }
8970
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8971 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8972 LocationSummary* locations =
8973 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8974 locations->SetInAt(0, Location::RequiresRegister());
8975
8976 // Constant area pointer.
8977 locations->SetInAt(1, Location::RequiresRegister());
8978
8979 // And the temporary we need.
8980 locations->AddTemp(Location::RequiresRegister());
8981 }
8982
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8983 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8984 int32_t lower_bound = switch_instr->GetStartValue();
8985 uint32_t num_entries = switch_instr->GetNumEntries();
8986 LocationSummary* locations = switch_instr->GetLocations();
8987 Register value_reg = locations->InAt(0).AsRegister<Register>();
8988 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8989
8990 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8991 GenPackedSwitchWithCompares(value_reg,
8992 lower_bound,
8993 num_entries,
8994 switch_instr->GetBlock(),
8995 default_block);
8996 return;
8997 }
8998
8999 // Optimizing has a jump area.
9000 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
9001 Register constant_area = locations->InAt(1).AsRegister<Register>();
9002
9003 // Remove the bias, if needed.
9004 if (lower_bound != 0) {
9005 __ leal(temp_reg, Address(value_reg, -lower_bound));
9006 value_reg = temp_reg;
9007 }
9008
9009 // Is the value in range?
9010 DCHECK_GE(num_entries, 1u);
9011 __ cmpl(value_reg, Immediate(num_entries - 1));
9012 __ j(kAbove, codegen_->GetLabelOf(default_block));
9013
9014 // We are in the range of the table.
9015 // Load (target-constant_area) from the jump table, indexing by the value.
9016 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
9017
9018 // Compute the actual target address by adding in constant_area.
9019 __ addl(temp_reg, constant_area);
9020
9021 // And jump.
9022 __ jmp(temp_reg);
9023 }
9024
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)9025 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
9026 HX86ComputeBaseMethodAddress* insn) {
9027 LocationSummary* locations =
9028 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
9029 locations->SetOut(Location::RequiresRegister());
9030 }
9031
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)9032 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
9033 HX86ComputeBaseMethodAddress* insn) {
9034 LocationSummary* locations = insn->GetLocations();
9035 Register reg = locations->Out().AsRegister<Register>();
9036
9037 // Generate call to next instruction.
9038 Label next_instruction;
9039 __ call(&next_instruction);
9040 __ Bind(&next_instruction);
9041
9042 // Remember this offset for later use with constant area.
9043 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
9044
9045 // Grab the return address off the stack.
9046 __ popl(reg);
9047 }
9048
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)9049 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
9050 HX86LoadFromConstantTable* insn) {
9051 LocationSummary* locations =
9052 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
9053
9054 locations->SetInAt(0, Location::RequiresRegister());
9055 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
9056
9057 // If we don't need to be materialized, we only need the inputs to be set.
9058 if (insn->IsEmittedAtUseSite()) {
9059 return;
9060 }
9061
9062 switch (insn->GetType()) {
9063 case DataType::Type::kFloat32:
9064 case DataType::Type::kFloat64:
9065 locations->SetOut(Location::RequiresFpuRegister());
9066 break;
9067
9068 case DataType::Type::kInt32:
9069 locations->SetOut(Location::RequiresRegister());
9070 break;
9071
9072 default:
9073 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
9074 }
9075 }
9076
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)9077 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
9078 if (insn->IsEmittedAtUseSite()) {
9079 return;
9080 }
9081
9082 LocationSummary* locations = insn->GetLocations();
9083 Location out = locations->Out();
9084 Register const_area = locations->InAt(0).AsRegister<Register>();
9085 HConstant *value = insn->GetConstant();
9086
9087 switch (insn->GetType()) {
9088 case DataType::Type::kFloat32:
9089 __ movss(out.AsFpuRegister<XmmRegister>(),
9090 codegen_->LiteralFloatAddress(
9091 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
9092 break;
9093
9094 case DataType::Type::kFloat64:
9095 __ movsd(out.AsFpuRegister<XmmRegister>(),
9096 codegen_->LiteralDoubleAddress(
9097 value->AsDoubleConstant()->GetValue(),
9098 insn->GetBaseMethodAddress(),
9099 const_area));
9100 break;
9101
9102 case DataType::Type::kInt32:
9103 __ movl(out.AsRegister<Register>(),
9104 codegen_->LiteralInt32Address(
9105 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
9106 break;
9107
9108 default:
9109 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
9110 }
9111 }
9112
9113 /**
9114 * Class to handle late fixup of offsets into constant area.
9115 */
9116 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
9117 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)9118 RIPFixup(CodeGeneratorX86& codegen,
9119 HX86ComputeBaseMethodAddress* base_method_address,
9120 size_t offset)
9121 : codegen_(&codegen),
9122 base_method_address_(base_method_address),
9123 offset_into_constant_area_(offset) {}
9124
9125 protected:
SetOffset(size_t offset)9126 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
9127
9128 CodeGeneratorX86* codegen_;
9129 HX86ComputeBaseMethodAddress* base_method_address_;
9130
9131 private:
Process(const MemoryRegion & region,int pos)9132 void Process(const MemoryRegion& region, int pos) override {
9133 // Patch the correct offset for the instruction. The place to patch is the
9134 // last 4 bytes of the instruction.
9135 // The value to patch is the distance from the offset in the constant area
9136 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
9137 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
9138 int32_t relative_position =
9139 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
9140
9141 // Patch in the right value.
9142 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
9143 }
9144
9145 // Location in constant area that the fixup refers to.
9146 int32_t offset_into_constant_area_;
9147 };
9148
9149 /**
9150 * Class to handle late fixup of offsets to a jump table that will be created in the
9151 * constant area.
9152 */
9153 class JumpTableRIPFixup : public RIPFixup {
9154 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)9155 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
9156 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
9157 switch_instr_(switch_instr) {}
9158
CreateJumpTable()9159 void CreateJumpTable() {
9160 X86Assembler* assembler = codegen_->GetAssembler();
9161
9162 // Ensure that the reference to the jump table has the correct offset.
9163 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
9164 SetOffset(offset_in_constant_table);
9165
9166 // The label values in the jump table are computed relative to the
9167 // instruction addressing the constant area.
9168 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
9169
9170 // Populate the jump table with the correct values for the jump table.
9171 int32_t num_entries = switch_instr_->GetNumEntries();
9172 HBasicBlock* block = switch_instr_->GetBlock();
9173 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
9174 // The value that we want is the target offset - the position of the table.
9175 for (int32_t i = 0; i < num_entries; i++) {
9176 HBasicBlock* b = successors[i];
9177 Label* l = codegen_->GetLabelOf(b);
9178 DCHECK(l->IsBound());
9179 int32_t offset_to_block = l->Position() - relative_offset;
9180 assembler->AppendInt32(offset_to_block);
9181 }
9182 }
9183
9184 private:
9185 const HX86PackedSwitch* switch_instr_;
9186 };
9187
Finalize()9188 void CodeGeneratorX86::Finalize() {
9189 // Generate the constant area if needed.
9190 X86Assembler* assembler = GetAssembler();
9191
9192 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
9193 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
9194 // byte values.
9195 assembler->Align(4, 0);
9196 constant_area_start_ = assembler->CodeSize();
9197
9198 // Populate any jump tables.
9199 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
9200 jump_table->CreateJumpTable();
9201 }
9202
9203 // And now add the constant area to the generated code.
9204 assembler->AddConstantArea();
9205 }
9206
9207 // And finish up.
9208 CodeGenerator::Finalize();
9209 }
9210
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)9211 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
9212 HX86ComputeBaseMethodAddress* method_base,
9213 Register reg) {
9214 AssemblerFixup* fixup =
9215 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
9216 return Address(reg, kPlaceholder32BitOffset, fixup);
9217 }
9218
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)9219 Address CodeGeneratorX86::LiteralFloatAddress(float v,
9220 HX86ComputeBaseMethodAddress* method_base,
9221 Register reg) {
9222 AssemblerFixup* fixup =
9223 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
9224 return Address(reg, kPlaceholder32BitOffset, fixup);
9225 }
9226
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)9227 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
9228 HX86ComputeBaseMethodAddress* method_base,
9229 Register reg) {
9230 AssemblerFixup* fixup =
9231 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
9232 return Address(reg, kPlaceholder32BitOffset, fixup);
9233 }
9234
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)9235 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
9236 HX86ComputeBaseMethodAddress* method_base,
9237 Register reg) {
9238 AssemblerFixup* fixup =
9239 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
9240 return Address(reg, kPlaceholder32BitOffset, fixup);
9241 }
9242
Load32BitValue(Register dest,int32_t value)9243 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
9244 if (value == 0) {
9245 __ xorl(dest, dest);
9246 } else {
9247 __ movl(dest, Immediate(value));
9248 }
9249 }
9250
Compare32BitValue(Register dest,int32_t value)9251 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
9252 if (value == 0) {
9253 __ testl(dest, dest);
9254 } else {
9255 __ cmpl(dest, Immediate(value));
9256 }
9257 }
9258
GenerateIntCompare(Location lhs,Location rhs)9259 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
9260 Register lhs_reg = lhs.AsRegister<Register>();
9261 GenerateIntCompare(lhs_reg, rhs);
9262 }
9263
GenerateIntCompare(Register lhs,Location rhs)9264 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
9265 if (rhs.IsConstant()) {
9266 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
9267 Compare32BitValue(lhs, value);
9268 } else if (rhs.IsStackSlot()) {
9269 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
9270 } else {
9271 __ cmpl(lhs, rhs.AsRegister<Register>());
9272 }
9273 }
9274
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)9275 Address CodeGeneratorX86::ArrayAddress(Register obj,
9276 Location index,
9277 ScaleFactor scale,
9278 uint32_t data_offset) {
9279 return index.IsConstant()
9280 ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
9281 : Address(obj, index.AsRegister<Register>(), scale, data_offset);
9282 }
9283
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)9284 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
9285 Register reg,
9286 Register value) {
9287 // Create a fixup to be used to create and address the jump table.
9288 JumpTableRIPFixup* table_fixup =
9289 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
9290
9291 // We have to populate the jump tables.
9292 fixups_to_jump_tables_.push_back(table_fixup);
9293
9294 // We want a scaled address, as we are extracting the correct offset from the table.
9295 return Address(reg, value, TIMES_4, kPlaceholder32BitOffset, table_fixup);
9296 }
9297
9298 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)9299 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
9300 if (!target.IsValid()) {
9301 DCHECK_EQ(type, DataType::Type::kVoid);
9302 return;
9303 }
9304
9305 DCHECK_NE(type, DataType::Type::kVoid);
9306
9307 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
9308 if (target.Equals(return_loc)) {
9309 return;
9310 }
9311
9312 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
9313 // with the else branch.
9314 if (type == DataType::Type::kInt64) {
9315 HParallelMove parallel_move(GetGraph()->GetAllocator());
9316 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
9317 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
9318 GetMoveResolver()->EmitNativeCode(¶llel_move);
9319 } else {
9320 // Let the parallel move resolver take care of all of this.
9321 HParallelMove parallel_move(GetGraph()->GetAllocator());
9322 parallel_move.AddMove(return_loc, target, type, nullptr);
9323 GetMoveResolver()->EmitNativeCode(¶llel_move);
9324 }
9325 }
9326
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const9327 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
9328 const uint8_t* roots_data,
9329 const PatchInfo<Label>& info,
9330 uint64_t index_in_table) const {
9331 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
9332 uintptr_t address =
9333 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
9334 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
9335 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
9336 dchecked_integral_cast<uint32_t>(address);
9337 }
9338
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)9339 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
9340 for (const PatchInfo<Label>& info : jit_string_patches_) {
9341 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
9342 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
9343 PatchJitRootUse(code, roots_data, info, index_in_table);
9344 }
9345
9346 for (const PatchInfo<Label>& info : jit_class_patches_) {
9347 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
9348 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
9349 PatchJitRootUse(code, roots_data, info, index_in_table);
9350 }
9351 }
9352
VisitIntermediateAddress(HIntermediateAddress * instruction)9353 void LocationsBuilderX86::VisitIntermediateAddress(
9354 [[maybe_unused]] HIntermediateAddress* instruction) {
9355 LOG(FATAL) << "Unreachable";
9356 }
9357
VisitIntermediateAddress(HIntermediateAddress * instruction)9358 void InstructionCodeGeneratorX86::VisitIntermediateAddress(
9359 [[maybe_unused]] HIntermediateAddress* instruction) {
9360 LOG(FATAL) << "Unreachable";
9361 }
9362
CpuHasAvxFeatureFlag()9363 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
9364 return codegen_->GetInstructionSetFeatures().HasAVX();
9365 }
CpuHasAvx2FeatureFlag()9366 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
9367 return codegen_->GetInstructionSetFeatures().HasAVX2();
9368 }
CpuHasAvxFeatureFlag()9369 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
9370 return codegen_->GetInstructionSetFeatures().HasAVX();
9371 }
CpuHasAvx2FeatureFlag()9372 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
9373 return codegen_->GetInstructionSetFeatures().HasAVX2();
9374 }
9375
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)9376 void LocationsBuilderX86::VisitBitwiseNegatedRight(
9377 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
9378 LOG(FATAL) << "Unimplemented";
9379 }
9380
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)9381 void InstructionCodeGeneratorX86::VisitBitwiseNegatedRight(
9382 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
9383 LOG(FATAL) << "Unimplemented";
9384 }
9385
9386 #undef __
9387
9388 } // namespace x86
9389 } // namespace art
9390