xref: /aosp_15_r20/art/compiler/optimizing/intrinsics_x86_64.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86_64.h"
18 
19 #include <limits>
20 
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "arch/x86_64/registers_x86_64.h"
23 #include "art_method.h"
24 #include "base/bit_utils.h"
25 #include "code_generator_x86_64.h"
26 #include "dex/modifiers.h"
27 #include "entrypoints/quick/quick_entrypoints.h"
28 #include "entrypoints/quick/quick_entrypoints_enum.h"
29 #include "heap_poisoning.h"
30 #include "intrinsic_objects.h"
31 #include "intrinsics.h"
32 #include "intrinsics_utils.h"
33 #include "lock_word.h"
34 #include "mirror/array-inl.h"
35 #include "mirror/object_array-inl.h"
36 #include "mirror/reference.h"
37 #include "mirror/string.h"
38 #include "optimizing/code_generator.h"
39 #include "optimizing/data_type.h"
40 #include "optimizing/locations.h"
41 #include "scoped_thread_state_change-inl.h"
42 #include "thread-current-inl.h"
43 #include "utils/x86_64/assembler_x86_64.h"
44 #include "utils/x86_64/constants_x86_64.h"
45 #include "well_known_classes.h"
46 
47 namespace art HIDDEN {
48 
49 namespace x86_64 {
50 
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)51 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
52   : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) {
53 }
54 
GetAssembler()55 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
56   return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
57 }
58 
GetAllocator()59 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
60   return codegen_->GetGraph()->GetAllocator();
61 }
62 
TryDispatch(HInvoke * invoke)63 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
64   Dispatch(invoke);
65   LocationSummary* res = invoke->GetLocations();
66   if (res == nullptr) {
67     return false;
68   }
69   return res->Intrinsified();
70 }
71 
72 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
73 
74 #define __ assembler->
75 
GenArrayAddress(X86_64Assembler * assembler,CpuRegister dest,CpuRegister base,Location pos,DataType::Type type,uint32_t data_offset)76 static void GenArrayAddress(X86_64Assembler* assembler,
77                             CpuRegister dest,
78                             CpuRegister base,
79                             Location pos,
80                             DataType::Type type,
81                             uint32_t data_offset) {
82   // Note: The heap is in low 4GiB, so we're using LEAL rather than LEAQ to save on code size.
83   if (pos.IsConstant()) {
84     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
85     __ leal(dest, Address(base, DataType::Size(type) * constant + data_offset));
86   } else {
87     const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
88     __ leal(dest, Address(base, pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
89   }
90 }
91 
92 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
93 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
94  public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)95   explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
96       : SlowPathCode(instruction) {
97   }
98 
EmitNativeCode(CodeGenerator * codegen)99   void EmitNativeCode(CodeGenerator* codegen) override {
100     DCHECK(codegen->EmitBakerReadBarrier());
101     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
102     X86_64Assembler* assembler = x86_64_codegen->GetAssembler();
103     LocationSummary* locations = instruction_->GetLocations();
104     DCHECK(locations->CanCall());
105     DCHECK(instruction_->IsInvokeStaticOrDirect())
106         << "Unexpected instruction in read barrier arraycopy slow path: "
107         << instruction_->DebugName();
108     DCHECK(instruction_->GetLocations()->Intrinsified());
109     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
110     Location length = locations->InAt(4);
111 
112     const DataType::Type type = DataType::Type::kReference;
113     const int32_t element_size = DataType::Size(type);
114 
115     CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
116     CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
117     CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
118 
119     __ Bind(GetEntryLabel());
120     // The `src_curr_addr` and `dst_curr_addr` were initialized before entering the slow-path.
121     GenArrayAddress(assembler, src_stop_addr, src_curr_addr, length, type, /*data_offset=*/ 0u);
122 
123     NearLabel loop;
124     __ Bind(&loop);
125     __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
126     __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
127     // TODO: Inline the mark bit check before calling the runtime?
128     // TMP = ReadBarrier::Mark(TMP);
129     // No need to save live registers; it's taken care of by the
130     // entrypoint. Also, there is no need to update the stack mask,
131     // as this runtime call will not trigger a garbage collection.
132     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
133     // This runtime call does not require a stack map.
134     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
135     __ MaybePoisonHeapReference(CpuRegister(TMP));
136     __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
137     __ addl(src_curr_addr, Immediate(element_size));
138     __ addl(dst_curr_addr, Immediate(element_size));
139     __ cmpl(src_curr_addr, src_stop_addr);
140     __ j(kNotEqual, &loop);
141     __ jmp(GetExitLabel());
142   }
143 
GetDescription() const144   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
145 
146  private:
147   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
148 };
149 
150 // The MethodHandle.invokeExact intrinsic sets up arguments to match the target method call. If we
151 // need to go to the slow path, we call art_quick_invoke_polymorphic_with_hidden_receiver, which
152 // expects the MethodHandle object in RDI (in place of the actual ArtMethod).
153 class InvokePolymorphicSlowPathX86_64 : public SlowPathCode {
154  public:
InvokePolymorphicSlowPathX86_64(HInstruction * instruction,CpuRegister method_handle)155   InvokePolymorphicSlowPathX86_64(HInstruction* instruction, CpuRegister method_handle)
156       : SlowPathCode(instruction), method_handle_(method_handle) {
157     DCHECK(instruction->IsInvokePolymorphic());
158   }
159 
EmitNativeCode(CodeGenerator * codegen)160   void EmitNativeCode(CodeGenerator* codegen) override {
161     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
162     X86_64Assembler* assembler = x86_64_codegen->GetAssembler();
163     __ Bind(GetEntryLabel());
164     SaveLiveRegisters(codegen, instruction_->GetLocations());
165 
166     // Passing `MethodHandle` object as hidden argument.
167     __ movl(CpuRegister(RDI), method_handle_);
168     x86_64_codegen->InvokeRuntime(QuickEntrypointEnum::kQuickInvokePolymorphicWithHiddenReceiver,
169                                   instruction_,
170                                   instruction_->GetDexPc());
171 
172     RestoreLiveRegisters(codegen, instruction_->GetLocations());
173     __ jmp(GetExitLabel());
174   }
175 
GetDescription() const176   const char* GetDescription() const override { return "InvokePolymorphicSlowPathX86_64"; }
177 
178  private:
179   const CpuRegister method_handle_;
180   DISALLOW_COPY_AND_ASSIGN(InvokePolymorphicSlowPathX86_64);
181 };
182 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)183 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
184   LocationSummary* locations =
185       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
186   locations->SetInAt(0, Location::RequiresFpuRegister());
187   locations->SetOut(Location::RequiresRegister());
188 }
189 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)190 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
191   LocationSummary* locations =
192       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
193   locations->SetInAt(0, Location::RequiresRegister());
194   locations->SetOut(Location::RequiresFpuRegister());
195 }
196 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)197 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
198   Location input = locations->InAt(0);
199   Location output = locations->Out();
200   __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
201 }
202 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)203 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
204   Location input = locations->InAt(0);
205   Location output = locations->Out();
206   __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
207 }
208 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)209 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
210   CreateFPToIntLocations(allocator_, invoke);
211 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)212 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
213   CreateIntToFPLocations(allocator_, invoke);
214 }
215 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)216 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
217   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
218 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)219 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
220   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
221 }
222 
VisitFloatFloatToRawIntBits(HInvoke * invoke)223 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
224   CreateFPToIntLocations(allocator_, invoke);
225 }
VisitFloatIntBitsToFloat(HInvoke * invoke)226 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
227   CreateIntToFPLocations(allocator_, invoke);
228 }
229 
VisitFloatFloatToRawIntBits(HInvoke * invoke)230 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
231   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
232 }
VisitFloatIntBitsToFloat(HInvoke * invoke)233 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
234   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
235 }
236 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)237 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
238   LocationSummary* locations =
239       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
240   locations->SetInAt(0, Location::RequiresRegister());
241   locations->SetOut(Location::SameAsFirstInput());
242 }
243 
VisitIntegerReverseBytes(HInvoke * invoke)244 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
245   CreateIntToIntLocations(allocator_, invoke);
246 }
247 
VisitIntegerReverseBytes(HInvoke * invoke)248 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
249   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt32);
250 }
251 
VisitLongReverseBytes(HInvoke * invoke)252 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
253   CreateIntToIntLocations(allocator_, invoke);
254 }
255 
VisitLongReverseBytes(HInvoke * invoke)256 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
257   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt64);
258 }
259 
VisitShortReverseBytes(HInvoke * invoke)260 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
261   CreateIntToIntLocations(allocator_, invoke);
262 }
263 
VisitShortReverseBytes(HInvoke * invoke)264 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
265   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt16);
266 }
267 
GenIsInfinite(LocationSummary * locations,bool is64bit,CodeGeneratorX86_64 * codegen)268 static void GenIsInfinite(LocationSummary* locations,
269                           bool is64bit,
270                           CodeGeneratorX86_64* codegen) {
271   X86_64Assembler* assembler = codegen->GetAssembler();
272 
273   XmmRegister input = locations->InAt(0).AsFpuRegister<XmmRegister>();
274   CpuRegister output = locations->Out().AsRegister<CpuRegister>();
275 
276   NearLabel done1, done2;
277 
278   if (is64bit) {
279     double kPositiveInfinity = std::numeric_limits<double>::infinity();
280     double kNegativeInfinity = -1 * kPositiveInfinity;
281 
282     __ xorq(output, output);
283     __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
284     __ j(kNotEqual, &done1);
285     __ j(kParityEven, &done2);
286     __ movq(output, Immediate(1));
287     __ jmp(&done2);
288     __ Bind(&done1);
289     __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
290     __ j(kNotEqual, &done2);
291     __ j(kParityEven, &done2);
292     __ movq(output, Immediate(1));
293     __ Bind(&done2);
294   } else {
295     float kPositiveInfinity = std::numeric_limits<float>::infinity();
296     float kNegativeInfinity = -1 * kPositiveInfinity;
297 
298     __ xorl(output, output);
299     __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
300     __ j(kNotEqual, &done1);
301     __ j(kParityEven, &done2);
302     __ movl(output, Immediate(1));
303     __ jmp(&done2);
304     __ Bind(&done1);
305     __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
306     __ j(kNotEqual, &done2);
307     __ j(kParityEven, &done2);
308     __ movl(output, Immediate(1));
309     __ Bind(&done2);
310   }
311 }
312 
VisitFloatIsInfinite(HInvoke * invoke)313 void IntrinsicLocationsBuilderX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
314   CreateFPToIntLocations(allocator_, invoke);
315 }
316 
VisitFloatIsInfinite(HInvoke * invoke)317 void IntrinsicCodeGeneratorX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
318   GenIsInfinite(invoke->GetLocations(), /* is64bit=*/  false, codegen_);
319 }
320 
VisitDoubleIsInfinite(HInvoke * invoke)321 void IntrinsicLocationsBuilderX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
322   CreateFPToIntLocations(allocator_, invoke);
323 }
324 
VisitDoubleIsInfinite(HInvoke * invoke)325 void IntrinsicCodeGeneratorX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
326   GenIsInfinite(invoke->GetLocations(), /* is64bit=*/  true, codegen_);
327 }
328 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)329 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
330   LocationSummary* locations =
331       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
332   locations->SetInAt(0, Location::RequiresFpuRegister());
333   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
334 }
335 
VisitMathSqrt(HInvoke * invoke)336 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
337   CreateFPToFPLocations(allocator_, invoke);
338 }
339 
VisitMathSqrt(HInvoke * invoke)340 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
341   LocationSummary* locations = invoke->GetLocations();
342   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
343   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
344 
345   GetAssembler()->sqrtsd(out, in);
346 }
347 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)348 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
349                                        HInvoke* invoke,
350                                        CodeGeneratorX86_64* codegen) {
351   // Do we have instruction support?
352   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
353     return;
354   }
355 
356   CreateFPToFPLocations(allocator, invoke);
357 }
358 
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86_64Assembler * assembler,int round_mode)359 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86_64Assembler* assembler, int round_mode) {
360   LocationSummary* locations = invoke->GetLocations();
361   DCHECK(!locations->WillCall());
362   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
363   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
364   __ roundsd(out, in, Immediate(round_mode));
365 }
366 
VisitMathCeil(HInvoke * invoke)367 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
368   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
369 }
370 
VisitMathCeil(HInvoke * invoke)371 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
372   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
373 }
374 
VisitMathFloor(HInvoke * invoke)375 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
376   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
377 }
378 
VisitMathFloor(HInvoke * invoke)379 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
380   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
381 }
382 
VisitMathRint(HInvoke * invoke)383 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
384   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
385 }
386 
VisitMathRint(HInvoke * invoke)387 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
388   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
389 }
390 
CreateSSE41FPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)391 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator,
392                                         HInvoke* invoke,
393                                         CodeGeneratorX86_64* codegen) {
394   // Do we have instruction support?
395   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
396     return;
397   }
398 
399   LocationSummary* locations =
400       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
401   locations->SetInAt(0, Location::RequiresFpuRegister());
402   locations->SetOut(Location::RequiresRegister());
403   locations->AddTemp(Location::RequiresFpuRegister());
404   locations->AddTemp(Location::RequiresFpuRegister());
405 }
406 
VisitMathRoundFloat(HInvoke * invoke)407 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
408   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
409 }
410 
VisitMathRoundFloat(HInvoke * invoke)411 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
412   LocationSummary* locations = invoke->GetLocations();
413   DCHECK(!locations->WillCall());
414 
415   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
416   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
417   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
418   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
419   NearLabel skip_incr, done;
420   X86_64Assembler* assembler = GetAssembler();
421 
422   // Since no direct x86 rounding instruction matches the required semantics,
423   // this intrinsic is implemented as follows:
424   //  result = floor(in);
425   //  if (in - result >= 0.5f)
426   //    result = result + 1.0f;
427   __ movss(t2, in);
428   __ roundss(t1, in, Immediate(1));
429   __ subss(t2, t1);
430   __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
431   __ j(kBelow, &skip_incr);
432   __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
433   __ Bind(&skip_incr);
434 
435   // Final conversion to an integer. Unfortunately this also does not have a
436   // direct x86 instruction, since NaN should map to 0 and large positive
437   // values need to be clipped to the extreme value.
438   codegen_->Load32BitValue(out, kPrimIntMax);
439   __ cvtsi2ss(t2, out);
440   __ comiss(t1, t2);
441   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
442   __ movl(out, Immediate(0));  // does not change flags
443   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
444   __ cvttss2si(out, t1);
445   __ Bind(&done);
446 }
447 
VisitMathRoundDouble(HInvoke * invoke)448 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
449   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
450 }
451 
VisitMathRoundDouble(HInvoke * invoke)452 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
453   LocationSummary* locations = invoke->GetLocations();
454   DCHECK(!locations->WillCall());
455 
456   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
457   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
458   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
459   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
460   NearLabel skip_incr, done;
461   X86_64Assembler* assembler = GetAssembler();
462 
463   // Since no direct x86 rounding instruction matches the required semantics,
464   // this intrinsic is implemented as follows:
465   //  result = floor(in);
466   //  if (in - result >= 0.5)
467   //    result = result + 1.0f;
468   __ movsd(t2, in);
469   __ roundsd(t1, in, Immediate(1));
470   __ subsd(t2, t1);
471   __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
472   __ j(kBelow, &skip_incr);
473   __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
474   __ Bind(&skip_incr);
475 
476   // Final conversion to an integer. Unfortunately this also does not have a
477   // direct x86 instruction, since NaN should map to 0 and large positive
478   // values need to be clipped to the extreme value.
479   codegen_->Load64BitValue(out, kPrimLongMax);
480   __ cvtsi2sd(t2, out, /* is64bit= */ true);
481   __ comisd(t1, t2);
482   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
483   __ movl(out, Immediate(0));  // does not change flags, implicit zero extension to 64-bit
484   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
485   __ cvttsd2si(out, t1, /* is64bit= */ true);
486   __ Bind(&done);
487 }
488 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)489 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
490   LocationSummary* locations =
491       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
492   InvokeRuntimeCallingConvention calling_convention;
493   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
494   locations->SetOut(Location::FpuRegisterLocation(XMM0));
495 
496   CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
497 }
498 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)499 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
500                           QuickEntrypointEnum entry) {
501   LocationSummary* locations = invoke->GetLocations();
502   DCHECK(locations->WillCall());
503   DCHECK(invoke->IsInvokeStaticOrDirect());
504 
505   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
506 }
507 
VisitMathCos(HInvoke * invoke)508 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
509   CreateFPToFPCallLocations(allocator_, invoke);
510 }
511 
VisitMathCos(HInvoke * invoke)512 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
513   GenFPToFPCall(invoke, codegen_, kQuickCos);
514 }
515 
VisitMathSin(HInvoke * invoke)516 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
517   CreateFPToFPCallLocations(allocator_, invoke);
518 }
519 
VisitMathSin(HInvoke * invoke)520 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
521   GenFPToFPCall(invoke, codegen_, kQuickSin);
522 }
523 
VisitMathAcos(HInvoke * invoke)524 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
525   CreateFPToFPCallLocations(allocator_, invoke);
526 }
527 
VisitMathAcos(HInvoke * invoke)528 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
529   GenFPToFPCall(invoke, codegen_, kQuickAcos);
530 }
531 
VisitMathAsin(HInvoke * invoke)532 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
533   CreateFPToFPCallLocations(allocator_, invoke);
534 }
535 
VisitMathAsin(HInvoke * invoke)536 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
537   GenFPToFPCall(invoke, codegen_, kQuickAsin);
538 }
539 
VisitMathAtan(HInvoke * invoke)540 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
541   CreateFPToFPCallLocations(allocator_, invoke);
542 }
543 
VisitMathAtan(HInvoke * invoke)544 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
545   GenFPToFPCall(invoke, codegen_, kQuickAtan);
546 }
547 
VisitMathCbrt(HInvoke * invoke)548 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
549   CreateFPToFPCallLocations(allocator_, invoke);
550 }
551 
VisitMathCbrt(HInvoke * invoke)552 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
553   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
554 }
555 
VisitMathCosh(HInvoke * invoke)556 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
557   CreateFPToFPCallLocations(allocator_, invoke);
558 }
559 
VisitMathCosh(HInvoke * invoke)560 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
561   GenFPToFPCall(invoke, codegen_, kQuickCosh);
562 }
563 
VisitMathExp(HInvoke * invoke)564 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
565   CreateFPToFPCallLocations(allocator_, invoke);
566 }
567 
VisitMathExp(HInvoke * invoke)568 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
569   GenFPToFPCall(invoke, codegen_, kQuickExp);
570 }
571 
VisitMathExpm1(HInvoke * invoke)572 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
573   CreateFPToFPCallLocations(allocator_, invoke);
574 }
575 
VisitMathExpm1(HInvoke * invoke)576 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
577   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
578 }
579 
VisitMathLog(HInvoke * invoke)580 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
581   CreateFPToFPCallLocations(allocator_, invoke);
582 }
583 
VisitMathLog(HInvoke * invoke)584 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
585   GenFPToFPCall(invoke, codegen_, kQuickLog);
586 }
587 
VisitMathLog10(HInvoke * invoke)588 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
589   CreateFPToFPCallLocations(allocator_, invoke);
590 }
591 
VisitMathLog10(HInvoke * invoke)592 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
593   GenFPToFPCall(invoke, codegen_, kQuickLog10);
594 }
595 
VisitMathSinh(HInvoke * invoke)596 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
597   CreateFPToFPCallLocations(allocator_, invoke);
598 }
599 
VisitMathSinh(HInvoke * invoke)600 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
601   GenFPToFPCall(invoke, codegen_, kQuickSinh);
602 }
603 
VisitMathTan(HInvoke * invoke)604 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
605   CreateFPToFPCallLocations(allocator_, invoke);
606 }
607 
VisitMathTan(HInvoke * invoke)608 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
609   GenFPToFPCall(invoke, codegen_, kQuickTan);
610 }
611 
VisitMathTanh(HInvoke * invoke)612 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
613   CreateFPToFPCallLocations(allocator_, invoke);
614 }
615 
VisitMathTanh(HInvoke * invoke)616 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
617   GenFPToFPCall(invoke, codegen_, kQuickTanh);
618 }
619 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)620 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
621   LocationSummary* locations =
622       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
623   InvokeRuntimeCallingConvention calling_convention;
624   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
625   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
626   locations->SetOut(Location::FpuRegisterLocation(XMM0));
627 
628   CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
629 }
630 
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)631 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
632   DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
633   LocationSummary* locations =
634       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
635   InvokeRuntimeCallingConvention calling_convention;
636   locations->SetInAt(0, Location::RequiresFpuRegister());
637   locations->SetInAt(1, Location::RequiresFpuRegister());
638   locations->SetInAt(2, Location::RequiresFpuRegister());
639   locations->SetOut(Location::SameAsFirstInput());
640 }
641 
VisitMathAtan2(HInvoke * invoke)642 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
643   CreateFPFPToFPCallLocations(allocator_, invoke);
644 }
645 
VisitMathAtan2(HInvoke * invoke)646 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
647   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
648 }
649 
VisitMathPow(HInvoke * invoke)650 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
651   CreateFPFPToFPCallLocations(allocator_, invoke);
652 }
653 
VisitMathPow(HInvoke * invoke)654 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
655   GenFPToFPCall(invoke, codegen_, kQuickPow);
656 }
657 
VisitMathHypot(HInvoke * invoke)658 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
659   CreateFPFPToFPCallLocations(allocator_, invoke);
660 }
661 
VisitMathHypot(HInvoke * invoke)662 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
663   GenFPToFPCall(invoke, codegen_, kQuickHypot);
664 }
665 
VisitMathNextAfter(HInvoke * invoke)666 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
667   CreateFPFPToFPCallLocations(allocator_, invoke);
668 }
669 
VisitMathNextAfter(HInvoke * invoke)670 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
671   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
672 }
673 
CreateSystemArrayCopyLocations(HInvoke * invoke)674 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
675   // Check to see if we have known failures that will cause us to have to bail out
676   // to the runtime, and just generate the runtime call directly.
677   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
678   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
679 
680   // The positions must be non-negative.
681   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
682       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
683     // We will have to fail anyways.
684     return;
685   }
686 
687   // The length must be > 0.
688   HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
689   if (length != nullptr) {
690     int32_t len = length->GetValue();
691     if (len < 0) {
692       // Just call as normal.
693       return;
694     }
695   }
696   LocationSummary* locations =
697       new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary
698       (invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
699   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
700   locations->SetInAt(0, Location::RequiresRegister());
701   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
702   locations->SetInAt(2, Location::RequiresRegister());
703   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
704   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
705 
706   // And we need some temporaries.  We will use REP MOVS{B,W,L}, so we need fixed registers.
707   locations->AddTemp(Location::RegisterLocation(RSI));
708   locations->AddTemp(Location::RegisterLocation(RDI));
709   locations->AddTemp(Location::RegisterLocation(RCX));
710 }
711 
712 template <typename LhsType>
EmitCmplJLess(X86_64Assembler * assembler,LhsType lhs,Location rhs,Label * label)713 static void EmitCmplJLess(X86_64Assembler* assembler,
714                           LhsType lhs,
715                           Location rhs,
716                           Label* label) {
717   static_assert(std::is_same_v<LhsType, CpuRegister> || std::is_same_v<LhsType, Address>);
718   if (rhs.IsConstant()) {
719     int32_t rhs_constant = rhs.GetConstant()->AsIntConstant()->GetValue();
720     __ cmpl(lhs, Immediate(rhs_constant));
721   } else {
722     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
723   }
724   __ j(kLess, label);
725 }
726 
CheckSystemArrayCopyPosition(X86_64Assembler * assembler,CpuRegister array,Location pos,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_array_length,bool position_sign_checked)727 static void CheckSystemArrayCopyPosition(X86_64Assembler* assembler,
728                                          CpuRegister array,
729                                          Location pos,
730                                          Location length,
731                                          SlowPathCode* slow_path,
732                                          CpuRegister temp,
733                                          bool length_is_array_length,
734                                          bool position_sign_checked) {
735   // Where is the length in the Array?
736   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
737 
738   if (pos.IsConstant()) {
739     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
740     if (pos_const == 0) {
741       if (!length_is_array_length) {
742         // Check that length(array) >= length.
743         EmitCmplJLess(assembler, Address(array, length_offset), length, slow_path->GetEntryLabel());
744       }
745     } else {
746       // Calculate length(array) - pos.
747       // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
748       // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
749       __ movl(temp, Address(array, length_offset));
750       __ subl(temp, Immediate(pos_const));
751 
752       // Check that (length(array) - pos) >= length.
753       EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
754     }
755   } else if (length_is_array_length) {
756     // The only way the copy can succeed is if pos is zero.
757     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
758     __ testl(pos_reg, pos_reg);
759     __ j(kNotEqual, slow_path->GetEntryLabel());
760   } else {
761     // Check that pos >= 0.
762     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
763     if (!position_sign_checked) {
764       __ testl(pos_reg, pos_reg);
765       __ j(kLess, slow_path->GetEntryLabel());
766     }
767 
768     // Calculate length(array) - pos.
769     // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
770     // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
771     __ movl(temp, Address(array, length_offset));
772     __ subl(temp, pos_reg);
773 
774     // Check that (length(array) - pos) >= length.
775     EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
776   }
777 }
778 
SystemArrayCopyPrimitive(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,DataType::Type type)779 static void SystemArrayCopyPrimitive(HInvoke* invoke,
780                                      X86_64Assembler* assembler,
781                                      CodeGeneratorX86_64* codegen,
782                                      DataType::Type type) {
783   LocationSummary* locations = invoke->GetLocations();
784   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
785   Location src_pos = locations->InAt(1);
786   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
787   Location dest_pos = locations->InAt(3);
788   Location length = locations->InAt(4);
789 
790   // Temporaries that we need for MOVSB/W/L.
791   CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
792   DCHECK_EQ(src_base.AsRegister(), RSI);
793   CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
794   DCHECK_EQ(dest_base.AsRegister(), RDI);
795   CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
796   DCHECK_EQ(count.AsRegister(), RCX);
797 
798   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
799   codegen->AddSlowPath(slow_path);
800 
801   // Bail out if the source and destination are the same.
802   __ cmpl(src, dest);
803   __ j(kEqual, slow_path->GetEntryLabel());
804 
805   // Bail out if the source is null.
806   __ testl(src, src);
807   __ j(kEqual, slow_path->GetEntryLabel());
808 
809   // Bail out if the destination is null.
810   __ testl(dest, dest);
811   __ j(kEqual, slow_path->GetEntryLabel());
812 
813   // If the length is negative, bail out.
814   // We have already checked in the LocationsBuilder for the constant case.
815   if (!length.IsConstant()) {
816     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
817     __ j(kLess, slow_path->GetEntryLabel());
818   }
819 
820   // Validity checks: source. Use src_base as a temporary register.
821   CheckSystemArrayCopyPosition(assembler,
822                                src,
823                                src_pos,
824                                length,
825                                slow_path,
826                                src_base,
827                                /*length_is_array_length=*/ false,
828                                /*position_sign_checked=*/ false);
829 
830   // Validity checks: dest. Use src_base as a temporary register.
831   CheckSystemArrayCopyPosition(assembler,
832                                dest,
833                                dest_pos,
834                                length,
835                                slow_path,
836                                src_base,
837                                /*length_is_array_length=*/ false,
838                                /*position_sign_checked=*/ false);
839 
840   // We need the count in RCX.
841   if (length.IsConstant()) {
842     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
843   } else {
844     __ movl(count, length.AsRegister<CpuRegister>());
845   }
846 
847   // Okay, everything checks out.  Finally time to do the copy.
848   // Check assumption that sizeof(Char) is 2 (used in scaling below).
849   const size_t data_size = DataType::Size(type);
850   const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
851 
852   GenArrayAddress(assembler, src_base, src, src_pos, type, data_offset);
853   GenArrayAddress(assembler, dest_base, dest, dest_pos, type, data_offset);
854 
855   // Do the move.
856   switch (type) {
857     case DataType::Type::kInt8:
858        __ rep_movsb();
859        break;
860     case DataType::Type::kUint16:
861        __ rep_movsw();
862        break;
863     case DataType::Type::kInt32:
864        __ rep_movsl();
865        break;
866     default:
867        LOG(FATAL) << "Unexpected data type for intrinsic";
868   }
869   __ Bind(slow_path->GetExitLabel());
870 }
871 
VisitSystemArrayCopyChar(HInvoke * invoke)872 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
873   CreateSystemArrayCopyLocations(invoke);
874 }
VisitSystemArrayCopyChar(HInvoke * invoke)875 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
876   X86_64Assembler* assembler = GetAssembler();
877   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
878 }
879 
VisitSystemArrayCopyByte(HInvoke * invoke)880 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
881   X86_64Assembler* assembler = GetAssembler();
882   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
883 }
884 
VisitSystemArrayCopyByte(HInvoke * invoke)885 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
886   CreateSystemArrayCopyLocations(invoke);
887 }
888 
VisitSystemArrayCopyInt(HInvoke * invoke)889 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
890   X86_64Assembler* assembler = GetAssembler();
891   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
892 }
893 
VisitSystemArrayCopyInt(HInvoke * invoke)894 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
895   CreateSystemArrayCopyLocations(invoke);
896 }
897 
VisitSystemArrayCopy(HInvoke * invoke)898 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
899   // The only read barrier implementation supporting the
900   // SystemArrayCopy intrinsic is the Baker-style read barriers.
901   if (codegen_->EmitNonBakerReadBarrier()) {
902     return;
903   }
904 
905   constexpr int32_t kLengthThreshold = -1;  // No cut-off - handle large arrays in intrinsic code.
906   constexpr size_t kInitialNumTemps = 0u;  // We shall allocate temps explicitly.
907   LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
908       invoke, kLengthThreshold, kInitialNumTemps);
909   if (locations != nullptr) {
910     // Add temporaries.  We will use REP MOVSL, so we need fixed registers.
911     DCHECK_EQ(locations->GetTempCount(), kInitialNumTemps);
912     locations->AddTemp(Location::RegisterLocation(RSI));
913     locations->AddTemp(Location::RegisterLocation(RDI));
914     locations->AddTemp(Location::RegisterLocation(RCX));
915   }
916 }
917 
VisitSystemArrayCopy(HInvoke * invoke)918 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
919   // The only read barrier implementation supporting the
920   // SystemArrayCopy intrinsic is the Baker-style read barriers.
921   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
922 
923   X86_64Assembler* assembler = GetAssembler();
924   LocationSummary* locations = invoke->GetLocations();
925 
926   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
927   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
928   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
929   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
930   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
931 
932   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
933   Location src_pos = locations->InAt(1);
934   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
935   Location dest_pos = locations->InAt(3);
936   Location length = locations->InAt(4);
937   Location temp1_loc = locations->GetTemp(0);
938   CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
939   Location temp2_loc = locations->GetTemp(1);
940   CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
941   Location temp3_loc = locations->GetTemp(2);
942   CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
943 
944   SlowPathCode* intrinsic_slow_path =
945       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
946   codegen_->AddSlowPath(intrinsic_slow_path);
947 
948   NearLabel conditions_on_positions_validated;
949   SystemArrayCopyOptimizations optimizations(invoke);
950 
951   // If source and destination are the same, we go to slow path if we need to do forward copying.
952   // We do not need to do this check if the source and destination positions are the same.
953   if (!optimizations.GetSourcePositionIsDestinationPosition()) {
954     if (src_pos.IsConstant()) {
955       int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
956       if (dest_pos.IsConstant()) {
957         int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
958         if (optimizations.GetDestinationIsSource()) {
959           // Checked when building locations.
960           DCHECK_GE(src_pos_constant, dest_pos_constant);
961         } else if (src_pos_constant < dest_pos_constant) {
962           __ cmpl(src, dest);
963           __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
964         }
965       } else {
966         if (!optimizations.GetDestinationIsSource()) {
967           __ cmpl(src, dest);
968           __ j(kNotEqual, &conditions_on_positions_validated);
969         }
970         __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
971         __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
972       }
973     } else {
974       if (!optimizations.GetDestinationIsSource()) {
975         __ cmpl(src, dest);
976         __ j(kNotEqual, &conditions_on_positions_validated);
977       }
978       CpuRegister src_pos_reg = src_pos.AsRegister<CpuRegister>();
979       EmitCmplJLess(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel());
980     }
981   }
982 
983   __ Bind(&conditions_on_positions_validated);
984 
985   if (!optimizations.GetSourceIsNotNull()) {
986     // Bail out if the source is null.
987     __ testl(src, src);
988     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
989   }
990 
991   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
992     // Bail out if the destination is null.
993     __ testl(dest, dest);
994     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
995   }
996 
997   // If the length is negative, bail out.
998   // We have already checked in the LocationsBuilder for the constant case.
999   if (!length.IsConstant() &&
1000       !optimizations.GetCountIsSourceLength() &&
1001       !optimizations.GetCountIsDestinationLength()) {
1002     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1003     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
1004   }
1005 
1006   // Validity checks: source.
1007   CheckSystemArrayCopyPosition(assembler,
1008                                src,
1009                                src_pos,
1010                                length,
1011                                intrinsic_slow_path,
1012                                temp1,
1013                                optimizations.GetCountIsSourceLength(),
1014                                /*position_sign_checked=*/ false);
1015 
1016   // Validity checks: dest.
1017   bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
1018   CheckSystemArrayCopyPosition(assembler,
1019                                dest,
1020                                dest_pos,
1021                                length,
1022                                intrinsic_slow_path,
1023                                temp1,
1024                                optimizations.GetCountIsDestinationLength(),
1025                                dest_position_sign_checked);
1026 
1027   auto check_non_primitive_array_class = [&](CpuRegister klass, CpuRegister temp) {
1028     // No read barrier is needed for reading a chain of constant references for comparing
1029     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1030     // /* HeapReference<Class> */ temp = klass->component_type_
1031     __ movl(temp, Address(klass, component_offset));
1032     __ MaybeUnpoisonHeapReference(temp);
1033     // Check that the component type is not null.
1034     __ testl(temp, temp);
1035     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1036     // Check that the component type is not a primitive.
1037     __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
1038     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1039   };
1040 
1041   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1042     // Check whether all elements of the source array are assignable to the component
1043     // type of the destination array. We do two checks: the classes are the same,
1044     // or the destination is Object[]. If none of these checks succeed, we go to the
1045     // slow path.
1046 
1047     if (codegen_->EmitBakerReadBarrier()) {
1048       // /* HeapReference<Class> */ temp1 = dest->klass_
1049       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1050           invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
1051       // Register `temp1` is not trashed by the read barrier emitted
1052       // by GenerateFieldLoadWithBakerReadBarrier below, as that
1053       // method produces a call to a ReadBarrierMarkRegX entry point,
1054       // which saves all potentially live registers, including
1055       // temporaries such a `temp1`.
1056       // /* HeapReference<Class> */ temp2 = src->klass_
1057       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1058           invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
1059       // If heap poisoning is enabled, `temp1` and `temp2` have been unpoisoned
1060       // by the previous calls to GenerateFieldLoadWithBakerReadBarrier.
1061     } else {
1062       // /* HeapReference<Class> */ temp1 = dest->klass_
1063       __ movl(temp1, Address(dest, class_offset));
1064       __ MaybeUnpoisonHeapReference(temp1);
1065       // /* HeapReference<Class> */ temp2 = src->klass_
1066       __ movl(temp2, Address(src, class_offset));
1067       __ MaybeUnpoisonHeapReference(temp2);
1068     }
1069 
1070     __ cmpl(temp1, temp2);
1071     if (optimizations.GetDestinationIsTypedObjectArray()) {
1072       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1073       NearLabel do_copy;
1074       // For class match, we can skip the source type check regardless of the optimization flag.
1075       __ j(kEqual, &do_copy);
1076       // No read barrier is needed for reading a chain of constant references
1077       // for comparing with null, see `ReadBarrierOption`.
1078       // /* HeapReference<Class> */ temp1 = temp1->component_type_
1079       __ movl(temp1, Address(temp1, component_offset));
1080       __ MaybeUnpoisonHeapReference(temp1);
1081       // No need to unpoison the following heap reference load, as
1082       // we're comparing against null.
1083       __ cmpl(Address(temp1, super_offset), Immediate(0));
1084       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1085       // Bail out if the source is not a non primitive array.
1086       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1087         check_non_primitive_array_class(temp2, CpuRegister(TMP));
1088       }
1089       __ Bind(&do_copy);
1090     } else {
1091       DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1092       // For class match, we can skip the array type check completely if at least one of source
1093       // and destination is known to be a non primitive array, otherwise one check is enough.
1094       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1095       if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1096           !optimizations.GetSourceIsNonPrimitiveArray()) {
1097         check_non_primitive_array_class(temp2, CpuRegister(TMP));
1098       }
1099     }
1100   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1101     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1102     // Bail out if the source is not a non primitive array.
1103     // No read barrier is needed for reading a chain of constant references for comparing
1104     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1105     // /* HeapReference<Class> */ temp1 = src->klass_
1106     __ movl(temp1, Address(src, class_offset));
1107     __ MaybeUnpoisonHeapReference(temp1);
1108     check_non_primitive_array_class(temp1, CpuRegister(TMP));
1109   }
1110 
1111   if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
1112     // Null constant length: not need to emit the loop code at all.
1113   } else {
1114     const DataType::Type type = DataType::Type::kReference;
1115     const int32_t element_size = DataType::Size(type);
1116     const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1117 
1118     // Don't enter copy loop if `length == 0`.
1119     NearLabel skip_copy_and_write_barrier;
1120     if (!length.IsConstant()) {
1121       __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1122       __ j(kEqual, &skip_copy_and_write_barrier);
1123     }
1124 
1125     // Compute base source address, base destination address, and end
1126     // source address in `temp1`, `temp2` and `temp3` respectively.
1127     GenArrayAddress(assembler, temp1, src, src_pos, type, data_offset);
1128     GenArrayAddress(assembler, temp2, dest, dest_pos, type, data_offset);
1129 
1130     SlowPathCode* read_barrier_slow_path = nullptr;
1131     if (codegen_->EmitBakerReadBarrier()) {
1132       // SystemArrayCopy implementation for Baker read barriers (see
1133       // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1134       //
1135       //   if (src_ptr != end_ptr) {
1136       //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1137       //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1138       //     bool is_gray = (rb_state == ReadBarrier::GrayState());
1139       //     if (is_gray) {
1140       //       // Slow-path copy.
1141       //       do {
1142       //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1143       //       } while (src_ptr != end_ptr)
1144       //     } else {
1145       //       // Fast-path copy.
1146       //       do {
1147       //         *dest_ptr++ = *src_ptr++;
1148       //       } while (src_ptr != end_ptr)
1149       //     }
1150       //   }
1151 
1152       // Given the numeric representation, it's enough to check the low bit of the rb_state.
1153       static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1154       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1155       constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1156       constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1157       constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1158 
1159       // if (rb_state == ReadBarrier::GrayState())
1160       //   goto slow_path;
1161       // At this point, just do the "if" and make sure that flags are preserved until the branch.
1162       __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1163 
1164       // Load fence to prevent load-load reordering.
1165       // Note that this is a no-op, thanks to the x86-64 memory model.
1166       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1167 
1168       // Slow path used to copy array when `src` is gray.
1169       read_barrier_slow_path =
1170           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1171       codegen_->AddSlowPath(read_barrier_slow_path);
1172 
1173       // We have done the "if" of the gray bit check above, now branch based on the flags.
1174       __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1175     }
1176 
1177     if (length.IsConstant()) {
1178       __ movl(temp3, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1179     } else {
1180       __ movl(temp3, length.AsRegister<CpuRegister>());
1181     }
1182 
1183     // Iterate over the arrays and do a raw copy of the objects. We don't need to poison/unpoison.
1184     DCHECK_EQ(temp1.AsRegister(), RSI);
1185     DCHECK_EQ(temp2.AsRegister(), RDI);
1186     DCHECK_EQ(temp3.AsRegister(), RCX);
1187     __ rep_movsl();
1188 
1189     if (read_barrier_slow_path != nullptr) {
1190       DCHECK(codegen_->EmitBakerReadBarrier());
1191       __ Bind(read_barrier_slow_path->GetExitLabel());
1192     }
1193 
1194     // We only need one card marking on the destination array.
1195     codegen_->MarkGCCard(temp1, temp2, dest);
1196 
1197     __ Bind(&skip_copy_and_write_barrier);
1198   }
1199 
1200   __ Bind(intrinsic_slow_path->GetExitLabel());
1201 }
1202 
VisitStringCompareTo(HInvoke * invoke)1203 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1204   LocationSummary* locations = new (allocator_) LocationSummary(
1205       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1206   InvokeRuntimeCallingConvention calling_convention;
1207   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1208   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1209   locations->SetOut(Location::RegisterLocation(RAX));
1210 }
1211 
VisitStringCompareTo(HInvoke * invoke)1212 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1213   X86_64Assembler* assembler = GetAssembler();
1214   LocationSummary* locations = invoke->GetLocations();
1215 
1216   // Note that the null check must have been done earlier.
1217   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1218 
1219   CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1220   __ testl(argument, argument);
1221   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1222   codegen_->AddSlowPath(slow_path);
1223   __ j(kEqual, slow_path->GetEntryLabel());
1224 
1225   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1226   __ Bind(slow_path->GetExitLabel());
1227 }
1228 
VisitStringEquals(HInvoke * invoke)1229 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1230   LocationSummary* locations =
1231       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1232   locations->SetInAt(0, Location::RequiresRegister());
1233   locations->SetInAt(1, Location::RequiresRegister());
1234 
1235   // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1236   locations->AddTemp(Location::RegisterLocation(RCX));
1237   locations->AddTemp(Location::RegisterLocation(RDI));
1238 
1239   // Set output, RSI needed for repe_cmpsq instruction anyways.
1240   locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1241 }
1242 
VisitStringEquals(HInvoke * invoke)1243 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1244   X86_64Assembler* assembler = GetAssembler();
1245   LocationSummary* locations = invoke->GetLocations();
1246 
1247   CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1248   CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1249   CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1250   CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1251   CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1252 
1253   NearLabel end, return_true, return_false;
1254 
1255   // Get offsets of count, value, and class fields within a string object.
1256   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1257   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1258   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1259 
1260   // Note that the null check must have been done earlier.
1261   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1262 
1263   StringEqualsOptimizations optimizations(invoke);
1264   if (!optimizations.GetArgumentNotNull()) {
1265     // Check if input is null, return false if it is.
1266     __ testl(arg, arg);
1267     __ j(kEqual, &return_false);
1268   }
1269 
1270   if (!optimizations.GetArgumentIsString()) {
1271     // Instanceof check for the argument by comparing class fields.
1272     // All string objects must have the same type since String cannot be subclassed.
1273     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1274     // If the argument is a string object, its class field must be equal to receiver's class field.
1275     //
1276     // As the String class is expected to be non-movable, we can read the class
1277     // field from String.equals' arguments without read barriers.
1278     AssertNonMovableStringClass();
1279     // Also, because we use the loaded class references only to compare them, we
1280     // don't need to unpoison them.
1281     // /* HeapReference<Class> */ rcx = str->klass_
1282     __ movl(rcx, Address(str, class_offset));
1283     // if (rcx != /* HeapReference<Class> */ arg->klass_) return false
1284     __ cmpl(rcx, Address(arg, class_offset));
1285     __ j(kNotEqual, &return_false);
1286   }
1287 
1288   // Reference equality check, return true if same reference.
1289   __ cmpl(str, arg);
1290   __ j(kEqual, &return_true);
1291 
1292   // Load length and compression flag of receiver string.
1293   __ movl(rcx, Address(str, count_offset));
1294   // Check if lengths and compressiond flags are equal, return false if they're not.
1295   // Two identical strings will always have same compression style since
1296   // compression style is decided on alloc.
1297   __ cmpl(rcx, Address(arg, count_offset));
1298   __ j(kNotEqual, &return_false);
1299   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1300   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1301                 "Expecting 0=compressed, 1=uncompressed");
1302   __ jrcxz(&return_true);
1303 
1304   if (mirror::kUseStringCompression) {
1305     NearLabel string_uncompressed;
1306     // Extract length and differentiate between both compressed or both uncompressed.
1307     // Different compression style is cut above.
1308     __ shrl(rcx, Immediate(1));
1309     __ j(kCarrySet, &string_uncompressed);
1310     // Divide string length by 2, rounding up, and continue as if uncompressed.
1311     // Merge clearing the compression flag with +1 for rounding.
1312     __ addl(rcx, Immediate(1));
1313     __ shrl(rcx, Immediate(1));
1314     __ Bind(&string_uncompressed);
1315   }
1316   // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1317   __ leal(rsi, Address(str, value_offset));
1318   __ leal(rdi, Address(arg, value_offset));
1319 
1320   // Divide string length by 4 and adjust for lengths not divisible by 4.
1321   __ addl(rcx, Immediate(3));
1322   __ shrl(rcx, Immediate(2));
1323 
1324   // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1325   // or 8 characters (compressed) at a time.
1326   DCHECK_ALIGNED(value_offset, 8);
1327   static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1328 
1329   // Loop to compare strings four characters at a time starting at the beginning of the string.
1330   __ repe_cmpsq();
1331   // If strings are not equal, zero flag will be cleared.
1332   __ j(kNotEqual, &return_false);
1333 
1334   // Return true and exit the function.
1335   // If loop does not result in returning false, we return true.
1336   __ Bind(&return_true);
1337   __ movl(rsi, Immediate(1));
1338   __ jmp(&end);
1339 
1340   // Return false and exit the function.
1341   __ Bind(&return_false);
1342   __ xorl(rsi, rsi);
1343   __ Bind(&end);
1344 }
1345 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1346 static void CreateStringIndexOfLocations(HInvoke* invoke,
1347                                          ArenaAllocator* allocator,
1348                                          bool start_at_zero) {
1349   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1350                                                                LocationSummary::kCallOnSlowPath,
1351                                                                kIntrinsified);
1352   // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1353   locations->SetInAt(0, Location::RegisterLocation(RDI));
1354   // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1355   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1356   // of the instruction explicitly.
1357   // Note: This works as we don't clobber RAX anywhere.
1358   locations->SetInAt(1, Location::RegisterLocation(RAX));
1359   if (!start_at_zero) {
1360     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1361   }
1362   // As we clobber RDI during execution anyways, also use it as the output.
1363   locations->SetOut(Location::SameAsFirstInput());
1364 
1365   // repne scasw uses RCX as the counter.
1366   locations->AddTemp(Location::RegisterLocation(RCX));
1367   // Need another temporary to be able to compute the result.
1368   locations->AddTemp(Location::RequiresRegister());
1369 }
1370 
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,bool start_at_zero)1371 static void GenerateStringIndexOf(HInvoke* invoke,
1372                                   X86_64Assembler* assembler,
1373                                   CodeGeneratorX86_64* codegen,
1374                                   bool start_at_zero) {
1375   LocationSummary* locations = invoke->GetLocations();
1376 
1377   // Note that the null check must have been done earlier.
1378   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1379 
1380   CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1381   CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1382   CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1383   CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1384   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1385 
1386   // Check our assumptions for registers.
1387   DCHECK_EQ(string_obj.AsRegister(), RDI);
1388   DCHECK_EQ(search_value.AsRegister(), RAX);
1389   DCHECK_EQ(counter.AsRegister(), RCX);
1390   DCHECK_EQ(out.AsRegister(), RDI);
1391 
1392   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1393   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1394   SlowPathCode* slow_path = nullptr;
1395   HInstruction* code_point = invoke->InputAt(1);
1396   if (code_point->IsIntConstant()) {
1397     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1398         std::numeric_limits<uint16_t>::max()) {
1399       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1400       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1401       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1402       codegen->AddSlowPath(slow_path);
1403       __ jmp(slow_path->GetEntryLabel());
1404       __ Bind(slow_path->GetExitLabel());
1405       return;
1406     }
1407   } else if (code_point->GetType() != DataType::Type::kUint16) {
1408     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1409     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1410     codegen->AddSlowPath(slow_path);
1411     __ j(kAbove, slow_path->GetEntryLabel());
1412   }
1413 
1414   // From here down, we know that we are looking for a char that fits in
1415   // 16 bits (uncompressed) or 8 bits (compressed).
1416   // Location of reference to data array within the String object.
1417   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1418   // Location of count within the String object.
1419   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1420 
1421   // Load the count field of the string containing the length and compression flag.
1422   __ movl(string_length, Address(string_obj, count_offset));
1423 
1424   // Do a zero-length check. Even with string compression `count == 0` means empty.
1425   // TODO: Support jecxz.
1426   NearLabel not_found_label;
1427   __ testl(string_length, string_length);
1428   __ j(kEqual, &not_found_label);
1429 
1430   if (mirror::kUseStringCompression) {
1431     // Use TMP to keep string_length_flagged.
1432     __ movl(CpuRegister(TMP), string_length);
1433     // Mask out first bit used as compression flag.
1434     __ shrl(string_length, Immediate(1));
1435   }
1436 
1437   if (start_at_zero) {
1438     // Number of chars to scan is the same as the string length.
1439     __ movl(counter, string_length);
1440     // Move to the start of the string.
1441     __ addq(string_obj, Immediate(value_offset));
1442   } else {
1443     CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1444 
1445     // Do a start_index check.
1446     __ cmpl(start_index, string_length);
1447     __ j(kGreaterEqual, &not_found_label);
1448 
1449     // Ensure we have a start index >= 0;
1450     __ xorl(counter, counter);
1451     __ cmpl(start_index, Immediate(0));
1452     __ cmov(kGreater, counter, start_index, /* is64bit= */ false);  // 32-bit copy is enough.
1453 
1454     if (mirror::kUseStringCompression) {
1455       NearLabel modify_counter, offset_uncompressed_label;
1456       __ testl(CpuRegister(TMP), Immediate(1));
1457       __ j(kNotZero, &offset_uncompressed_label);
1458       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1459       __ jmp(&modify_counter);
1460       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1461       __ Bind(&offset_uncompressed_label);
1462       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1463       __ Bind(&modify_counter);
1464     } else {
1465       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1466     }
1467     // Now update ecx, the work counter: it's gonna be string.length - start_index.
1468     __ negq(counter);  // Needs to be 64-bit negation, as the address computation is 64-bit.
1469     __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1470   }
1471 
1472   if (mirror::kUseStringCompression) {
1473     NearLabel uncompressed_string_comparison;
1474     NearLabel comparison_done;
1475     __ testl(CpuRegister(TMP), Immediate(1));
1476     __ j(kNotZero, &uncompressed_string_comparison);
1477     // Check if RAX (search_value) is ASCII.
1478     __ cmpl(search_value, Immediate(127));
1479     __ j(kGreater, &not_found_label);
1480     // Comparing byte-per-byte.
1481     __ repne_scasb();
1482     __ jmp(&comparison_done);
1483     // Everything is set up for repne scasw:
1484     //   * Comparison address in RDI.
1485     //   * Counter in ECX.
1486     __ Bind(&uncompressed_string_comparison);
1487     __ repne_scasw();
1488     __ Bind(&comparison_done);
1489   } else {
1490     __ repne_scasw();
1491   }
1492   // Did we find a match?
1493   __ j(kNotEqual, &not_found_label);
1494 
1495   // Yes, we matched.  Compute the index of the result.
1496   __ subl(string_length, counter);
1497   __ leal(out, Address(string_length, -1));
1498 
1499   NearLabel done;
1500   __ jmp(&done);
1501 
1502   // Failed to match; return -1.
1503   __ Bind(&not_found_label);
1504   __ movl(out, Immediate(-1));
1505 
1506   // And join up at the end.
1507   __ Bind(&done);
1508   if (slow_path != nullptr) {
1509     __ Bind(slow_path->GetExitLabel());
1510   }
1511 }
1512 
VisitStringIndexOf(HInvoke * invoke)1513 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1514   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1515 }
1516 
VisitStringIndexOf(HInvoke * invoke)1517 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1518   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1519 }
1520 
VisitStringIndexOfAfter(HInvoke * invoke)1521 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1522   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1523 }
1524 
VisitStringIndexOfAfter(HInvoke * invoke)1525 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1526   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1527 }
1528 
VisitStringNewStringFromBytes(HInvoke * invoke)1529 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1530   LocationSummary* locations = new (allocator_) LocationSummary(
1531       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1532   InvokeRuntimeCallingConvention calling_convention;
1533   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1534   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1535   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1536   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1537   locations->SetOut(Location::RegisterLocation(RAX));
1538 }
1539 
VisitStringNewStringFromBytes(HInvoke * invoke)1540 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1541   X86_64Assembler* assembler = GetAssembler();
1542   LocationSummary* locations = invoke->GetLocations();
1543 
1544   CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1545   __ testl(byte_array, byte_array);
1546   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1547   codegen_->AddSlowPath(slow_path);
1548   __ j(kEqual, slow_path->GetEntryLabel());
1549 
1550   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1551   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1552   __ Bind(slow_path->GetExitLabel());
1553 }
1554 
VisitStringNewStringFromChars(HInvoke * invoke)1555 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1556   LocationSummary* locations =
1557       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1558   InvokeRuntimeCallingConvention calling_convention;
1559   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1560   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1561   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1562   locations->SetOut(Location::RegisterLocation(RAX));
1563 }
1564 
VisitStringNewStringFromChars(HInvoke * invoke)1565 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1566   // No need to emit code checking whether `locations->InAt(2)` is a null
1567   // pointer, as callers of the native method
1568   //
1569   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1570   //
1571   // all include a null check on `data` before calling that method.
1572   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1573   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1574 }
1575 
VisitStringNewStringFromString(HInvoke * invoke)1576 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1577   LocationSummary* locations = new (allocator_) LocationSummary(
1578       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1579   InvokeRuntimeCallingConvention calling_convention;
1580   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1581   locations->SetOut(Location::RegisterLocation(RAX));
1582 }
1583 
VisitStringNewStringFromString(HInvoke * invoke)1584 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1585   X86_64Assembler* assembler = GetAssembler();
1586   LocationSummary* locations = invoke->GetLocations();
1587 
1588   CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1589   __ testl(string_to_copy, string_to_copy);
1590   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1591   codegen_->AddSlowPath(slow_path);
1592   __ j(kEqual, slow_path->GetEntryLabel());
1593 
1594   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1595   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1596   __ Bind(slow_path->GetExitLabel());
1597 }
1598 
VisitStringGetCharsNoCheck(HInvoke * invoke)1599 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1600   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1601   LocationSummary* locations =
1602       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1603   locations->SetInAt(0, Location::RequiresRegister());
1604   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1605   locations->SetInAt(2, Location::RequiresRegister());
1606   locations->SetInAt(3, Location::RequiresRegister());
1607   locations->SetInAt(4, Location::RequiresRegister());
1608 
1609   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1610   locations->AddTemp(Location::RegisterLocation(RSI));
1611   locations->AddTemp(Location::RegisterLocation(RDI));
1612   locations->AddTemp(Location::RegisterLocation(RCX));
1613 }
1614 
VisitStringGetCharsNoCheck(HInvoke * invoke)1615 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1616   X86_64Assembler* assembler = GetAssembler();
1617   LocationSummary* locations = invoke->GetLocations();
1618 
1619   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1620   // Location of data in char array buffer.
1621   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1622   // Location of char array data in string.
1623   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1624 
1625   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1626   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1627   Location srcBegin = locations->InAt(1);
1628   int srcBegin_value =
1629       srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1630   CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1631   CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1632   CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1633 
1634   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1635   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1636   DCHECK_EQ(char_size, 2u);
1637 
1638   NearLabel done;
1639   // Compute the number of chars (words) to move.
1640   __ movl(CpuRegister(RCX), srcEnd);
1641   if (srcBegin.IsConstant()) {
1642     __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1643   } else {
1644     DCHECK(srcBegin.IsRegister());
1645     __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1646   }
1647   if (mirror::kUseStringCompression) {
1648     NearLabel copy_uncompressed, copy_loop;
1649     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1650     DCHECK_EQ(c_char_size, 1u);
1651     // Location of count in string.
1652     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1653 
1654     __ testl(Address(obj, count_offset), Immediate(1));
1655     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1656                   "Expecting 0=compressed, 1=uncompressed");
1657     __ j(kNotZero, &copy_uncompressed);
1658     // Compute the address of the source string by adding the number of chars from
1659     // the source beginning to the value offset of a string.
1660     __ leaq(CpuRegister(RSI),
1661             CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1662     // Start the loop to copy String's value to Array of Char.
1663     __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1664 
1665     __ Bind(&copy_loop);
1666     __ jrcxz(&done);
1667     // Use TMP as temporary (convert byte from RSI to word).
1668     // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1669     __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1670     __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1671     __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1672     __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1673     // TODO: Add support for LOOP to X86_64Assembler.
1674     __ subl(CpuRegister(RCX), Immediate(1));
1675     __ jmp(&copy_loop);
1676 
1677     __ Bind(&copy_uncompressed);
1678   }
1679 
1680   __ leaq(CpuRegister(RSI),
1681           CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1682   // Compute the address of the destination buffer.
1683   __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1684   // Do the move.
1685   __ rep_movsw();
1686 
1687   __ Bind(&done);
1688 }
1689 
GenPeek(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1690 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1691   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1692   CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
1693   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1694   // to avoid a SIGBUS.
1695   switch (size) {
1696     case DataType::Type::kInt8:
1697       __ movsxb(out, Address(address, 0));
1698       break;
1699     case DataType::Type::kInt16:
1700       __ movsxw(out, Address(address, 0));
1701       break;
1702     case DataType::Type::kInt32:
1703       __ movl(out, Address(address, 0));
1704       break;
1705     case DataType::Type::kInt64:
1706       __ movq(out, Address(address, 0));
1707       break;
1708     default:
1709       LOG(FATAL) << "Type not recognized for peek: " << size;
1710       UNREACHABLE();
1711   }
1712 }
1713 
VisitMemoryPeekByte(HInvoke * invoke)1714 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1715   CreateIntToIntLocations(allocator_, invoke);
1716 }
1717 
VisitMemoryPeekByte(HInvoke * invoke)1718 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1719   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1720 }
1721 
VisitMemoryPeekIntNative(HInvoke * invoke)1722 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1723   CreateIntToIntLocations(allocator_, invoke);
1724 }
1725 
VisitMemoryPeekIntNative(HInvoke * invoke)1726 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1727   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1728 }
1729 
VisitMemoryPeekLongNative(HInvoke * invoke)1730 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1731   CreateIntToIntLocations(allocator_, invoke);
1732 }
1733 
VisitMemoryPeekLongNative(HInvoke * invoke)1734 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1735   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1736 }
1737 
VisitMemoryPeekShortNative(HInvoke * invoke)1738 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1739   CreateIntToIntLocations(allocator_, invoke);
1740 }
1741 
VisitMemoryPeekShortNative(HInvoke * invoke)1742 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1743   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1744 }
1745 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)1746 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1747   LocationSummary* locations =
1748       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1749   locations->SetInAt(0, Location::RequiresRegister());
1750   locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1751 }
1752 
GenPoke(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1753 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1754   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1755   Location value = locations->InAt(1);
1756   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1757   // to avoid a SIGBUS.
1758   switch (size) {
1759     case DataType::Type::kInt8:
1760       if (value.IsConstant()) {
1761         __ movb(Address(address, 0),
1762                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1763       } else {
1764         __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1765       }
1766       break;
1767     case DataType::Type::kInt16:
1768       if (value.IsConstant()) {
1769         __ movw(Address(address, 0),
1770                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1771       } else {
1772         __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1773       }
1774       break;
1775     case DataType::Type::kInt32:
1776       if (value.IsConstant()) {
1777         __ movl(Address(address, 0),
1778                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1779       } else {
1780         __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1781       }
1782       break;
1783     case DataType::Type::kInt64:
1784       if (value.IsConstant()) {
1785         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1786         DCHECK(IsInt<32>(v));
1787         int32_t v_32 = v;
1788         __ movq(Address(address, 0), Immediate(v_32));
1789       } else {
1790         __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1791       }
1792       break;
1793     default:
1794       LOG(FATAL) << "Type not recognized for poke: " << size;
1795       UNREACHABLE();
1796   }
1797 }
1798 
VisitMemoryPokeByte(HInvoke * invoke)1799 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1800   CreateIntIntToVoidLocations(allocator_, invoke);
1801 }
1802 
VisitMemoryPokeByte(HInvoke * invoke)1803 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1804   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1805 }
1806 
VisitMemoryPokeIntNative(HInvoke * invoke)1807 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1808   CreateIntIntToVoidLocations(allocator_, invoke);
1809 }
1810 
VisitMemoryPokeIntNative(HInvoke * invoke)1811 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1812   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1813 }
1814 
VisitMemoryPokeLongNative(HInvoke * invoke)1815 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1816   CreateIntIntToVoidLocations(allocator_, invoke);
1817 }
1818 
VisitMemoryPokeLongNative(HInvoke * invoke)1819 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1820   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1821 }
1822 
VisitMemoryPokeShortNative(HInvoke * invoke)1823 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1824   CreateIntIntToVoidLocations(allocator_, invoke);
1825 }
1826 
VisitMemoryPokeShortNative(HInvoke * invoke)1827 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1828   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1829 }
1830 
VisitThreadCurrentThread(HInvoke * invoke)1831 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1832   LocationSummary* locations =
1833       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1834   locations->SetOut(Location::RequiresRegister());
1835 }
1836 
VisitThreadCurrentThread(HInvoke * invoke)1837 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1838   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1839   GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
1840                                                     /* no_rip= */ true));
1841 }
1842 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)1843 static void GenUnsafeGet(HInvoke* invoke,
1844                          DataType::Type type,
1845                          [[maybe_unused]] bool is_volatile,
1846                          CodeGeneratorX86_64* codegen) {
1847   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1848   LocationSummary* locations = invoke->GetLocations();
1849   Location base_loc = locations->InAt(1);
1850   CpuRegister base = base_loc.AsRegister<CpuRegister>();
1851   Location offset_loc = locations->InAt(2);
1852   CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1853   Location output_loc = locations->Out();
1854   CpuRegister output = output_loc.AsRegister<CpuRegister>();
1855 
1856   switch (type) {
1857     case DataType::Type::kInt8:
1858       __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1859       break;
1860 
1861     case DataType::Type::kInt32:
1862       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1863       break;
1864 
1865     case DataType::Type::kReference: {
1866       if (codegen->EmitReadBarrier()) {
1867         if (kUseBakerReadBarrier) {
1868           Address src(base, offset, ScaleFactor::TIMES_1, 0);
1869           codegen->GenerateReferenceLoadWithBakerReadBarrier(
1870               invoke, output_loc, base, src, /* needs_null_check= */ false);
1871         } else {
1872           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1873           codegen->GenerateReadBarrierSlow(
1874               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1875         }
1876       } else {
1877         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1878         __ MaybeUnpoisonHeapReference(output);
1879       }
1880       break;
1881     }
1882 
1883     case DataType::Type::kInt64:
1884       __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1885       break;
1886 
1887     default:
1888       LOG(FATAL) << "Unsupported op size " << type;
1889       UNREACHABLE();
1890   }
1891 }
1892 
GenUnsafeGetAbsolute(HInvoke * invoke,DataType::Type type,CodeGeneratorX86_64 * codegen)1893 static void GenUnsafeGetAbsolute(HInvoke* invoke,
1894                                  DataType::Type type,
1895                                  CodeGeneratorX86_64* codegen) {
1896   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1897   LocationSummary* locations = invoke->GetLocations();
1898   Location address_loc = locations->InAt(1);
1899   Address address = Address(address_loc.AsRegister<CpuRegister>(), 0);
1900   Location output_loc = locations->Out();
1901   CpuRegister output = output_loc.AsRegister<CpuRegister>();
1902 
1903   switch (type) {
1904     case DataType::Type::kInt8:
1905       __ movsxb(output, address);
1906       break;
1907 
1908     case DataType::Type::kInt32:
1909       __ movl(output, address);
1910       break;
1911 
1912     case DataType::Type::kInt64:
1913       __ movq(output, address);
1914       break;
1915 
1916     default:
1917       LOG(FATAL) << "Unsupported op size " << type;
1918       UNREACHABLE();
1919   }
1920 }
1921 
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)1922 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1923   LocationSummary* locations =
1924       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1925   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1926   locations->SetInAt(1, Location::RequiresRegister());
1927   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1928 }
1929 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)1930 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1931                                           HInvoke* invoke,
1932                                           CodeGeneratorX86_64* codegen) {
1933   bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
1934   LocationSummary* locations =
1935       new (allocator) LocationSummary(invoke,
1936                                       can_call
1937                                           ? LocationSummary::kCallOnSlowPath
1938                                           : LocationSummary::kNoCall,
1939                                       kIntrinsified);
1940   if (can_call && kUseBakerReadBarrier) {
1941     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1942   }
1943   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1944   locations->SetInAt(1, Location::RequiresRegister());
1945   locations->SetInAt(2, Location::RequiresRegister());
1946   locations->SetOut(Location::RequiresRegister(),
1947                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1948 }
1949 
VisitUnsafeGet(HInvoke * invoke)1950 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1951   VisitJdkUnsafeGet(invoke);
1952 }
VisitUnsafeGetAbsolute(HInvoke * invoke)1953 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
1954   VisitJdkUnsafeGetAbsolute(invoke);
1955 }
VisitUnsafeGetVolatile(HInvoke * invoke)1956 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1957   VisitJdkUnsafeGetVolatile(invoke);
1958 }
VisitUnsafeGetLong(HInvoke * invoke)1959 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1960   VisitJdkUnsafeGetLong(invoke);
1961 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1962 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1963   VisitJdkUnsafeGetLongVolatile(invoke);
1964 }
VisitUnsafeGetObject(HInvoke * invoke)1965 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1966   VisitJdkUnsafeGetReference(invoke);
1967 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1968 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1969   VisitJdkUnsafeGetReferenceVolatile(invoke);
1970 }
VisitUnsafeGetByte(HInvoke * invoke)1971 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
1972   VisitJdkUnsafeGetByte(invoke);
1973 }
1974 
VisitJdkUnsafeGet(HInvoke * invoke)1975 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
1976   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1977 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)1978 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
1979   CreateIntIntToIntLocations(allocator_, invoke);
1980 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1981 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1982   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1983 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1984 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1985   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1986 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1987 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1988   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1989 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1990 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1991   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1992 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1993 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1994   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1995 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1996 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1997   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1998 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1999 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2000   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2001 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2002 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2003   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2004 }
VisitJdkUnsafeGetByte(HInvoke * invoke)2005 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2006   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2007 }
2008 
VisitUnsafeGet(HInvoke * invoke)2009 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
2010   VisitJdkUnsafeGet(invoke);
2011 }
VisitUnsafeGetAbsolute(HInvoke * invoke)2012 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2013   VisitJdkUnsafeGetAbsolute(invoke);
2014 }
VisitUnsafeGetVolatile(HInvoke * invoke)2015 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2016   VisitJdkUnsafeGetVolatile(invoke);
2017 }
VisitUnsafeGetLong(HInvoke * invoke)2018 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2019   VisitJdkUnsafeGetLong(invoke);
2020 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2021 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2022   VisitJdkUnsafeGetLongVolatile(invoke);
2023 }
VisitUnsafeGetObject(HInvoke * invoke)2024 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2025   VisitJdkUnsafeGetReference(invoke);
2026 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2027 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2028   VisitJdkUnsafeGetReferenceVolatile(invoke);
2029 }
VisitUnsafeGetByte(HInvoke * invoke)2030 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
2031   VisitJdkUnsafeGetByte(invoke);
2032 }
2033 
VisitJdkUnsafeGet(HInvoke * invoke)2034 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
2035   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2036 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2037 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2038   GenUnsafeGetAbsolute(invoke, DataType::Type::kInt32, codegen_);
2039 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2040 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2041   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2042 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2043 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2044   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2045 }
VisitJdkUnsafeGetLong(HInvoke * invoke)2046 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2047   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2048 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2049 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2050   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2051 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2052 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2053   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2054 }
VisitJdkUnsafeGetReference(HInvoke * invoke)2055 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2056   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2057 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2058 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2059   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2060 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2061 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2062   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2063 }
VisitJdkUnsafeGetByte(HInvoke * invoke)2064 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2065   GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
2066 }
2067 
CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2068 static void CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
2069                                                     [[maybe_unused]] DataType::Type type,
2070                                                     HInvoke* invoke) {
2071   LocationSummary* locations =
2072       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2073   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2074   locations->SetInAt(1, Location::RequiresRegister());
2075   locations->SetInAt(2, Location::RequiresRegister());
2076 }
2077 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2078 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
2079                                                        DataType::Type type,
2080                                                        HInvoke* invoke) {
2081   LocationSummary* locations =
2082       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2083   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2084   locations->SetInAt(1, Location::RequiresRegister());
2085   locations->SetInAt(2, Location::RequiresRegister());
2086   locations->SetInAt(3, Location::RequiresRegister());
2087   if (type == DataType::Type::kReference) {
2088     // Need temp registers for card-marking.
2089     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2090     locations->AddTemp(Location::RequiresRegister());
2091   }
2092 }
2093 
VisitUnsafePut(HInvoke * invoke)2094 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
2095   VisitJdkUnsafePut(invoke);
2096 }
VisitUnsafePutAbsolute(HInvoke * invoke)2097 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2098   VisitJdkUnsafePutAbsolute(invoke);
2099 }
VisitUnsafePutOrdered(HInvoke * invoke)2100 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2101   VisitJdkUnsafePutOrdered(invoke);
2102 }
VisitUnsafePutVolatile(HInvoke * invoke)2103 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2104   VisitJdkUnsafePutVolatile(invoke);
2105 }
VisitUnsafePutObject(HInvoke * invoke)2106 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2107   VisitJdkUnsafePutReference(invoke);
2108 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2109 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2110   VisitJdkUnsafePutObjectOrdered(invoke);
2111 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2112 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2113   VisitJdkUnsafePutReferenceVolatile(invoke);
2114 }
VisitUnsafePutLong(HInvoke * invoke)2115 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2116   VisitJdkUnsafePutLong(invoke);
2117 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2118 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2119   VisitJdkUnsafePutLongOrdered(invoke);
2120 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2121 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2122   VisitJdkUnsafePutLongVolatile(invoke);
2123 }
VisitUnsafePutByte(HInvoke * invoke)2124 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2125   VisitJdkUnsafePut(invoke);
2126 }
2127 
VisitJdkUnsafePut(HInvoke * invoke)2128 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2129   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2130 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2131 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2132   CreateIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2133 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2134 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2135   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2136 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2137 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2138   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2139 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2140 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2141   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2142 }
VisitJdkUnsafePutReference(HInvoke * invoke)2143 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2144   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2145 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2146 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2147   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2148 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2149 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2150   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2151 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2152 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2153   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2154 }
VisitJdkUnsafePutLong(HInvoke * invoke)2155 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2156   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2157 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2158 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2159   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2160 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2161 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2162   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2163 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2164 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2165   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2166 }
VisitJdkUnsafePutByte(HInvoke * invoke)2167 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2168   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt8, invoke);
2169 }
2170 
2171 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2172 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2173 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile,
2174                          CodeGeneratorX86_64* codegen) {
2175   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2176   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2177   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2178   CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2179 
2180   if (type == DataType::Type::kInt64) {
2181     __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2182   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2183     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2184     __ movl(temp, value);
2185     __ PoisonHeapReference(temp);
2186     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2187   } else if (type == DataType::Type::kInt32 || type == DataType::Type::kReference) {
2188     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2189   } else {
2190     CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2191     __ movb(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2192   }
2193 
2194   if (is_volatile) {
2195     codegen->MemoryFence();
2196   }
2197 
2198   if (type == DataType::Type::kReference) {
2199     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2200     codegen->MaybeMarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2201                              locations->GetTemp(1).AsRegister<CpuRegister>(),
2202                              base,
2203                              value,
2204                              value_can_be_null);
2205   }
2206 }
2207 
2208 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2209 // memory model.
GenUnsafePutAbsolute(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2210 static void GenUnsafePutAbsolute(LocationSummary* locations,
2211                                  DataType::Type type,
2212                                  bool is_volatile,
2213                                  CodeGeneratorX86_64* codegen) {
2214   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2215   CpuRegister address_reg = locations->InAt(1).AsRegister<CpuRegister>();
2216   Address address = Address(address_reg, 0);
2217   CpuRegister value = locations->InAt(2).AsRegister<CpuRegister>();
2218 
2219   if (type == DataType::Type::kInt64) {
2220     __ movq(address, value);
2221   } else if (type == DataType::Type::kInt32) {
2222     __ movl(address, value);
2223   } else {
2224     CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2225     __ movb(address, value);
2226   }
2227 
2228   if (is_volatile) {
2229     codegen->MemoryFence();
2230   }
2231 }
2232 
VisitUnsafePut(HInvoke * invoke)2233 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2234   VisitJdkUnsafePut(invoke);
2235 }
VisitUnsafePutAbsolute(HInvoke * invoke)2236 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2237   VisitJdkUnsafePutAbsolute(invoke);
2238 }
VisitUnsafePutOrdered(HInvoke * invoke)2239 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2240   VisitJdkUnsafePutOrdered(invoke);
2241 }
VisitUnsafePutVolatile(HInvoke * invoke)2242 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2243   VisitJdkUnsafePutVolatile(invoke);
2244 }
VisitUnsafePutObject(HInvoke * invoke)2245 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2246   VisitJdkUnsafePutReference(invoke);
2247 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2248 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2249   VisitJdkUnsafePutObjectOrdered(invoke);
2250 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2251 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2252   VisitJdkUnsafePutReferenceVolatile(invoke);
2253 }
VisitUnsafePutLong(HInvoke * invoke)2254 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2255   VisitJdkUnsafePutLong(invoke);
2256 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2257 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2258   VisitJdkUnsafePutLongOrdered(invoke);
2259 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2260 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2261   VisitJdkUnsafePutLongVolatile(invoke);
2262 }
VisitUnsafePutByte(HInvoke * invoke)2263 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2264   VisitJdkUnsafePutByte(invoke);
2265 }
2266 
VisitJdkUnsafePut(HInvoke * invoke)2267 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2268   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2269 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2270 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2271   GenUnsafePutAbsolute(
2272       invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/false, codegen_);
2273 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2274 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2275   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2276 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2277 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2278   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2279 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2280 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2281   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
2282 }
VisitJdkUnsafePutReference(HInvoke * invoke)2283 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2284   GenUnsafePut(
2285       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2286 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2287 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2288   GenUnsafePut(
2289       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2290 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2291 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2292   GenUnsafePut(
2293       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2294 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2295 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2296   GenUnsafePut(
2297       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2298 }
VisitJdkUnsafePutLong(HInvoke * invoke)2299 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2300   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2301 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2302 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2303   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2304 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2305 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2306   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2307 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2308 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2309   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2310 }
VisitJdkUnsafePutByte(HInvoke * invoke)2311 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2312   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
2313 }
2314 
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)2315 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
2316                                      HInvoke* invoke,
2317                                      CodeGeneratorX86_64* codegen,
2318                                      DataType::Type type) {
2319   const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
2320   LocationSummary* locations =
2321       new (allocator) LocationSummary(invoke,
2322                                       can_call
2323                                           ? LocationSummary::kCallOnSlowPath
2324                                           : LocationSummary::kNoCall,
2325                                       kIntrinsified);
2326   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2327   locations->SetInAt(1, Location::RequiresRegister());
2328   locations->SetInAt(2, Location::RequiresRegister());
2329   // expected value must be in EAX/RAX.
2330   locations->SetInAt(3, Location::RegisterLocation(RAX));
2331   locations->SetInAt(4, Location::RequiresRegister());
2332 
2333   // RAX is clobbered in CMPXCHG, but we set it as out so no need to add it as temporary.
2334   locations->SetOut(Location::RegisterLocation(RAX));
2335 
2336   if (type == DataType::Type::kReference) {
2337     // Need two temporaries for MarkGCCard.
2338     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2339     locations->AddTemp(Location::RequiresRegister());
2340     if (codegen->EmitReadBarrier()) {
2341       // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
2342       DCHECK(kUseBakerReadBarrier);
2343       locations->AddTemp(Location::RequiresRegister());
2344     }
2345   }
2346 }
2347 
VisitUnsafeCASInt(HInvoke * invoke)2348 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2349   VisitJdkUnsafeCASInt(invoke);
2350 }
2351 
VisitUnsafeCASLong(HInvoke * invoke)2352 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2353   VisitJdkUnsafeCASLong(invoke);
2354 }
2355 
VisitUnsafeCASObject(HInvoke * invoke)2356 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2357   VisitJdkUnsafeCASObject(invoke);
2358 }
2359 
VisitJdkUnsafeCASInt(HInvoke * invoke)2360 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2361   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2362   VisitJdkUnsafeCompareAndSetInt(invoke);
2363 }
2364 
VisitJdkUnsafeCASLong(HInvoke * invoke)2365 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2366   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2367   VisitJdkUnsafeCompareAndSetLong(invoke);
2368 }
2369 
VisitJdkUnsafeCASObject(HInvoke * invoke)2370 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2371   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2372   VisitJdkUnsafeCompareAndSetReference(invoke);
2373 }
2374 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2375 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2376   CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt32);
2377 }
2378 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2379 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2380   CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt64);
2381 }
2382 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2383 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2384   // The only supported read barrier implementation is the Baker-style read barriers.
2385   if (codegen_->EmitNonBakerReadBarrier()) {
2386     return;
2387   }
2388 
2389   CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kReference);
2390 }
2391 
2392 // Convert ZF into the Boolean result.
GenZFlagToResult(X86_64Assembler * assembler,CpuRegister out)2393 static inline void GenZFlagToResult(X86_64Assembler* assembler, CpuRegister out) {
2394   __ setcc(kZero, out);
2395   __ movzxb(out, out);
2396 }
2397 
2398 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64 * codegen,DataType::Type type,Address field_addr,Location value,bool is_cmpxchg,bool byte_swap)2399 static void GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64* codegen,
2400                                           DataType::Type type,
2401                                           Address field_addr,
2402                                           Location value,
2403                                           bool is_cmpxchg,
2404                                           bool byte_swap) {
2405   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2406   InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2407 
2408   if (byte_swap) {
2409     instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2410     instr_codegen->Bswap(value, type);
2411   }
2412 
2413   switch (type) {
2414     case DataType::Type::kBool:
2415     case DataType::Type::kInt8:
2416       __ LockCmpxchgb(field_addr, value.AsRegister<CpuRegister>());
2417       break;
2418     case DataType::Type::kInt16:
2419     case DataType::Type::kUint16:
2420       __ LockCmpxchgw(field_addr, value.AsRegister<CpuRegister>());
2421       break;
2422     case DataType::Type::kInt32:
2423     case DataType::Type::kUint32:
2424       __ LockCmpxchgl(field_addr, value.AsRegister<CpuRegister>());
2425       break;
2426     case DataType::Type::kInt64:
2427     case DataType::Type::kUint64:
2428       __ LockCmpxchgq(field_addr, value.AsRegister<CpuRegister>());
2429       break;
2430     default:
2431       LOG(FATAL) << "Unexpected non-integral CAS type " << type;
2432   }
2433   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2434 
2435   if (byte_swap) {
2436     // Restore byte order for value.
2437     instr_codegen->Bswap(value, type);
2438   }
2439 
2440   CpuRegister rax(RAX);
2441   if (is_cmpxchg) {
2442     if (byte_swap) {
2443       instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2444     }
2445     // Sign-extend or zero-extend the result as necessary.
2446     switch (type) {
2447       case DataType::Type::kBool:
2448         __ movzxb(rax, rax);
2449         break;
2450       case DataType::Type::kInt8:
2451         __ movsxb(rax, rax);
2452         break;
2453       case DataType::Type::kInt16:
2454         __ movsxw(rax, rax);
2455         break;
2456       case DataType::Type::kUint16:
2457         __ movzxw(rax, rax);
2458         break;
2459       default:
2460         break;  // No need to do anything.
2461     }
2462   } else {
2463     GenZFlagToResult(assembler, rax);
2464   }
2465 }
2466 
GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64 * codegen,Address field_addr,CpuRegister temp,Location value,Location expected,Location out,bool is64bit,bool is_cmpxchg,bool byte_swap)2467 static void GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64* codegen,
2468                                          Address field_addr,
2469                                          CpuRegister temp,
2470                                          Location value,
2471                                          Location expected,
2472                                          Location out,
2473                                          bool is64bit,
2474                                          bool is_cmpxchg,
2475                                          bool byte_swap) {
2476   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2477   InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2478 
2479   Location rax_loc = Location::RegisterLocation(RAX);
2480   Location temp_loc = Location::RegisterLocation(temp.AsRegister());
2481 
2482   DataType::Type type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
2483 
2484   // Copy `expected` to RAX (required by the CMPXCHG instruction).
2485   codegen->Move(rax_loc, expected);
2486 
2487   // Copy value to some other register (ensure it's not RAX).
2488   DCHECK_NE(temp.AsRegister(), RAX);
2489   codegen->Move(temp_loc, value);
2490 
2491   if (byte_swap) {
2492     instr_codegen->Bswap(rax_loc, type);
2493     instr_codegen->Bswap(temp_loc, type);
2494   }
2495 
2496   if (is64bit) {
2497     __ LockCmpxchgq(field_addr, temp);
2498   } else {
2499     __ LockCmpxchgl(field_addr, temp);
2500   }
2501   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2502   // No need to restore byte order for temporary register.
2503 
2504   if (is_cmpxchg) {
2505     if (byte_swap) {
2506       instr_codegen->Bswap(rax_loc, type);
2507     }
2508     __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
2509   } else {
2510     GenZFlagToResult(assembler, out.AsRegister<CpuRegister>());
2511   }
2512 }
2513 
2514 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64 * codegen,HInvoke * invoke,CpuRegister base,CpuRegister offset,CpuRegister value,CpuRegister temp1,CpuRegister temp2,CpuRegister temp3,bool is_cmpxchg)2515 static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
2516                                           HInvoke* invoke,
2517                                           CpuRegister base,
2518                                           CpuRegister offset,
2519                                           CpuRegister value,
2520                                           CpuRegister temp1,
2521                                           CpuRegister temp2,
2522                                           CpuRegister temp3,
2523                                           bool is_cmpxchg) {
2524   // The only supported read barrier implementation is the Baker-style read barriers.
2525   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
2526 
2527   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2528 
2529   // Mark card for object assuming new value is stored.
2530   bool value_can_be_null = true;  // TODO: Worth finding out this information?
2531   codegen->MaybeMarkGCCard(temp1, temp2, base, value, value_can_be_null);
2532 
2533   Address field_addr(base, offset, TIMES_1, 0);
2534   if (codegen->EmitBakerReadBarrier()) {
2535     // Need to make sure the reference stored in the field is a to-space
2536     // one before attempting the CAS or the CAS could fail incorrectly.
2537     codegen->GenerateReferenceLoadWithBakerReadBarrier(
2538         invoke,
2539         Location::RegisterLocation(temp3.AsRegister()),
2540         base,
2541         field_addr,
2542         /* needs_null_check= */ false,
2543         /* always_update_field= */ true,
2544         &temp1,
2545         &temp2);
2546   } else {
2547     // Nothing to do, the value will be loaded into the out register by CMPXCHG.
2548   }
2549 
2550   bool base_equals_value = (base.AsRegister() == value.AsRegister());
2551   Register value_reg = value.AsRegister();
2552   if (kPoisonHeapReferences) {
2553     if (base_equals_value) {
2554       // If `base` and `value` are the same register location, move `value_reg` to a temporary
2555       // register.  This way, poisoning `value_reg` won't invalidate `base`.
2556       value_reg = temp1.AsRegister();
2557       __ movl(CpuRegister(value_reg), base);
2558     }
2559 
2560     // Check that the register allocator did not assign the location of expected value (RAX) to
2561     // `value` nor to `base`, so that heap poisoning (when enabled) works as intended below.
2562     // - If `value` were equal to RAX, both references would be poisoned twice, meaning they would
2563     //   not be poisoned at all, as heap poisoning uses address negation.
2564     // - If `base` were equal to RAX, poisoning RAX would invalidate `base`.
2565     DCHECK_NE(RAX, value_reg);
2566     DCHECK_NE(RAX, base.AsRegister());
2567 
2568     __ PoisonHeapReference(CpuRegister(RAX));
2569     __ PoisonHeapReference(CpuRegister(value_reg));
2570   }
2571 
2572   __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2573   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
2574 
2575   if (is_cmpxchg) {
2576     // Output is in RAX, so we can rely on CMPXCHG and do nothing.
2577     __ MaybeUnpoisonHeapReference(CpuRegister(RAX));
2578   } else {
2579     GenZFlagToResult(assembler, CpuRegister(RAX));
2580   }
2581 
2582   // If heap poisoning is enabled, we need to unpoison the values that were poisoned earlier.
2583   if (kPoisonHeapReferences) {
2584     if (base_equals_value) {
2585       // `value_reg` has been moved to a temporary register, no need to unpoison it.
2586     } else {
2587       // Ensure `value` is not RAX, so that unpoisoning the former does not invalidate the latter.
2588       DCHECK_NE(RAX, value_reg);
2589       __ UnpoisonHeapReference(CpuRegister(value_reg));
2590     }
2591   }
2592 }
2593 
2594 // In debug mode, return true if all registers are pairwise different. In release mode, do nothing
2595 // and always return true.
RegsAreAllDifferent(const std::vector<CpuRegister> & regs)2596 static bool RegsAreAllDifferent(const std::vector<CpuRegister>& regs) {
2597   if (kIsDebugBuild) {
2598     for (size_t i = 0; i < regs.size(); ++i) {
2599       for (size_t j = 0; j < i; ++j) {
2600         if (regs[i].AsRegister() == regs[j].AsRegister()) {
2601           return false;
2602         }
2603       }
2604     }
2605   }
2606   return true;
2607 }
2608 
2609 // GenCompareAndSetOrExchange handles all value types and therefore accepts generic locations and
2610 // temporary indices that may not correspond to real registers for code paths that do not use them.
GenCompareAndSetOrExchange(CodeGeneratorX86_64 * codegen,HInvoke * invoke,DataType::Type type,CpuRegister base,CpuRegister offset,uint32_t temp1_index,uint32_t temp2_index,uint32_t temp3_index,Location new_value,Location expected,Location out,bool is_cmpxchg,bool byte_swap)2611 static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen,
2612                                        HInvoke* invoke,
2613                                        DataType::Type type,
2614                                        CpuRegister base,
2615                                        CpuRegister offset,
2616                                        uint32_t temp1_index,
2617                                        uint32_t temp2_index,
2618                                        uint32_t temp3_index,
2619                                        Location new_value,
2620                                        Location expected,
2621                                        Location out,
2622                                        bool is_cmpxchg,
2623                                        bool byte_swap) {
2624   LocationSummary* locations = invoke->GetLocations();
2625   Address field_address(base, offset, TIMES_1, 0);
2626 
2627   if (DataType::IsFloatingPointType(type)) {
2628     bool is64bit = (type == DataType::Type::kFloat64);
2629     CpuRegister temp = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2630     DCHECK(RegsAreAllDifferent({base, offset, temp, CpuRegister(RAX)}));
2631 
2632     GenCompareAndSetOrExchangeFP(
2633         codegen, field_address, temp, new_value, expected, out, is64bit, is_cmpxchg, byte_swap);
2634   } else {
2635     // Both the expected value for CMPXCHG and the output are in RAX.
2636     DCHECK_EQ(RAX, expected.AsRegister<Register>());
2637     DCHECK_EQ(RAX, out.AsRegister<Register>());
2638 
2639     if (type == DataType::Type::kReference) {
2640       CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>();
2641       CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2642       CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>();
2643       CpuRegister temp3 = codegen->EmitReadBarrier()
2644           ? locations->GetTemp(temp3_index).AsRegister<CpuRegister>()
2645           : CpuRegister(kNoRegister);
2646       DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3}));
2647 
2648       DCHECK(!byte_swap);
2649       GenCompareAndSetOrExchangeRef(
2650           codegen, invoke, base, offset, new_value_reg, temp1, temp2, temp3, is_cmpxchg);
2651     } else {
2652       GenCompareAndSetOrExchangeInt(codegen, type, field_address, new_value, is_cmpxchg, byte_swap);
2653     }
2654   }
2655 }
2656 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2657 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2658   LocationSummary* locations = invoke->GetLocations();
2659   GenCompareAndSetOrExchange(codegen,
2660                              invoke,
2661                              type,
2662                              /*base=*/ locations->InAt(1).AsRegister<CpuRegister>(),
2663                              /*offset=*/ locations->InAt(2).AsRegister<CpuRegister>(),
2664                              /*temp1_index=*/ 0,
2665                              /*temp2_index=*/ 1,
2666                              /*temp3_index=*/ 2,
2667                              /*new_value=*/ locations->InAt(4),
2668                              /*expected=*/ locations->InAt(3),
2669                              locations->Out(),
2670                              /*is_cmpxchg=*/ false,
2671                              /*byte_swap=*/ false);
2672 }
2673 
VisitUnsafeCASInt(HInvoke * invoke)2674 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2675   VisitJdkUnsafeCASInt(invoke);
2676 }
2677 
VisitUnsafeCASLong(HInvoke * invoke)2678 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2679   VisitJdkUnsafeCASLong(invoke);
2680 }
2681 
VisitUnsafeCASObject(HInvoke * invoke)2682 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2683   VisitJdkUnsafeCASObject(invoke);
2684 }
2685 
VisitJdkUnsafeCASInt(HInvoke * invoke)2686 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2687   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2688   VisitJdkUnsafeCompareAndSetInt(invoke);
2689 }
2690 
VisitJdkUnsafeCASLong(HInvoke * invoke)2691 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2692   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2693   VisitJdkUnsafeCompareAndSetLong(invoke);
2694 }
2695 
VisitJdkUnsafeCASObject(HInvoke * invoke)2696 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2697   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2698   VisitJdkUnsafeCompareAndSetReference(invoke);
2699 }
2700 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2701 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2702   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2703 }
2704 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2705 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2706   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2707 }
2708 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2709 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2710   // The only supported read barrier implementation is the Baker-style read barriers.
2711   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2712 
2713   GenCAS(DataType::Type::kReference, invoke, codegen_);
2714 }
2715 
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2716 static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
2717                                               HInvoke* invoke,
2718                                               CodeGeneratorX86_64* codegen) {
2719   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
2720   LocationSummary* locations =
2721       new (allocator) LocationSummary(invoke,
2722                                       can_call
2723                                           ? LocationSummary::kCallOnSlowPath
2724                                           : LocationSummary::kNoCall,
2725                                       kIntrinsified);
2726   if (can_call && kUseBakerReadBarrier) {
2727     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2728   }
2729   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2730   locations->SetInAt(1, Location::RequiresRegister());
2731   locations->SetInAt(2, Location::RequiresRegister());
2732   // Use the same register for both the output and the new value or addend
2733   // to take advantage of XCHG or XADD. Arbitrarily pick RAX.
2734   locations->SetInAt(3, Location::RegisterLocation(RAX));
2735   // Only set the `out` register if it's needed. In the void case we can still use RAX in the
2736   // same manner as it is marked as a temp register.
2737   if (invoke->GetType() == DataType::Type::kVoid) {
2738     locations->AddTemp(Location::RegisterLocation(RAX));
2739   } else {
2740     locations->SetOut(Location::RegisterLocation(RAX));
2741   }
2742 }
2743 
VisitUnsafeGetAndAddInt(HInvoke * invoke)2744 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2745   VisitJdkUnsafeGetAndAddInt(invoke);
2746 }
2747 
VisitUnsafeGetAndAddLong(HInvoke * invoke)2748 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2749   VisitJdkUnsafeGetAndAddLong(invoke);
2750 }
2751 
VisitUnsafeGetAndSetInt(HInvoke * invoke)2752 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2753   VisitJdkUnsafeGetAndSetInt(invoke);
2754 }
2755 
VisitUnsafeGetAndSetLong(HInvoke * invoke)2756 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2757   VisitJdkUnsafeGetAndSetLong(invoke);
2758 }
2759 
VisitUnsafeGetAndSetObject(HInvoke * invoke)2760 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2761   VisitJdkUnsafeGetAndSetReference(invoke);
2762 }
2763 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2764 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2765   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2766 }
2767 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2768 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2769   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2770 }
2771 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2772 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2773   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2774 }
2775 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2776 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2777   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2778 }
2779 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2780 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2781   // The only supported read barrier implementation is the Baker-style read barriers.
2782   if (codegen_->EmitNonBakerReadBarrier()) {
2783     return;
2784   }
2785 
2786   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2787   invoke->GetLocations()->AddRegisterTemps(3);
2788 }
2789 
2790 enum class GetAndUpdateOp {
2791   kSet,
2792   kAdd,
2793   kBitwiseAnd,
2794   kBitwiseOr,
2795   kBitwiseXor
2796 };
2797 
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op)2798 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
2799                                   DataType::Type type,
2800                                   CodeGeneratorX86_64* codegen,
2801                                   GetAndUpdateOp get_and_update_op) {
2802   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2803   LocationSummary* locations = invoke->GetLocations();
2804 
2805   const bool is_void = invoke->GetType() == DataType::Type::kVoid;
2806   Location rax_loc = Location::RegisterLocation(RAX);
2807   // We requested RAX to use as a temporary for void methods, as we don't return the value.
2808   DCHECK_IMPLIES(!is_void, locations->Out().Equals(rax_loc));
2809   CpuRegister out_or_temp = rax_loc.AsRegister<CpuRegister>();           // Result.
2810   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();       // Object pointer.
2811   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();     // Long offset.
2812   DCHECK_EQ(out_or_temp, locations->InAt(3).AsRegister<CpuRegister>());  // New value or addend.
2813   Address field_address(base, offset, TIMES_1, 0);
2814 
2815   if (type == DataType::Type::kInt32) {
2816     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2817       __ LockXaddl(field_address, out_or_temp);
2818     } else {
2819       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2820       __ xchgl(out_or_temp, field_address);
2821     }
2822   } else if (type == DataType::Type::kInt64) {
2823     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2824       __ LockXaddq(field_address, out_or_temp);
2825     } else {
2826       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2827       __ xchgq(out_or_temp, field_address);
2828     }
2829   } else {
2830     DCHECK_EQ(type, DataType::Type::kReference);
2831     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2832 
2833     // In the void case, we have an extra temp register, which is used to signal the register
2834     // allocator that we are clobering RAX.
2835     const uint32_t extra_temp = is_void ? 1u : 0u;
2836     DCHECK_EQ(locations->GetTempCount(), 3u + extra_temp);
2837     DCHECK_IMPLIES(is_void, locations->GetTemp(0u).Equals(Location::RegisterLocation(RAX)));
2838 
2839     CpuRegister temp1 = locations->GetTemp(0u + extra_temp).AsRegister<CpuRegister>();
2840     CpuRegister temp2 = locations->GetTemp(1u + extra_temp).AsRegister<CpuRegister>();
2841     CpuRegister temp3 = locations->GetTemp(2u + extra_temp).AsRegister<CpuRegister>();
2842 
2843     if (codegen->EmitReadBarrier()) {
2844       DCHECK(kUseBakerReadBarrier);
2845       // Ensure that the field contains a to-space reference.
2846       codegen->GenerateReferenceLoadWithBakerReadBarrier(
2847           invoke,
2848           Location::RegisterLocation(temp3.AsRegister()),
2849           base,
2850           field_address,
2851           /*needs_null_check=*/ false,
2852           /*always_update_field=*/ true,
2853           &temp1,
2854           &temp2);
2855     }
2856 
2857     // Mark card for object as a new value shall be stored.
2858     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
2859     codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out_or_temp, new_value_can_be_null);
2860 
2861     if (kPoisonHeapReferences) {
2862       // Use a temp to avoid poisoning base of the field address, which might happen if `out`
2863       // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
2864       __ movl(temp1, out_or_temp);
2865       __ PoisonHeapReference(temp1);
2866       __ xchgl(temp1, field_address);
2867       if (!is_void) {
2868         __ UnpoisonHeapReference(temp1);
2869         __ movl(out_or_temp, temp1);
2870       }
2871     } else {
2872       __ xchgl(out_or_temp, field_address);
2873     }
2874   }
2875 }
2876 
VisitUnsafeGetAndAddInt(HInvoke * invoke)2877 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2878   VisitJdkUnsafeGetAndAddInt(invoke);
2879 }
2880 
VisitUnsafeGetAndAddLong(HInvoke * invoke)2881 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2882   VisitJdkUnsafeGetAndAddLong(invoke);
2883 }
2884 
VisitUnsafeGetAndSetInt(HInvoke * invoke)2885 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2886   VisitJdkUnsafeGetAndSetInt(invoke);
2887 }
2888 
VisitUnsafeGetAndSetLong(HInvoke * invoke)2889 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2890   VisitJdkUnsafeGetAndSetLong(invoke);
2891 }
2892 
VisitUnsafeGetAndSetObject(HInvoke * invoke)2893 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2894   VisitJdkUnsafeGetAndSetReference(invoke);
2895 }
2896 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2897 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2898   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
2899 }
2900 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2901 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2902   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
2903 }
2904 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2905 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2906   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
2907 }
2908 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2909 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2910   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
2911 }
2912 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2913 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2914   GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
2915 }
2916 
VisitIntegerReverse(HInvoke * invoke)2917 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2918   LocationSummary* locations =
2919       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2920   locations->SetInAt(0, Location::RequiresRegister());
2921   locations->SetOut(Location::SameAsFirstInput());
2922   locations->AddTemp(Location::RequiresRegister());
2923 }
2924 
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2925 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2926                      X86_64Assembler* assembler) {
2927   Immediate imm_shift(shift);
2928   Immediate imm_mask(mask);
2929   __ movl(temp, reg);
2930   __ shrl(reg, imm_shift);
2931   __ andl(temp, imm_mask);
2932   __ andl(reg, imm_mask);
2933   __ shll(temp, imm_shift);
2934   __ orl(reg, temp);
2935 }
2936 
VisitIntegerReverse(HInvoke * invoke)2937 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2938   X86_64Assembler* assembler = GetAssembler();
2939   LocationSummary* locations = invoke->GetLocations();
2940 
2941   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2942   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2943 
2944   /*
2945    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2946    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2947    * compared to generic luni implementation which has 5 rounds of swapping bits.
2948    * x = bswap x
2949    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2950    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2951    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2952    */
2953   __ bswapl(reg);
2954   SwapBits(reg, temp, 1, 0x55555555, assembler);
2955   SwapBits(reg, temp, 2, 0x33333333, assembler);
2956   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2957 }
2958 
VisitLongReverse(HInvoke * invoke)2959 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2960   LocationSummary* locations =
2961       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2962   locations->SetInAt(0, Location::RequiresRegister());
2963   locations->SetOut(Location::SameAsFirstInput());
2964   locations->AddRegisterTemps(2);
2965 }
2966 
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2967 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2968                        int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2969   Immediate imm_shift(shift);
2970   __ movq(temp_mask, Immediate(mask));
2971   __ movq(temp, reg);
2972   __ shrq(reg, imm_shift);
2973   __ andq(temp, temp_mask);
2974   __ andq(reg, temp_mask);
2975   __ shlq(temp, imm_shift);
2976   __ orq(reg, temp);
2977 }
2978 
VisitLongReverse(HInvoke * invoke)2979 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2980   X86_64Assembler* assembler = GetAssembler();
2981   LocationSummary* locations = invoke->GetLocations();
2982 
2983   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2984   CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2985   CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2986 
2987   /*
2988    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2989    * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2990    * compared to generic luni implementation which has 5 rounds of swapping bits.
2991    * x = bswap x
2992    * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2993    * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2994    * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2995    */
2996   __ bswapq(reg);
2997   SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2998   SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2999   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
3000 }
3001 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen,HInvoke * invoke)3002 static void CreateBitCountLocations(
3003     ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
3004   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
3005     // Do nothing if there is no popcnt support. This results in generating
3006     // a call for the intrinsic rather than direct code.
3007     return;
3008   }
3009   LocationSummary* locations =
3010       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3011   locations->SetInAt(0, Location::Any());
3012   locations->SetOut(Location::RequiresRegister());
3013 }
3014 
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3015 static void GenBitCount(X86_64Assembler* assembler,
3016                         CodeGeneratorX86_64* codegen,
3017                         HInvoke* invoke,
3018                         bool is_long) {
3019   LocationSummary* locations = invoke->GetLocations();
3020   Location src = locations->InAt(0);
3021   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3022 
3023   if (invoke->InputAt(0)->IsConstant()) {
3024     // Evaluate this at compile time.
3025     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3026     int32_t result = is_long
3027         ? POPCOUNT(static_cast<uint64_t>(value))
3028         : POPCOUNT(static_cast<uint32_t>(value));
3029     codegen->Load32BitValue(out, result);
3030     return;
3031   }
3032 
3033   if (src.IsRegister()) {
3034     if (is_long) {
3035       __ popcntq(out, src.AsRegister<CpuRegister>());
3036     } else {
3037       __ popcntl(out, src.AsRegister<CpuRegister>());
3038     }
3039   } else if (is_long) {
3040     DCHECK(src.IsDoubleStackSlot());
3041     __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3042   } else {
3043     DCHECK(src.IsStackSlot());
3044     __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3045   }
3046 }
3047 
VisitIntegerBitCount(HInvoke * invoke)3048 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
3049   CreateBitCountLocations(allocator_, codegen_, invoke);
3050 }
3051 
VisitIntegerBitCount(HInvoke * invoke)3052 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
3053   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3054 }
3055 
VisitLongBitCount(HInvoke * invoke)3056 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
3057   CreateBitCountLocations(allocator_, codegen_, invoke);
3058 }
3059 
VisitLongBitCount(HInvoke * invoke)3060 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
3061   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3062 }
3063 
CreateOneBitLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_high)3064 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
3065   LocationSummary* locations =
3066       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3067   locations->SetInAt(0, Location::Any());
3068   locations->SetOut(Location::RequiresRegister());
3069   locations->AddTemp(is_high ? Location::RegisterLocation(RCX)  // needs CL
3070                              : Location::RequiresRegister());  // any will do
3071 }
3072 
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)3073 static void GenOneBit(X86_64Assembler* assembler,
3074                       CodeGeneratorX86_64* codegen,
3075                       HInvoke* invoke,
3076                       bool is_high, bool is_long) {
3077   LocationSummary* locations = invoke->GetLocations();
3078   Location src = locations->InAt(0);
3079   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3080 
3081   if (invoke->InputAt(0)->IsConstant()) {
3082     // Evaluate this at compile time.
3083     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3084     if (value == 0) {
3085       __ xorl(out, out);  // Clears upper bits too.
3086       return;
3087     }
3088     // Nonzero value.
3089     if (is_high) {
3090       value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
3091                       : 31 - CLZ(static_cast<uint32_t>(value));
3092     } else {
3093       value = is_long ? CTZ(static_cast<uint64_t>(value))
3094                       : CTZ(static_cast<uint32_t>(value));
3095     }
3096     if (is_long) {
3097       codegen->Load64BitValue(out, 1ULL << value);
3098     } else {
3099       codegen->Load32BitValue(out, 1 << value);
3100     }
3101     return;
3102   }
3103 
3104   // Handle the non-constant cases.
3105   if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
3106       src.IsRegister()) {
3107       __ blsi(out, src.AsRegister<CpuRegister>());
3108   } else {
3109     CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3110     if (is_high) {
3111       // Use architectural support: basically 1 << bsr.
3112       if (src.IsRegister()) {
3113         if (is_long) {
3114           __ bsrq(tmp, src.AsRegister<CpuRegister>());
3115         } else {
3116           __ bsrl(tmp, src.AsRegister<CpuRegister>());
3117         }
3118       } else if (is_long) {
3119         DCHECK(src.IsDoubleStackSlot());
3120         __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3121       } else {
3122         DCHECK(src.IsStackSlot());
3123         __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3124       }
3125       // BSR sets ZF if the input was zero.
3126       NearLabel is_zero, done;
3127       __ j(kEqual, &is_zero);
3128       __ movl(out, Immediate(1));  // Clears upper bits too.
3129       if (is_long) {
3130         __ shlq(out, tmp);
3131       } else {
3132         __ shll(out, tmp);
3133       }
3134       __ jmp(&done);
3135       __ Bind(&is_zero);
3136       __ xorl(out, out);  // Clears upper bits too.
3137       __ Bind(&done);
3138     } else  {
3139       // Copy input into temporary.
3140       if (src.IsRegister()) {
3141         if (is_long) {
3142           __ movq(tmp, src.AsRegister<CpuRegister>());
3143         } else {
3144           __ movl(tmp, src.AsRegister<CpuRegister>());
3145         }
3146       } else if (is_long) {
3147         DCHECK(src.IsDoubleStackSlot());
3148         __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3149       } else {
3150         DCHECK(src.IsStackSlot());
3151         __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3152       }
3153       // Do the bit twiddling: basically tmp & -tmp;
3154       if (is_long) {
3155         __ movq(out, tmp);
3156         __ negq(tmp);
3157         __ andq(out, tmp);
3158       } else {
3159         __ movl(out, tmp);
3160         __ negl(tmp);
3161         __ andl(out, tmp);
3162       }
3163     }
3164   }
3165 }
3166 
VisitIntegerHighestOneBit(HInvoke * invoke)3167 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3168   CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3169 }
3170 
VisitIntegerHighestOneBit(HInvoke * invoke)3171 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3172   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false);
3173 }
3174 
VisitLongHighestOneBit(HInvoke * invoke)3175 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3176   CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3177 }
3178 
VisitLongHighestOneBit(HInvoke * invoke)3179 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3180   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true);
3181 }
3182 
VisitIntegerLowestOneBit(HInvoke * invoke)3183 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3184   CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3185 }
3186 
VisitIntegerLowestOneBit(HInvoke * invoke)3187 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3188   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false);
3189 }
3190 
VisitLongLowestOneBit(HInvoke * invoke)3191 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3192   CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3193 }
3194 
VisitLongLowestOneBit(HInvoke * invoke)3195 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3196   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true);
3197 }
3198 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3199 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3200   LocationSummary* locations =
3201       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3202   locations->SetInAt(0, Location::Any());
3203   locations->SetOut(Location::RequiresRegister());
3204 }
3205 
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3206 static void GenLeadingZeros(X86_64Assembler* assembler,
3207                             CodeGeneratorX86_64* codegen,
3208                             HInvoke* invoke, bool is_long) {
3209   LocationSummary* locations = invoke->GetLocations();
3210   Location src = locations->InAt(0);
3211   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3212 
3213   int zero_value_result = is_long ? 64 : 32;
3214   if (invoke->InputAt(0)->IsConstant()) {
3215     // Evaluate this at compile time.
3216     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3217     if (value == 0) {
3218       value = zero_value_result;
3219     } else {
3220       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
3221     }
3222     codegen->Load32BitValue(out, value);
3223     return;
3224   }
3225 
3226   // Handle the non-constant cases.
3227   if (src.IsRegister()) {
3228     if (is_long) {
3229       __ bsrq(out, src.AsRegister<CpuRegister>());
3230     } else {
3231       __ bsrl(out, src.AsRegister<CpuRegister>());
3232     }
3233   } else if (is_long) {
3234     DCHECK(src.IsDoubleStackSlot());
3235     __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3236   } else {
3237     DCHECK(src.IsStackSlot());
3238     __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3239   }
3240 
3241   // BSR sets ZF if the input was zero, and the output is undefined.
3242   NearLabel is_zero, done;
3243   __ j(kEqual, &is_zero);
3244 
3245   // Correct the result from BSR to get the CLZ result.
3246   __ xorl(out, Immediate(zero_value_result - 1));
3247   __ jmp(&done);
3248 
3249   // Fix the zero case with the expected result.
3250   __ Bind(&is_zero);
3251   __ movl(out, Immediate(zero_value_result));
3252 
3253   __ Bind(&done);
3254 }
3255 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3256 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3257   CreateLeadingZeroLocations(allocator_, invoke);
3258 }
3259 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3260 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3261   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3262 }
3263 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3264 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3265   CreateLeadingZeroLocations(allocator_, invoke);
3266 }
3267 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3268 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3269   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3270 }
3271 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3272 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3273   LocationSummary* locations =
3274       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3275   locations->SetInAt(0, Location::Any());
3276   locations->SetOut(Location::RequiresRegister());
3277 }
3278 
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3279 static void GenTrailingZeros(X86_64Assembler* assembler,
3280                              CodeGeneratorX86_64* codegen,
3281                              HInvoke* invoke, bool is_long) {
3282   LocationSummary* locations = invoke->GetLocations();
3283   Location src = locations->InAt(0);
3284   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3285 
3286   int zero_value_result = is_long ? 64 : 32;
3287   if (invoke->InputAt(0)->IsConstant()) {
3288     // Evaluate this at compile time.
3289     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3290     if (value == 0) {
3291       value = zero_value_result;
3292     } else {
3293       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
3294     }
3295     codegen->Load32BitValue(out, value);
3296     return;
3297   }
3298 
3299   // Handle the non-constant cases.
3300   if (src.IsRegister()) {
3301     if (is_long) {
3302       __ bsfq(out, src.AsRegister<CpuRegister>());
3303     } else {
3304       __ bsfl(out, src.AsRegister<CpuRegister>());
3305     }
3306   } else if (is_long) {
3307     DCHECK(src.IsDoubleStackSlot());
3308     __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3309   } else {
3310     DCHECK(src.IsStackSlot());
3311     __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3312   }
3313 
3314   // BSF sets ZF if the input was zero, and the output is undefined.
3315   NearLabel done;
3316   __ j(kNotEqual, &done);
3317 
3318   // Fix the zero case with the expected result.
3319   __ movl(out, Immediate(zero_value_result));
3320 
3321   __ Bind(&done);
3322 }
3323 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3324 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3325   CreateTrailingZeroLocations(allocator_, invoke);
3326 }
3327 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3328 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3329   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3330 }
3331 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3332 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3333   CreateTrailingZeroLocations(allocator_, invoke);
3334 }
3335 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3336 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3337   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3338 }
3339 
3340 #define VISIT_INTRINSIC(name, low, high, type, start_index)                              \
3341   void IntrinsicLocationsBuilderX86_64::Visit##name##ValueOf(HInvoke* invoke) {          \
3342     InvokeRuntimeCallingConvention calling_convention;                                   \
3343     IntrinsicVisitor::ComputeValueOfLocations(                                           \
3344         invoke,                                                                          \
3345         codegen_,                                                                        \
3346         low,                                                                             \
3347         (high) - (low) + 1,                                                              \
3348         Location::RegisterLocation(RAX),                                                 \
3349         Location::RegisterLocation(calling_convention.GetRegisterAt(0)));                \
3350   }                                                                                      \
3351   void IntrinsicCodeGeneratorX86_64::Visit##name##ValueOf(HInvoke* invoke) {             \
3352     IntrinsicVisitor::ValueOfInfo info =                                                 \
3353         IntrinsicVisitor::ComputeValueOfInfo(invoke,                                     \
3354                                              codegen_->GetCompilerOptions(),             \
3355                                              WellKnownClasses::java_lang_##name##_value, \
3356                                              low,                                        \
3357                                              (high) - (low) + 1,                         \
3358                                              start_index);                               \
3359     HandleValueOf(invoke, info, type);                                                   \
3360   }
BOXED_TYPES(VISIT_INTRINSIC)3361   BOXED_TYPES(VISIT_INTRINSIC)
3362 #undef VISIT_INTRINSIC
3363 
3364 template <typename T>
3365 static void Store(X86_64Assembler* assembler,
3366                   DataType::Type primitive_type,
3367                   const Address& address,
3368                   const T& operand) {
3369   switch (primitive_type) {
3370     case DataType::Type::kInt8:
3371     case DataType::Type::kUint8: {
3372       __ movb(address, operand);
3373       break;
3374     }
3375     case DataType::Type::kInt16:
3376     case DataType::Type::kUint16: {
3377       __ movw(address, operand);
3378       break;
3379     }
3380     case DataType::Type::kInt32: {
3381       __ movl(address, operand);
3382       break;
3383     }
3384     default: {
3385       LOG(FATAL) << "Unrecognized ValueOf type " << primitive_type;
3386     }
3387   }
3388 }
3389 
HandleValueOf(HInvoke * invoke,const IntrinsicVisitor::ValueOfInfo & info,DataType::Type type)3390 void IntrinsicCodeGeneratorX86_64::HandleValueOf(HInvoke* invoke,
3391                                                  const IntrinsicVisitor::ValueOfInfo& info,
3392                                                  DataType::Type type) {
3393   LocationSummary* locations = invoke->GetLocations();
3394   X86_64Assembler* assembler = GetAssembler();
3395 
3396   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3397   InvokeRuntimeCallingConvention calling_convention;
3398   CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
3399   auto allocate_instance = [&]() {
3400     codegen_->LoadIntrinsicDeclaringClass(argument, invoke);
3401     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3402     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3403   };
3404   if (invoke->InputAt(0)->IsIntConstant()) {
3405     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3406     if (static_cast<uint32_t>(value - info.low) < info.length) {
3407       // Just embed the object in the code.
3408       DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3409       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3410     } else {
3411       DCHECK(locations->CanCall());
3412       // Allocate and initialize a new object.
3413       // TODO: If we JIT, we could allocate the boxed value now, and store it in the
3414       // JIT object table.
3415       allocate_instance();
3416       Store(assembler, type, Address(out, info.value_offset), Immediate(value));
3417     }
3418   } else {
3419     DCHECK(locations->CanCall());
3420     CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
3421     // Check bounds of our cache.
3422     __ leal(out, Address(in, -info.low));
3423     __ cmpl(out, Immediate(info.length));
3424     NearLabel allocate, done;
3425     __ j(kAboveEqual, &allocate);
3426     // If the value is within the bounds, load the boxed value directly from the array.
3427     DCHECK_NE(out.AsRegister(), argument.AsRegister());
3428     codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
3429     static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3430                   "Check heap reference size.");
3431     __ movl(out, Address(argument, out, TIMES_4, 0));
3432     __ MaybeUnpoisonHeapReference(out);
3433     __ jmp(&done);
3434     __ Bind(&allocate);
3435     // Otherwise allocate and initialize a new object.
3436     allocate_instance();
3437     Store(assembler, type, Address(out, info.value_offset), in);
3438     __ Bind(&done);
3439   }
3440 }
3441 
VisitReferenceGetReferent(HInvoke * invoke)3442 void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3443   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3444 }
3445 
VisitReferenceGetReferent(HInvoke * invoke)3446 void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3447   X86_64Assembler* assembler = GetAssembler();
3448   LocationSummary* locations = invoke->GetLocations();
3449 
3450   Location obj = locations->InAt(0);
3451   Location out = locations->Out();
3452 
3453   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
3454   codegen_->AddSlowPath(slow_path);
3455 
3456   if (codegen_->EmitReadBarrier()) {
3457     // Check self->GetWeakRefAccessEnabled().
3458     ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
3459     __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true),
3460                   Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3461     __ j(kNotEqual, slow_path->GetEntryLabel());
3462   }
3463 
3464   // Load the java.lang.ref.Reference class, use the output register as a temporary.
3465   codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<CpuRegister>(), invoke);
3466 
3467   // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3468   MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3469   DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3470   DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3471             IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3472   __ cmpw(Address(out.AsRegister<CpuRegister>(), disable_intrinsic_offset.Uint32Value()),
3473           Immediate(0));
3474   __ j(kNotEqual, slow_path->GetEntryLabel());
3475 
3476   // Load the value from the field.
3477   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3478   if (codegen_->EmitBakerReadBarrier()) {
3479     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3480                                                     out,
3481                                                     obj.AsRegister<CpuRegister>(),
3482                                                     referent_offset,
3483                                                     /*needs_null_check=*/ true);
3484     // Note that the fence is a no-op, thanks to the x86-64 memory model.
3485     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3486   } else {
3487     __ movl(out.AsRegister<CpuRegister>(), Address(obj.AsRegister<CpuRegister>(), referent_offset));
3488     codegen_->MaybeRecordImplicitNullCheck(invoke);
3489     // Note that the fence is a no-op, thanks to the x86-64 memory model.
3490     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3491     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3492   }
3493   __ Bind(slow_path->GetExitLabel());
3494 }
3495 
VisitReferenceRefersTo(HInvoke * invoke)3496 void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3497   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3498 }
3499 
VisitReferenceRefersTo(HInvoke * invoke)3500 void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3501   X86_64Assembler* assembler = GetAssembler();
3502   LocationSummary* locations = invoke->GetLocations();
3503 
3504   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
3505   CpuRegister other = locations->InAt(1).AsRegister<CpuRegister>();
3506   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3507 
3508   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3509   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3510 
3511   __ movl(out, Address(obj, referent_offset));
3512   codegen_->MaybeRecordImplicitNullCheck(invoke);
3513   __ MaybeUnpoisonHeapReference(out);
3514   // Note that the fence is a no-op, thanks to the x86-64 memory model.
3515   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3516 
3517   __ cmpl(out, other);
3518 
3519   if (codegen_->EmitReadBarrier()) {
3520     DCHECK(kUseBakerReadBarrier);
3521 
3522     NearLabel calculate_result;
3523     __ j(kEqual, &calculate_result);  // ZF set if taken.
3524 
3525     // Check if the loaded reference is null in a way that leaves ZF clear for null.
3526     __ cmpl(out, Immediate(1));
3527     __ j(kBelow, &calculate_result);  // ZF clear if taken.
3528 
3529     // For correct memory visibility, we need a barrier before loading the lock word
3530     // but we already have the barrier emitted for volatile load above which is sufficient.
3531 
3532     // Load the lockword and check if it is a forwarding address.
3533     static_assert(LockWord::kStateShift == 30u);
3534     static_assert(LockWord::kStateForwardingAddress == 3u);
3535     __ movl(out, Address(out, monitor_offset));
3536     __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3537     __ j(kBelow, &calculate_result);   // ZF clear if taken.
3538 
3539     // Extract the forwarding address and compare with `other`.
3540     __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3541     __ cmpl(out, other);
3542 
3543     __ Bind(&calculate_result);
3544   }
3545 
3546   // Convert ZF into the Boolean result.
3547   __ setcc(kEqual, out);
3548   __ movzxb(out, out);
3549 }
3550 
VisitThreadInterrupted(HInvoke * invoke)3551 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3552   LocationSummary* locations =
3553       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3554   locations->SetOut(Location::RequiresRegister());
3555 }
3556 
VisitThreadInterrupted(HInvoke * invoke)3557 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3558   X86_64Assembler* assembler = GetAssembler();
3559   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
3560   Address address = Address::Absolute
3561       (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true);
3562   NearLabel done;
3563   __ gs()->movl(out, address);
3564   __ testl(out, out);
3565   __ j(kEqual, &done);
3566   __ gs()->movl(address, Immediate(0));
3567   codegen_->MemoryFence();
3568   __ Bind(&done);
3569 }
3570 
VisitReachabilityFence(HInvoke * invoke)3571 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
3572   LocationSummary* locations =
3573       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3574   locations->SetInAt(0, Location::Any());
3575 }
3576 
VisitReachabilityFence(HInvoke * invoke)3577 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3578 
CreateDivideUnsignedLocations(HInvoke * invoke,ArenaAllocator * allocator)3579 static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) {
3580   LocationSummary* locations =
3581       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3582   locations->SetInAt(0, Location::RegisterLocation(RAX));
3583   locations->SetInAt(1, Location::RequiresRegister());
3584   locations->SetOut(Location::SameAsFirstInput());
3585   // Intel uses edx:eax as the dividend.
3586   locations->AddTemp(Location::RegisterLocation(RDX));
3587 }
3588 
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type data_type)3589 static void GenerateDivideUnsigned(HInvoke* invoke,
3590                                    CodeGeneratorX86_64* codegen,
3591                                    DataType::Type data_type) {
3592   LocationSummary* locations = invoke->GetLocations();
3593   Location out = locations->Out();
3594   Location first = locations->InAt(0);
3595   Location second = locations->InAt(1);
3596   CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3597   CpuRegister second_reg = second.AsRegister<CpuRegister>();
3598 
3599   DCHECK_EQ(RAX, first.AsRegister<Register>());
3600   DCHECK_EQ(RAX, out.AsRegister<Register>());
3601   DCHECK_EQ(RDX, rdx.AsRegister());
3602 
3603   // We check if the divisor is zero and bail to the slow path to handle if so.
3604   auto* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
3605   codegen->AddSlowPath(slow_path);
3606 
3607   X86_64Assembler* assembler = codegen->GetAssembler();
3608   if (data_type == DataType::Type::kInt32) {
3609     __ testl(second_reg, second_reg);
3610     __ j(kEqual, slow_path->GetEntryLabel());
3611     __ xorl(rdx, rdx);
3612     __ divl(second_reg);
3613   } else {
3614     DCHECK(data_type == DataType::Type::kInt64);
3615     __ testq(second_reg, second_reg);
3616     __ j(kEqual, slow_path->GetEntryLabel());
3617     __ xorq(rdx, rdx);
3618     __ divq(second_reg);
3619   }
3620   __ Bind(slow_path->GetExitLabel());
3621 }
3622 
VisitIntegerDivideUnsigned(HInvoke * invoke)3623 void IntrinsicLocationsBuilderX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3624   CreateDivideUnsignedLocations(invoke, allocator_);
3625 }
3626 
VisitIntegerDivideUnsigned(HInvoke * invoke)3627 void IntrinsicCodeGeneratorX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3628   GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt32);
3629 }
3630 
VisitLongDivideUnsigned(HInvoke * invoke)3631 void IntrinsicLocationsBuilderX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3632   CreateDivideUnsignedLocations(invoke, allocator_);
3633 }
3634 
VisitLongDivideUnsigned(HInvoke * invoke)3635 void IntrinsicCodeGeneratorX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3636   GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt64);
3637 }
3638 
VisitMathMultiplyHigh(HInvoke * invoke)3639 void IntrinsicLocationsBuilderX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3640   LocationSummary* locations =
3641       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3642   locations->SetInAt(0, Location::RegisterLocation(RAX));
3643   locations->SetInAt(1, Location::RequiresRegister());
3644   locations->SetOut(Location::RegisterLocation(RDX));
3645   locations->AddTemp(Location::RegisterLocation(RAX));
3646 }
3647 
VisitMathMultiplyHigh(HInvoke * invoke)3648 void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3649   X86_64Assembler* assembler = GetAssembler();
3650   LocationSummary* locations = invoke->GetLocations();
3651 
3652   CpuRegister y = locations->InAt(1).AsRegister<CpuRegister>();
3653 
3654   DCHECK_EQ(locations->InAt(0).AsRegister<Register>(), RAX);
3655   DCHECK_EQ(locations->Out().AsRegister<Register>(), RDX);
3656 
3657   __ imulq(y);
3658 }
3659 
3660 class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
3661  public:
VarHandleSlowPathX86_64(HInvoke * invoke)3662   explicit VarHandleSlowPathX86_64(HInvoke* invoke)
3663       : IntrinsicSlowPathX86_64(invoke) {
3664   }
3665 
SetVolatile(bool is_volatile)3666   void SetVolatile(bool is_volatile) {
3667     is_volatile_ = is_volatile;
3668   }
3669 
SetAtomic(bool is_atomic)3670   void SetAtomic(bool is_atomic) {
3671     is_atomic_ = is_atomic;
3672   }
3673 
SetNeedAnyStoreBarrier(bool need_any_store_barrier)3674   void SetNeedAnyStoreBarrier(bool need_any_store_barrier) {
3675     need_any_store_barrier_ = need_any_store_barrier;
3676   }
3677 
SetNeedAnyAnyBarrier(bool need_any_any_barrier)3678   void SetNeedAnyAnyBarrier(bool need_any_any_barrier) {
3679     need_any_any_barrier_ = need_any_any_barrier;
3680   }
3681 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3682   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3683     get_and_update_op_ = get_and_update_op;
3684   }
3685 
GetByteArrayViewCheckLabel()3686   Label* GetByteArrayViewCheckLabel() {
3687     return &byte_array_view_check_label_;
3688   }
3689 
GetNativeByteOrderLabel()3690   Label* GetNativeByteOrderLabel() {
3691     return &native_byte_order_label_;
3692   }
3693 
EmitNativeCode(CodeGenerator * codegen)3694   void EmitNativeCode(CodeGenerator* codegen) override {
3695     if (GetByteArrayViewCheckLabel()->IsLinked()) {
3696       EmitByteArrayViewCode(down_cast<CodeGeneratorX86_64*>(codegen));
3697     }
3698     IntrinsicSlowPathX86_64::EmitNativeCode(codegen);
3699   }
3700 
3701  private:
GetInvoke() const3702   HInvoke* GetInvoke() const {
3703     return GetInstruction()->AsInvoke();
3704   }
3705 
GetAccessModeTemplate() const3706   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3707     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3708   }
3709 
3710   void EmitByteArrayViewCode(CodeGeneratorX86_64* codegen);
3711 
3712   Label byte_array_view_check_label_;
3713   Label native_byte_order_label_;
3714 
3715   // Arguments forwarded to specific methods.
3716   bool is_volatile_;
3717   bool is_atomic_;
3718   bool need_any_store_barrier_;
3719   bool need_any_any_barrier_;
3720   GetAndUpdateOp get_and_update_op_;
3721 };
3722 
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3723 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3724   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
3725   X86_64Assembler* assembler = codegen->GetAssembler();
3726   LocationSummary* locations = invoke->GetLocations();
3727   DCHECK(locations->InAt(0).Equals(locations->Out()));
3728   XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
3729   XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
3730   XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
3731   if (invoke->GetType() == DataType::Type::kFloat32) {
3732     __ vfmadd213ss(left, right, accumulator);
3733   } else {
3734     DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
3735     __ vfmadd213sd(left, right, accumulator);
3736   }
3737 }
3738 
VisitMathFmaDouble(HInvoke * invoke)3739 void IntrinsicCodeGeneratorX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3740   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3741   GenerateMathFma(invoke, codegen_);
3742 }
3743 
VisitMathFmaDouble(HInvoke * invoke)3744 void IntrinsicLocationsBuilderX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3745   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3746     CreateFPFPFPToFPCallLocations(allocator_, invoke);
3747   }
3748 }
3749 
VisitMathFmaFloat(HInvoke * invoke)3750 void IntrinsicCodeGeneratorX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3751   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3752   GenerateMathFma(invoke, codegen_);
3753 }
3754 
VisitMathFmaFloat(HInvoke * invoke)3755 void IntrinsicLocationsBuilderX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3756   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3757     CreateFPFPFPToFPCallLocations(allocator_, invoke);
3758   }
3759 }
3760 
3761 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64 * codegen,SlowPathCode * slow_path,CpuRegister object,CpuRegister temp,Address type_address,bool object_can_be_null=true)3762 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen,
3763                                                     SlowPathCode* slow_path,
3764                                                     CpuRegister object,
3765                                                     CpuRegister temp,
3766                                                     Address type_address,
3767                                                     bool object_can_be_null = true) {
3768   X86_64Assembler* assembler = codegen->GetAssembler();
3769 
3770   const MemberOffset class_offset = mirror::Object::ClassOffset();
3771   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3772 
3773   NearLabel check_type_compatibility, type_matched;
3774 
3775   // If the object is null, there is no need to check the type
3776   if (object_can_be_null) {
3777     __ testl(object, object);
3778     __ j(kZero, &type_matched);
3779   }
3780 
3781   // Do not unpoison for in-memory comparison.
3782   // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3783   __ movl(temp, Address(object, class_offset));
3784   __ Bind(&check_type_compatibility);
3785   __ cmpl(temp, type_address);
3786   __ j(kEqual, &type_matched);
3787   // Load the super class.
3788   __ MaybeUnpoisonHeapReference(temp);
3789   __ movl(temp, Address(temp, super_class_offset));
3790   // If the super class is null, we reached the root of the hierarchy without a match.
3791   // We let the slow path handle uncovered cases (e.g. interfaces).
3792   __ testl(temp, temp);
3793   __ j(kEqual, slow_path->GetEntryLabel());
3794   __ jmp(&check_type_compatibility);
3795   __ Bind(&type_matched);
3796 }
3797 
3798 // Check access mode and the primitive type from VarHandle.varType.
3799 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3800 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,DataType::Type type)3801 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3802                                                         CodeGeneratorX86_64* codegen,
3803                                                         VarHandleSlowPathX86_64* slow_path,
3804                                                         DataType::Type type) {
3805   X86_64Assembler* assembler = codegen->GetAssembler();
3806 
3807   LocationSummary* locations = invoke->GetLocations();
3808   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3809   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3810 
3811   mirror::VarHandle::AccessMode access_mode =
3812       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3813   Primitive::Type primitive_type = DataTypeToPrimitive(type);
3814 
3815   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3816   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3817   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3818 
3819   // Check that the operation is permitted.
3820   __ testl(Address(varhandle, access_mode_bit_mask_offset),
3821            Immediate(1u << static_cast<uint32_t>(access_mode)));
3822   __ j(kZero, slow_path->GetEntryLabel());
3823 
3824   // For primitive types, we do not need a read barrier when loading a reference only for loading
3825   // constant field through the reference. For reference types, we deliberately avoid the read
3826   // barrier, letting the slow path handle the false negatives.
3827   __ movl(temp, Address(varhandle, var_type_offset));
3828   __ MaybeUnpoisonHeapReference(temp);
3829 
3830   // Check the varType.primitiveType field against the type we're trying to use.
3831   __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3832   __ j(kNotEqual, slow_path->GetEntryLabel());
3833 
3834   if (type == DataType::Type::kReference) {
3835     // Check reference arguments against the varType.
3836     // False negatives due to varType being an interface or array type
3837     // or due to the missing read barrier are handled by the slow path.
3838     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3839     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3840     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3841     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3842       HInstruction* arg = invoke->InputAt(arg_index);
3843       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3844       if (!arg->IsNullConstant()) {
3845         CpuRegister arg_reg = invoke->GetLocations()->InAt(arg_index).AsRegister<CpuRegister>();
3846         Address type_addr(varhandle, var_type_offset);
3847         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp, type_addr);
3848       }
3849     }
3850   }
3851 }
3852 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3853 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3854                                               CodeGeneratorX86_64* codegen,
3855                                               VarHandleSlowPathX86_64* slow_path) {
3856   X86_64Assembler* assembler = codegen->GetAssembler();
3857 
3858   LocationSummary* locations = invoke->GetLocations();
3859   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3860 
3861   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3862 
3863   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3864   // Do not emit read barrier (or unpoison the reference) for comparing to null.
3865   __ cmpl(Address(varhandle, coordinate_type0_offset), Immediate(0));
3866   __ j(kNotEqual, slow_path->GetEntryLabel());
3867 }
3868 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3869 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3870                                                  CodeGeneratorX86_64* codegen,
3871                                                  VarHandleSlowPathX86_64* slow_path) {
3872   VarHandleOptimizations optimizations(invoke);
3873   X86_64Assembler* assembler = codegen->GetAssembler();
3874 
3875   LocationSummary* locations = invoke->GetLocations();
3876   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3877   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3878   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3879 
3880   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3881   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3882 
3883   // Null-check the object.
3884   if (!optimizations.GetSkipObjectNullCheck()) {
3885     __ testl(object, object);
3886     __ j(kZero, slow_path->GetEntryLabel());
3887   }
3888 
3889   if (!optimizations.GetUseKnownImageVarHandle()) {
3890     // Check that the VarHandle references an instance field by checking that
3891     // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3892     // type compatibility check with the source object's type, which will fail for null.
3893     __ cmpl(Address(varhandle, coordinate_type1_offset), Immediate(0));
3894     __ j(kNotEqual, slow_path->GetEntryLabel());
3895 
3896     // Check that the object has the correct type.
3897     // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3898     GenerateSubTypeObjectCheckNoReadBarrier(codegen,
3899                                             slow_path,
3900                                             object,
3901                                             temp,
3902                                             Address(varhandle, coordinate_type0_offset),
3903                                             /*object_can_be_null=*/ false);
3904   }
3905 }
3906 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3907 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3908                                          CodeGeneratorX86_64* codegen,
3909                                          VarHandleSlowPathX86_64* slow_path) {
3910   VarHandleOptimizations optimizations(invoke);
3911   X86_64Assembler* assembler = codegen->GetAssembler();
3912   LocationSummary* locations = invoke->GetLocations();
3913 
3914   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3915   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3916   CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3917   DataType::Type value_type =
3918       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3919   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3920 
3921   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3922   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3923   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3924   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3925   const MemberOffset class_offset = mirror::Object::ClassOffset();
3926   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3927 
3928   // Null-check the object.
3929   if (!optimizations.GetSkipObjectNullCheck()) {
3930     __ testl(object, object);
3931     __ j(kZero, slow_path->GetEntryLabel());
3932   }
3933 
3934   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3935 
3936   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3937   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3938   // coordinateType0 shall not be null but we do not explicitly verify that.
3939   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3940   __ cmpl(Address(varhandle, coordinate_type1_offset.Int32Value()), Immediate(0));
3941   __ j(kEqual, slow_path->GetEntryLabel());
3942 
3943   // Check object class against componentType0.
3944   //
3945   // This is an exact check and we defer other cases to the runtime. This includes
3946   // conversion to array of superclass references, which is valid but subsequently
3947   // requires all update operations to check that the value can indeed be stored.
3948   // We do not want to perform such extra checks in the intrinsified code.
3949   //
3950   // We do this check without read barrier, so there can be false negatives which we
3951   // defer to the slow path. There shall be no false negatives for array classes in the
3952   // boot image (including Object[] and primitive arrays) because they are non-movable.
3953   __ movl(temp, Address(object, class_offset.Int32Value()));
3954   __ cmpl(temp, Address(varhandle, coordinate_type0_offset.Int32Value()));
3955   __ j(kNotEqual, slow_path->GetEntryLabel());
3956 
3957   // Check that the coordinateType0 is an array type. We do not need a read barrier
3958   // for loading constant reference fields (or chains of them) for comparison with null,
3959   // nor for finally loading a constant primitive field (primitive type) below.
3960   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3961   __ movl(temp, Address(temp, component_type_offset.Int32Value()));
3962   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3963   __ testl(temp, temp);
3964   __ j(kZero, slow_path->GetEntryLabel());
3965 
3966   // Check that the array component type matches the primitive type.
3967   Label* slow_path_label;
3968   if (primitive_type == Primitive::kPrimNot) {
3969     slow_path_label = slow_path->GetEntryLabel();
3970   } else {
3971     // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
3972     // we shall check for a byte array view in the slow path.
3973     // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
3974     // so we cannot emit that if we're JITting without boot image.
3975     bool boot_image_available =
3976         codegen->GetCompilerOptions().IsBootImage() ||
3977         !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
3978     bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
3979     slow_path_label =
3980         can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
3981   }
3982   __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3983   __ j(kNotEqual, slow_path_label);
3984 
3985   // Check for array index out of bounds.
3986   __ cmpl(index, Address(object, array_length_offset.Int32Value()));
3987   __ j(kAboveEqual, slow_path->GetEntryLabel());
3988 }
3989 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3990 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
3991                                               CodeGeneratorX86_64* codegen,
3992                                               VarHandleSlowPathX86_64* slow_path) {
3993   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3994   if (expected_coordinates_count == 0u) {
3995     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
3996   } else if (expected_coordinates_count == 1u) {
3997     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
3998   } else {
3999     DCHECK_EQ(expected_coordinates_count, 2u);
4000     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4001   }
4002 }
4003 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)4004 static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
4005                                                         CodeGeneratorX86_64* codegen,
4006                                                         DataType::Type type) {
4007   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4008   VarHandleOptimizations optimizations(invoke);
4009   if (optimizations.GetUseKnownImageVarHandle()) {
4010     DCHECK_NE(expected_coordinates_count, 2u);
4011     if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4012       return nullptr;
4013     }
4014   }
4015 
4016   VarHandleSlowPathX86_64* slow_path =
4017       new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
4018   codegen->AddSlowPath(slow_path);
4019 
4020   if (!optimizations.GetUseKnownImageVarHandle()) {
4021     GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4022   }
4023   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4024 
4025   return slow_path;
4026 }
4027 
4028 struct VarHandleTarget {
4029   Register object;  // The object holding the value to operate on.
4030   Register offset;  // The offset of the value to operate on.
4031 };
4032 
GetVarHandleTarget(HInvoke * invoke)4033 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4034   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4035   LocationSummary* locations = invoke->GetLocations();
4036 
4037   VarHandleTarget target;
4038   // The temporary allocated for loading the offset.
4039   target.offset = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
4040   // The reference to the object that holds the value to operate on.
4041   target.object = (expected_coordinates_count == 0u)
4042       ? locations->GetTemp(1).AsRegister<CpuRegister>().AsRegister()
4043       : locations->InAt(1).AsRegister<CpuRegister>().AsRegister();
4044   return target;
4045 }
4046 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorX86_64 * codegen)4047 static void GenerateVarHandleTarget(HInvoke* invoke,
4048                                     const VarHandleTarget& target,
4049                                     CodeGeneratorX86_64* codegen) {
4050   LocationSummary* locations = invoke->GetLocations();
4051   X86_64Assembler* assembler = codegen->GetAssembler();
4052   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4053 
4054   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
4055 
4056   if (expected_coordinates_count <= 1u) {
4057     if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
4058       ScopedObjectAccess soa(Thread::Current());
4059       ArtField* target_field = GetBootImageVarHandleField(invoke);
4060       if (expected_coordinates_count == 0u) {
4061         ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4062         __ movl(CpuRegister(target.object),
4063                 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /*no_rip=*/ false));
4064         if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4065           codegen->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(declaring_class));
4066         } else {
4067           codegen->RecordBootImageTypePatch(declaring_class->GetDexFile(),
4068                                             declaring_class->GetDexTypeIndex());
4069         }
4070       }
4071       __ movl(CpuRegister(target.offset), Immediate(target_field->GetOffset().Uint32Value()));
4072     } else {
4073       // For static fields, we need to fill the `target.object` with the declaring class,
4074       // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
4075       // we do not need the declaring class, so we can forget the `ArtField*` when
4076       // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
4077       CpuRegister field((expected_coordinates_count == 0) ? target.object : target.offset);
4078 
4079       const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4080       const MemberOffset offset_offset = ArtField::OffsetOffset();
4081 
4082       // Load the ArtField*, the offset and, if needed, declaring class.
4083       __ movq(field, Address(varhandle, art_field_offset));
4084       __ movl(CpuRegister(target.offset), Address(field, offset_offset));
4085       if (expected_coordinates_count == 0u) {
4086         InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
4087         instr_codegen->GenerateGcRootFieldLoad(invoke,
4088                                                Location::RegisterLocation(target.object),
4089                                                Address(field, ArtField::DeclaringClassOffset()),
4090                                                /*fixup_label=*/nullptr,
4091                                                codegen->GetCompilerReadBarrierOption());
4092       }
4093     }
4094   } else {
4095     DCHECK_EQ(expected_coordinates_count, 2u);
4096 
4097     DataType::Type value_type =
4098         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4099     ScaleFactor scale = CodeGenerator::ScaleFactorForType(value_type);
4100     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4101     CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
4102 
4103     // The effect of LEA is `target.offset = index * scale + data_offset`.
4104     __ leal(CpuRegister(target.offset), Address(index, scale, data_offset.Int32Value()));
4105   }
4106 }
4107 
HasVarHandleIntrinsicImplementation(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4108 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4109   // The only supported read barrier implementation is the Baker-style read barriers.
4110   if (codegen->EmitNonBakerReadBarrier()) {
4111     return false;
4112   }
4113 
4114   VarHandleOptimizations optimizations(invoke);
4115   if (optimizations.GetDoNotIntrinsify()) {
4116     return false;
4117   }
4118 
4119   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4120   DCHECK_LE(expected_coordinates_count, 2u);  // Filtered by the `DoNotIntrinsify` flag above.
4121   return true;
4122 }
4123 
CreateVarHandleCommonLocations(HInvoke * invoke)4124 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
4125   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4126   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4127   LocationSummary* locations = new (allocator) LocationSummary(
4128       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4129 
4130   locations->SetInAt(0, Location::RequiresRegister());
4131   // Require coordinates in registers. These are the object holding the value
4132   // to operate on (except for static fields) and index (for arrays and views).
4133   for (size_t i = 0; i != expected_coordinates_count; ++i) {
4134     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4135   }
4136 
4137   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4138   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4139   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4140     HInstruction* arg = invoke->InputAt(arg_index);
4141     if (DataType::IsFloatingPointType(arg->GetType())) {
4142       locations->SetInAt(arg_index, Location::FpuRegisterOrConstant(arg));
4143     } else {
4144       locations->SetInAt(arg_index, Location::RegisterOrConstant(arg));
4145     }
4146   }
4147 
4148   // Add a temporary for offset.
4149   locations->AddTemp(Location::RequiresRegister());
4150 
4151   if (expected_coordinates_count == 0u) {
4152     // Add a temporary to hold the declaring class.
4153     locations->AddTemp(Location::RequiresRegister());
4154   }
4155 
4156   return locations;
4157 }
4158 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4159 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4160   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4161     return;
4162   }
4163 
4164   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4165   if (DataType::IsFloatingPointType(invoke->GetType())) {
4166     locations->SetOut(Location::RequiresFpuRegister());
4167   } else {
4168     locations->SetOut(Location::RequiresRegister());
4169   }
4170 }
4171 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool byte_swap=false)4172 static void GenerateVarHandleGet(HInvoke* invoke,
4173                                  CodeGeneratorX86_64* codegen,
4174                                  bool byte_swap = false) {
4175   DataType::Type type = invoke->GetType();
4176   DCHECK_NE(type, DataType::Type::kVoid);
4177 
4178   LocationSummary* locations = invoke->GetLocations();
4179   X86_64Assembler* assembler = codegen->GetAssembler();
4180 
4181   VarHandleTarget target = GetVarHandleTarget(invoke);
4182   VarHandleSlowPathX86_64* slow_path = nullptr;
4183   if (!byte_swap) {
4184     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4185     GenerateVarHandleTarget(invoke, target, codegen);
4186     if (slow_path != nullptr) {
4187       __ Bind(slow_path->GetNativeByteOrderLabel());
4188     }
4189   }
4190 
4191   // Load the value from the field
4192   Address src(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4193   Location out = locations->Out();
4194 
4195   if (type == DataType::Type::kReference) {
4196     if (codegen->EmitReadBarrier()) {
4197       DCHECK(kUseBakerReadBarrier);
4198       codegen->GenerateReferenceLoadWithBakerReadBarrier(
4199           invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false);
4200     } else {
4201       __ movl(out.AsRegister<CpuRegister>(), src);
4202       __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
4203     }
4204     DCHECK(!byte_swap);
4205   } else {
4206     codegen->LoadFromMemoryNoReference(type, out, src);
4207     if (byte_swap) {
4208       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4209       codegen->GetInstructionCodegen()->Bswap(out, type, &temp);
4210     }
4211   }
4212 
4213   if (slow_path != nullptr) {
4214     DCHECK(!byte_swap);
4215     __ Bind(slow_path->GetExitLabel());
4216   }
4217 }
4218 
VisitMethodHandleInvokeExact(HInvoke * invoke)4219 void IntrinsicLocationsBuilderX86_64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
4220   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4221   LocationSummary* locations = new (allocator)
4222       LocationSummary(invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
4223 
4224   InvokeDexCallingConventionVisitorX86_64 calling_convention;
4225   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
4226 
4227   locations->SetInAt(0, Location::RequiresRegister());
4228 
4229   // Accomodating LocationSummary for underlying invoke-* call.
4230   uint32_t number_of_args = invoke->GetNumberOfArguments();
4231   for (uint32_t i = 1; i < number_of_args; ++i) {
4232     locations->SetInAt(i, calling_convention.GetNextLocation(invoke->InputAt(i)->GetType()));
4233   }
4234 
4235   // The last input is MethodType object corresponding to the call-site.
4236   locations->SetInAt(number_of_args, Location::RequiresRegister());
4237 
4238   locations->AddTemp(Location::RequiresRegister());
4239   // Hidden arg for invoke-interface.
4240   locations->AddTemp(Location::RegisterLocation(RAX));
4241 }
4242 
VisitMethodHandleInvokeExact(HInvoke * invoke)4243 void IntrinsicCodeGeneratorX86_64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
4244   LocationSummary* locations = invoke->GetLocations();
4245 
4246   CpuRegister method_handle = locations->InAt(0).AsRegister<CpuRegister>();
4247 
4248   SlowPathCode* slow_path =
4249       new (codegen_->GetScopedAllocator()) InvokePolymorphicSlowPathX86_64(invoke, method_handle);
4250   codegen_->AddSlowPath(slow_path);
4251   X86_64Assembler* assembler = codegen_->GetAssembler();
4252 
4253   CpuRegister call_site_type =
4254       locations->InAt(invoke->GetNumberOfArguments()).AsRegister<CpuRegister>();
4255 
4256   // Call site should match with MethodHandle's type.
4257   __ MaybePoisonHeapReference(call_site_type);
4258   __ cmpl(call_site_type, Address(method_handle, mirror::MethodHandle::MethodTypeOffset()));
4259   __ j(kNotEqual, slow_path->GetEntryLabel());
4260 
4261   CpuRegister method = CpuRegister(kMethodRegisterArgument);
4262   __ movq(method, Address(method_handle, mirror::MethodHandle::ArtFieldOrMethodOffset()));
4263 
4264   Label static_dispatch;
4265   Label execute_target_method;
4266 
4267   Address method_handle_kind = Address(method_handle, mirror::MethodHandle::HandleKindOffset());
4268   if (invoke->AsInvokePolymorphic()->CanTargetInstanceMethod()) {
4269     CpuRegister receiver = locations->InAt(1).AsRegister<CpuRegister>();
4270 
4271     // Receiver shouldn't be null for all the following cases.
4272     __ testl(receiver, receiver);
4273     __ j(kEqual, slow_path->GetEntryLabel());
4274 
4275     __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeDirect));
4276     // No dispatch is needed for invoke-direct.
4277     __ j(kEqual, &execute_target_method);
4278 
4279     Label non_virtual_dispatch;
4280     // Handle invoke-virtual case.
4281     __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeVirtual));
4282     __ j(kNotEqual, &non_virtual_dispatch);
4283 
4284     // Skip virtual dispatch if `method` is private.
4285     __ testl(Address(method, ArtMethod::AccessFlagsOffset()), Immediate(kAccPrivate));
4286     __ j(kNotZero, &execute_target_method);
4287 
4288     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4289 
4290     __ movl(temp, Address(method, ArtMethod::DeclaringClassOffset()));
4291     __ cmpl(temp, Address(receiver, mirror::Object::ClassOffset()));
4292     // If method is defined in the receiver's class, execute it as it is.
4293     __ j(kEqual, &execute_target_method);
4294 
4295     // MethodIndex is uint16_t.
4296     __ movzxw(temp, Address(method, ArtMethod::MethodIndexOffset()));
4297 
4298     constexpr uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4299     // Re-using method register for receiver class.
4300     __ movl(method, Address(receiver, class_offset));
4301     __ MaybeUnpoisonHeapReference(method);
4302 
4303     constexpr uint32_t vtable_offset =
4304         mirror::Class::EmbeddedVTableOffset(art::PointerSize::k64).Int32Value();
4305     __ movq(method, Address(method, temp, TIMES_8, vtable_offset));
4306     __ Jump(&execute_target_method);
4307 
4308     __ Bind(&non_virtual_dispatch);
4309     __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeInterface));
4310     __ j(kNotEqual, &static_dispatch);
4311 
4312     __ movl(temp, Address(method, ArtMethod::AccessFlagsOffset()));
4313 
4314     __ testl(temp, Immediate(kAccPrivate));
4315     __ j(kNotZero, &execute_target_method);
4316 
4317     CpuRegister hidden_arg = locations->GetTemp(1).AsRegister<CpuRegister>();
4318     // Set the hidden argument.
4319     DCHECK_EQ(RAX, hidden_arg.AsRegister());
4320     __ movq(hidden_arg, method);
4321 
4322     Label get_imt_index_from_method_index;
4323     Label do_imt_dispatch;
4324 
4325     // Get IMT index.
4326     // Not doing default conflict check as IMT index is set for all method which have
4327     // kAccAbstract bit.
4328     __ testl(temp, Immediate(kAccAbstract));
4329     __ j(kZero, &get_imt_index_from_method_index);
4330 
4331     // imt_index_ is uint16_t
4332     __ movzxw(temp, Address(method, ArtMethod::ImtIndexOffset()));
4333     __ Jump(&do_imt_dispatch);
4334 
4335     // Default method, do method->GetMethodIndex() & (ImTable::kSizeTruncToPowerOfTwo - 1);
4336     __ Bind(&get_imt_index_from_method_index);
4337     __ movl(temp, Address(method, ArtMethod::MethodIndexOffset()));
4338     __ andl(temp, Immediate(ImTable::kSizeTruncToPowerOfTwo - 1));
4339 
4340     __ Bind(&do_imt_dispatch);
4341     // Re-using `method` to store receiver class and ImTableEntry.
4342     __ movl(method, Address(receiver, mirror::Object::ClassOffset()));
4343     __ MaybeUnpoisonHeapReference(method);
4344 
4345     __ movq(method, Address(method, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4346     // method = receiver->GetClass()->embedded_imtable_->Get(method_offset);
4347     __ movq(method, Address(method, temp, TIMES_8, /* disp= */ 0));
4348 
4349     __ Jump(&execute_target_method);
4350   }
4351   __ Bind(&static_dispatch);
4352   __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeStatic));
4353   __ j(kNotEqual, slow_path->GetEntryLabel());
4354   // MH's kind is invoke-static. The method can be called directly, hence fall-through.
4355 
4356   __ Bind(&execute_target_method);
4357   __ call(Address(
4358       method,
4359       ArtMethod::EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k64).SizeValue()));
4360   codegen_->RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4361   __ Bind(slow_path->GetExitLabel());
4362 }
4363 
VisitVarHandleGet(HInvoke * invoke)4364 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) {
4365   CreateVarHandleGetLocations(invoke, codegen_);
4366 }
4367 
VisitVarHandleGet(HInvoke * invoke)4368 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) {
4369   GenerateVarHandleGet(invoke, codegen_);
4370 }
4371 
VisitVarHandleGetAcquire(HInvoke * invoke)4372 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4373   CreateVarHandleGetLocations(invoke, codegen_);
4374 }
4375 
VisitVarHandleGetAcquire(HInvoke * invoke)4376 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4377   // VarHandleGetAcquire is the same as VarHandleGet on x86-64 due to the x86 memory model.
4378   GenerateVarHandleGet(invoke, codegen_);
4379 }
4380 
VisitVarHandleGetOpaque(HInvoke * invoke)4381 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4382   CreateVarHandleGetLocations(invoke, codegen_);
4383 }
4384 
VisitVarHandleGetOpaque(HInvoke * invoke)4385 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4386   // VarHandleGetOpaque is the same as VarHandleGet on x86-64 due to the x86 memory model.
4387   GenerateVarHandleGet(invoke, codegen_);
4388 }
4389 
VisitVarHandleGetVolatile(HInvoke * invoke)4390 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4391   CreateVarHandleGetLocations(invoke, codegen_);
4392 }
4393 
VisitVarHandleGetVolatile(HInvoke * invoke)4394 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4395   // VarHandleGetVolatile is the same as VarHandleGet on x86-64 due to the x86 memory model.
4396   GenerateVarHandleGet(invoke, codegen_);
4397 }
4398 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4399 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4400   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4401     return;
4402   }
4403 
4404   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4405 
4406   // Extra temporary is used for card in MarkGCCard and to move 64-bit constants to memory.
4407   locations->AddTemp(Location::RequiresRegister());
4408 }
4409 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_volatile,bool is_atomic,bool byte_swap=false)4410 static void GenerateVarHandleSet(HInvoke* invoke,
4411                                  CodeGeneratorX86_64* codegen,
4412                                  bool is_volatile,
4413                                  bool is_atomic,
4414                                  bool byte_swap = false) {
4415   X86_64Assembler* assembler = codegen->GetAssembler();
4416 
4417   LocationSummary* locations = invoke->GetLocations();
4418   const uint32_t last_temp_index = locations->GetTempCount() - 1;
4419 
4420   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4421   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4422 
4423   VarHandleTarget target = GetVarHandleTarget(invoke);
4424   VarHandleSlowPathX86_64* slow_path = nullptr;
4425   if (!byte_swap) {
4426     slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
4427     GenerateVarHandleTarget(invoke, target, codegen);
4428     if (slow_path != nullptr) {
4429       slow_path->SetVolatile(is_volatile);
4430       slow_path->SetAtomic(is_atomic);
4431       __ Bind(slow_path->GetNativeByteOrderLabel());
4432     }
4433   }
4434 
4435   switch (invoke->GetIntrinsic()) {
4436     case Intrinsics::kVarHandleSetRelease:
4437       codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4438       break;
4439     case Intrinsics::kVarHandleSetVolatile:
4440       // setVolatile needs kAnyStore barrier, but HandleFieldSet takes care of that.
4441       break;
4442     default:
4443       // Other intrinsics don't need a barrier.
4444       break;
4445   }
4446 
4447   Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4448 
4449   // Store the value to the field.
4450   codegen->GetInstructionCodegen()->HandleFieldSet(
4451       invoke,
4452       value_index,
4453       last_temp_index,
4454       value_type,
4455       dst,
4456       CpuRegister(target.object),
4457       is_volatile,
4458       is_atomic,
4459       /*value_can_be_null=*/true,
4460       byte_swap,
4461       // Value can be null, and this write barrier is not being relied on for other sets.
4462       value_type == DataType::Type::kReference ? WriteBarrierKind::kEmitNotBeingReliedOn :
4463                                                  WriteBarrierKind::kDontEmit);
4464 
4465   // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
4466 
4467   if (slow_path != nullptr) {
4468     DCHECK(!byte_swap);
4469     __ Bind(slow_path->GetExitLabel());
4470   }
4471 }
4472 
VisitVarHandleSet(HInvoke * invoke)4473 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
4474   CreateVarHandleSetLocations(invoke, codegen_);
4475 }
4476 
VisitVarHandleSet(HInvoke * invoke)4477 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
4478   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4479 }
4480 
VisitVarHandleSetOpaque(HInvoke * invoke)4481 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4482   CreateVarHandleSetLocations(invoke, codegen_);
4483 }
4484 
VisitVarHandleSetOpaque(HInvoke * invoke)4485 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4486   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4487 }
4488 
VisitVarHandleSetRelease(HInvoke * invoke)4489 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4490   CreateVarHandleSetLocations(invoke, codegen_);
4491 }
4492 
VisitVarHandleSetRelease(HInvoke * invoke)4493 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4494   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4495 }
4496 
VisitVarHandleSetVolatile(HInvoke * invoke)4497 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4498   CreateVarHandleSetLocations(invoke, codegen_);
4499 }
4500 
VisitVarHandleSetVolatile(HInvoke * invoke)4501 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4502   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ true, /*is_atomic=*/ true);
4503 }
4504 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4505 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4506                                                             CodeGeneratorX86_64* codegen) {
4507   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4508     return;
4509   }
4510 
4511   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4512   uint32_t expected_value_index = number_of_arguments - 2;
4513   uint32_t new_value_index = number_of_arguments - 1;
4514   DataType::Type return_type = invoke->GetType();
4515   DataType::Type expected_type = GetDataTypeFromShorty(invoke, expected_value_index);
4516   DCHECK_EQ(expected_type, GetDataTypeFromShorty(invoke, new_value_index));
4517 
4518   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4519 
4520   if (DataType::IsFloatingPointType(return_type)) {
4521     locations->SetOut(Location::RequiresFpuRegister());
4522   } else {
4523     // Take advantage of the fact that CMPXCHG writes result to RAX.
4524     locations->SetOut(Location::RegisterLocation(RAX));
4525   }
4526 
4527   if (DataType::IsFloatingPointType(expected_type)) {
4528     // RAX is needed to load the expected floating-point value into a register for CMPXCHG.
4529     locations->AddTemp(Location::RegisterLocation(RAX));
4530     // Another temporary is needed to load the new floating-point value into a register for CMPXCHG.
4531     locations->AddTemp(Location::RequiresRegister());
4532   } else {
4533     // Ensure that expected value is in RAX, as required by CMPXCHG.
4534     locations->SetInAt(expected_value_index, Location::RegisterLocation(RAX));
4535     locations->SetInAt(new_value_index, Location::RequiresRegister());
4536     if (expected_type == DataType::Type::kReference) {
4537       // Need two temporaries for MarkGCCard.
4538       locations->AddRegisterTemps(2);
4539       if (codegen->EmitReadBarrier()) {
4540         // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
4541         DCHECK(kUseBakerReadBarrier);
4542         locations->AddTemp(Location::RequiresRegister());
4543       }
4544     }
4545     // RAX is clobbered in CMPXCHG, but no need to mark it as temporary as it's the output register.
4546     DCHECK_EQ(RAX, locations->Out().AsRegister<Register>());
4547   }
4548 }
4549 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_cmpxchg,bool byte_swap=false)4550 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4551                                                      CodeGeneratorX86_64* codegen,
4552                                                      bool is_cmpxchg,
4553                                                      bool byte_swap = false) {
4554   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4555 
4556   X86_64Assembler* assembler = codegen->GetAssembler();
4557   LocationSummary* locations = invoke->GetLocations();
4558 
4559   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4560   uint32_t expected_value_index = number_of_arguments - 2;
4561   uint32_t new_value_index = number_of_arguments - 1;
4562   DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4563 
4564   VarHandleSlowPathX86_64* slow_path = nullptr;
4565   VarHandleTarget target = GetVarHandleTarget(invoke);
4566   if (!byte_swap) {
4567     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4568     GenerateVarHandleTarget(invoke, target, codegen);
4569     if (slow_path != nullptr) {
4570       __ Bind(slow_path->GetNativeByteOrderLabel());
4571     }
4572   }
4573 
4574   uint32_t temp_count = locations->GetTempCount();
4575   GenCompareAndSetOrExchange(codegen,
4576                              invoke,
4577                              type,
4578                              CpuRegister(target.object),
4579                              CpuRegister(target.offset),
4580                              /*temp1_index=*/ temp_count - 1,
4581                              /*temp2_index=*/ temp_count - 2,
4582                              /*temp3_index=*/ temp_count - 3,
4583                              locations->InAt(new_value_index),
4584                              locations->InAt(expected_value_index),
4585                              locations->Out(),
4586                              is_cmpxchg,
4587                              byte_swap);
4588 
4589   // We are using LOCK CMPXCHG in all cases because there is no CAS equivalent that has weak
4590   // failure semantics. LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
4591 
4592   if (slow_path != nullptr) {
4593     DCHECK(!byte_swap);
4594     __ Bind(slow_path->GetExitLabel());
4595   }
4596 }
4597 
VisitVarHandleCompareAndSet(HInvoke * invoke)4598 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4599   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4600 }
4601 
VisitVarHandleCompareAndSet(HInvoke * invoke)4602 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4603   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4604 }
4605 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4606 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4607   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4608 }
4609 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4610 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4611   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4612 }
4613 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4614 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4615   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4616 }
4617 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4618 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4619   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4620 }
4621 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4622 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4623   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4624 }
4625 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4626 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4627   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4628 }
4629 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4630 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4631   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4632 }
4633 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4634 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4635   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4636 }
4637 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4638 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4639   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4640 }
4641 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4642 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4643   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4644 }
4645 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4646 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4647   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4648 }
4649 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4650 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4651   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4652 }
4653 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4654 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4655   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4656 }
4657 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4658 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4659   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4660 }
4661 
CreateVarHandleGetAndSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4662 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4663   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4664     return;
4665   }
4666 
4667   // Get the type from the shorty as the invokes may not return a value.
4668   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4669   uint32_t new_value_index = number_of_arguments - 1;
4670   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4671   DataType::Type return_type = invoke->GetType();
4672   const bool is_void = return_type == DataType::Type::kVoid;
4673   DCHECK_IMPLIES(!is_void, return_type == value_type);
4674 
4675   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4676 
4677   if (DataType::IsFloatingPointType(value_type)) {
4678     // Only set the `out` register if it's needed. In the void case we don't use `out`.
4679     if (!is_void) {
4680       locations->SetOut(Location::RequiresFpuRegister());
4681     }
4682     // A temporary is needed to load the new floating-point value into a register for XCHG.
4683     locations->AddTemp(Location::RequiresRegister());
4684   } else {
4685     locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4686     if (value_type == DataType::Type::kReference) {
4687       // Need two temporaries for MarkGCCard.
4688       locations->AddRegisterTemps(2);
4689       if (codegen->EmitReadBarrier()) {
4690         // Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier.
4691         DCHECK(kUseBakerReadBarrier);
4692         locations->AddTemp(Location::RequiresRegister());
4693       }
4694     }
4695     // Only set the `out` register if it's needed. In the void case we can still use RAX in the
4696     // same manner as it is marked as a temp register.
4697     if (is_void) {
4698       locations->AddTemp(Location::RegisterLocation(RAX));
4699     } else {
4700       // Use the same register for both the new value and output to take advantage of XCHG.
4701       // It doesn't have to be RAX, but we need to choose some to make sure it's the same.
4702       locations->SetOut(Location::RegisterLocation(RAX));
4703     }
4704   }
4705 }
4706 
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,CpuRegister ref,bool byte_swap)4707 static void GenerateVarHandleGetAndSet(HInvoke* invoke,
4708                                        CodeGeneratorX86_64* codegen,
4709                                        Location value,
4710                                        DataType::Type type,
4711                                        Address field_addr,
4712                                        CpuRegister ref,
4713                                        bool byte_swap) {
4714   X86_64Assembler* assembler = codegen->GetAssembler();
4715   LocationSummary* locations = invoke->GetLocations();
4716   Location out = locations->Out();
4717   uint32_t temp_count = locations->GetTempCount();
4718   DataType::Type return_type = invoke->GetType();
4719   const bool is_void = return_type == DataType::Type::kVoid;
4720   DCHECK_IMPLIES(!is_void, return_type == type);
4721 
4722   if (DataType::IsFloatingPointType(type)) {
4723     // `getAndSet` for floating-point types: move the new FP value into a register, atomically
4724     // exchange it with the field, and move the old value into the output FP register.
4725     Location temp = locations->GetTemp(temp_count - 1);
4726     codegen->Move(temp, value);
4727     bool is64bit = (type == DataType::Type::kFloat64);
4728     DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4729     if (byte_swap) {
4730       codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4731     }
4732     if (is64bit) {
4733       __ xchgq(temp.AsRegister<CpuRegister>(), field_addr);
4734     } else {
4735       __ xchgl(temp.AsRegister<CpuRegister>(), field_addr);
4736     }
4737     if (byte_swap) {
4738       codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4739     }
4740     if (!is_void) {
4741       __ movd(out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit);
4742     }
4743   } else if (type == DataType::Type::kReference) {
4744     // `getAndSet` for references: load reference and atomically exchange it with the field.
4745     // Output register is the same as the one holding new value, so no need to move the result.
4746     DCHECK(!byte_swap);
4747 
4748     // In the void case, we have an extra temp register, which is used to signal the register
4749     // allocator that we are clobering RAX.
4750     const uint32_t extra_temp = is_void ? 1u : 0u;
4751     DCHECK_IMPLIES(is_void,
4752                    locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
4753 
4754     CpuRegister temp1 = locations->GetTemp(temp_count - extra_temp - 1u).AsRegister<CpuRegister>();
4755     CpuRegister temp2 = locations->GetTemp(temp_count - extra_temp - 2u).AsRegister<CpuRegister>();
4756     CpuRegister valreg = value.AsRegister<CpuRegister>();
4757 
4758     if (codegen->EmitBakerReadBarrier()) {
4759       codegen->GenerateReferenceLoadWithBakerReadBarrier(
4760           invoke,
4761           locations->GetTemp(temp_count - extra_temp - 3u),
4762           ref,
4763           field_addr,
4764           /*needs_null_check=*/false,
4765           /*always_update_field=*/true,
4766           &temp1,
4767           &temp2);
4768     }
4769     codegen->MarkGCCard(temp1, temp2, ref);
4770 
4771     DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
4772     if (kPoisonHeapReferences) {
4773       // Use a temp to avoid poisoning base of the field address, which might happen if `valreg` is
4774       // the same as `target.object` (for code like `vh.getAndSet(obj, obj)`).
4775       __ movl(temp1, valreg);
4776       __ PoisonHeapReference(temp1);
4777       __ xchgl(temp1, field_addr);
4778       if (!is_void) {
4779         __ UnpoisonHeapReference(temp1);
4780         __ movl(valreg, temp1);
4781       }
4782     } else {
4783       __ xchgl(valreg, field_addr);
4784     }
4785   } else {
4786     // `getAndSet` for integral types: atomically exchange the new value with the field. Output
4787     // register is the same as the one holding new value. Do sign extend / zero extend as needed.
4788     if (byte_swap) {
4789       codegen->GetInstructionCodegen()->Bswap(value, type);
4790     }
4791     CpuRegister valreg = value.AsRegister<CpuRegister>();
4792     DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
4793     switch (type) {
4794       case DataType::Type::kBool:
4795       case DataType::Type::kUint8:
4796         __ xchgb(valreg, field_addr);
4797         if (!is_void) {
4798           __ movzxb(valreg, valreg);
4799         }
4800         break;
4801       case DataType::Type::kInt8:
4802         __ xchgb(valreg, field_addr);
4803         if (!is_void) {
4804           __ movsxb(valreg, valreg);
4805         }
4806         break;
4807       case DataType::Type::kUint16:
4808         __ xchgw(valreg, field_addr);
4809         if (!is_void) {
4810           __ movzxw(valreg, valreg);
4811         }
4812         break;
4813       case DataType::Type::kInt16:
4814         __ xchgw(valreg, field_addr);
4815         if (!is_void) {
4816           __ movsxw(valreg, valreg);
4817         }
4818         break;
4819       case DataType::Type::kInt32:
4820       case DataType::Type::kUint32:
4821         __ xchgl(valreg, field_addr);
4822         break;
4823       case DataType::Type::kInt64:
4824       case DataType::Type::kUint64:
4825         __ xchgq(valreg, field_addr);
4826         break;
4827       default:
4828         LOG(FATAL) << "unexpected type in getAndSet intrinsic: " << type;
4829         UNREACHABLE();
4830     }
4831     if (byte_swap) {
4832       codegen->GetInstructionCodegen()->Bswap(value, type);
4833     }
4834   }
4835 }
4836 
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4837 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4838   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4839     return;
4840   }
4841 
4842   // Get the type from the shorty as the invokes may not return a value.
4843   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4844   uint32_t new_value_index = number_of_arguments - 1;
4845   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4846   DataType::Type return_type = invoke->GetType();
4847   const bool is_void = return_type == DataType::Type::kVoid;
4848   DCHECK_IMPLIES(!is_void, return_type == value_type);
4849 
4850   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4851 
4852   DCHECK_NE(DataType::Type::kReference, value_type);
4853   DCHECK(!DataType::IsFloatingPointType(value_type));
4854   // A temporary to compute the bitwise operation on the old and the new values.
4855   locations->AddTemp(Location::RequiresRegister());
4856   // We need value to be either in a register, or a 32-bit constant (as there are no arithmetic
4857   // instructions that accept 64-bit immediate on x86_64).
4858   locations->SetInAt(new_value_index,
4859                      DataType::Is64BitType(value_type) ?
4860                          Location::RequiresRegister() :
4861                          Location::RegisterOrConstant(invoke->InputAt(new_value_index)));
4862   if (is_void) {
4863     // Used as a temporary, even when we are not outputting it so reserve it. This has to be
4864     // requested before the other temporary since there's variable number of temp registers and the
4865     // other temp register is expected to be the last one.
4866     locations->AddTemp(Location::RegisterLocation(RAX));
4867   } else {
4868     // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary.
4869     locations->SetOut(Location::RegisterLocation(RAX));
4870   }
4871 }
4872 
GenerateVarHandleGetAndOp(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,GetAndUpdateOp get_and_update_op,bool byte_swap)4873 static void GenerateVarHandleGetAndOp(HInvoke* invoke,
4874                                       CodeGeneratorX86_64* codegen,
4875                                       Location value,
4876                                       DataType::Type type,
4877                                       Address field_addr,
4878                                       GetAndUpdateOp get_and_update_op,
4879                                       bool byte_swap) {
4880   X86_64Assembler* assembler = codegen->GetAssembler();
4881   LocationSummary* locations = invoke->GetLocations();
4882   // In the void case, we have an extra temp register, which is used to signal the register
4883   // allocator that we are clobering RAX.
4884   const bool is_void = invoke->GetType() == DataType::Type::kVoid;
4885   const uint32_t extra_temp = is_void ? 1u : 0u;
4886   const uint32_t temp_count = locations->GetTempCount();
4887   DCHECK_IMPLIES(is_void,
4888                  locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
4889   Location temp_loc = locations->GetTemp(temp_count - extra_temp - 1u);
4890   Location rax_loc = Location::RegisterLocation(RAX);
4891   DCHECK_IMPLIES(!is_void, locations->Out().Equals(rax_loc));
4892   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4893   bool is64Bit = DataType::Is64BitType(type);
4894 
4895   NearLabel retry;
4896   __ Bind(&retry);
4897 
4898   // Load field value into RAX and copy it into a temporary register for the operation.
4899   codegen->LoadFromMemoryNoReference(type, rax_loc, field_addr);
4900   codegen->Move(temp_loc, rax_loc);
4901   if (byte_swap) {
4902     // Byte swap the temporary, since we need to perform operation in native endianness.
4903     codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4904   }
4905 
4906   DCHECK_IMPLIES(value.IsConstant(), !is64Bit);
4907   int32_t const_value = value.IsConstant()
4908       ? CodeGenerator::GetInt32ValueOf(value.GetConstant())
4909       : 0;
4910 
4911   // Use 32-bit registers for 8/16/32-bit types to save on the REX prefix.
4912   switch (get_and_update_op) {
4913     case GetAndUpdateOp::kAdd:
4914       DCHECK(byte_swap);  // The non-byte-swapping path should use a faster XADD instruction.
4915       if (is64Bit) {
4916         __ addq(temp, value.AsRegister<CpuRegister>());
4917       } else if (value.IsConstant()) {
4918         __ addl(temp, Immediate(const_value));
4919       } else {
4920         __ addl(temp, value.AsRegister<CpuRegister>());
4921       }
4922       break;
4923     case GetAndUpdateOp::kBitwiseAnd:
4924       if (is64Bit) {
4925         __ andq(temp, value.AsRegister<CpuRegister>());
4926       } else if (value.IsConstant()) {
4927         __ andl(temp, Immediate(const_value));
4928       } else {
4929         __ andl(temp, value.AsRegister<CpuRegister>());
4930       }
4931       break;
4932     case GetAndUpdateOp::kBitwiseOr:
4933       if (is64Bit) {
4934         __ orq(temp, value.AsRegister<CpuRegister>());
4935       } else if (value.IsConstant()) {
4936         __ orl(temp, Immediate(const_value));
4937       } else {
4938         __ orl(temp, value.AsRegister<CpuRegister>());
4939       }
4940       break;
4941     case GetAndUpdateOp::kBitwiseXor:
4942       if (is64Bit) {
4943         __ xorq(temp, value.AsRegister<CpuRegister>());
4944       } else if (value.IsConstant()) {
4945         __ xorl(temp, Immediate(const_value));
4946       } else {
4947         __ xorl(temp, value.AsRegister<CpuRegister>());
4948       }
4949       break;
4950     default:
4951       LOG(FATAL) <<  "unexpected operation";
4952       UNREACHABLE();
4953   }
4954 
4955   if (byte_swap) {
4956     // RAX still contains the original value, but we need to byte swap the temporary back.
4957     codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4958   }
4959 
4960   switch (type) {
4961     case DataType::Type::kBool:
4962     case DataType::Type::kUint8:
4963     case DataType::Type::kInt8:
4964       __ LockCmpxchgb(field_addr, temp);
4965       break;
4966     case DataType::Type::kUint16:
4967     case DataType::Type::kInt16:
4968       __ LockCmpxchgw(field_addr, temp);
4969       break;
4970     case DataType::Type::kInt32:
4971     case DataType::Type::kUint32:
4972       __ LockCmpxchgl(field_addr, temp);
4973       break;
4974     case DataType::Type::kInt64:
4975     case DataType::Type::kUint64:
4976       __ LockCmpxchgq(field_addr, temp);
4977       break;
4978     default:
4979       LOG(FATAL) << "unexpected type in getAndBitwiseOp intrinsic";
4980       UNREACHABLE();
4981   }
4982 
4983   __ j(kNotZero, &retry);
4984 
4985   // The result is in RAX after CMPXCHG. Byte swap if necessary, but do not sign/zero extend,
4986   // as it has already been done by `LoadFromMemoryNoReference` above (and not altered by CMPXCHG).
4987   if (byte_swap) {
4988     codegen->GetInstructionCodegen()->Bswap(rax_loc, type);
4989   }
4990 }
4991 
CreateVarHandleGetAndAddLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4992 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4993   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4994     return;
4995   }
4996 
4997   // Get the type from the shorty as the invokes may not return a value.
4998   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4999   uint32_t new_value_index = number_of_arguments - 1;
5000   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
5001   DataType::Type return_type = invoke->GetType();
5002   const bool is_void = return_type == DataType::Type::kVoid;
5003   DCHECK_IMPLIES(!is_void, return_type == value_type);
5004 
5005   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
5006 
5007   if (DataType::IsFloatingPointType(value_type)) {
5008     // Only set the `out` register if it's needed. In the void case we don't use `out`
5009     if (!is_void) {
5010       locations->SetOut(Location::RequiresFpuRegister());
5011     }
5012     // Require that the new FP value is in a register (and not a constant) for ADDSS/ADDSD.
5013     locations->SetInAt(new_value_index, Location::RequiresFpuRegister());
5014     // CMPXCHG clobbers RAX.
5015     locations->AddTemp(Location::RegisterLocation(RAX));
5016     // An FP temporary to load the old value from the field and perform FP addition.
5017     locations->AddTemp(Location::RequiresFpuRegister());
5018     // A temporary to hold the new value for CMPXCHG.
5019     locations->AddTemp(Location::RequiresRegister());
5020   } else {
5021     DCHECK_NE(value_type, DataType::Type::kReference);
5022     locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
5023     if (GetExpectedVarHandleCoordinatesCount(invoke) == 2) {
5024       // For byte array views with non-native endianness we need extra BSWAP operations, so we
5025       // cannot use XADD and have to fallback to a generic implementation based on CMPXCH. In that
5026       // case we need two temporary registers: one to hold value instead of RAX (which may get
5027       // clobbered by repeated CMPXCHG) and one for performing the operation. At compile time we
5028       // cannot distinguish this case from arrays or native-endian byte array views.
5029       locations->AddRegisterTemps(2);
5030     }
5031     // Only set the `out` register if it's needed. In the void case we can still use RAX in the
5032     // same manner as it is marked as a temp register.
5033     if (is_void) {
5034       locations->AddTemp(Location::RegisterLocation(RAX));
5035     } else {
5036       // Use the same register for both the new value and output to take advantage of XADD.
5037       // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls
5038       // back to GenerateVarHandleGetAndOp that expects out in RAX.
5039       locations->SetOut(Location::RegisterLocation(RAX));
5040     }
5041   }
5042 }
5043 
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,bool byte_swap)5044 static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
5045                                        CodeGeneratorX86_64* codegen,
5046                                        Location value,
5047                                        DataType::Type type,
5048                                        Address field_addr,
5049                                        bool byte_swap) {
5050   X86_64Assembler* assembler = codegen->GetAssembler();
5051   LocationSummary* locations = invoke->GetLocations();
5052   Location out = locations->Out();
5053   uint32_t temp_count = locations->GetTempCount();
5054 
5055   DataType::Type return_type = invoke->GetType();
5056   const bool is_void = return_type == DataType::Type::kVoid;
5057   DCHECK_IMPLIES(!is_void, return_type == type);
5058 
5059   if (DataType::IsFloatingPointType(type)) {
5060     if (byte_swap) {
5061       // This code should never be executed: it is the case of a byte array view (since it requires
5062       // a byte swap), and varhandles for byte array views support numeric atomic update access mode
5063       // only for int and long, but not for floating-point types (see javadoc comments for
5064       // java.lang.invoke.MethodHandles.byteArrayViewVarHandle()). But ART varhandle implementation
5065       // for byte array views treats floating-point types them as numeric types in
5066       // ByteArrayViewVarHandle::Access(). Terefore we do generate intrinsic code, but it always
5067       // fails access mode check at runtime prior to reaching this point. Illegal instruction UD2
5068       // ensures that if control flow gets here by mistake, we will notice.
5069       __ ud2();
5070     }
5071 
5072     // `getAndAdd` for floating-point types: load the old FP value into a temporary FP register and
5073     // in RAX for CMPXCHG, add the new FP value to the old one, move it to a non-FP temporary for
5074     // CMPXCHG and loop until CMPXCHG succeeds. Move the result from RAX to the output FP register.
5075     bool is64bit = (type == DataType::Type::kFloat64);
5076     DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
5077     XmmRegister fptemp = locations->GetTemp(temp_count - 2).AsFpuRegister<XmmRegister>();
5078     Location rax_loc = Location::RegisterLocation(RAX);
5079     Location temp_loc = locations->GetTemp(temp_count - 1);
5080     CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5081 
5082     NearLabel retry;
5083     __ Bind(&retry);
5084 
5085     // Read value from memory into an FP register and copy in into RAX.
5086     if (is64bit) {
5087       __ movsd(fptemp, field_addr);
5088     } else {
5089       __ movss(fptemp, field_addr);
5090     }
5091     __ movd(CpuRegister(RAX), fptemp, is64bit);
5092     // If necessary, byte swap RAX and update the value in FP register to also be byte-swapped.
5093     if (byte_swap) {
5094       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5095       __ movd(fptemp, CpuRegister(RAX), is64bit);
5096     }
5097     // Perform the FP addition and move it to a temporary register to prepare for CMPXCHG.
5098     if (is64bit) {
5099       __ addsd(fptemp, value.AsFpuRegister<XmmRegister>());
5100     } else {
5101       __ addss(fptemp, value.AsFpuRegister<XmmRegister>());
5102     }
5103     __ movd(temp, fptemp, is64bit);
5104     // If necessary, byte swap RAX before CMPXCHG and the temporary before copying to FP register.
5105     if (byte_swap) {
5106       codegen->GetInstructionCodegen()->Bswap(temp_loc, bswap_type);
5107       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5108     }
5109     if (is64bit) {
5110       __ LockCmpxchgq(field_addr, temp);
5111     } else {
5112       __ LockCmpxchgl(field_addr, temp);
5113     }
5114 
5115     __ j(kNotZero, &retry);
5116 
5117     // The old value is in RAX, byte swap if necessary.
5118     if (byte_swap) {
5119       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5120     }
5121     if (!is_void) {
5122       __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
5123     }
5124   } else {
5125     if (byte_swap) {
5126       // We cannot use XADD since we need to byte-swap the old value when reading it from memory,
5127       // and then byte-swap the sum before writing it to memory. So fallback to the slower generic
5128       // implementation that is also used for bitwise operations.
5129       // Move value from RAX to a temporary register, as RAX may get clobbered by repeated CMPXCHG.
5130       DCHECK_EQ(GetExpectedVarHandleCoordinatesCount(invoke), 2u);
5131       // In the void case, we have an extra temp register, which is used to signal the register
5132       // allocator that we are clobering RAX.
5133       const uint32_t extra_temp = is_void ? 1u : 0u;
5134       DCHECK_IMPLIES(is_void,
5135                      locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
5136       Location temp = locations->GetTemp(temp_count - extra_temp - 2u);
5137       codegen->Move(temp, value);
5138       GenerateVarHandleGetAndOp(
5139           invoke, codegen, temp, type, field_addr, GetAndUpdateOp::kAdd, byte_swap);
5140     } else {
5141       // `getAndAdd` for integral types: atomically exchange the new value with the field and add
5142       // the old value to the field. Output register is the same as the one holding new value. Do
5143       // sign extend / zero extend as needed.
5144       CpuRegister valreg = value.AsRegister<CpuRegister>();
5145       DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
5146       switch (type) {
5147         case DataType::Type::kBool:
5148         case DataType::Type::kUint8:
5149           __ LockXaddb(field_addr, valreg);
5150           if (!is_void) {
5151             __ movzxb(valreg, valreg);
5152           }
5153           break;
5154         case DataType::Type::kInt8:
5155           __ LockXaddb(field_addr, valreg);
5156           if (!is_void) {
5157             __ movsxb(valreg, valreg);
5158           }
5159           break;
5160         case DataType::Type::kUint16:
5161           __ LockXaddw(field_addr, valreg);
5162           if (!is_void) {
5163             __ movzxw(valreg, valreg);
5164           }
5165           break;
5166         case DataType::Type::kInt16:
5167           __ LockXaddw(field_addr, valreg);
5168           if (!is_void) {
5169             __ movsxw(valreg, valreg);
5170           }
5171           break;
5172         case DataType::Type::kInt32:
5173         case DataType::Type::kUint32:
5174           __ LockXaddl(field_addr, valreg);
5175           break;
5176         case DataType::Type::kInt64:
5177         case DataType::Type::kUint64:
5178           __ LockXaddq(field_addr, valreg);
5179           break;
5180         default:
5181           LOG(FATAL) << "unexpected type in getAndAdd intrinsic";
5182           UNREACHABLE();
5183       }
5184     }
5185   }
5186 }
5187 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op,bool need_any_store_barrier,bool need_any_any_barrier,bool byte_swap=false)5188 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5189                                           CodeGeneratorX86_64* codegen,
5190                                           GetAndUpdateOp get_and_update_op,
5191                                           bool need_any_store_barrier,
5192                                           bool need_any_any_barrier,
5193                                           bool byte_swap = false) {
5194   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
5195 
5196   X86_64Assembler* assembler = codegen->GetAssembler();
5197   LocationSummary* locations = invoke->GetLocations();
5198 
5199   // Get the type from the shorty as the invokes may not return a value.
5200   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
5201   Location value = locations->InAt(number_of_arguments - 1);
5202   DataType::Type type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
5203 
5204   VarHandleSlowPathX86_64* slow_path = nullptr;
5205   VarHandleTarget target = GetVarHandleTarget(invoke);
5206   if (!byte_swap) {
5207     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
5208     GenerateVarHandleTarget(invoke, target, codegen);
5209     if (slow_path != nullptr) {
5210       slow_path->SetGetAndUpdateOp(get_and_update_op);
5211       slow_path->SetNeedAnyStoreBarrier(need_any_store_barrier);
5212       slow_path->SetNeedAnyAnyBarrier(need_any_any_barrier);
5213       __ Bind(slow_path->GetNativeByteOrderLabel());
5214     }
5215   }
5216 
5217   CpuRegister ref(target.object);
5218   Address field_addr(ref, CpuRegister(target.offset), TIMES_1, 0);
5219 
5220   if (need_any_store_barrier) {
5221     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5222   }
5223 
5224   switch (get_and_update_op) {
5225     case GetAndUpdateOp::kSet:
5226       GenerateVarHandleGetAndSet(invoke, codegen, value, type, field_addr, ref, byte_swap);
5227       break;
5228     case GetAndUpdateOp::kAdd:
5229       GenerateVarHandleGetAndAdd(invoke, codegen, value, type, field_addr, byte_swap);
5230       break;
5231     case GetAndUpdateOp::kBitwiseAnd:
5232     case GetAndUpdateOp::kBitwiseOr:
5233     case GetAndUpdateOp::kBitwiseXor:
5234       GenerateVarHandleGetAndOp(
5235           invoke, codegen, value, type, field_addr, get_and_update_op, byte_swap);
5236       break;
5237   }
5238 
5239   if (need_any_any_barrier) {
5240     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5241   }
5242 
5243   if (slow_path != nullptr) {
5244     DCHECK(!byte_swap);
5245     __ Bind(slow_path->GetExitLabel());
5246   }
5247 }
5248 
VisitVarHandleGetAndSet(HInvoke * invoke)5249 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5250   CreateVarHandleGetAndSetLocations(invoke, codegen_);
5251 }
5252 
VisitVarHandleGetAndSet(HInvoke * invoke)5253 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5254   // `getAndSet` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5255   GenerateVarHandleGetAndUpdate(invoke,
5256                                 codegen_,
5257                                 GetAndUpdateOp::kSet,
5258                                 /*need_any_store_barrier=*/ true,
5259                                 /*need_any_any_barrier=*/ true);
5260 }
5261 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5262 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5263   CreateVarHandleGetAndSetLocations(invoke, codegen_);
5264 }
5265 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5266 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5267   // `getAndSetAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5268   GenerateVarHandleGetAndUpdate(invoke,
5269                                 codegen_,
5270                                 GetAndUpdateOp::kSet,
5271                                 /*need_any_store_barrier=*/ false,
5272                                 /*need_any_any_barrier=*/ false);
5273 }
5274 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5275 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5276   CreateVarHandleGetAndSetLocations(invoke, codegen_);
5277 }
5278 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5279 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5280   // `getAndSetRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5281   GenerateVarHandleGetAndUpdate(invoke,
5282                                 codegen_,
5283                                 GetAndUpdateOp::kSet,
5284                                 /*need_any_store_barrier=*/ true,
5285                                 /*need_any_any_barrier=*/ false);
5286 }
5287 
VisitVarHandleGetAndAdd(HInvoke * invoke)5288 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5289   CreateVarHandleGetAndAddLocations(invoke, codegen_);
5290 }
5291 
VisitVarHandleGetAndAdd(HInvoke * invoke)5292 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5293   // `getAndAdd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5294   GenerateVarHandleGetAndUpdate(invoke,
5295                                 codegen_,
5296                                 GetAndUpdateOp::kAdd,
5297                                 /*need_any_store_barrier=*/ true,
5298                                 /*need_any_any_barrier=*/ true);
5299 }
5300 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5301 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5302   CreateVarHandleGetAndAddLocations(invoke, codegen_);
5303 }
5304 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5305 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5306   // `getAndAddAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5307   GenerateVarHandleGetAndUpdate(invoke,
5308                                 codegen_,
5309                                 GetAndUpdateOp::kAdd,
5310                                 /*need_any_store_barrier=*/ false,
5311                                 /*need_any_any_barrier=*/ false);
5312 }
5313 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5314 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5315   CreateVarHandleGetAndAddLocations(invoke, codegen_);
5316 }
5317 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5318 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5319   // `getAndAddRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5320   GenerateVarHandleGetAndUpdate(invoke,
5321                                 codegen_,
5322                                 GetAndUpdateOp::kAdd,
5323                                 /*need_any_store_barrier=*/ true,
5324                                 /*need_any_any_barrier=*/ false);
5325 }
5326 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5327 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5328   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5329 }
5330 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5331 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5332   // `getAndBitwiseAnd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5333   GenerateVarHandleGetAndUpdate(invoke,
5334                                 codegen_,
5335                                 GetAndUpdateOp::kBitwiseAnd,
5336                                 /*need_any_store_barrier=*/ true,
5337                                 /*need_any_any_barrier=*/ true);
5338 }
5339 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5340 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5341   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5342 }
5343 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5344 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5345   // `getAndBitwiseAndAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5346   GenerateVarHandleGetAndUpdate(invoke,
5347                                 codegen_,
5348                                 GetAndUpdateOp::kBitwiseAnd,
5349                                 /*need_any_store_barrier=*/ false,
5350                                 /*need_any_any_barrier=*/ false);
5351 }
5352 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5353 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5354   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5355 }
5356 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5357 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5358   // `getAndBitwiseAndRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5359   GenerateVarHandleGetAndUpdate(invoke,
5360                                 codegen_,
5361                                 GetAndUpdateOp::kBitwiseAnd,
5362                                 /*need_any_store_barrier=*/ true,
5363                                 /*need_any_any_barrier=*/ false);
5364 }
5365 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5366 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5367   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5368 }
5369 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5370 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5371   // `getAndBitwiseOr` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5372   GenerateVarHandleGetAndUpdate(invoke,
5373                                 codegen_,
5374                                 GetAndUpdateOp::kBitwiseOr,
5375                                 /*need_any_store_barrier=*/ true,
5376                                 /*need_any_any_barrier=*/ true);
5377 }
5378 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5379 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5380   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5381 }
5382 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5383 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5384   // `getAndBitwiseOrAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5385   GenerateVarHandleGetAndUpdate(invoke,
5386                                 codegen_,
5387                                 GetAndUpdateOp::kBitwiseOr,
5388                                 /*need_any_store_barrier=*/ false,
5389                                 /*need_any_any_barrier=*/ false);
5390 }
5391 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5392 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5393   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5394 }
5395 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5396 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5397   // `getAndBitwiseOrRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5398   GenerateVarHandleGetAndUpdate(invoke,
5399                                 codegen_,
5400                                 GetAndUpdateOp::kBitwiseOr,
5401                                 /*need_any_store_barrier=*/ true,
5402                                 /*need_any_any_barrier=*/ false);
5403 }
5404 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5405 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5406   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5407 }
5408 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5409 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5410   // `getAndBitwiseXor` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5411   GenerateVarHandleGetAndUpdate(invoke,
5412                                 codegen_,
5413                                 GetAndUpdateOp::kBitwiseXor,
5414                                 /*need_any_store_barrier=*/ true,
5415                                 /*need_any_any_barrier=*/ true);
5416 }
5417 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5418 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5419   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5420 }
5421 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5422 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5423   // `getAndBitwiseXorAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5424   GenerateVarHandleGetAndUpdate(invoke,
5425                                 codegen_,
5426                                 GetAndUpdateOp::kBitwiseXor,
5427                                 /*need_any_store_barrier=*/ false,
5428                                 /*need_any_any_barrier=*/ false);
5429 }
5430 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5431 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5432   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5433 }
5434 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5435 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5436   // `getAndBitwiseXorRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5437   GenerateVarHandleGetAndUpdate(invoke,
5438                                 codegen_,
5439                                 GetAndUpdateOp::kBitwiseXor,
5440                                 /*need_any_store_barrier=*/ true,
5441                                 /*need_any_any_barrier=*/ false);
5442 }
5443 
EmitByteArrayViewCode(CodeGeneratorX86_64 * codegen)5444 void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen) {
5445   DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5446   X86_64Assembler* assembler = codegen->GetAssembler();
5447 
5448   HInvoke* invoke = GetInvoke();
5449   LocationSummary* locations = invoke->GetLocations();
5450   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5451   DataType::Type value_type =
5452       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5453   DCHECK_NE(value_type, DataType::Type::kReference);
5454   size_t size = DataType::Size(value_type);
5455   DCHECK_GT(size, 1u);
5456 
5457   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
5458   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
5459   CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
5460   CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1u).AsRegister<CpuRegister>();
5461 
5462   MemberOffset class_offset = mirror::Object::ClassOffset();
5463   MemberOffset array_length_offset = mirror::Array::LengthOffset();
5464   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5465   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5466 
5467   VarHandleTarget target = GetVarHandleTarget(invoke);
5468 
5469   __ Bind(GetByteArrayViewCheckLabel());
5470 
5471   // The main path checked that the coordinateType0 is an array class that matches
5472   // the class of the actual coordinate argument but it does not match the value type.
5473   // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5474   codegen->LoadClassRootForIntrinsic(temp, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5475   assembler->MaybePoisonHeapReference(temp);
5476   __ cmpl(temp, Address(varhandle, class_offset.Int32Value()));
5477   __ j(kNotEqual, GetEntryLabel());
5478 
5479   // Check for array index out of bounds.
5480   __ movl(temp, Address(object, array_length_offset.Int32Value()));
5481   // SUB sets flags in the same way as CMP.
5482   __ subl(temp, index);
5483   __ j(kBelowEqual, GetEntryLabel());
5484   // The difference between index and array length must be enough for the `value_type` size.
5485   __ cmpl(temp, Immediate(size));
5486   __ j(kBelow, GetEntryLabel());
5487 
5488   // Construct the target.
5489   __ leal(CpuRegister(target.offset), Address(index, TIMES_1, data_offset.Int32Value()));
5490 
5491   // Alignment check. For unaligned access, go to the runtime.
5492   DCHECK(IsPowerOfTwo(size));
5493   __ testl(CpuRegister(target.offset), Immediate(size - 1u));
5494   __ j(kNotZero, GetEntryLabel());
5495 
5496   // Byte order check. For native byte order return to the main path.
5497   if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5498       IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5499     // There is no reason to differentiate between native byte order and byte-swap
5500     // for setting a zero bit pattern. Just return to the main path.
5501     __ jmp(GetNativeByteOrderLabel());
5502     return;
5503   }
5504   __ cmpl(Address(varhandle, native_byte_order_offset.Int32Value()), Immediate(0));
5505   __ j(kNotEqual, GetNativeByteOrderLabel());
5506 
5507   switch (access_mode_template) {
5508     case mirror::VarHandle::AccessModeTemplate::kGet:
5509       GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
5510       break;
5511     case mirror::VarHandle::AccessModeTemplate::kSet:
5512       GenerateVarHandleSet(invoke, codegen, is_volatile_, is_atomic_, /*byte_swap=*/ true);
5513       break;
5514     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5515       GenerateVarHandleCompareAndSetOrExchange(
5516           invoke, codegen, /*is_cmpxchg=*/ false, /*byte_swap=*/ true);
5517       break;
5518     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5519       GenerateVarHandleCompareAndSetOrExchange(
5520           invoke, codegen, /*is_cmpxchg=*/ true, /*byte_swap=*/ true);
5521       break;
5522     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5523       GenerateVarHandleGetAndUpdate(invoke,
5524                                     codegen,
5525                                     get_and_update_op_,
5526                                     need_any_store_barrier_,
5527                                     need_any_any_barrier_,
5528                                     /*byte_swap=*/ true);
5529       break;
5530   }
5531 
5532   __ jmp(GetExitLabel());
5533 }
5534 
5535 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name)
5536 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED);
5537 #undef MARK_UNIMPLEMENTED
5538 
5539 UNREACHABLE_INTRINSICS(X86_64)
5540 
5541 #undef __
5542 
5543 }  // namespace x86_64
5544 }  // namespace art
5545