1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86_64.h"
18
19 #include <limits>
20
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "arch/x86_64/registers_x86_64.h"
23 #include "art_method.h"
24 #include "base/bit_utils.h"
25 #include "code_generator_x86_64.h"
26 #include "dex/modifiers.h"
27 #include "entrypoints/quick/quick_entrypoints.h"
28 #include "entrypoints/quick/quick_entrypoints_enum.h"
29 #include "heap_poisoning.h"
30 #include "intrinsic_objects.h"
31 #include "intrinsics.h"
32 #include "intrinsics_utils.h"
33 #include "lock_word.h"
34 #include "mirror/array-inl.h"
35 #include "mirror/object_array-inl.h"
36 #include "mirror/reference.h"
37 #include "mirror/string.h"
38 #include "optimizing/code_generator.h"
39 #include "optimizing/data_type.h"
40 #include "optimizing/locations.h"
41 #include "scoped_thread_state_change-inl.h"
42 #include "thread-current-inl.h"
43 #include "utils/x86_64/assembler_x86_64.h"
44 #include "utils/x86_64/constants_x86_64.h"
45 #include "well_known_classes.h"
46
47 namespace art HIDDEN {
48
49 namespace x86_64 {
50
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)51 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
52 : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) {
53 }
54
GetAssembler()55 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
56 return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
57 }
58
GetAllocator()59 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
60 return codegen_->GetGraph()->GetAllocator();
61 }
62
TryDispatch(HInvoke * invoke)63 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
64 Dispatch(invoke);
65 LocationSummary* res = invoke->GetLocations();
66 if (res == nullptr) {
67 return false;
68 }
69 return res->Intrinsified();
70 }
71
72 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
73
74 #define __ assembler->
75
GenArrayAddress(X86_64Assembler * assembler,CpuRegister dest,CpuRegister base,Location pos,DataType::Type type,uint32_t data_offset)76 static void GenArrayAddress(X86_64Assembler* assembler,
77 CpuRegister dest,
78 CpuRegister base,
79 Location pos,
80 DataType::Type type,
81 uint32_t data_offset) {
82 // Note: The heap is in low 4GiB, so we're using LEAL rather than LEAQ to save on code size.
83 if (pos.IsConstant()) {
84 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
85 __ leal(dest, Address(base, DataType::Size(type) * constant + data_offset));
86 } else {
87 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
88 __ leal(dest, Address(base, pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
89 }
90 }
91
92 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
93 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
94 public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)95 explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
96 : SlowPathCode(instruction) {
97 }
98
EmitNativeCode(CodeGenerator * codegen)99 void EmitNativeCode(CodeGenerator* codegen) override {
100 DCHECK(codegen->EmitBakerReadBarrier());
101 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
102 X86_64Assembler* assembler = x86_64_codegen->GetAssembler();
103 LocationSummary* locations = instruction_->GetLocations();
104 DCHECK(locations->CanCall());
105 DCHECK(instruction_->IsInvokeStaticOrDirect())
106 << "Unexpected instruction in read barrier arraycopy slow path: "
107 << instruction_->DebugName();
108 DCHECK(instruction_->GetLocations()->Intrinsified());
109 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
110 Location length = locations->InAt(4);
111
112 const DataType::Type type = DataType::Type::kReference;
113 const int32_t element_size = DataType::Size(type);
114
115 CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
116 CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
117 CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
118
119 __ Bind(GetEntryLabel());
120 // The `src_curr_addr` and `dst_curr_addr` were initialized before entering the slow-path.
121 GenArrayAddress(assembler, src_stop_addr, src_curr_addr, length, type, /*data_offset=*/ 0u);
122
123 NearLabel loop;
124 __ Bind(&loop);
125 __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
126 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
127 // TODO: Inline the mark bit check before calling the runtime?
128 // TMP = ReadBarrier::Mark(TMP);
129 // No need to save live registers; it's taken care of by the
130 // entrypoint. Also, there is no need to update the stack mask,
131 // as this runtime call will not trigger a garbage collection.
132 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
133 // This runtime call does not require a stack map.
134 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
135 __ MaybePoisonHeapReference(CpuRegister(TMP));
136 __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
137 __ addl(src_curr_addr, Immediate(element_size));
138 __ addl(dst_curr_addr, Immediate(element_size));
139 __ cmpl(src_curr_addr, src_stop_addr);
140 __ j(kNotEqual, &loop);
141 __ jmp(GetExitLabel());
142 }
143
GetDescription() const144 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
145
146 private:
147 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
148 };
149
150 // The MethodHandle.invokeExact intrinsic sets up arguments to match the target method call. If we
151 // need to go to the slow path, we call art_quick_invoke_polymorphic_with_hidden_receiver, which
152 // expects the MethodHandle object in RDI (in place of the actual ArtMethod).
153 class InvokePolymorphicSlowPathX86_64 : public SlowPathCode {
154 public:
InvokePolymorphicSlowPathX86_64(HInstruction * instruction,CpuRegister method_handle)155 InvokePolymorphicSlowPathX86_64(HInstruction* instruction, CpuRegister method_handle)
156 : SlowPathCode(instruction), method_handle_(method_handle) {
157 DCHECK(instruction->IsInvokePolymorphic());
158 }
159
EmitNativeCode(CodeGenerator * codegen)160 void EmitNativeCode(CodeGenerator* codegen) override {
161 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
162 X86_64Assembler* assembler = x86_64_codegen->GetAssembler();
163 __ Bind(GetEntryLabel());
164 SaveLiveRegisters(codegen, instruction_->GetLocations());
165
166 // Passing `MethodHandle` object as hidden argument.
167 __ movl(CpuRegister(RDI), method_handle_);
168 x86_64_codegen->InvokeRuntime(QuickEntrypointEnum::kQuickInvokePolymorphicWithHiddenReceiver,
169 instruction_,
170 instruction_->GetDexPc());
171
172 RestoreLiveRegisters(codegen, instruction_->GetLocations());
173 __ jmp(GetExitLabel());
174 }
175
GetDescription() const176 const char* GetDescription() const override { return "InvokePolymorphicSlowPathX86_64"; }
177
178 private:
179 const CpuRegister method_handle_;
180 DISALLOW_COPY_AND_ASSIGN(InvokePolymorphicSlowPathX86_64);
181 };
182
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)183 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
184 LocationSummary* locations =
185 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
186 locations->SetInAt(0, Location::RequiresFpuRegister());
187 locations->SetOut(Location::RequiresRegister());
188 }
189
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)190 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
191 LocationSummary* locations =
192 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
193 locations->SetInAt(0, Location::RequiresRegister());
194 locations->SetOut(Location::RequiresFpuRegister());
195 }
196
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)197 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
198 Location input = locations->InAt(0);
199 Location output = locations->Out();
200 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
201 }
202
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)203 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
204 Location input = locations->InAt(0);
205 Location output = locations->Out();
206 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
207 }
208
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)209 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
210 CreateFPToIntLocations(allocator_, invoke);
211 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)212 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
213 CreateIntToFPLocations(allocator_, invoke);
214 }
215
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)216 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
217 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
218 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)219 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
220 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
221 }
222
VisitFloatFloatToRawIntBits(HInvoke * invoke)223 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
224 CreateFPToIntLocations(allocator_, invoke);
225 }
VisitFloatIntBitsToFloat(HInvoke * invoke)226 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
227 CreateIntToFPLocations(allocator_, invoke);
228 }
229
VisitFloatFloatToRawIntBits(HInvoke * invoke)230 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
231 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
232 }
VisitFloatIntBitsToFloat(HInvoke * invoke)233 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
234 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
235 }
236
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)237 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
238 LocationSummary* locations =
239 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
240 locations->SetInAt(0, Location::RequiresRegister());
241 locations->SetOut(Location::SameAsFirstInput());
242 }
243
VisitIntegerReverseBytes(HInvoke * invoke)244 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
245 CreateIntToIntLocations(allocator_, invoke);
246 }
247
VisitIntegerReverseBytes(HInvoke * invoke)248 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
249 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt32);
250 }
251
VisitLongReverseBytes(HInvoke * invoke)252 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
253 CreateIntToIntLocations(allocator_, invoke);
254 }
255
VisitLongReverseBytes(HInvoke * invoke)256 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
257 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt64);
258 }
259
VisitShortReverseBytes(HInvoke * invoke)260 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
261 CreateIntToIntLocations(allocator_, invoke);
262 }
263
VisitShortReverseBytes(HInvoke * invoke)264 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
265 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt16);
266 }
267
GenIsInfinite(LocationSummary * locations,bool is64bit,CodeGeneratorX86_64 * codegen)268 static void GenIsInfinite(LocationSummary* locations,
269 bool is64bit,
270 CodeGeneratorX86_64* codegen) {
271 X86_64Assembler* assembler = codegen->GetAssembler();
272
273 XmmRegister input = locations->InAt(0).AsFpuRegister<XmmRegister>();
274 CpuRegister output = locations->Out().AsRegister<CpuRegister>();
275
276 NearLabel done1, done2;
277
278 if (is64bit) {
279 double kPositiveInfinity = std::numeric_limits<double>::infinity();
280 double kNegativeInfinity = -1 * kPositiveInfinity;
281
282 __ xorq(output, output);
283 __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
284 __ j(kNotEqual, &done1);
285 __ j(kParityEven, &done2);
286 __ movq(output, Immediate(1));
287 __ jmp(&done2);
288 __ Bind(&done1);
289 __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
290 __ j(kNotEqual, &done2);
291 __ j(kParityEven, &done2);
292 __ movq(output, Immediate(1));
293 __ Bind(&done2);
294 } else {
295 float kPositiveInfinity = std::numeric_limits<float>::infinity();
296 float kNegativeInfinity = -1 * kPositiveInfinity;
297
298 __ xorl(output, output);
299 __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
300 __ j(kNotEqual, &done1);
301 __ j(kParityEven, &done2);
302 __ movl(output, Immediate(1));
303 __ jmp(&done2);
304 __ Bind(&done1);
305 __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
306 __ j(kNotEqual, &done2);
307 __ j(kParityEven, &done2);
308 __ movl(output, Immediate(1));
309 __ Bind(&done2);
310 }
311 }
312
VisitFloatIsInfinite(HInvoke * invoke)313 void IntrinsicLocationsBuilderX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
314 CreateFPToIntLocations(allocator_, invoke);
315 }
316
VisitFloatIsInfinite(HInvoke * invoke)317 void IntrinsicCodeGeneratorX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
318 GenIsInfinite(invoke->GetLocations(), /* is64bit=*/ false, codegen_);
319 }
320
VisitDoubleIsInfinite(HInvoke * invoke)321 void IntrinsicLocationsBuilderX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
322 CreateFPToIntLocations(allocator_, invoke);
323 }
324
VisitDoubleIsInfinite(HInvoke * invoke)325 void IntrinsicCodeGeneratorX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
326 GenIsInfinite(invoke->GetLocations(), /* is64bit=*/ true, codegen_);
327 }
328
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)329 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
330 LocationSummary* locations =
331 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
332 locations->SetInAt(0, Location::RequiresFpuRegister());
333 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
334 }
335
VisitMathSqrt(HInvoke * invoke)336 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
337 CreateFPToFPLocations(allocator_, invoke);
338 }
339
VisitMathSqrt(HInvoke * invoke)340 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
341 LocationSummary* locations = invoke->GetLocations();
342 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
343 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
344
345 GetAssembler()->sqrtsd(out, in);
346 }
347
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)348 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
349 HInvoke* invoke,
350 CodeGeneratorX86_64* codegen) {
351 // Do we have instruction support?
352 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
353 return;
354 }
355
356 CreateFPToFPLocations(allocator, invoke);
357 }
358
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86_64Assembler * assembler,int round_mode)359 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86_64Assembler* assembler, int round_mode) {
360 LocationSummary* locations = invoke->GetLocations();
361 DCHECK(!locations->WillCall());
362 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
363 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
364 __ roundsd(out, in, Immediate(round_mode));
365 }
366
VisitMathCeil(HInvoke * invoke)367 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
368 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
369 }
370
VisitMathCeil(HInvoke * invoke)371 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
372 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
373 }
374
VisitMathFloor(HInvoke * invoke)375 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
376 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
377 }
378
VisitMathFloor(HInvoke * invoke)379 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
380 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
381 }
382
VisitMathRint(HInvoke * invoke)383 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
384 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
385 }
386
VisitMathRint(HInvoke * invoke)387 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
388 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
389 }
390
CreateSSE41FPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)391 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator,
392 HInvoke* invoke,
393 CodeGeneratorX86_64* codegen) {
394 // Do we have instruction support?
395 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
396 return;
397 }
398
399 LocationSummary* locations =
400 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
401 locations->SetInAt(0, Location::RequiresFpuRegister());
402 locations->SetOut(Location::RequiresRegister());
403 locations->AddTemp(Location::RequiresFpuRegister());
404 locations->AddTemp(Location::RequiresFpuRegister());
405 }
406
VisitMathRoundFloat(HInvoke * invoke)407 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
408 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
409 }
410
VisitMathRoundFloat(HInvoke * invoke)411 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
412 LocationSummary* locations = invoke->GetLocations();
413 DCHECK(!locations->WillCall());
414
415 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
416 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
417 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
418 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
419 NearLabel skip_incr, done;
420 X86_64Assembler* assembler = GetAssembler();
421
422 // Since no direct x86 rounding instruction matches the required semantics,
423 // this intrinsic is implemented as follows:
424 // result = floor(in);
425 // if (in - result >= 0.5f)
426 // result = result + 1.0f;
427 __ movss(t2, in);
428 __ roundss(t1, in, Immediate(1));
429 __ subss(t2, t1);
430 __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
431 __ j(kBelow, &skip_incr);
432 __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
433 __ Bind(&skip_incr);
434
435 // Final conversion to an integer. Unfortunately this also does not have a
436 // direct x86 instruction, since NaN should map to 0 and large positive
437 // values need to be clipped to the extreme value.
438 codegen_->Load32BitValue(out, kPrimIntMax);
439 __ cvtsi2ss(t2, out);
440 __ comiss(t1, t2);
441 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
442 __ movl(out, Immediate(0)); // does not change flags
443 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
444 __ cvttss2si(out, t1);
445 __ Bind(&done);
446 }
447
VisitMathRoundDouble(HInvoke * invoke)448 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
449 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
450 }
451
VisitMathRoundDouble(HInvoke * invoke)452 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
453 LocationSummary* locations = invoke->GetLocations();
454 DCHECK(!locations->WillCall());
455
456 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
457 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
458 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
459 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
460 NearLabel skip_incr, done;
461 X86_64Assembler* assembler = GetAssembler();
462
463 // Since no direct x86 rounding instruction matches the required semantics,
464 // this intrinsic is implemented as follows:
465 // result = floor(in);
466 // if (in - result >= 0.5)
467 // result = result + 1.0f;
468 __ movsd(t2, in);
469 __ roundsd(t1, in, Immediate(1));
470 __ subsd(t2, t1);
471 __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
472 __ j(kBelow, &skip_incr);
473 __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
474 __ Bind(&skip_incr);
475
476 // Final conversion to an integer. Unfortunately this also does not have a
477 // direct x86 instruction, since NaN should map to 0 and large positive
478 // values need to be clipped to the extreme value.
479 codegen_->Load64BitValue(out, kPrimLongMax);
480 __ cvtsi2sd(t2, out, /* is64bit= */ true);
481 __ comisd(t1, t2);
482 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
483 __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit
484 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
485 __ cvttsd2si(out, t1, /* is64bit= */ true);
486 __ Bind(&done);
487 }
488
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)489 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
490 LocationSummary* locations =
491 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
492 InvokeRuntimeCallingConvention calling_convention;
493 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
494 locations->SetOut(Location::FpuRegisterLocation(XMM0));
495
496 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
497 }
498
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)499 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
500 QuickEntrypointEnum entry) {
501 LocationSummary* locations = invoke->GetLocations();
502 DCHECK(locations->WillCall());
503 DCHECK(invoke->IsInvokeStaticOrDirect());
504
505 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
506 }
507
VisitMathCos(HInvoke * invoke)508 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
509 CreateFPToFPCallLocations(allocator_, invoke);
510 }
511
VisitMathCos(HInvoke * invoke)512 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
513 GenFPToFPCall(invoke, codegen_, kQuickCos);
514 }
515
VisitMathSin(HInvoke * invoke)516 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
517 CreateFPToFPCallLocations(allocator_, invoke);
518 }
519
VisitMathSin(HInvoke * invoke)520 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
521 GenFPToFPCall(invoke, codegen_, kQuickSin);
522 }
523
VisitMathAcos(HInvoke * invoke)524 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
525 CreateFPToFPCallLocations(allocator_, invoke);
526 }
527
VisitMathAcos(HInvoke * invoke)528 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
529 GenFPToFPCall(invoke, codegen_, kQuickAcos);
530 }
531
VisitMathAsin(HInvoke * invoke)532 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
533 CreateFPToFPCallLocations(allocator_, invoke);
534 }
535
VisitMathAsin(HInvoke * invoke)536 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
537 GenFPToFPCall(invoke, codegen_, kQuickAsin);
538 }
539
VisitMathAtan(HInvoke * invoke)540 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
541 CreateFPToFPCallLocations(allocator_, invoke);
542 }
543
VisitMathAtan(HInvoke * invoke)544 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
545 GenFPToFPCall(invoke, codegen_, kQuickAtan);
546 }
547
VisitMathCbrt(HInvoke * invoke)548 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
549 CreateFPToFPCallLocations(allocator_, invoke);
550 }
551
VisitMathCbrt(HInvoke * invoke)552 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
553 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
554 }
555
VisitMathCosh(HInvoke * invoke)556 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
557 CreateFPToFPCallLocations(allocator_, invoke);
558 }
559
VisitMathCosh(HInvoke * invoke)560 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
561 GenFPToFPCall(invoke, codegen_, kQuickCosh);
562 }
563
VisitMathExp(HInvoke * invoke)564 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
565 CreateFPToFPCallLocations(allocator_, invoke);
566 }
567
VisitMathExp(HInvoke * invoke)568 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
569 GenFPToFPCall(invoke, codegen_, kQuickExp);
570 }
571
VisitMathExpm1(HInvoke * invoke)572 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
573 CreateFPToFPCallLocations(allocator_, invoke);
574 }
575
VisitMathExpm1(HInvoke * invoke)576 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
577 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
578 }
579
VisitMathLog(HInvoke * invoke)580 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
581 CreateFPToFPCallLocations(allocator_, invoke);
582 }
583
VisitMathLog(HInvoke * invoke)584 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
585 GenFPToFPCall(invoke, codegen_, kQuickLog);
586 }
587
VisitMathLog10(HInvoke * invoke)588 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
589 CreateFPToFPCallLocations(allocator_, invoke);
590 }
591
VisitMathLog10(HInvoke * invoke)592 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
593 GenFPToFPCall(invoke, codegen_, kQuickLog10);
594 }
595
VisitMathSinh(HInvoke * invoke)596 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
597 CreateFPToFPCallLocations(allocator_, invoke);
598 }
599
VisitMathSinh(HInvoke * invoke)600 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
601 GenFPToFPCall(invoke, codegen_, kQuickSinh);
602 }
603
VisitMathTan(HInvoke * invoke)604 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
605 CreateFPToFPCallLocations(allocator_, invoke);
606 }
607
VisitMathTan(HInvoke * invoke)608 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
609 GenFPToFPCall(invoke, codegen_, kQuickTan);
610 }
611
VisitMathTanh(HInvoke * invoke)612 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
613 CreateFPToFPCallLocations(allocator_, invoke);
614 }
615
VisitMathTanh(HInvoke * invoke)616 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
617 GenFPToFPCall(invoke, codegen_, kQuickTanh);
618 }
619
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)620 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
621 LocationSummary* locations =
622 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
623 InvokeRuntimeCallingConvention calling_convention;
624 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
625 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
626 locations->SetOut(Location::FpuRegisterLocation(XMM0));
627
628 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
629 }
630
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)631 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
632 DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
633 LocationSummary* locations =
634 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
635 InvokeRuntimeCallingConvention calling_convention;
636 locations->SetInAt(0, Location::RequiresFpuRegister());
637 locations->SetInAt(1, Location::RequiresFpuRegister());
638 locations->SetInAt(2, Location::RequiresFpuRegister());
639 locations->SetOut(Location::SameAsFirstInput());
640 }
641
VisitMathAtan2(HInvoke * invoke)642 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
643 CreateFPFPToFPCallLocations(allocator_, invoke);
644 }
645
VisitMathAtan2(HInvoke * invoke)646 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
647 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
648 }
649
VisitMathPow(HInvoke * invoke)650 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
651 CreateFPFPToFPCallLocations(allocator_, invoke);
652 }
653
VisitMathPow(HInvoke * invoke)654 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
655 GenFPToFPCall(invoke, codegen_, kQuickPow);
656 }
657
VisitMathHypot(HInvoke * invoke)658 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
659 CreateFPFPToFPCallLocations(allocator_, invoke);
660 }
661
VisitMathHypot(HInvoke * invoke)662 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
663 GenFPToFPCall(invoke, codegen_, kQuickHypot);
664 }
665
VisitMathNextAfter(HInvoke * invoke)666 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
667 CreateFPFPToFPCallLocations(allocator_, invoke);
668 }
669
VisitMathNextAfter(HInvoke * invoke)670 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
671 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
672 }
673
CreateSystemArrayCopyLocations(HInvoke * invoke)674 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
675 // Check to see if we have known failures that will cause us to have to bail out
676 // to the runtime, and just generate the runtime call directly.
677 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
678 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
679
680 // The positions must be non-negative.
681 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
682 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
683 // We will have to fail anyways.
684 return;
685 }
686
687 // The length must be > 0.
688 HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
689 if (length != nullptr) {
690 int32_t len = length->GetValue();
691 if (len < 0) {
692 // Just call as normal.
693 return;
694 }
695 }
696 LocationSummary* locations =
697 new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary
698 (invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
699 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
700 locations->SetInAt(0, Location::RequiresRegister());
701 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
702 locations->SetInAt(2, Location::RequiresRegister());
703 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
704 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
705
706 // And we need some temporaries. We will use REP MOVS{B,W,L}, so we need fixed registers.
707 locations->AddTemp(Location::RegisterLocation(RSI));
708 locations->AddTemp(Location::RegisterLocation(RDI));
709 locations->AddTemp(Location::RegisterLocation(RCX));
710 }
711
712 template <typename LhsType>
EmitCmplJLess(X86_64Assembler * assembler,LhsType lhs,Location rhs,Label * label)713 static void EmitCmplJLess(X86_64Assembler* assembler,
714 LhsType lhs,
715 Location rhs,
716 Label* label) {
717 static_assert(std::is_same_v<LhsType, CpuRegister> || std::is_same_v<LhsType, Address>);
718 if (rhs.IsConstant()) {
719 int32_t rhs_constant = rhs.GetConstant()->AsIntConstant()->GetValue();
720 __ cmpl(lhs, Immediate(rhs_constant));
721 } else {
722 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
723 }
724 __ j(kLess, label);
725 }
726
CheckSystemArrayCopyPosition(X86_64Assembler * assembler,CpuRegister array,Location pos,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_array_length,bool position_sign_checked)727 static void CheckSystemArrayCopyPosition(X86_64Assembler* assembler,
728 CpuRegister array,
729 Location pos,
730 Location length,
731 SlowPathCode* slow_path,
732 CpuRegister temp,
733 bool length_is_array_length,
734 bool position_sign_checked) {
735 // Where is the length in the Array?
736 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
737
738 if (pos.IsConstant()) {
739 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
740 if (pos_const == 0) {
741 if (!length_is_array_length) {
742 // Check that length(array) >= length.
743 EmitCmplJLess(assembler, Address(array, length_offset), length, slow_path->GetEntryLabel());
744 }
745 } else {
746 // Calculate length(array) - pos.
747 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
748 // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
749 __ movl(temp, Address(array, length_offset));
750 __ subl(temp, Immediate(pos_const));
751
752 // Check that (length(array) - pos) >= length.
753 EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
754 }
755 } else if (length_is_array_length) {
756 // The only way the copy can succeed is if pos is zero.
757 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
758 __ testl(pos_reg, pos_reg);
759 __ j(kNotEqual, slow_path->GetEntryLabel());
760 } else {
761 // Check that pos >= 0.
762 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
763 if (!position_sign_checked) {
764 __ testl(pos_reg, pos_reg);
765 __ j(kLess, slow_path->GetEntryLabel());
766 }
767
768 // Calculate length(array) - pos.
769 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
770 // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
771 __ movl(temp, Address(array, length_offset));
772 __ subl(temp, pos_reg);
773
774 // Check that (length(array) - pos) >= length.
775 EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
776 }
777 }
778
SystemArrayCopyPrimitive(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,DataType::Type type)779 static void SystemArrayCopyPrimitive(HInvoke* invoke,
780 X86_64Assembler* assembler,
781 CodeGeneratorX86_64* codegen,
782 DataType::Type type) {
783 LocationSummary* locations = invoke->GetLocations();
784 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
785 Location src_pos = locations->InAt(1);
786 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
787 Location dest_pos = locations->InAt(3);
788 Location length = locations->InAt(4);
789
790 // Temporaries that we need for MOVSB/W/L.
791 CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
792 DCHECK_EQ(src_base.AsRegister(), RSI);
793 CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
794 DCHECK_EQ(dest_base.AsRegister(), RDI);
795 CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
796 DCHECK_EQ(count.AsRegister(), RCX);
797
798 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
799 codegen->AddSlowPath(slow_path);
800
801 // Bail out if the source and destination are the same.
802 __ cmpl(src, dest);
803 __ j(kEqual, slow_path->GetEntryLabel());
804
805 // Bail out if the source is null.
806 __ testl(src, src);
807 __ j(kEqual, slow_path->GetEntryLabel());
808
809 // Bail out if the destination is null.
810 __ testl(dest, dest);
811 __ j(kEqual, slow_path->GetEntryLabel());
812
813 // If the length is negative, bail out.
814 // We have already checked in the LocationsBuilder for the constant case.
815 if (!length.IsConstant()) {
816 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
817 __ j(kLess, slow_path->GetEntryLabel());
818 }
819
820 // Validity checks: source. Use src_base as a temporary register.
821 CheckSystemArrayCopyPosition(assembler,
822 src,
823 src_pos,
824 length,
825 slow_path,
826 src_base,
827 /*length_is_array_length=*/ false,
828 /*position_sign_checked=*/ false);
829
830 // Validity checks: dest. Use src_base as a temporary register.
831 CheckSystemArrayCopyPosition(assembler,
832 dest,
833 dest_pos,
834 length,
835 slow_path,
836 src_base,
837 /*length_is_array_length=*/ false,
838 /*position_sign_checked=*/ false);
839
840 // We need the count in RCX.
841 if (length.IsConstant()) {
842 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
843 } else {
844 __ movl(count, length.AsRegister<CpuRegister>());
845 }
846
847 // Okay, everything checks out. Finally time to do the copy.
848 // Check assumption that sizeof(Char) is 2 (used in scaling below).
849 const size_t data_size = DataType::Size(type);
850 const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
851
852 GenArrayAddress(assembler, src_base, src, src_pos, type, data_offset);
853 GenArrayAddress(assembler, dest_base, dest, dest_pos, type, data_offset);
854
855 // Do the move.
856 switch (type) {
857 case DataType::Type::kInt8:
858 __ rep_movsb();
859 break;
860 case DataType::Type::kUint16:
861 __ rep_movsw();
862 break;
863 case DataType::Type::kInt32:
864 __ rep_movsl();
865 break;
866 default:
867 LOG(FATAL) << "Unexpected data type for intrinsic";
868 }
869 __ Bind(slow_path->GetExitLabel());
870 }
871
VisitSystemArrayCopyChar(HInvoke * invoke)872 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
873 CreateSystemArrayCopyLocations(invoke);
874 }
VisitSystemArrayCopyChar(HInvoke * invoke)875 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
876 X86_64Assembler* assembler = GetAssembler();
877 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
878 }
879
VisitSystemArrayCopyByte(HInvoke * invoke)880 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
881 X86_64Assembler* assembler = GetAssembler();
882 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
883 }
884
VisitSystemArrayCopyByte(HInvoke * invoke)885 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
886 CreateSystemArrayCopyLocations(invoke);
887 }
888
VisitSystemArrayCopyInt(HInvoke * invoke)889 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
890 X86_64Assembler* assembler = GetAssembler();
891 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
892 }
893
VisitSystemArrayCopyInt(HInvoke * invoke)894 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
895 CreateSystemArrayCopyLocations(invoke);
896 }
897
VisitSystemArrayCopy(HInvoke * invoke)898 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
899 // The only read barrier implementation supporting the
900 // SystemArrayCopy intrinsic is the Baker-style read barriers.
901 if (codegen_->EmitNonBakerReadBarrier()) {
902 return;
903 }
904
905 constexpr int32_t kLengthThreshold = -1; // No cut-off - handle large arrays in intrinsic code.
906 constexpr size_t kInitialNumTemps = 0u; // We shall allocate temps explicitly.
907 LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
908 invoke, kLengthThreshold, kInitialNumTemps);
909 if (locations != nullptr) {
910 // Add temporaries. We will use REP MOVSL, so we need fixed registers.
911 DCHECK_EQ(locations->GetTempCount(), kInitialNumTemps);
912 locations->AddTemp(Location::RegisterLocation(RSI));
913 locations->AddTemp(Location::RegisterLocation(RDI));
914 locations->AddTemp(Location::RegisterLocation(RCX));
915 }
916 }
917
VisitSystemArrayCopy(HInvoke * invoke)918 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
919 // The only read barrier implementation supporting the
920 // SystemArrayCopy intrinsic is the Baker-style read barriers.
921 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
922
923 X86_64Assembler* assembler = GetAssembler();
924 LocationSummary* locations = invoke->GetLocations();
925
926 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
927 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
928 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
929 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
930 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
931
932 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
933 Location src_pos = locations->InAt(1);
934 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
935 Location dest_pos = locations->InAt(3);
936 Location length = locations->InAt(4);
937 Location temp1_loc = locations->GetTemp(0);
938 CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
939 Location temp2_loc = locations->GetTemp(1);
940 CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
941 Location temp3_loc = locations->GetTemp(2);
942 CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
943
944 SlowPathCode* intrinsic_slow_path =
945 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
946 codegen_->AddSlowPath(intrinsic_slow_path);
947
948 NearLabel conditions_on_positions_validated;
949 SystemArrayCopyOptimizations optimizations(invoke);
950
951 // If source and destination are the same, we go to slow path if we need to do forward copying.
952 // We do not need to do this check if the source and destination positions are the same.
953 if (!optimizations.GetSourcePositionIsDestinationPosition()) {
954 if (src_pos.IsConstant()) {
955 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
956 if (dest_pos.IsConstant()) {
957 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
958 if (optimizations.GetDestinationIsSource()) {
959 // Checked when building locations.
960 DCHECK_GE(src_pos_constant, dest_pos_constant);
961 } else if (src_pos_constant < dest_pos_constant) {
962 __ cmpl(src, dest);
963 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
964 }
965 } else {
966 if (!optimizations.GetDestinationIsSource()) {
967 __ cmpl(src, dest);
968 __ j(kNotEqual, &conditions_on_positions_validated);
969 }
970 __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
971 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
972 }
973 } else {
974 if (!optimizations.GetDestinationIsSource()) {
975 __ cmpl(src, dest);
976 __ j(kNotEqual, &conditions_on_positions_validated);
977 }
978 CpuRegister src_pos_reg = src_pos.AsRegister<CpuRegister>();
979 EmitCmplJLess(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel());
980 }
981 }
982
983 __ Bind(&conditions_on_positions_validated);
984
985 if (!optimizations.GetSourceIsNotNull()) {
986 // Bail out if the source is null.
987 __ testl(src, src);
988 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
989 }
990
991 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
992 // Bail out if the destination is null.
993 __ testl(dest, dest);
994 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
995 }
996
997 // If the length is negative, bail out.
998 // We have already checked in the LocationsBuilder for the constant case.
999 if (!length.IsConstant() &&
1000 !optimizations.GetCountIsSourceLength() &&
1001 !optimizations.GetCountIsDestinationLength()) {
1002 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1003 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
1004 }
1005
1006 // Validity checks: source.
1007 CheckSystemArrayCopyPosition(assembler,
1008 src,
1009 src_pos,
1010 length,
1011 intrinsic_slow_path,
1012 temp1,
1013 optimizations.GetCountIsSourceLength(),
1014 /*position_sign_checked=*/ false);
1015
1016 // Validity checks: dest.
1017 bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
1018 CheckSystemArrayCopyPosition(assembler,
1019 dest,
1020 dest_pos,
1021 length,
1022 intrinsic_slow_path,
1023 temp1,
1024 optimizations.GetCountIsDestinationLength(),
1025 dest_position_sign_checked);
1026
1027 auto check_non_primitive_array_class = [&](CpuRegister klass, CpuRegister temp) {
1028 // No read barrier is needed for reading a chain of constant references for comparing
1029 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1030 // /* HeapReference<Class> */ temp = klass->component_type_
1031 __ movl(temp, Address(klass, component_offset));
1032 __ MaybeUnpoisonHeapReference(temp);
1033 // Check that the component type is not null.
1034 __ testl(temp, temp);
1035 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1036 // Check that the component type is not a primitive.
1037 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
1038 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1039 };
1040
1041 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1042 // Check whether all elements of the source array are assignable to the component
1043 // type of the destination array. We do two checks: the classes are the same,
1044 // or the destination is Object[]. If none of these checks succeed, we go to the
1045 // slow path.
1046
1047 if (codegen_->EmitBakerReadBarrier()) {
1048 // /* HeapReference<Class> */ temp1 = dest->klass_
1049 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1050 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
1051 // Register `temp1` is not trashed by the read barrier emitted
1052 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1053 // method produces a call to a ReadBarrierMarkRegX entry point,
1054 // which saves all potentially live registers, including
1055 // temporaries such a `temp1`.
1056 // /* HeapReference<Class> */ temp2 = src->klass_
1057 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1058 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
1059 // If heap poisoning is enabled, `temp1` and `temp2` have been unpoisoned
1060 // by the previous calls to GenerateFieldLoadWithBakerReadBarrier.
1061 } else {
1062 // /* HeapReference<Class> */ temp1 = dest->klass_
1063 __ movl(temp1, Address(dest, class_offset));
1064 __ MaybeUnpoisonHeapReference(temp1);
1065 // /* HeapReference<Class> */ temp2 = src->klass_
1066 __ movl(temp2, Address(src, class_offset));
1067 __ MaybeUnpoisonHeapReference(temp2);
1068 }
1069
1070 __ cmpl(temp1, temp2);
1071 if (optimizations.GetDestinationIsTypedObjectArray()) {
1072 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1073 NearLabel do_copy;
1074 // For class match, we can skip the source type check regardless of the optimization flag.
1075 __ j(kEqual, &do_copy);
1076 // No read barrier is needed for reading a chain of constant references
1077 // for comparing with null, see `ReadBarrierOption`.
1078 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1079 __ movl(temp1, Address(temp1, component_offset));
1080 __ MaybeUnpoisonHeapReference(temp1);
1081 // No need to unpoison the following heap reference load, as
1082 // we're comparing against null.
1083 __ cmpl(Address(temp1, super_offset), Immediate(0));
1084 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1085 // Bail out if the source is not a non primitive array.
1086 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1087 check_non_primitive_array_class(temp2, CpuRegister(TMP));
1088 }
1089 __ Bind(&do_copy);
1090 } else {
1091 DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1092 // For class match, we can skip the array type check completely if at least one of source
1093 // and destination is known to be a non primitive array, otherwise one check is enough.
1094 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1095 if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1096 !optimizations.GetSourceIsNonPrimitiveArray()) {
1097 check_non_primitive_array_class(temp2, CpuRegister(TMP));
1098 }
1099 }
1100 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1101 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1102 // Bail out if the source is not a non primitive array.
1103 // No read barrier is needed for reading a chain of constant references for comparing
1104 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1105 // /* HeapReference<Class> */ temp1 = src->klass_
1106 __ movl(temp1, Address(src, class_offset));
1107 __ MaybeUnpoisonHeapReference(temp1);
1108 check_non_primitive_array_class(temp1, CpuRegister(TMP));
1109 }
1110
1111 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
1112 // Null constant length: not need to emit the loop code at all.
1113 } else {
1114 const DataType::Type type = DataType::Type::kReference;
1115 const int32_t element_size = DataType::Size(type);
1116 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1117
1118 // Don't enter copy loop if `length == 0`.
1119 NearLabel skip_copy_and_write_barrier;
1120 if (!length.IsConstant()) {
1121 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1122 __ j(kEqual, &skip_copy_and_write_barrier);
1123 }
1124
1125 // Compute base source address, base destination address, and end
1126 // source address in `temp1`, `temp2` and `temp3` respectively.
1127 GenArrayAddress(assembler, temp1, src, src_pos, type, data_offset);
1128 GenArrayAddress(assembler, temp2, dest, dest_pos, type, data_offset);
1129
1130 SlowPathCode* read_barrier_slow_path = nullptr;
1131 if (codegen_->EmitBakerReadBarrier()) {
1132 // SystemArrayCopy implementation for Baker read barriers (see
1133 // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1134 //
1135 // if (src_ptr != end_ptr) {
1136 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1137 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
1138 // bool is_gray = (rb_state == ReadBarrier::GrayState());
1139 // if (is_gray) {
1140 // // Slow-path copy.
1141 // do {
1142 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1143 // } while (src_ptr != end_ptr)
1144 // } else {
1145 // // Fast-path copy.
1146 // do {
1147 // *dest_ptr++ = *src_ptr++;
1148 // } while (src_ptr != end_ptr)
1149 // }
1150 // }
1151
1152 // Given the numeric representation, it's enough to check the low bit of the rb_state.
1153 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1154 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1155 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1156 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1157 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1158
1159 // if (rb_state == ReadBarrier::GrayState())
1160 // goto slow_path;
1161 // At this point, just do the "if" and make sure that flags are preserved until the branch.
1162 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1163
1164 // Load fence to prevent load-load reordering.
1165 // Note that this is a no-op, thanks to the x86-64 memory model.
1166 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1167
1168 // Slow path used to copy array when `src` is gray.
1169 read_barrier_slow_path =
1170 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1171 codegen_->AddSlowPath(read_barrier_slow_path);
1172
1173 // We have done the "if" of the gray bit check above, now branch based on the flags.
1174 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1175 }
1176
1177 if (length.IsConstant()) {
1178 __ movl(temp3, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1179 } else {
1180 __ movl(temp3, length.AsRegister<CpuRegister>());
1181 }
1182
1183 // Iterate over the arrays and do a raw copy of the objects. We don't need to poison/unpoison.
1184 DCHECK_EQ(temp1.AsRegister(), RSI);
1185 DCHECK_EQ(temp2.AsRegister(), RDI);
1186 DCHECK_EQ(temp3.AsRegister(), RCX);
1187 __ rep_movsl();
1188
1189 if (read_barrier_slow_path != nullptr) {
1190 DCHECK(codegen_->EmitBakerReadBarrier());
1191 __ Bind(read_barrier_slow_path->GetExitLabel());
1192 }
1193
1194 // We only need one card marking on the destination array.
1195 codegen_->MarkGCCard(temp1, temp2, dest);
1196
1197 __ Bind(&skip_copy_and_write_barrier);
1198 }
1199
1200 __ Bind(intrinsic_slow_path->GetExitLabel());
1201 }
1202
VisitStringCompareTo(HInvoke * invoke)1203 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1204 LocationSummary* locations = new (allocator_) LocationSummary(
1205 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1206 InvokeRuntimeCallingConvention calling_convention;
1207 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1208 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1209 locations->SetOut(Location::RegisterLocation(RAX));
1210 }
1211
VisitStringCompareTo(HInvoke * invoke)1212 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1213 X86_64Assembler* assembler = GetAssembler();
1214 LocationSummary* locations = invoke->GetLocations();
1215
1216 // Note that the null check must have been done earlier.
1217 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1218
1219 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1220 __ testl(argument, argument);
1221 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1222 codegen_->AddSlowPath(slow_path);
1223 __ j(kEqual, slow_path->GetEntryLabel());
1224
1225 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1226 __ Bind(slow_path->GetExitLabel());
1227 }
1228
VisitStringEquals(HInvoke * invoke)1229 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1230 LocationSummary* locations =
1231 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1232 locations->SetInAt(0, Location::RequiresRegister());
1233 locations->SetInAt(1, Location::RequiresRegister());
1234
1235 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1236 locations->AddTemp(Location::RegisterLocation(RCX));
1237 locations->AddTemp(Location::RegisterLocation(RDI));
1238
1239 // Set output, RSI needed for repe_cmpsq instruction anyways.
1240 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1241 }
1242
VisitStringEquals(HInvoke * invoke)1243 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1244 X86_64Assembler* assembler = GetAssembler();
1245 LocationSummary* locations = invoke->GetLocations();
1246
1247 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1248 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1249 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1250 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1251 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1252
1253 NearLabel end, return_true, return_false;
1254
1255 // Get offsets of count, value, and class fields within a string object.
1256 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1257 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1258 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1259
1260 // Note that the null check must have been done earlier.
1261 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1262
1263 StringEqualsOptimizations optimizations(invoke);
1264 if (!optimizations.GetArgumentNotNull()) {
1265 // Check if input is null, return false if it is.
1266 __ testl(arg, arg);
1267 __ j(kEqual, &return_false);
1268 }
1269
1270 if (!optimizations.GetArgumentIsString()) {
1271 // Instanceof check for the argument by comparing class fields.
1272 // All string objects must have the same type since String cannot be subclassed.
1273 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1274 // If the argument is a string object, its class field must be equal to receiver's class field.
1275 //
1276 // As the String class is expected to be non-movable, we can read the class
1277 // field from String.equals' arguments without read barriers.
1278 AssertNonMovableStringClass();
1279 // Also, because we use the loaded class references only to compare them, we
1280 // don't need to unpoison them.
1281 // /* HeapReference<Class> */ rcx = str->klass_
1282 __ movl(rcx, Address(str, class_offset));
1283 // if (rcx != /* HeapReference<Class> */ arg->klass_) return false
1284 __ cmpl(rcx, Address(arg, class_offset));
1285 __ j(kNotEqual, &return_false);
1286 }
1287
1288 // Reference equality check, return true if same reference.
1289 __ cmpl(str, arg);
1290 __ j(kEqual, &return_true);
1291
1292 // Load length and compression flag of receiver string.
1293 __ movl(rcx, Address(str, count_offset));
1294 // Check if lengths and compressiond flags are equal, return false if they're not.
1295 // Two identical strings will always have same compression style since
1296 // compression style is decided on alloc.
1297 __ cmpl(rcx, Address(arg, count_offset));
1298 __ j(kNotEqual, &return_false);
1299 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1300 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1301 "Expecting 0=compressed, 1=uncompressed");
1302 __ jrcxz(&return_true);
1303
1304 if (mirror::kUseStringCompression) {
1305 NearLabel string_uncompressed;
1306 // Extract length and differentiate between both compressed or both uncompressed.
1307 // Different compression style is cut above.
1308 __ shrl(rcx, Immediate(1));
1309 __ j(kCarrySet, &string_uncompressed);
1310 // Divide string length by 2, rounding up, and continue as if uncompressed.
1311 // Merge clearing the compression flag with +1 for rounding.
1312 __ addl(rcx, Immediate(1));
1313 __ shrl(rcx, Immediate(1));
1314 __ Bind(&string_uncompressed);
1315 }
1316 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1317 __ leal(rsi, Address(str, value_offset));
1318 __ leal(rdi, Address(arg, value_offset));
1319
1320 // Divide string length by 4 and adjust for lengths not divisible by 4.
1321 __ addl(rcx, Immediate(3));
1322 __ shrl(rcx, Immediate(2));
1323
1324 // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1325 // or 8 characters (compressed) at a time.
1326 DCHECK_ALIGNED(value_offset, 8);
1327 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1328
1329 // Loop to compare strings four characters at a time starting at the beginning of the string.
1330 __ repe_cmpsq();
1331 // If strings are not equal, zero flag will be cleared.
1332 __ j(kNotEqual, &return_false);
1333
1334 // Return true and exit the function.
1335 // If loop does not result in returning false, we return true.
1336 __ Bind(&return_true);
1337 __ movl(rsi, Immediate(1));
1338 __ jmp(&end);
1339
1340 // Return false and exit the function.
1341 __ Bind(&return_false);
1342 __ xorl(rsi, rsi);
1343 __ Bind(&end);
1344 }
1345
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1346 static void CreateStringIndexOfLocations(HInvoke* invoke,
1347 ArenaAllocator* allocator,
1348 bool start_at_zero) {
1349 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1350 LocationSummary::kCallOnSlowPath,
1351 kIntrinsified);
1352 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1353 locations->SetInAt(0, Location::RegisterLocation(RDI));
1354 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1355 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1356 // of the instruction explicitly.
1357 // Note: This works as we don't clobber RAX anywhere.
1358 locations->SetInAt(1, Location::RegisterLocation(RAX));
1359 if (!start_at_zero) {
1360 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1361 }
1362 // As we clobber RDI during execution anyways, also use it as the output.
1363 locations->SetOut(Location::SameAsFirstInput());
1364
1365 // repne scasw uses RCX as the counter.
1366 locations->AddTemp(Location::RegisterLocation(RCX));
1367 // Need another temporary to be able to compute the result.
1368 locations->AddTemp(Location::RequiresRegister());
1369 }
1370
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,bool start_at_zero)1371 static void GenerateStringIndexOf(HInvoke* invoke,
1372 X86_64Assembler* assembler,
1373 CodeGeneratorX86_64* codegen,
1374 bool start_at_zero) {
1375 LocationSummary* locations = invoke->GetLocations();
1376
1377 // Note that the null check must have been done earlier.
1378 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1379
1380 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1381 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1382 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1383 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1384 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1385
1386 // Check our assumptions for registers.
1387 DCHECK_EQ(string_obj.AsRegister(), RDI);
1388 DCHECK_EQ(search_value.AsRegister(), RAX);
1389 DCHECK_EQ(counter.AsRegister(), RCX);
1390 DCHECK_EQ(out.AsRegister(), RDI);
1391
1392 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1393 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1394 SlowPathCode* slow_path = nullptr;
1395 HInstruction* code_point = invoke->InputAt(1);
1396 if (code_point->IsIntConstant()) {
1397 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1398 std::numeric_limits<uint16_t>::max()) {
1399 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1400 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1401 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1402 codegen->AddSlowPath(slow_path);
1403 __ jmp(slow_path->GetEntryLabel());
1404 __ Bind(slow_path->GetExitLabel());
1405 return;
1406 }
1407 } else if (code_point->GetType() != DataType::Type::kUint16) {
1408 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1409 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1410 codegen->AddSlowPath(slow_path);
1411 __ j(kAbove, slow_path->GetEntryLabel());
1412 }
1413
1414 // From here down, we know that we are looking for a char that fits in
1415 // 16 bits (uncompressed) or 8 bits (compressed).
1416 // Location of reference to data array within the String object.
1417 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1418 // Location of count within the String object.
1419 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1420
1421 // Load the count field of the string containing the length and compression flag.
1422 __ movl(string_length, Address(string_obj, count_offset));
1423
1424 // Do a zero-length check. Even with string compression `count == 0` means empty.
1425 // TODO: Support jecxz.
1426 NearLabel not_found_label;
1427 __ testl(string_length, string_length);
1428 __ j(kEqual, ¬_found_label);
1429
1430 if (mirror::kUseStringCompression) {
1431 // Use TMP to keep string_length_flagged.
1432 __ movl(CpuRegister(TMP), string_length);
1433 // Mask out first bit used as compression flag.
1434 __ shrl(string_length, Immediate(1));
1435 }
1436
1437 if (start_at_zero) {
1438 // Number of chars to scan is the same as the string length.
1439 __ movl(counter, string_length);
1440 // Move to the start of the string.
1441 __ addq(string_obj, Immediate(value_offset));
1442 } else {
1443 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1444
1445 // Do a start_index check.
1446 __ cmpl(start_index, string_length);
1447 __ j(kGreaterEqual, ¬_found_label);
1448
1449 // Ensure we have a start index >= 0;
1450 __ xorl(counter, counter);
1451 __ cmpl(start_index, Immediate(0));
1452 __ cmov(kGreater, counter, start_index, /* is64bit= */ false); // 32-bit copy is enough.
1453
1454 if (mirror::kUseStringCompression) {
1455 NearLabel modify_counter, offset_uncompressed_label;
1456 __ testl(CpuRegister(TMP), Immediate(1));
1457 __ j(kNotZero, &offset_uncompressed_label);
1458 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1459 __ jmp(&modify_counter);
1460 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1461 __ Bind(&offset_uncompressed_label);
1462 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1463 __ Bind(&modify_counter);
1464 } else {
1465 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1466 }
1467 // Now update ecx, the work counter: it's gonna be string.length - start_index.
1468 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
1469 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1470 }
1471
1472 if (mirror::kUseStringCompression) {
1473 NearLabel uncompressed_string_comparison;
1474 NearLabel comparison_done;
1475 __ testl(CpuRegister(TMP), Immediate(1));
1476 __ j(kNotZero, &uncompressed_string_comparison);
1477 // Check if RAX (search_value) is ASCII.
1478 __ cmpl(search_value, Immediate(127));
1479 __ j(kGreater, ¬_found_label);
1480 // Comparing byte-per-byte.
1481 __ repne_scasb();
1482 __ jmp(&comparison_done);
1483 // Everything is set up for repne scasw:
1484 // * Comparison address in RDI.
1485 // * Counter in ECX.
1486 __ Bind(&uncompressed_string_comparison);
1487 __ repne_scasw();
1488 __ Bind(&comparison_done);
1489 } else {
1490 __ repne_scasw();
1491 }
1492 // Did we find a match?
1493 __ j(kNotEqual, ¬_found_label);
1494
1495 // Yes, we matched. Compute the index of the result.
1496 __ subl(string_length, counter);
1497 __ leal(out, Address(string_length, -1));
1498
1499 NearLabel done;
1500 __ jmp(&done);
1501
1502 // Failed to match; return -1.
1503 __ Bind(¬_found_label);
1504 __ movl(out, Immediate(-1));
1505
1506 // And join up at the end.
1507 __ Bind(&done);
1508 if (slow_path != nullptr) {
1509 __ Bind(slow_path->GetExitLabel());
1510 }
1511 }
1512
VisitStringIndexOf(HInvoke * invoke)1513 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1514 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1515 }
1516
VisitStringIndexOf(HInvoke * invoke)1517 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1518 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1519 }
1520
VisitStringIndexOfAfter(HInvoke * invoke)1521 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1522 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1523 }
1524
VisitStringIndexOfAfter(HInvoke * invoke)1525 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1526 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1527 }
1528
VisitStringNewStringFromBytes(HInvoke * invoke)1529 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1530 LocationSummary* locations = new (allocator_) LocationSummary(
1531 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1532 InvokeRuntimeCallingConvention calling_convention;
1533 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1534 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1535 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1536 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1537 locations->SetOut(Location::RegisterLocation(RAX));
1538 }
1539
VisitStringNewStringFromBytes(HInvoke * invoke)1540 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1541 X86_64Assembler* assembler = GetAssembler();
1542 LocationSummary* locations = invoke->GetLocations();
1543
1544 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1545 __ testl(byte_array, byte_array);
1546 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1547 codegen_->AddSlowPath(slow_path);
1548 __ j(kEqual, slow_path->GetEntryLabel());
1549
1550 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1551 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1552 __ Bind(slow_path->GetExitLabel());
1553 }
1554
VisitStringNewStringFromChars(HInvoke * invoke)1555 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1556 LocationSummary* locations =
1557 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1558 InvokeRuntimeCallingConvention calling_convention;
1559 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1560 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1561 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1562 locations->SetOut(Location::RegisterLocation(RAX));
1563 }
1564
VisitStringNewStringFromChars(HInvoke * invoke)1565 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1566 // No need to emit code checking whether `locations->InAt(2)` is a null
1567 // pointer, as callers of the native method
1568 //
1569 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1570 //
1571 // all include a null check on `data` before calling that method.
1572 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1573 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1574 }
1575
VisitStringNewStringFromString(HInvoke * invoke)1576 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1577 LocationSummary* locations = new (allocator_) LocationSummary(
1578 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1579 InvokeRuntimeCallingConvention calling_convention;
1580 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1581 locations->SetOut(Location::RegisterLocation(RAX));
1582 }
1583
VisitStringNewStringFromString(HInvoke * invoke)1584 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1585 X86_64Assembler* assembler = GetAssembler();
1586 LocationSummary* locations = invoke->GetLocations();
1587
1588 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1589 __ testl(string_to_copy, string_to_copy);
1590 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1591 codegen_->AddSlowPath(slow_path);
1592 __ j(kEqual, slow_path->GetEntryLabel());
1593
1594 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1595 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1596 __ Bind(slow_path->GetExitLabel());
1597 }
1598
VisitStringGetCharsNoCheck(HInvoke * invoke)1599 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1600 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1601 LocationSummary* locations =
1602 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1603 locations->SetInAt(0, Location::RequiresRegister());
1604 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1605 locations->SetInAt(2, Location::RequiresRegister());
1606 locations->SetInAt(3, Location::RequiresRegister());
1607 locations->SetInAt(4, Location::RequiresRegister());
1608
1609 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1610 locations->AddTemp(Location::RegisterLocation(RSI));
1611 locations->AddTemp(Location::RegisterLocation(RDI));
1612 locations->AddTemp(Location::RegisterLocation(RCX));
1613 }
1614
VisitStringGetCharsNoCheck(HInvoke * invoke)1615 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1616 X86_64Assembler* assembler = GetAssembler();
1617 LocationSummary* locations = invoke->GetLocations();
1618
1619 size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1620 // Location of data in char array buffer.
1621 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1622 // Location of char array data in string.
1623 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1624
1625 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1626 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1627 Location srcBegin = locations->InAt(1);
1628 int srcBegin_value =
1629 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1630 CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1631 CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1632 CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1633
1634 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1635 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1636 DCHECK_EQ(char_size, 2u);
1637
1638 NearLabel done;
1639 // Compute the number of chars (words) to move.
1640 __ movl(CpuRegister(RCX), srcEnd);
1641 if (srcBegin.IsConstant()) {
1642 __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1643 } else {
1644 DCHECK(srcBegin.IsRegister());
1645 __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1646 }
1647 if (mirror::kUseStringCompression) {
1648 NearLabel copy_uncompressed, copy_loop;
1649 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1650 DCHECK_EQ(c_char_size, 1u);
1651 // Location of count in string.
1652 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1653
1654 __ testl(Address(obj, count_offset), Immediate(1));
1655 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1656 "Expecting 0=compressed, 1=uncompressed");
1657 __ j(kNotZero, ©_uncompressed);
1658 // Compute the address of the source string by adding the number of chars from
1659 // the source beginning to the value offset of a string.
1660 __ leaq(CpuRegister(RSI),
1661 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1662 // Start the loop to copy String's value to Array of Char.
1663 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1664
1665 __ Bind(©_loop);
1666 __ jrcxz(&done);
1667 // Use TMP as temporary (convert byte from RSI to word).
1668 // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1669 __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1670 __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1671 __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1672 __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1673 // TODO: Add support for LOOP to X86_64Assembler.
1674 __ subl(CpuRegister(RCX), Immediate(1));
1675 __ jmp(©_loop);
1676
1677 __ Bind(©_uncompressed);
1678 }
1679
1680 __ leaq(CpuRegister(RSI),
1681 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1682 // Compute the address of the destination buffer.
1683 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1684 // Do the move.
1685 __ rep_movsw();
1686
1687 __ Bind(&done);
1688 }
1689
GenPeek(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1690 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1691 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1692 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1693 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1694 // to avoid a SIGBUS.
1695 switch (size) {
1696 case DataType::Type::kInt8:
1697 __ movsxb(out, Address(address, 0));
1698 break;
1699 case DataType::Type::kInt16:
1700 __ movsxw(out, Address(address, 0));
1701 break;
1702 case DataType::Type::kInt32:
1703 __ movl(out, Address(address, 0));
1704 break;
1705 case DataType::Type::kInt64:
1706 __ movq(out, Address(address, 0));
1707 break;
1708 default:
1709 LOG(FATAL) << "Type not recognized for peek: " << size;
1710 UNREACHABLE();
1711 }
1712 }
1713
VisitMemoryPeekByte(HInvoke * invoke)1714 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1715 CreateIntToIntLocations(allocator_, invoke);
1716 }
1717
VisitMemoryPeekByte(HInvoke * invoke)1718 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1719 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1720 }
1721
VisitMemoryPeekIntNative(HInvoke * invoke)1722 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1723 CreateIntToIntLocations(allocator_, invoke);
1724 }
1725
VisitMemoryPeekIntNative(HInvoke * invoke)1726 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1727 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1728 }
1729
VisitMemoryPeekLongNative(HInvoke * invoke)1730 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1731 CreateIntToIntLocations(allocator_, invoke);
1732 }
1733
VisitMemoryPeekLongNative(HInvoke * invoke)1734 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1735 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1736 }
1737
VisitMemoryPeekShortNative(HInvoke * invoke)1738 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1739 CreateIntToIntLocations(allocator_, invoke);
1740 }
1741
VisitMemoryPeekShortNative(HInvoke * invoke)1742 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1743 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1744 }
1745
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)1746 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1747 LocationSummary* locations =
1748 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1749 locations->SetInAt(0, Location::RequiresRegister());
1750 locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1751 }
1752
GenPoke(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1753 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1754 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1755 Location value = locations->InAt(1);
1756 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1757 // to avoid a SIGBUS.
1758 switch (size) {
1759 case DataType::Type::kInt8:
1760 if (value.IsConstant()) {
1761 __ movb(Address(address, 0),
1762 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1763 } else {
1764 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1765 }
1766 break;
1767 case DataType::Type::kInt16:
1768 if (value.IsConstant()) {
1769 __ movw(Address(address, 0),
1770 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1771 } else {
1772 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1773 }
1774 break;
1775 case DataType::Type::kInt32:
1776 if (value.IsConstant()) {
1777 __ movl(Address(address, 0),
1778 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1779 } else {
1780 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1781 }
1782 break;
1783 case DataType::Type::kInt64:
1784 if (value.IsConstant()) {
1785 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1786 DCHECK(IsInt<32>(v));
1787 int32_t v_32 = v;
1788 __ movq(Address(address, 0), Immediate(v_32));
1789 } else {
1790 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1791 }
1792 break;
1793 default:
1794 LOG(FATAL) << "Type not recognized for poke: " << size;
1795 UNREACHABLE();
1796 }
1797 }
1798
VisitMemoryPokeByte(HInvoke * invoke)1799 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1800 CreateIntIntToVoidLocations(allocator_, invoke);
1801 }
1802
VisitMemoryPokeByte(HInvoke * invoke)1803 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1804 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1805 }
1806
VisitMemoryPokeIntNative(HInvoke * invoke)1807 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1808 CreateIntIntToVoidLocations(allocator_, invoke);
1809 }
1810
VisitMemoryPokeIntNative(HInvoke * invoke)1811 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1812 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1813 }
1814
VisitMemoryPokeLongNative(HInvoke * invoke)1815 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1816 CreateIntIntToVoidLocations(allocator_, invoke);
1817 }
1818
VisitMemoryPokeLongNative(HInvoke * invoke)1819 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1820 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1821 }
1822
VisitMemoryPokeShortNative(HInvoke * invoke)1823 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1824 CreateIntIntToVoidLocations(allocator_, invoke);
1825 }
1826
VisitMemoryPokeShortNative(HInvoke * invoke)1827 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1828 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1829 }
1830
VisitThreadCurrentThread(HInvoke * invoke)1831 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1832 LocationSummary* locations =
1833 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1834 locations->SetOut(Location::RequiresRegister());
1835 }
1836
VisitThreadCurrentThread(HInvoke * invoke)1837 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1838 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1839 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
1840 /* no_rip= */ true));
1841 }
1842
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)1843 static void GenUnsafeGet(HInvoke* invoke,
1844 DataType::Type type,
1845 [[maybe_unused]] bool is_volatile,
1846 CodeGeneratorX86_64* codegen) {
1847 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1848 LocationSummary* locations = invoke->GetLocations();
1849 Location base_loc = locations->InAt(1);
1850 CpuRegister base = base_loc.AsRegister<CpuRegister>();
1851 Location offset_loc = locations->InAt(2);
1852 CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1853 Location output_loc = locations->Out();
1854 CpuRegister output = output_loc.AsRegister<CpuRegister>();
1855
1856 switch (type) {
1857 case DataType::Type::kInt8:
1858 __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1859 break;
1860
1861 case DataType::Type::kInt32:
1862 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1863 break;
1864
1865 case DataType::Type::kReference: {
1866 if (codegen->EmitReadBarrier()) {
1867 if (kUseBakerReadBarrier) {
1868 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1869 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1870 invoke, output_loc, base, src, /* needs_null_check= */ false);
1871 } else {
1872 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1873 codegen->GenerateReadBarrierSlow(
1874 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1875 }
1876 } else {
1877 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1878 __ MaybeUnpoisonHeapReference(output);
1879 }
1880 break;
1881 }
1882
1883 case DataType::Type::kInt64:
1884 __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1885 break;
1886
1887 default:
1888 LOG(FATAL) << "Unsupported op size " << type;
1889 UNREACHABLE();
1890 }
1891 }
1892
GenUnsafeGetAbsolute(HInvoke * invoke,DataType::Type type,CodeGeneratorX86_64 * codegen)1893 static void GenUnsafeGetAbsolute(HInvoke* invoke,
1894 DataType::Type type,
1895 CodeGeneratorX86_64* codegen) {
1896 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1897 LocationSummary* locations = invoke->GetLocations();
1898 Location address_loc = locations->InAt(1);
1899 Address address = Address(address_loc.AsRegister<CpuRegister>(), 0);
1900 Location output_loc = locations->Out();
1901 CpuRegister output = output_loc.AsRegister<CpuRegister>();
1902
1903 switch (type) {
1904 case DataType::Type::kInt8:
1905 __ movsxb(output, address);
1906 break;
1907
1908 case DataType::Type::kInt32:
1909 __ movl(output, address);
1910 break;
1911
1912 case DataType::Type::kInt64:
1913 __ movq(output, address);
1914 break;
1915
1916 default:
1917 LOG(FATAL) << "Unsupported op size " << type;
1918 UNREACHABLE();
1919 }
1920 }
1921
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)1922 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1923 LocationSummary* locations =
1924 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1925 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1926 locations->SetInAt(1, Location::RequiresRegister());
1927 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1928 }
1929
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)1930 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1931 HInvoke* invoke,
1932 CodeGeneratorX86_64* codegen) {
1933 bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
1934 LocationSummary* locations =
1935 new (allocator) LocationSummary(invoke,
1936 can_call
1937 ? LocationSummary::kCallOnSlowPath
1938 : LocationSummary::kNoCall,
1939 kIntrinsified);
1940 if (can_call && kUseBakerReadBarrier) {
1941 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1942 }
1943 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1944 locations->SetInAt(1, Location::RequiresRegister());
1945 locations->SetInAt(2, Location::RequiresRegister());
1946 locations->SetOut(Location::RequiresRegister(),
1947 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1948 }
1949
VisitUnsafeGet(HInvoke * invoke)1950 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1951 VisitJdkUnsafeGet(invoke);
1952 }
VisitUnsafeGetAbsolute(HInvoke * invoke)1953 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
1954 VisitJdkUnsafeGetAbsolute(invoke);
1955 }
VisitUnsafeGetVolatile(HInvoke * invoke)1956 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1957 VisitJdkUnsafeGetVolatile(invoke);
1958 }
VisitUnsafeGetLong(HInvoke * invoke)1959 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1960 VisitJdkUnsafeGetLong(invoke);
1961 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1962 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1963 VisitJdkUnsafeGetLongVolatile(invoke);
1964 }
VisitUnsafeGetObject(HInvoke * invoke)1965 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1966 VisitJdkUnsafeGetReference(invoke);
1967 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1968 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1969 VisitJdkUnsafeGetReferenceVolatile(invoke);
1970 }
VisitUnsafeGetByte(HInvoke * invoke)1971 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
1972 VisitJdkUnsafeGetByte(invoke);
1973 }
1974
VisitJdkUnsafeGet(HInvoke * invoke)1975 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
1976 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1977 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)1978 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
1979 CreateIntIntToIntLocations(allocator_, invoke);
1980 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1981 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1982 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1983 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1984 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1985 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1986 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1987 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1988 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1989 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1990 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1991 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1992 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1993 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1994 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1995 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1996 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1997 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1998 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1999 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2000 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2001 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2002 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2003 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2004 }
VisitJdkUnsafeGetByte(HInvoke * invoke)2005 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2006 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2007 }
2008
VisitUnsafeGet(HInvoke * invoke)2009 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
2010 VisitJdkUnsafeGet(invoke);
2011 }
VisitUnsafeGetAbsolute(HInvoke * invoke)2012 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2013 VisitJdkUnsafeGetAbsolute(invoke);
2014 }
VisitUnsafeGetVolatile(HInvoke * invoke)2015 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2016 VisitJdkUnsafeGetVolatile(invoke);
2017 }
VisitUnsafeGetLong(HInvoke * invoke)2018 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2019 VisitJdkUnsafeGetLong(invoke);
2020 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2021 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2022 VisitJdkUnsafeGetLongVolatile(invoke);
2023 }
VisitUnsafeGetObject(HInvoke * invoke)2024 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2025 VisitJdkUnsafeGetReference(invoke);
2026 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2027 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2028 VisitJdkUnsafeGetReferenceVolatile(invoke);
2029 }
VisitUnsafeGetByte(HInvoke * invoke)2030 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
2031 VisitJdkUnsafeGetByte(invoke);
2032 }
2033
VisitJdkUnsafeGet(HInvoke * invoke)2034 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
2035 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2036 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2037 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2038 GenUnsafeGetAbsolute(invoke, DataType::Type::kInt32, codegen_);
2039 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2040 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2041 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2042 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2043 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2044 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2045 }
VisitJdkUnsafeGetLong(HInvoke * invoke)2046 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2047 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2048 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2049 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2050 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2051 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2052 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2053 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2054 }
VisitJdkUnsafeGetReference(HInvoke * invoke)2055 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2056 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2057 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2058 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2059 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2060 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2061 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2062 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2063 }
VisitJdkUnsafeGetByte(HInvoke * invoke)2064 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2065 GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
2066 }
2067
CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2068 static void CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
2069 [[maybe_unused]] DataType::Type type,
2070 HInvoke* invoke) {
2071 LocationSummary* locations =
2072 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2073 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2074 locations->SetInAt(1, Location::RequiresRegister());
2075 locations->SetInAt(2, Location::RequiresRegister());
2076 }
2077
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2078 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
2079 DataType::Type type,
2080 HInvoke* invoke) {
2081 LocationSummary* locations =
2082 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2083 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2084 locations->SetInAt(1, Location::RequiresRegister());
2085 locations->SetInAt(2, Location::RequiresRegister());
2086 locations->SetInAt(3, Location::RequiresRegister());
2087 if (type == DataType::Type::kReference) {
2088 // Need temp registers for card-marking.
2089 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2090 locations->AddTemp(Location::RequiresRegister());
2091 }
2092 }
2093
VisitUnsafePut(HInvoke * invoke)2094 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
2095 VisitJdkUnsafePut(invoke);
2096 }
VisitUnsafePutAbsolute(HInvoke * invoke)2097 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2098 VisitJdkUnsafePutAbsolute(invoke);
2099 }
VisitUnsafePutOrdered(HInvoke * invoke)2100 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2101 VisitJdkUnsafePutOrdered(invoke);
2102 }
VisitUnsafePutVolatile(HInvoke * invoke)2103 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2104 VisitJdkUnsafePutVolatile(invoke);
2105 }
VisitUnsafePutObject(HInvoke * invoke)2106 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2107 VisitJdkUnsafePutReference(invoke);
2108 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2109 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2110 VisitJdkUnsafePutObjectOrdered(invoke);
2111 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2112 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2113 VisitJdkUnsafePutReferenceVolatile(invoke);
2114 }
VisitUnsafePutLong(HInvoke * invoke)2115 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2116 VisitJdkUnsafePutLong(invoke);
2117 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2118 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2119 VisitJdkUnsafePutLongOrdered(invoke);
2120 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2121 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2122 VisitJdkUnsafePutLongVolatile(invoke);
2123 }
VisitUnsafePutByte(HInvoke * invoke)2124 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2125 VisitJdkUnsafePut(invoke);
2126 }
2127
VisitJdkUnsafePut(HInvoke * invoke)2128 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2129 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2130 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2131 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2132 CreateIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2133 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2134 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2135 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2136 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2137 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2138 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2139 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2140 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2141 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2142 }
VisitJdkUnsafePutReference(HInvoke * invoke)2143 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2144 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2145 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2146 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2147 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2148 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2149 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2150 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2151 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2152 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2153 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2154 }
VisitJdkUnsafePutLong(HInvoke * invoke)2155 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2156 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2157 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2158 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2159 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2160 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2161 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2162 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2163 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2164 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2165 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2166 }
VisitJdkUnsafePutByte(HInvoke * invoke)2167 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2168 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt8, invoke);
2169 }
2170
2171 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2172 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2173 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile,
2174 CodeGeneratorX86_64* codegen) {
2175 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2176 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2177 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2178 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2179
2180 if (type == DataType::Type::kInt64) {
2181 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2182 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2183 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2184 __ movl(temp, value);
2185 __ PoisonHeapReference(temp);
2186 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2187 } else if (type == DataType::Type::kInt32 || type == DataType::Type::kReference) {
2188 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2189 } else {
2190 CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2191 __ movb(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2192 }
2193
2194 if (is_volatile) {
2195 codegen->MemoryFence();
2196 }
2197
2198 if (type == DataType::Type::kReference) {
2199 bool value_can_be_null = true; // TODO: Worth finding out this information?
2200 codegen->MaybeMarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2201 locations->GetTemp(1).AsRegister<CpuRegister>(),
2202 base,
2203 value,
2204 value_can_be_null);
2205 }
2206 }
2207
2208 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2209 // memory model.
GenUnsafePutAbsolute(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2210 static void GenUnsafePutAbsolute(LocationSummary* locations,
2211 DataType::Type type,
2212 bool is_volatile,
2213 CodeGeneratorX86_64* codegen) {
2214 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2215 CpuRegister address_reg = locations->InAt(1).AsRegister<CpuRegister>();
2216 Address address = Address(address_reg, 0);
2217 CpuRegister value = locations->InAt(2).AsRegister<CpuRegister>();
2218
2219 if (type == DataType::Type::kInt64) {
2220 __ movq(address, value);
2221 } else if (type == DataType::Type::kInt32) {
2222 __ movl(address, value);
2223 } else {
2224 CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2225 __ movb(address, value);
2226 }
2227
2228 if (is_volatile) {
2229 codegen->MemoryFence();
2230 }
2231 }
2232
VisitUnsafePut(HInvoke * invoke)2233 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2234 VisitJdkUnsafePut(invoke);
2235 }
VisitUnsafePutAbsolute(HInvoke * invoke)2236 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2237 VisitJdkUnsafePutAbsolute(invoke);
2238 }
VisitUnsafePutOrdered(HInvoke * invoke)2239 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2240 VisitJdkUnsafePutOrdered(invoke);
2241 }
VisitUnsafePutVolatile(HInvoke * invoke)2242 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2243 VisitJdkUnsafePutVolatile(invoke);
2244 }
VisitUnsafePutObject(HInvoke * invoke)2245 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2246 VisitJdkUnsafePutReference(invoke);
2247 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2248 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2249 VisitJdkUnsafePutObjectOrdered(invoke);
2250 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2251 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2252 VisitJdkUnsafePutReferenceVolatile(invoke);
2253 }
VisitUnsafePutLong(HInvoke * invoke)2254 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2255 VisitJdkUnsafePutLong(invoke);
2256 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2257 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2258 VisitJdkUnsafePutLongOrdered(invoke);
2259 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2260 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2261 VisitJdkUnsafePutLongVolatile(invoke);
2262 }
VisitUnsafePutByte(HInvoke * invoke)2263 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2264 VisitJdkUnsafePutByte(invoke);
2265 }
2266
VisitJdkUnsafePut(HInvoke * invoke)2267 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2268 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2269 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2270 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2271 GenUnsafePutAbsolute(
2272 invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/false, codegen_);
2273 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2274 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2275 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2276 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2277 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2278 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2279 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2280 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2281 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
2282 }
VisitJdkUnsafePutReference(HInvoke * invoke)2283 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2284 GenUnsafePut(
2285 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2286 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2287 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2288 GenUnsafePut(
2289 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2290 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2291 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2292 GenUnsafePut(
2293 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2294 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2295 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2296 GenUnsafePut(
2297 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2298 }
VisitJdkUnsafePutLong(HInvoke * invoke)2299 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2300 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2301 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2302 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2303 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2304 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2305 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2306 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2307 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2308 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2309 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2310 }
VisitJdkUnsafePutByte(HInvoke * invoke)2311 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2312 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
2313 }
2314
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)2315 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
2316 HInvoke* invoke,
2317 CodeGeneratorX86_64* codegen,
2318 DataType::Type type) {
2319 const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
2320 LocationSummary* locations =
2321 new (allocator) LocationSummary(invoke,
2322 can_call
2323 ? LocationSummary::kCallOnSlowPath
2324 : LocationSummary::kNoCall,
2325 kIntrinsified);
2326 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2327 locations->SetInAt(1, Location::RequiresRegister());
2328 locations->SetInAt(2, Location::RequiresRegister());
2329 // expected value must be in EAX/RAX.
2330 locations->SetInAt(3, Location::RegisterLocation(RAX));
2331 locations->SetInAt(4, Location::RequiresRegister());
2332
2333 // RAX is clobbered in CMPXCHG, but we set it as out so no need to add it as temporary.
2334 locations->SetOut(Location::RegisterLocation(RAX));
2335
2336 if (type == DataType::Type::kReference) {
2337 // Need two temporaries for MarkGCCard.
2338 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2339 locations->AddTemp(Location::RequiresRegister());
2340 if (codegen->EmitReadBarrier()) {
2341 // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
2342 DCHECK(kUseBakerReadBarrier);
2343 locations->AddTemp(Location::RequiresRegister());
2344 }
2345 }
2346 }
2347
VisitUnsafeCASInt(HInvoke * invoke)2348 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2349 VisitJdkUnsafeCASInt(invoke);
2350 }
2351
VisitUnsafeCASLong(HInvoke * invoke)2352 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2353 VisitJdkUnsafeCASLong(invoke);
2354 }
2355
VisitUnsafeCASObject(HInvoke * invoke)2356 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2357 VisitJdkUnsafeCASObject(invoke);
2358 }
2359
VisitJdkUnsafeCASInt(HInvoke * invoke)2360 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2361 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2362 VisitJdkUnsafeCompareAndSetInt(invoke);
2363 }
2364
VisitJdkUnsafeCASLong(HInvoke * invoke)2365 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2366 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2367 VisitJdkUnsafeCompareAndSetLong(invoke);
2368 }
2369
VisitJdkUnsafeCASObject(HInvoke * invoke)2370 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2371 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2372 VisitJdkUnsafeCompareAndSetReference(invoke);
2373 }
2374
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2375 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2376 CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt32);
2377 }
2378
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2379 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2380 CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt64);
2381 }
2382
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2383 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2384 // The only supported read barrier implementation is the Baker-style read barriers.
2385 if (codegen_->EmitNonBakerReadBarrier()) {
2386 return;
2387 }
2388
2389 CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kReference);
2390 }
2391
2392 // Convert ZF into the Boolean result.
GenZFlagToResult(X86_64Assembler * assembler,CpuRegister out)2393 static inline void GenZFlagToResult(X86_64Assembler* assembler, CpuRegister out) {
2394 __ setcc(kZero, out);
2395 __ movzxb(out, out);
2396 }
2397
2398 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64 * codegen,DataType::Type type,Address field_addr,Location value,bool is_cmpxchg,bool byte_swap)2399 static void GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64* codegen,
2400 DataType::Type type,
2401 Address field_addr,
2402 Location value,
2403 bool is_cmpxchg,
2404 bool byte_swap) {
2405 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2406 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2407
2408 if (byte_swap) {
2409 instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2410 instr_codegen->Bswap(value, type);
2411 }
2412
2413 switch (type) {
2414 case DataType::Type::kBool:
2415 case DataType::Type::kInt8:
2416 __ LockCmpxchgb(field_addr, value.AsRegister<CpuRegister>());
2417 break;
2418 case DataType::Type::kInt16:
2419 case DataType::Type::kUint16:
2420 __ LockCmpxchgw(field_addr, value.AsRegister<CpuRegister>());
2421 break;
2422 case DataType::Type::kInt32:
2423 case DataType::Type::kUint32:
2424 __ LockCmpxchgl(field_addr, value.AsRegister<CpuRegister>());
2425 break;
2426 case DataType::Type::kInt64:
2427 case DataType::Type::kUint64:
2428 __ LockCmpxchgq(field_addr, value.AsRegister<CpuRegister>());
2429 break;
2430 default:
2431 LOG(FATAL) << "Unexpected non-integral CAS type " << type;
2432 }
2433 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2434
2435 if (byte_swap) {
2436 // Restore byte order for value.
2437 instr_codegen->Bswap(value, type);
2438 }
2439
2440 CpuRegister rax(RAX);
2441 if (is_cmpxchg) {
2442 if (byte_swap) {
2443 instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2444 }
2445 // Sign-extend or zero-extend the result as necessary.
2446 switch (type) {
2447 case DataType::Type::kBool:
2448 __ movzxb(rax, rax);
2449 break;
2450 case DataType::Type::kInt8:
2451 __ movsxb(rax, rax);
2452 break;
2453 case DataType::Type::kInt16:
2454 __ movsxw(rax, rax);
2455 break;
2456 case DataType::Type::kUint16:
2457 __ movzxw(rax, rax);
2458 break;
2459 default:
2460 break; // No need to do anything.
2461 }
2462 } else {
2463 GenZFlagToResult(assembler, rax);
2464 }
2465 }
2466
GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64 * codegen,Address field_addr,CpuRegister temp,Location value,Location expected,Location out,bool is64bit,bool is_cmpxchg,bool byte_swap)2467 static void GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64* codegen,
2468 Address field_addr,
2469 CpuRegister temp,
2470 Location value,
2471 Location expected,
2472 Location out,
2473 bool is64bit,
2474 bool is_cmpxchg,
2475 bool byte_swap) {
2476 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2477 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2478
2479 Location rax_loc = Location::RegisterLocation(RAX);
2480 Location temp_loc = Location::RegisterLocation(temp.AsRegister());
2481
2482 DataType::Type type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
2483
2484 // Copy `expected` to RAX (required by the CMPXCHG instruction).
2485 codegen->Move(rax_loc, expected);
2486
2487 // Copy value to some other register (ensure it's not RAX).
2488 DCHECK_NE(temp.AsRegister(), RAX);
2489 codegen->Move(temp_loc, value);
2490
2491 if (byte_swap) {
2492 instr_codegen->Bswap(rax_loc, type);
2493 instr_codegen->Bswap(temp_loc, type);
2494 }
2495
2496 if (is64bit) {
2497 __ LockCmpxchgq(field_addr, temp);
2498 } else {
2499 __ LockCmpxchgl(field_addr, temp);
2500 }
2501 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2502 // No need to restore byte order for temporary register.
2503
2504 if (is_cmpxchg) {
2505 if (byte_swap) {
2506 instr_codegen->Bswap(rax_loc, type);
2507 }
2508 __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
2509 } else {
2510 GenZFlagToResult(assembler, out.AsRegister<CpuRegister>());
2511 }
2512 }
2513
2514 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64 * codegen,HInvoke * invoke,CpuRegister base,CpuRegister offset,CpuRegister value,CpuRegister temp1,CpuRegister temp2,CpuRegister temp3,bool is_cmpxchg)2515 static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
2516 HInvoke* invoke,
2517 CpuRegister base,
2518 CpuRegister offset,
2519 CpuRegister value,
2520 CpuRegister temp1,
2521 CpuRegister temp2,
2522 CpuRegister temp3,
2523 bool is_cmpxchg) {
2524 // The only supported read barrier implementation is the Baker-style read barriers.
2525 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
2526
2527 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2528
2529 // Mark card for object assuming new value is stored.
2530 bool value_can_be_null = true; // TODO: Worth finding out this information?
2531 codegen->MaybeMarkGCCard(temp1, temp2, base, value, value_can_be_null);
2532
2533 Address field_addr(base, offset, TIMES_1, 0);
2534 if (codegen->EmitBakerReadBarrier()) {
2535 // Need to make sure the reference stored in the field is a to-space
2536 // one before attempting the CAS or the CAS could fail incorrectly.
2537 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2538 invoke,
2539 Location::RegisterLocation(temp3.AsRegister()),
2540 base,
2541 field_addr,
2542 /* needs_null_check= */ false,
2543 /* always_update_field= */ true,
2544 &temp1,
2545 &temp2);
2546 } else {
2547 // Nothing to do, the value will be loaded into the out register by CMPXCHG.
2548 }
2549
2550 bool base_equals_value = (base.AsRegister() == value.AsRegister());
2551 Register value_reg = value.AsRegister();
2552 if (kPoisonHeapReferences) {
2553 if (base_equals_value) {
2554 // If `base` and `value` are the same register location, move `value_reg` to a temporary
2555 // register. This way, poisoning `value_reg` won't invalidate `base`.
2556 value_reg = temp1.AsRegister();
2557 __ movl(CpuRegister(value_reg), base);
2558 }
2559
2560 // Check that the register allocator did not assign the location of expected value (RAX) to
2561 // `value` nor to `base`, so that heap poisoning (when enabled) works as intended below.
2562 // - If `value` were equal to RAX, both references would be poisoned twice, meaning they would
2563 // not be poisoned at all, as heap poisoning uses address negation.
2564 // - If `base` were equal to RAX, poisoning RAX would invalidate `base`.
2565 DCHECK_NE(RAX, value_reg);
2566 DCHECK_NE(RAX, base.AsRegister());
2567
2568 __ PoisonHeapReference(CpuRegister(RAX));
2569 __ PoisonHeapReference(CpuRegister(value_reg));
2570 }
2571
2572 __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2573 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
2574
2575 if (is_cmpxchg) {
2576 // Output is in RAX, so we can rely on CMPXCHG and do nothing.
2577 __ MaybeUnpoisonHeapReference(CpuRegister(RAX));
2578 } else {
2579 GenZFlagToResult(assembler, CpuRegister(RAX));
2580 }
2581
2582 // If heap poisoning is enabled, we need to unpoison the values that were poisoned earlier.
2583 if (kPoisonHeapReferences) {
2584 if (base_equals_value) {
2585 // `value_reg` has been moved to a temporary register, no need to unpoison it.
2586 } else {
2587 // Ensure `value` is not RAX, so that unpoisoning the former does not invalidate the latter.
2588 DCHECK_NE(RAX, value_reg);
2589 __ UnpoisonHeapReference(CpuRegister(value_reg));
2590 }
2591 }
2592 }
2593
2594 // In debug mode, return true if all registers are pairwise different. In release mode, do nothing
2595 // and always return true.
RegsAreAllDifferent(const std::vector<CpuRegister> & regs)2596 static bool RegsAreAllDifferent(const std::vector<CpuRegister>& regs) {
2597 if (kIsDebugBuild) {
2598 for (size_t i = 0; i < regs.size(); ++i) {
2599 for (size_t j = 0; j < i; ++j) {
2600 if (regs[i].AsRegister() == regs[j].AsRegister()) {
2601 return false;
2602 }
2603 }
2604 }
2605 }
2606 return true;
2607 }
2608
2609 // GenCompareAndSetOrExchange handles all value types and therefore accepts generic locations and
2610 // temporary indices that may not correspond to real registers for code paths that do not use them.
GenCompareAndSetOrExchange(CodeGeneratorX86_64 * codegen,HInvoke * invoke,DataType::Type type,CpuRegister base,CpuRegister offset,uint32_t temp1_index,uint32_t temp2_index,uint32_t temp3_index,Location new_value,Location expected,Location out,bool is_cmpxchg,bool byte_swap)2611 static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen,
2612 HInvoke* invoke,
2613 DataType::Type type,
2614 CpuRegister base,
2615 CpuRegister offset,
2616 uint32_t temp1_index,
2617 uint32_t temp2_index,
2618 uint32_t temp3_index,
2619 Location new_value,
2620 Location expected,
2621 Location out,
2622 bool is_cmpxchg,
2623 bool byte_swap) {
2624 LocationSummary* locations = invoke->GetLocations();
2625 Address field_address(base, offset, TIMES_1, 0);
2626
2627 if (DataType::IsFloatingPointType(type)) {
2628 bool is64bit = (type == DataType::Type::kFloat64);
2629 CpuRegister temp = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2630 DCHECK(RegsAreAllDifferent({base, offset, temp, CpuRegister(RAX)}));
2631
2632 GenCompareAndSetOrExchangeFP(
2633 codegen, field_address, temp, new_value, expected, out, is64bit, is_cmpxchg, byte_swap);
2634 } else {
2635 // Both the expected value for CMPXCHG and the output are in RAX.
2636 DCHECK_EQ(RAX, expected.AsRegister<Register>());
2637 DCHECK_EQ(RAX, out.AsRegister<Register>());
2638
2639 if (type == DataType::Type::kReference) {
2640 CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>();
2641 CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2642 CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>();
2643 CpuRegister temp3 = codegen->EmitReadBarrier()
2644 ? locations->GetTemp(temp3_index).AsRegister<CpuRegister>()
2645 : CpuRegister(kNoRegister);
2646 DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3}));
2647
2648 DCHECK(!byte_swap);
2649 GenCompareAndSetOrExchangeRef(
2650 codegen, invoke, base, offset, new_value_reg, temp1, temp2, temp3, is_cmpxchg);
2651 } else {
2652 GenCompareAndSetOrExchangeInt(codegen, type, field_address, new_value, is_cmpxchg, byte_swap);
2653 }
2654 }
2655 }
2656
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2657 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2658 LocationSummary* locations = invoke->GetLocations();
2659 GenCompareAndSetOrExchange(codegen,
2660 invoke,
2661 type,
2662 /*base=*/ locations->InAt(1).AsRegister<CpuRegister>(),
2663 /*offset=*/ locations->InAt(2).AsRegister<CpuRegister>(),
2664 /*temp1_index=*/ 0,
2665 /*temp2_index=*/ 1,
2666 /*temp3_index=*/ 2,
2667 /*new_value=*/ locations->InAt(4),
2668 /*expected=*/ locations->InAt(3),
2669 locations->Out(),
2670 /*is_cmpxchg=*/ false,
2671 /*byte_swap=*/ false);
2672 }
2673
VisitUnsafeCASInt(HInvoke * invoke)2674 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2675 VisitJdkUnsafeCASInt(invoke);
2676 }
2677
VisitUnsafeCASLong(HInvoke * invoke)2678 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2679 VisitJdkUnsafeCASLong(invoke);
2680 }
2681
VisitUnsafeCASObject(HInvoke * invoke)2682 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2683 VisitJdkUnsafeCASObject(invoke);
2684 }
2685
VisitJdkUnsafeCASInt(HInvoke * invoke)2686 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2687 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2688 VisitJdkUnsafeCompareAndSetInt(invoke);
2689 }
2690
VisitJdkUnsafeCASLong(HInvoke * invoke)2691 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2692 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2693 VisitJdkUnsafeCompareAndSetLong(invoke);
2694 }
2695
VisitJdkUnsafeCASObject(HInvoke * invoke)2696 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2697 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2698 VisitJdkUnsafeCompareAndSetReference(invoke);
2699 }
2700
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2701 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2702 GenCAS(DataType::Type::kInt32, invoke, codegen_);
2703 }
2704
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2705 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2706 GenCAS(DataType::Type::kInt64, invoke, codegen_);
2707 }
2708
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2709 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2710 // The only supported read barrier implementation is the Baker-style read barriers.
2711 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2712
2713 GenCAS(DataType::Type::kReference, invoke, codegen_);
2714 }
2715
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2716 static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
2717 HInvoke* invoke,
2718 CodeGeneratorX86_64* codegen) {
2719 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
2720 LocationSummary* locations =
2721 new (allocator) LocationSummary(invoke,
2722 can_call
2723 ? LocationSummary::kCallOnSlowPath
2724 : LocationSummary::kNoCall,
2725 kIntrinsified);
2726 if (can_call && kUseBakerReadBarrier) {
2727 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2728 }
2729 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2730 locations->SetInAt(1, Location::RequiresRegister());
2731 locations->SetInAt(2, Location::RequiresRegister());
2732 // Use the same register for both the output and the new value or addend
2733 // to take advantage of XCHG or XADD. Arbitrarily pick RAX.
2734 locations->SetInAt(3, Location::RegisterLocation(RAX));
2735 // Only set the `out` register if it's needed. In the void case we can still use RAX in the
2736 // same manner as it is marked as a temp register.
2737 if (invoke->GetType() == DataType::Type::kVoid) {
2738 locations->AddTemp(Location::RegisterLocation(RAX));
2739 } else {
2740 locations->SetOut(Location::RegisterLocation(RAX));
2741 }
2742 }
2743
VisitUnsafeGetAndAddInt(HInvoke * invoke)2744 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2745 VisitJdkUnsafeGetAndAddInt(invoke);
2746 }
2747
VisitUnsafeGetAndAddLong(HInvoke * invoke)2748 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2749 VisitJdkUnsafeGetAndAddLong(invoke);
2750 }
2751
VisitUnsafeGetAndSetInt(HInvoke * invoke)2752 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2753 VisitJdkUnsafeGetAndSetInt(invoke);
2754 }
2755
VisitUnsafeGetAndSetLong(HInvoke * invoke)2756 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2757 VisitJdkUnsafeGetAndSetLong(invoke);
2758 }
2759
VisitUnsafeGetAndSetObject(HInvoke * invoke)2760 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2761 VisitJdkUnsafeGetAndSetReference(invoke);
2762 }
2763
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2764 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2765 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2766 }
2767
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2768 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2769 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2770 }
2771
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2772 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2773 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2774 }
2775
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2776 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2777 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2778 }
2779
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2780 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2781 // The only supported read barrier implementation is the Baker-style read barriers.
2782 if (codegen_->EmitNonBakerReadBarrier()) {
2783 return;
2784 }
2785
2786 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2787 invoke->GetLocations()->AddRegisterTemps(3);
2788 }
2789
2790 enum class GetAndUpdateOp {
2791 kSet,
2792 kAdd,
2793 kBitwiseAnd,
2794 kBitwiseOr,
2795 kBitwiseXor
2796 };
2797
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op)2798 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
2799 DataType::Type type,
2800 CodeGeneratorX86_64* codegen,
2801 GetAndUpdateOp get_and_update_op) {
2802 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2803 LocationSummary* locations = invoke->GetLocations();
2804
2805 const bool is_void = invoke->GetType() == DataType::Type::kVoid;
2806 Location rax_loc = Location::RegisterLocation(RAX);
2807 // We requested RAX to use as a temporary for void methods, as we don't return the value.
2808 DCHECK_IMPLIES(!is_void, locations->Out().Equals(rax_loc));
2809 CpuRegister out_or_temp = rax_loc.AsRegister<CpuRegister>(); // Result.
2810 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); // Object pointer.
2811 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); // Long offset.
2812 DCHECK_EQ(out_or_temp, locations->InAt(3).AsRegister<CpuRegister>()); // New value or addend.
2813 Address field_address(base, offset, TIMES_1, 0);
2814
2815 if (type == DataType::Type::kInt32) {
2816 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2817 __ LockXaddl(field_address, out_or_temp);
2818 } else {
2819 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2820 __ xchgl(out_or_temp, field_address);
2821 }
2822 } else if (type == DataType::Type::kInt64) {
2823 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2824 __ LockXaddq(field_address, out_or_temp);
2825 } else {
2826 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2827 __ xchgq(out_or_temp, field_address);
2828 }
2829 } else {
2830 DCHECK_EQ(type, DataType::Type::kReference);
2831 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2832
2833 // In the void case, we have an extra temp register, which is used to signal the register
2834 // allocator that we are clobering RAX.
2835 const uint32_t extra_temp = is_void ? 1u : 0u;
2836 DCHECK_EQ(locations->GetTempCount(), 3u + extra_temp);
2837 DCHECK_IMPLIES(is_void, locations->GetTemp(0u).Equals(Location::RegisterLocation(RAX)));
2838
2839 CpuRegister temp1 = locations->GetTemp(0u + extra_temp).AsRegister<CpuRegister>();
2840 CpuRegister temp2 = locations->GetTemp(1u + extra_temp).AsRegister<CpuRegister>();
2841 CpuRegister temp3 = locations->GetTemp(2u + extra_temp).AsRegister<CpuRegister>();
2842
2843 if (codegen->EmitReadBarrier()) {
2844 DCHECK(kUseBakerReadBarrier);
2845 // Ensure that the field contains a to-space reference.
2846 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2847 invoke,
2848 Location::RegisterLocation(temp3.AsRegister()),
2849 base,
2850 field_address,
2851 /*needs_null_check=*/ false,
2852 /*always_update_field=*/ true,
2853 &temp1,
2854 &temp2);
2855 }
2856
2857 // Mark card for object as a new value shall be stored.
2858 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
2859 codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out_or_temp, new_value_can_be_null);
2860
2861 if (kPoisonHeapReferences) {
2862 // Use a temp to avoid poisoning base of the field address, which might happen if `out`
2863 // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
2864 __ movl(temp1, out_or_temp);
2865 __ PoisonHeapReference(temp1);
2866 __ xchgl(temp1, field_address);
2867 if (!is_void) {
2868 __ UnpoisonHeapReference(temp1);
2869 __ movl(out_or_temp, temp1);
2870 }
2871 } else {
2872 __ xchgl(out_or_temp, field_address);
2873 }
2874 }
2875 }
2876
VisitUnsafeGetAndAddInt(HInvoke * invoke)2877 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2878 VisitJdkUnsafeGetAndAddInt(invoke);
2879 }
2880
VisitUnsafeGetAndAddLong(HInvoke * invoke)2881 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2882 VisitJdkUnsafeGetAndAddLong(invoke);
2883 }
2884
VisitUnsafeGetAndSetInt(HInvoke * invoke)2885 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2886 VisitJdkUnsafeGetAndSetInt(invoke);
2887 }
2888
VisitUnsafeGetAndSetLong(HInvoke * invoke)2889 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2890 VisitJdkUnsafeGetAndSetLong(invoke);
2891 }
2892
VisitUnsafeGetAndSetObject(HInvoke * invoke)2893 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2894 VisitJdkUnsafeGetAndSetReference(invoke);
2895 }
2896
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2897 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2898 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
2899 }
2900
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2901 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2902 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
2903 }
2904
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2905 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2906 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
2907 }
2908
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2909 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2910 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
2911 }
2912
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2913 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2914 GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
2915 }
2916
VisitIntegerReverse(HInvoke * invoke)2917 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2918 LocationSummary* locations =
2919 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2920 locations->SetInAt(0, Location::RequiresRegister());
2921 locations->SetOut(Location::SameAsFirstInput());
2922 locations->AddTemp(Location::RequiresRegister());
2923 }
2924
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2925 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2926 X86_64Assembler* assembler) {
2927 Immediate imm_shift(shift);
2928 Immediate imm_mask(mask);
2929 __ movl(temp, reg);
2930 __ shrl(reg, imm_shift);
2931 __ andl(temp, imm_mask);
2932 __ andl(reg, imm_mask);
2933 __ shll(temp, imm_shift);
2934 __ orl(reg, temp);
2935 }
2936
VisitIntegerReverse(HInvoke * invoke)2937 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2938 X86_64Assembler* assembler = GetAssembler();
2939 LocationSummary* locations = invoke->GetLocations();
2940
2941 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2942 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2943
2944 /*
2945 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2946 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2947 * compared to generic luni implementation which has 5 rounds of swapping bits.
2948 * x = bswap x
2949 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2950 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2951 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2952 */
2953 __ bswapl(reg);
2954 SwapBits(reg, temp, 1, 0x55555555, assembler);
2955 SwapBits(reg, temp, 2, 0x33333333, assembler);
2956 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2957 }
2958
VisitLongReverse(HInvoke * invoke)2959 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2960 LocationSummary* locations =
2961 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2962 locations->SetInAt(0, Location::RequiresRegister());
2963 locations->SetOut(Location::SameAsFirstInput());
2964 locations->AddRegisterTemps(2);
2965 }
2966
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2967 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2968 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2969 Immediate imm_shift(shift);
2970 __ movq(temp_mask, Immediate(mask));
2971 __ movq(temp, reg);
2972 __ shrq(reg, imm_shift);
2973 __ andq(temp, temp_mask);
2974 __ andq(reg, temp_mask);
2975 __ shlq(temp, imm_shift);
2976 __ orq(reg, temp);
2977 }
2978
VisitLongReverse(HInvoke * invoke)2979 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2980 X86_64Assembler* assembler = GetAssembler();
2981 LocationSummary* locations = invoke->GetLocations();
2982
2983 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2984 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2985 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2986
2987 /*
2988 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2989 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2990 * compared to generic luni implementation which has 5 rounds of swapping bits.
2991 * x = bswap x
2992 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2993 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2994 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2995 */
2996 __ bswapq(reg);
2997 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2998 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2999 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
3000 }
3001
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen,HInvoke * invoke)3002 static void CreateBitCountLocations(
3003 ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
3004 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
3005 // Do nothing if there is no popcnt support. This results in generating
3006 // a call for the intrinsic rather than direct code.
3007 return;
3008 }
3009 LocationSummary* locations =
3010 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3011 locations->SetInAt(0, Location::Any());
3012 locations->SetOut(Location::RequiresRegister());
3013 }
3014
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3015 static void GenBitCount(X86_64Assembler* assembler,
3016 CodeGeneratorX86_64* codegen,
3017 HInvoke* invoke,
3018 bool is_long) {
3019 LocationSummary* locations = invoke->GetLocations();
3020 Location src = locations->InAt(0);
3021 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3022
3023 if (invoke->InputAt(0)->IsConstant()) {
3024 // Evaluate this at compile time.
3025 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3026 int32_t result = is_long
3027 ? POPCOUNT(static_cast<uint64_t>(value))
3028 : POPCOUNT(static_cast<uint32_t>(value));
3029 codegen->Load32BitValue(out, result);
3030 return;
3031 }
3032
3033 if (src.IsRegister()) {
3034 if (is_long) {
3035 __ popcntq(out, src.AsRegister<CpuRegister>());
3036 } else {
3037 __ popcntl(out, src.AsRegister<CpuRegister>());
3038 }
3039 } else if (is_long) {
3040 DCHECK(src.IsDoubleStackSlot());
3041 __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3042 } else {
3043 DCHECK(src.IsStackSlot());
3044 __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3045 }
3046 }
3047
VisitIntegerBitCount(HInvoke * invoke)3048 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
3049 CreateBitCountLocations(allocator_, codegen_, invoke);
3050 }
3051
VisitIntegerBitCount(HInvoke * invoke)3052 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
3053 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3054 }
3055
VisitLongBitCount(HInvoke * invoke)3056 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
3057 CreateBitCountLocations(allocator_, codegen_, invoke);
3058 }
3059
VisitLongBitCount(HInvoke * invoke)3060 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
3061 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3062 }
3063
CreateOneBitLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_high)3064 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
3065 LocationSummary* locations =
3066 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3067 locations->SetInAt(0, Location::Any());
3068 locations->SetOut(Location::RequiresRegister());
3069 locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL
3070 : Location::RequiresRegister()); // any will do
3071 }
3072
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)3073 static void GenOneBit(X86_64Assembler* assembler,
3074 CodeGeneratorX86_64* codegen,
3075 HInvoke* invoke,
3076 bool is_high, bool is_long) {
3077 LocationSummary* locations = invoke->GetLocations();
3078 Location src = locations->InAt(0);
3079 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3080
3081 if (invoke->InputAt(0)->IsConstant()) {
3082 // Evaluate this at compile time.
3083 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3084 if (value == 0) {
3085 __ xorl(out, out); // Clears upper bits too.
3086 return;
3087 }
3088 // Nonzero value.
3089 if (is_high) {
3090 value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
3091 : 31 - CLZ(static_cast<uint32_t>(value));
3092 } else {
3093 value = is_long ? CTZ(static_cast<uint64_t>(value))
3094 : CTZ(static_cast<uint32_t>(value));
3095 }
3096 if (is_long) {
3097 codegen->Load64BitValue(out, 1ULL << value);
3098 } else {
3099 codegen->Load32BitValue(out, 1 << value);
3100 }
3101 return;
3102 }
3103
3104 // Handle the non-constant cases.
3105 if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
3106 src.IsRegister()) {
3107 __ blsi(out, src.AsRegister<CpuRegister>());
3108 } else {
3109 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3110 if (is_high) {
3111 // Use architectural support: basically 1 << bsr.
3112 if (src.IsRegister()) {
3113 if (is_long) {
3114 __ bsrq(tmp, src.AsRegister<CpuRegister>());
3115 } else {
3116 __ bsrl(tmp, src.AsRegister<CpuRegister>());
3117 }
3118 } else if (is_long) {
3119 DCHECK(src.IsDoubleStackSlot());
3120 __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3121 } else {
3122 DCHECK(src.IsStackSlot());
3123 __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3124 }
3125 // BSR sets ZF if the input was zero.
3126 NearLabel is_zero, done;
3127 __ j(kEqual, &is_zero);
3128 __ movl(out, Immediate(1)); // Clears upper bits too.
3129 if (is_long) {
3130 __ shlq(out, tmp);
3131 } else {
3132 __ shll(out, tmp);
3133 }
3134 __ jmp(&done);
3135 __ Bind(&is_zero);
3136 __ xorl(out, out); // Clears upper bits too.
3137 __ Bind(&done);
3138 } else {
3139 // Copy input into temporary.
3140 if (src.IsRegister()) {
3141 if (is_long) {
3142 __ movq(tmp, src.AsRegister<CpuRegister>());
3143 } else {
3144 __ movl(tmp, src.AsRegister<CpuRegister>());
3145 }
3146 } else if (is_long) {
3147 DCHECK(src.IsDoubleStackSlot());
3148 __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3149 } else {
3150 DCHECK(src.IsStackSlot());
3151 __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3152 }
3153 // Do the bit twiddling: basically tmp & -tmp;
3154 if (is_long) {
3155 __ movq(out, tmp);
3156 __ negq(tmp);
3157 __ andq(out, tmp);
3158 } else {
3159 __ movl(out, tmp);
3160 __ negl(tmp);
3161 __ andl(out, tmp);
3162 }
3163 }
3164 }
3165 }
3166
VisitIntegerHighestOneBit(HInvoke * invoke)3167 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3168 CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3169 }
3170
VisitIntegerHighestOneBit(HInvoke * invoke)3171 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3172 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false);
3173 }
3174
VisitLongHighestOneBit(HInvoke * invoke)3175 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3176 CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3177 }
3178
VisitLongHighestOneBit(HInvoke * invoke)3179 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3180 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true);
3181 }
3182
VisitIntegerLowestOneBit(HInvoke * invoke)3183 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3184 CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3185 }
3186
VisitIntegerLowestOneBit(HInvoke * invoke)3187 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3188 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false);
3189 }
3190
VisitLongLowestOneBit(HInvoke * invoke)3191 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3192 CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3193 }
3194
VisitLongLowestOneBit(HInvoke * invoke)3195 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3196 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true);
3197 }
3198
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3199 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3200 LocationSummary* locations =
3201 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3202 locations->SetInAt(0, Location::Any());
3203 locations->SetOut(Location::RequiresRegister());
3204 }
3205
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3206 static void GenLeadingZeros(X86_64Assembler* assembler,
3207 CodeGeneratorX86_64* codegen,
3208 HInvoke* invoke, bool is_long) {
3209 LocationSummary* locations = invoke->GetLocations();
3210 Location src = locations->InAt(0);
3211 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3212
3213 int zero_value_result = is_long ? 64 : 32;
3214 if (invoke->InputAt(0)->IsConstant()) {
3215 // Evaluate this at compile time.
3216 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3217 if (value == 0) {
3218 value = zero_value_result;
3219 } else {
3220 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
3221 }
3222 codegen->Load32BitValue(out, value);
3223 return;
3224 }
3225
3226 // Handle the non-constant cases.
3227 if (src.IsRegister()) {
3228 if (is_long) {
3229 __ bsrq(out, src.AsRegister<CpuRegister>());
3230 } else {
3231 __ bsrl(out, src.AsRegister<CpuRegister>());
3232 }
3233 } else if (is_long) {
3234 DCHECK(src.IsDoubleStackSlot());
3235 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3236 } else {
3237 DCHECK(src.IsStackSlot());
3238 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3239 }
3240
3241 // BSR sets ZF if the input was zero, and the output is undefined.
3242 NearLabel is_zero, done;
3243 __ j(kEqual, &is_zero);
3244
3245 // Correct the result from BSR to get the CLZ result.
3246 __ xorl(out, Immediate(zero_value_result - 1));
3247 __ jmp(&done);
3248
3249 // Fix the zero case with the expected result.
3250 __ Bind(&is_zero);
3251 __ movl(out, Immediate(zero_value_result));
3252
3253 __ Bind(&done);
3254 }
3255
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3256 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3257 CreateLeadingZeroLocations(allocator_, invoke);
3258 }
3259
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3260 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3261 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3262 }
3263
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3264 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3265 CreateLeadingZeroLocations(allocator_, invoke);
3266 }
3267
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3268 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3269 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3270 }
3271
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3272 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3273 LocationSummary* locations =
3274 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3275 locations->SetInAt(0, Location::Any());
3276 locations->SetOut(Location::RequiresRegister());
3277 }
3278
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3279 static void GenTrailingZeros(X86_64Assembler* assembler,
3280 CodeGeneratorX86_64* codegen,
3281 HInvoke* invoke, bool is_long) {
3282 LocationSummary* locations = invoke->GetLocations();
3283 Location src = locations->InAt(0);
3284 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3285
3286 int zero_value_result = is_long ? 64 : 32;
3287 if (invoke->InputAt(0)->IsConstant()) {
3288 // Evaluate this at compile time.
3289 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3290 if (value == 0) {
3291 value = zero_value_result;
3292 } else {
3293 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
3294 }
3295 codegen->Load32BitValue(out, value);
3296 return;
3297 }
3298
3299 // Handle the non-constant cases.
3300 if (src.IsRegister()) {
3301 if (is_long) {
3302 __ bsfq(out, src.AsRegister<CpuRegister>());
3303 } else {
3304 __ bsfl(out, src.AsRegister<CpuRegister>());
3305 }
3306 } else if (is_long) {
3307 DCHECK(src.IsDoubleStackSlot());
3308 __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3309 } else {
3310 DCHECK(src.IsStackSlot());
3311 __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3312 }
3313
3314 // BSF sets ZF if the input was zero, and the output is undefined.
3315 NearLabel done;
3316 __ j(kNotEqual, &done);
3317
3318 // Fix the zero case with the expected result.
3319 __ movl(out, Immediate(zero_value_result));
3320
3321 __ Bind(&done);
3322 }
3323
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3324 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3325 CreateTrailingZeroLocations(allocator_, invoke);
3326 }
3327
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3328 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3329 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3330 }
3331
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3332 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3333 CreateTrailingZeroLocations(allocator_, invoke);
3334 }
3335
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3336 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3337 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3338 }
3339
3340 #define VISIT_INTRINSIC(name, low, high, type, start_index) \
3341 void IntrinsicLocationsBuilderX86_64::Visit##name##ValueOf(HInvoke* invoke) { \
3342 InvokeRuntimeCallingConvention calling_convention; \
3343 IntrinsicVisitor::ComputeValueOfLocations( \
3344 invoke, \
3345 codegen_, \
3346 low, \
3347 (high) - (low) + 1, \
3348 Location::RegisterLocation(RAX), \
3349 Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \
3350 } \
3351 void IntrinsicCodeGeneratorX86_64::Visit##name##ValueOf(HInvoke* invoke) { \
3352 IntrinsicVisitor::ValueOfInfo info = \
3353 IntrinsicVisitor::ComputeValueOfInfo(invoke, \
3354 codegen_->GetCompilerOptions(), \
3355 WellKnownClasses::java_lang_##name##_value, \
3356 low, \
3357 (high) - (low) + 1, \
3358 start_index); \
3359 HandleValueOf(invoke, info, type); \
3360 }
BOXED_TYPES(VISIT_INTRINSIC)3361 BOXED_TYPES(VISIT_INTRINSIC)
3362 #undef VISIT_INTRINSIC
3363
3364 template <typename T>
3365 static void Store(X86_64Assembler* assembler,
3366 DataType::Type primitive_type,
3367 const Address& address,
3368 const T& operand) {
3369 switch (primitive_type) {
3370 case DataType::Type::kInt8:
3371 case DataType::Type::kUint8: {
3372 __ movb(address, operand);
3373 break;
3374 }
3375 case DataType::Type::kInt16:
3376 case DataType::Type::kUint16: {
3377 __ movw(address, operand);
3378 break;
3379 }
3380 case DataType::Type::kInt32: {
3381 __ movl(address, operand);
3382 break;
3383 }
3384 default: {
3385 LOG(FATAL) << "Unrecognized ValueOf type " << primitive_type;
3386 }
3387 }
3388 }
3389
HandleValueOf(HInvoke * invoke,const IntrinsicVisitor::ValueOfInfo & info,DataType::Type type)3390 void IntrinsicCodeGeneratorX86_64::HandleValueOf(HInvoke* invoke,
3391 const IntrinsicVisitor::ValueOfInfo& info,
3392 DataType::Type type) {
3393 LocationSummary* locations = invoke->GetLocations();
3394 X86_64Assembler* assembler = GetAssembler();
3395
3396 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3397 InvokeRuntimeCallingConvention calling_convention;
3398 CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
3399 auto allocate_instance = [&]() {
3400 codegen_->LoadIntrinsicDeclaringClass(argument, invoke);
3401 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3402 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3403 };
3404 if (invoke->InputAt(0)->IsIntConstant()) {
3405 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3406 if (static_cast<uint32_t>(value - info.low) < info.length) {
3407 // Just embed the object in the code.
3408 DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3409 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3410 } else {
3411 DCHECK(locations->CanCall());
3412 // Allocate and initialize a new object.
3413 // TODO: If we JIT, we could allocate the boxed value now, and store it in the
3414 // JIT object table.
3415 allocate_instance();
3416 Store(assembler, type, Address(out, info.value_offset), Immediate(value));
3417 }
3418 } else {
3419 DCHECK(locations->CanCall());
3420 CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
3421 // Check bounds of our cache.
3422 __ leal(out, Address(in, -info.low));
3423 __ cmpl(out, Immediate(info.length));
3424 NearLabel allocate, done;
3425 __ j(kAboveEqual, &allocate);
3426 // If the value is within the bounds, load the boxed value directly from the array.
3427 DCHECK_NE(out.AsRegister(), argument.AsRegister());
3428 codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
3429 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3430 "Check heap reference size.");
3431 __ movl(out, Address(argument, out, TIMES_4, 0));
3432 __ MaybeUnpoisonHeapReference(out);
3433 __ jmp(&done);
3434 __ Bind(&allocate);
3435 // Otherwise allocate and initialize a new object.
3436 allocate_instance();
3437 Store(assembler, type, Address(out, info.value_offset), in);
3438 __ Bind(&done);
3439 }
3440 }
3441
VisitReferenceGetReferent(HInvoke * invoke)3442 void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3443 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3444 }
3445
VisitReferenceGetReferent(HInvoke * invoke)3446 void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3447 X86_64Assembler* assembler = GetAssembler();
3448 LocationSummary* locations = invoke->GetLocations();
3449
3450 Location obj = locations->InAt(0);
3451 Location out = locations->Out();
3452
3453 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
3454 codegen_->AddSlowPath(slow_path);
3455
3456 if (codegen_->EmitReadBarrier()) {
3457 // Check self->GetWeakRefAccessEnabled().
3458 ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
3459 __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true),
3460 Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3461 __ j(kNotEqual, slow_path->GetEntryLabel());
3462 }
3463
3464 // Load the java.lang.ref.Reference class, use the output register as a temporary.
3465 codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<CpuRegister>(), invoke);
3466
3467 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3468 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3469 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3470 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3471 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3472 __ cmpw(Address(out.AsRegister<CpuRegister>(), disable_intrinsic_offset.Uint32Value()),
3473 Immediate(0));
3474 __ j(kNotEqual, slow_path->GetEntryLabel());
3475
3476 // Load the value from the field.
3477 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3478 if (codegen_->EmitBakerReadBarrier()) {
3479 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3480 out,
3481 obj.AsRegister<CpuRegister>(),
3482 referent_offset,
3483 /*needs_null_check=*/ true);
3484 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3485 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3486 } else {
3487 __ movl(out.AsRegister<CpuRegister>(), Address(obj.AsRegister<CpuRegister>(), referent_offset));
3488 codegen_->MaybeRecordImplicitNullCheck(invoke);
3489 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3490 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3491 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3492 }
3493 __ Bind(slow_path->GetExitLabel());
3494 }
3495
VisitReferenceRefersTo(HInvoke * invoke)3496 void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3497 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3498 }
3499
VisitReferenceRefersTo(HInvoke * invoke)3500 void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3501 X86_64Assembler* assembler = GetAssembler();
3502 LocationSummary* locations = invoke->GetLocations();
3503
3504 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
3505 CpuRegister other = locations->InAt(1).AsRegister<CpuRegister>();
3506 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3507
3508 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3509 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3510
3511 __ movl(out, Address(obj, referent_offset));
3512 codegen_->MaybeRecordImplicitNullCheck(invoke);
3513 __ MaybeUnpoisonHeapReference(out);
3514 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3515 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3516
3517 __ cmpl(out, other);
3518
3519 if (codegen_->EmitReadBarrier()) {
3520 DCHECK(kUseBakerReadBarrier);
3521
3522 NearLabel calculate_result;
3523 __ j(kEqual, &calculate_result); // ZF set if taken.
3524
3525 // Check if the loaded reference is null in a way that leaves ZF clear for null.
3526 __ cmpl(out, Immediate(1));
3527 __ j(kBelow, &calculate_result); // ZF clear if taken.
3528
3529 // For correct memory visibility, we need a barrier before loading the lock word
3530 // but we already have the barrier emitted for volatile load above which is sufficient.
3531
3532 // Load the lockword and check if it is a forwarding address.
3533 static_assert(LockWord::kStateShift == 30u);
3534 static_assert(LockWord::kStateForwardingAddress == 3u);
3535 __ movl(out, Address(out, monitor_offset));
3536 __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3537 __ j(kBelow, &calculate_result); // ZF clear if taken.
3538
3539 // Extract the forwarding address and compare with `other`.
3540 __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3541 __ cmpl(out, other);
3542
3543 __ Bind(&calculate_result);
3544 }
3545
3546 // Convert ZF into the Boolean result.
3547 __ setcc(kEqual, out);
3548 __ movzxb(out, out);
3549 }
3550
VisitThreadInterrupted(HInvoke * invoke)3551 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3552 LocationSummary* locations =
3553 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3554 locations->SetOut(Location::RequiresRegister());
3555 }
3556
VisitThreadInterrupted(HInvoke * invoke)3557 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3558 X86_64Assembler* assembler = GetAssembler();
3559 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
3560 Address address = Address::Absolute
3561 (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true);
3562 NearLabel done;
3563 __ gs()->movl(out, address);
3564 __ testl(out, out);
3565 __ j(kEqual, &done);
3566 __ gs()->movl(address, Immediate(0));
3567 codegen_->MemoryFence();
3568 __ Bind(&done);
3569 }
3570
VisitReachabilityFence(HInvoke * invoke)3571 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
3572 LocationSummary* locations =
3573 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3574 locations->SetInAt(0, Location::Any());
3575 }
3576
VisitReachabilityFence(HInvoke * invoke)3577 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3578
CreateDivideUnsignedLocations(HInvoke * invoke,ArenaAllocator * allocator)3579 static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) {
3580 LocationSummary* locations =
3581 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3582 locations->SetInAt(0, Location::RegisterLocation(RAX));
3583 locations->SetInAt(1, Location::RequiresRegister());
3584 locations->SetOut(Location::SameAsFirstInput());
3585 // Intel uses edx:eax as the dividend.
3586 locations->AddTemp(Location::RegisterLocation(RDX));
3587 }
3588
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type data_type)3589 static void GenerateDivideUnsigned(HInvoke* invoke,
3590 CodeGeneratorX86_64* codegen,
3591 DataType::Type data_type) {
3592 LocationSummary* locations = invoke->GetLocations();
3593 Location out = locations->Out();
3594 Location first = locations->InAt(0);
3595 Location second = locations->InAt(1);
3596 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3597 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3598
3599 DCHECK_EQ(RAX, first.AsRegister<Register>());
3600 DCHECK_EQ(RAX, out.AsRegister<Register>());
3601 DCHECK_EQ(RDX, rdx.AsRegister());
3602
3603 // We check if the divisor is zero and bail to the slow path to handle if so.
3604 auto* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
3605 codegen->AddSlowPath(slow_path);
3606
3607 X86_64Assembler* assembler = codegen->GetAssembler();
3608 if (data_type == DataType::Type::kInt32) {
3609 __ testl(second_reg, second_reg);
3610 __ j(kEqual, slow_path->GetEntryLabel());
3611 __ xorl(rdx, rdx);
3612 __ divl(second_reg);
3613 } else {
3614 DCHECK(data_type == DataType::Type::kInt64);
3615 __ testq(second_reg, second_reg);
3616 __ j(kEqual, slow_path->GetEntryLabel());
3617 __ xorq(rdx, rdx);
3618 __ divq(second_reg);
3619 }
3620 __ Bind(slow_path->GetExitLabel());
3621 }
3622
VisitIntegerDivideUnsigned(HInvoke * invoke)3623 void IntrinsicLocationsBuilderX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3624 CreateDivideUnsignedLocations(invoke, allocator_);
3625 }
3626
VisitIntegerDivideUnsigned(HInvoke * invoke)3627 void IntrinsicCodeGeneratorX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3628 GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt32);
3629 }
3630
VisitLongDivideUnsigned(HInvoke * invoke)3631 void IntrinsicLocationsBuilderX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3632 CreateDivideUnsignedLocations(invoke, allocator_);
3633 }
3634
VisitLongDivideUnsigned(HInvoke * invoke)3635 void IntrinsicCodeGeneratorX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3636 GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt64);
3637 }
3638
VisitMathMultiplyHigh(HInvoke * invoke)3639 void IntrinsicLocationsBuilderX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3640 LocationSummary* locations =
3641 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3642 locations->SetInAt(0, Location::RegisterLocation(RAX));
3643 locations->SetInAt(1, Location::RequiresRegister());
3644 locations->SetOut(Location::RegisterLocation(RDX));
3645 locations->AddTemp(Location::RegisterLocation(RAX));
3646 }
3647
VisitMathMultiplyHigh(HInvoke * invoke)3648 void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3649 X86_64Assembler* assembler = GetAssembler();
3650 LocationSummary* locations = invoke->GetLocations();
3651
3652 CpuRegister y = locations->InAt(1).AsRegister<CpuRegister>();
3653
3654 DCHECK_EQ(locations->InAt(0).AsRegister<Register>(), RAX);
3655 DCHECK_EQ(locations->Out().AsRegister<Register>(), RDX);
3656
3657 __ imulq(y);
3658 }
3659
3660 class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
3661 public:
VarHandleSlowPathX86_64(HInvoke * invoke)3662 explicit VarHandleSlowPathX86_64(HInvoke* invoke)
3663 : IntrinsicSlowPathX86_64(invoke) {
3664 }
3665
SetVolatile(bool is_volatile)3666 void SetVolatile(bool is_volatile) {
3667 is_volatile_ = is_volatile;
3668 }
3669
SetAtomic(bool is_atomic)3670 void SetAtomic(bool is_atomic) {
3671 is_atomic_ = is_atomic;
3672 }
3673
SetNeedAnyStoreBarrier(bool need_any_store_barrier)3674 void SetNeedAnyStoreBarrier(bool need_any_store_barrier) {
3675 need_any_store_barrier_ = need_any_store_barrier;
3676 }
3677
SetNeedAnyAnyBarrier(bool need_any_any_barrier)3678 void SetNeedAnyAnyBarrier(bool need_any_any_barrier) {
3679 need_any_any_barrier_ = need_any_any_barrier;
3680 }
3681
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3682 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3683 get_and_update_op_ = get_and_update_op;
3684 }
3685
GetByteArrayViewCheckLabel()3686 Label* GetByteArrayViewCheckLabel() {
3687 return &byte_array_view_check_label_;
3688 }
3689
GetNativeByteOrderLabel()3690 Label* GetNativeByteOrderLabel() {
3691 return &native_byte_order_label_;
3692 }
3693
EmitNativeCode(CodeGenerator * codegen)3694 void EmitNativeCode(CodeGenerator* codegen) override {
3695 if (GetByteArrayViewCheckLabel()->IsLinked()) {
3696 EmitByteArrayViewCode(down_cast<CodeGeneratorX86_64*>(codegen));
3697 }
3698 IntrinsicSlowPathX86_64::EmitNativeCode(codegen);
3699 }
3700
3701 private:
GetInvoke() const3702 HInvoke* GetInvoke() const {
3703 return GetInstruction()->AsInvoke();
3704 }
3705
GetAccessModeTemplate() const3706 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3707 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3708 }
3709
3710 void EmitByteArrayViewCode(CodeGeneratorX86_64* codegen);
3711
3712 Label byte_array_view_check_label_;
3713 Label native_byte_order_label_;
3714
3715 // Arguments forwarded to specific methods.
3716 bool is_volatile_;
3717 bool is_atomic_;
3718 bool need_any_store_barrier_;
3719 bool need_any_any_barrier_;
3720 GetAndUpdateOp get_and_update_op_;
3721 };
3722
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3723 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3724 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
3725 X86_64Assembler* assembler = codegen->GetAssembler();
3726 LocationSummary* locations = invoke->GetLocations();
3727 DCHECK(locations->InAt(0).Equals(locations->Out()));
3728 XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
3729 XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
3730 XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
3731 if (invoke->GetType() == DataType::Type::kFloat32) {
3732 __ vfmadd213ss(left, right, accumulator);
3733 } else {
3734 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
3735 __ vfmadd213sd(left, right, accumulator);
3736 }
3737 }
3738
VisitMathFmaDouble(HInvoke * invoke)3739 void IntrinsicCodeGeneratorX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3740 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3741 GenerateMathFma(invoke, codegen_);
3742 }
3743
VisitMathFmaDouble(HInvoke * invoke)3744 void IntrinsicLocationsBuilderX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3745 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3746 CreateFPFPFPToFPCallLocations(allocator_, invoke);
3747 }
3748 }
3749
VisitMathFmaFloat(HInvoke * invoke)3750 void IntrinsicCodeGeneratorX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3751 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3752 GenerateMathFma(invoke, codegen_);
3753 }
3754
VisitMathFmaFloat(HInvoke * invoke)3755 void IntrinsicLocationsBuilderX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3756 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3757 CreateFPFPFPToFPCallLocations(allocator_, invoke);
3758 }
3759 }
3760
3761 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64 * codegen,SlowPathCode * slow_path,CpuRegister object,CpuRegister temp,Address type_address,bool object_can_be_null=true)3762 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen,
3763 SlowPathCode* slow_path,
3764 CpuRegister object,
3765 CpuRegister temp,
3766 Address type_address,
3767 bool object_can_be_null = true) {
3768 X86_64Assembler* assembler = codegen->GetAssembler();
3769
3770 const MemberOffset class_offset = mirror::Object::ClassOffset();
3771 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3772
3773 NearLabel check_type_compatibility, type_matched;
3774
3775 // If the object is null, there is no need to check the type
3776 if (object_can_be_null) {
3777 __ testl(object, object);
3778 __ j(kZero, &type_matched);
3779 }
3780
3781 // Do not unpoison for in-memory comparison.
3782 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3783 __ movl(temp, Address(object, class_offset));
3784 __ Bind(&check_type_compatibility);
3785 __ cmpl(temp, type_address);
3786 __ j(kEqual, &type_matched);
3787 // Load the super class.
3788 __ MaybeUnpoisonHeapReference(temp);
3789 __ movl(temp, Address(temp, super_class_offset));
3790 // If the super class is null, we reached the root of the hierarchy without a match.
3791 // We let the slow path handle uncovered cases (e.g. interfaces).
3792 __ testl(temp, temp);
3793 __ j(kEqual, slow_path->GetEntryLabel());
3794 __ jmp(&check_type_compatibility);
3795 __ Bind(&type_matched);
3796 }
3797
3798 // Check access mode and the primitive type from VarHandle.varType.
3799 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3800 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,DataType::Type type)3801 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3802 CodeGeneratorX86_64* codegen,
3803 VarHandleSlowPathX86_64* slow_path,
3804 DataType::Type type) {
3805 X86_64Assembler* assembler = codegen->GetAssembler();
3806
3807 LocationSummary* locations = invoke->GetLocations();
3808 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3809 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3810
3811 mirror::VarHandle::AccessMode access_mode =
3812 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3813 Primitive::Type primitive_type = DataTypeToPrimitive(type);
3814
3815 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3816 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3817 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3818
3819 // Check that the operation is permitted.
3820 __ testl(Address(varhandle, access_mode_bit_mask_offset),
3821 Immediate(1u << static_cast<uint32_t>(access_mode)));
3822 __ j(kZero, slow_path->GetEntryLabel());
3823
3824 // For primitive types, we do not need a read barrier when loading a reference only for loading
3825 // constant field through the reference. For reference types, we deliberately avoid the read
3826 // barrier, letting the slow path handle the false negatives.
3827 __ movl(temp, Address(varhandle, var_type_offset));
3828 __ MaybeUnpoisonHeapReference(temp);
3829
3830 // Check the varType.primitiveType field against the type we're trying to use.
3831 __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3832 __ j(kNotEqual, slow_path->GetEntryLabel());
3833
3834 if (type == DataType::Type::kReference) {
3835 // Check reference arguments against the varType.
3836 // False negatives due to varType being an interface or array type
3837 // or due to the missing read barrier are handled by the slow path.
3838 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3839 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3840 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3841 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3842 HInstruction* arg = invoke->InputAt(arg_index);
3843 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3844 if (!arg->IsNullConstant()) {
3845 CpuRegister arg_reg = invoke->GetLocations()->InAt(arg_index).AsRegister<CpuRegister>();
3846 Address type_addr(varhandle, var_type_offset);
3847 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp, type_addr);
3848 }
3849 }
3850 }
3851 }
3852
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3853 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3854 CodeGeneratorX86_64* codegen,
3855 VarHandleSlowPathX86_64* slow_path) {
3856 X86_64Assembler* assembler = codegen->GetAssembler();
3857
3858 LocationSummary* locations = invoke->GetLocations();
3859 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3860
3861 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3862
3863 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3864 // Do not emit read barrier (or unpoison the reference) for comparing to null.
3865 __ cmpl(Address(varhandle, coordinate_type0_offset), Immediate(0));
3866 __ j(kNotEqual, slow_path->GetEntryLabel());
3867 }
3868
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3869 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3870 CodeGeneratorX86_64* codegen,
3871 VarHandleSlowPathX86_64* slow_path) {
3872 VarHandleOptimizations optimizations(invoke);
3873 X86_64Assembler* assembler = codegen->GetAssembler();
3874
3875 LocationSummary* locations = invoke->GetLocations();
3876 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3877 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3878 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3879
3880 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3881 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3882
3883 // Null-check the object.
3884 if (!optimizations.GetSkipObjectNullCheck()) {
3885 __ testl(object, object);
3886 __ j(kZero, slow_path->GetEntryLabel());
3887 }
3888
3889 if (!optimizations.GetUseKnownImageVarHandle()) {
3890 // Check that the VarHandle references an instance field by checking that
3891 // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3892 // type compatibility check with the source object's type, which will fail for null.
3893 __ cmpl(Address(varhandle, coordinate_type1_offset), Immediate(0));
3894 __ j(kNotEqual, slow_path->GetEntryLabel());
3895
3896 // Check that the object has the correct type.
3897 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3898 GenerateSubTypeObjectCheckNoReadBarrier(codegen,
3899 slow_path,
3900 object,
3901 temp,
3902 Address(varhandle, coordinate_type0_offset),
3903 /*object_can_be_null=*/ false);
3904 }
3905 }
3906
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3907 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3908 CodeGeneratorX86_64* codegen,
3909 VarHandleSlowPathX86_64* slow_path) {
3910 VarHandleOptimizations optimizations(invoke);
3911 X86_64Assembler* assembler = codegen->GetAssembler();
3912 LocationSummary* locations = invoke->GetLocations();
3913
3914 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3915 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3916 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3917 DataType::Type value_type =
3918 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3919 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3920
3921 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3922 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3923 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3924 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3925 const MemberOffset class_offset = mirror::Object::ClassOffset();
3926 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3927
3928 // Null-check the object.
3929 if (!optimizations.GetSkipObjectNullCheck()) {
3930 __ testl(object, object);
3931 __ j(kZero, slow_path->GetEntryLabel());
3932 }
3933
3934 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3935
3936 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3937 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3938 // coordinateType0 shall not be null but we do not explicitly verify that.
3939 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3940 __ cmpl(Address(varhandle, coordinate_type1_offset.Int32Value()), Immediate(0));
3941 __ j(kEqual, slow_path->GetEntryLabel());
3942
3943 // Check object class against componentType0.
3944 //
3945 // This is an exact check and we defer other cases to the runtime. This includes
3946 // conversion to array of superclass references, which is valid but subsequently
3947 // requires all update operations to check that the value can indeed be stored.
3948 // We do not want to perform such extra checks in the intrinsified code.
3949 //
3950 // We do this check without read barrier, so there can be false negatives which we
3951 // defer to the slow path. There shall be no false negatives for array classes in the
3952 // boot image (including Object[] and primitive arrays) because they are non-movable.
3953 __ movl(temp, Address(object, class_offset.Int32Value()));
3954 __ cmpl(temp, Address(varhandle, coordinate_type0_offset.Int32Value()));
3955 __ j(kNotEqual, slow_path->GetEntryLabel());
3956
3957 // Check that the coordinateType0 is an array type. We do not need a read barrier
3958 // for loading constant reference fields (or chains of them) for comparison with null,
3959 // nor for finally loading a constant primitive field (primitive type) below.
3960 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3961 __ movl(temp, Address(temp, component_type_offset.Int32Value()));
3962 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3963 __ testl(temp, temp);
3964 __ j(kZero, slow_path->GetEntryLabel());
3965
3966 // Check that the array component type matches the primitive type.
3967 Label* slow_path_label;
3968 if (primitive_type == Primitive::kPrimNot) {
3969 slow_path_label = slow_path->GetEntryLabel();
3970 } else {
3971 // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
3972 // we shall check for a byte array view in the slow path.
3973 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
3974 // so we cannot emit that if we're JITting without boot image.
3975 bool boot_image_available =
3976 codegen->GetCompilerOptions().IsBootImage() ||
3977 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
3978 bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
3979 slow_path_label =
3980 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
3981 }
3982 __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3983 __ j(kNotEqual, slow_path_label);
3984
3985 // Check for array index out of bounds.
3986 __ cmpl(index, Address(object, array_length_offset.Int32Value()));
3987 __ j(kAboveEqual, slow_path->GetEntryLabel());
3988 }
3989
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3990 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
3991 CodeGeneratorX86_64* codegen,
3992 VarHandleSlowPathX86_64* slow_path) {
3993 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3994 if (expected_coordinates_count == 0u) {
3995 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
3996 } else if (expected_coordinates_count == 1u) {
3997 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
3998 } else {
3999 DCHECK_EQ(expected_coordinates_count, 2u);
4000 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4001 }
4002 }
4003
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)4004 static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
4005 CodeGeneratorX86_64* codegen,
4006 DataType::Type type) {
4007 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4008 VarHandleOptimizations optimizations(invoke);
4009 if (optimizations.GetUseKnownImageVarHandle()) {
4010 DCHECK_NE(expected_coordinates_count, 2u);
4011 if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4012 return nullptr;
4013 }
4014 }
4015
4016 VarHandleSlowPathX86_64* slow_path =
4017 new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
4018 codegen->AddSlowPath(slow_path);
4019
4020 if (!optimizations.GetUseKnownImageVarHandle()) {
4021 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4022 }
4023 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4024
4025 return slow_path;
4026 }
4027
4028 struct VarHandleTarget {
4029 Register object; // The object holding the value to operate on.
4030 Register offset; // The offset of the value to operate on.
4031 };
4032
GetVarHandleTarget(HInvoke * invoke)4033 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4034 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4035 LocationSummary* locations = invoke->GetLocations();
4036
4037 VarHandleTarget target;
4038 // The temporary allocated for loading the offset.
4039 target.offset = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
4040 // The reference to the object that holds the value to operate on.
4041 target.object = (expected_coordinates_count == 0u)
4042 ? locations->GetTemp(1).AsRegister<CpuRegister>().AsRegister()
4043 : locations->InAt(1).AsRegister<CpuRegister>().AsRegister();
4044 return target;
4045 }
4046
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorX86_64 * codegen)4047 static void GenerateVarHandleTarget(HInvoke* invoke,
4048 const VarHandleTarget& target,
4049 CodeGeneratorX86_64* codegen) {
4050 LocationSummary* locations = invoke->GetLocations();
4051 X86_64Assembler* assembler = codegen->GetAssembler();
4052 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4053
4054 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
4055
4056 if (expected_coordinates_count <= 1u) {
4057 if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
4058 ScopedObjectAccess soa(Thread::Current());
4059 ArtField* target_field = GetBootImageVarHandleField(invoke);
4060 if (expected_coordinates_count == 0u) {
4061 ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4062 __ movl(CpuRegister(target.object),
4063 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /*no_rip=*/ false));
4064 if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4065 codegen->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(declaring_class));
4066 } else {
4067 codegen->RecordBootImageTypePatch(declaring_class->GetDexFile(),
4068 declaring_class->GetDexTypeIndex());
4069 }
4070 }
4071 __ movl(CpuRegister(target.offset), Immediate(target_field->GetOffset().Uint32Value()));
4072 } else {
4073 // For static fields, we need to fill the `target.object` with the declaring class,
4074 // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
4075 // we do not need the declaring class, so we can forget the `ArtField*` when
4076 // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
4077 CpuRegister field((expected_coordinates_count == 0) ? target.object : target.offset);
4078
4079 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4080 const MemberOffset offset_offset = ArtField::OffsetOffset();
4081
4082 // Load the ArtField*, the offset and, if needed, declaring class.
4083 __ movq(field, Address(varhandle, art_field_offset));
4084 __ movl(CpuRegister(target.offset), Address(field, offset_offset));
4085 if (expected_coordinates_count == 0u) {
4086 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
4087 instr_codegen->GenerateGcRootFieldLoad(invoke,
4088 Location::RegisterLocation(target.object),
4089 Address(field, ArtField::DeclaringClassOffset()),
4090 /*fixup_label=*/nullptr,
4091 codegen->GetCompilerReadBarrierOption());
4092 }
4093 }
4094 } else {
4095 DCHECK_EQ(expected_coordinates_count, 2u);
4096
4097 DataType::Type value_type =
4098 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4099 ScaleFactor scale = CodeGenerator::ScaleFactorForType(value_type);
4100 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4101 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
4102
4103 // The effect of LEA is `target.offset = index * scale + data_offset`.
4104 __ leal(CpuRegister(target.offset), Address(index, scale, data_offset.Int32Value()));
4105 }
4106 }
4107
HasVarHandleIntrinsicImplementation(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4108 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4109 // The only supported read barrier implementation is the Baker-style read barriers.
4110 if (codegen->EmitNonBakerReadBarrier()) {
4111 return false;
4112 }
4113
4114 VarHandleOptimizations optimizations(invoke);
4115 if (optimizations.GetDoNotIntrinsify()) {
4116 return false;
4117 }
4118
4119 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4120 DCHECK_LE(expected_coordinates_count, 2u); // Filtered by the `DoNotIntrinsify` flag above.
4121 return true;
4122 }
4123
CreateVarHandleCommonLocations(HInvoke * invoke)4124 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
4125 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4126 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4127 LocationSummary* locations = new (allocator) LocationSummary(
4128 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4129
4130 locations->SetInAt(0, Location::RequiresRegister());
4131 // Require coordinates in registers. These are the object holding the value
4132 // to operate on (except for static fields) and index (for arrays and views).
4133 for (size_t i = 0; i != expected_coordinates_count; ++i) {
4134 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4135 }
4136
4137 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4138 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4139 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4140 HInstruction* arg = invoke->InputAt(arg_index);
4141 if (DataType::IsFloatingPointType(arg->GetType())) {
4142 locations->SetInAt(arg_index, Location::FpuRegisterOrConstant(arg));
4143 } else {
4144 locations->SetInAt(arg_index, Location::RegisterOrConstant(arg));
4145 }
4146 }
4147
4148 // Add a temporary for offset.
4149 locations->AddTemp(Location::RequiresRegister());
4150
4151 if (expected_coordinates_count == 0u) {
4152 // Add a temporary to hold the declaring class.
4153 locations->AddTemp(Location::RequiresRegister());
4154 }
4155
4156 return locations;
4157 }
4158
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4159 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4160 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4161 return;
4162 }
4163
4164 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4165 if (DataType::IsFloatingPointType(invoke->GetType())) {
4166 locations->SetOut(Location::RequiresFpuRegister());
4167 } else {
4168 locations->SetOut(Location::RequiresRegister());
4169 }
4170 }
4171
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool byte_swap=false)4172 static void GenerateVarHandleGet(HInvoke* invoke,
4173 CodeGeneratorX86_64* codegen,
4174 bool byte_swap = false) {
4175 DataType::Type type = invoke->GetType();
4176 DCHECK_NE(type, DataType::Type::kVoid);
4177
4178 LocationSummary* locations = invoke->GetLocations();
4179 X86_64Assembler* assembler = codegen->GetAssembler();
4180
4181 VarHandleTarget target = GetVarHandleTarget(invoke);
4182 VarHandleSlowPathX86_64* slow_path = nullptr;
4183 if (!byte_swap) {
4184 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4185 GenerateVarHandleTarget(invoke, target, codegen);
4186 if (slow_path != nullptr) {
4187 __ Bind(slow_path->GetNativeByteOrderLabel());
4188 }
4189 }
4190
4191 // Load the value from the field
4192 Address src(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4193 Location out = locations->Out();
4194
4195 if (type == DataType::Type::kReference) {
4196 if (codegen->EmitReadBarrier()) {
4197 DCHECK(kUseBakerReadBarrier);
4198 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4199 invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false);
4200 } else {
4201 __ movl(out.AsRegister<CpuRegister>(), src);
4202 __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
4203 }
4204 DCHECK(!byte_swap);
4205 } else {
4206 codegen->LoadFromMemoryNoReference(type, out, src);
4207 if (byte_swap) {
4208 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4209 codegen->GetInstructionCodegen()->Bswap(out, type, &temp);
4210 }
4211 }
4212
4213 if (slow_path != nullptr) {
4214 DCHECK(!byte_swap);
4215 __ Bind(slow_path->GetExitLabel());
4216 }
4217 }
4218
VisitMethodHandleInvokeExact(HInvoke * invoke)4219 void IntrinsicLocationsBuilderX86_64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
4220 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4221 LocationSummary* locations = new (allocator)
4222 LocationSummary(invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
4223
4224 InvokeDexCallingConventionVisitorX86_64 calling_convention;
4225 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
4226
4227 locations->SetInAt(0, Location::RequiresRegister());
4228
4229 // Accomodating LocationSummary for underlying invoke-* call.
4230 uint32_t number_of_args = invoke->GetNumberOfArguments();
4231 for (uint32_t i = 1; i < number_of_args; ++i) {
4232 locations->SetInAt(i, calling_convention.GetNextLocation(invoke->InputAt(i)->GetType()));
4233 }
4234
4235 // The last input is MethodType object corresponding to the call-site.
4236 locations->SetInAt(number_of_args, Location::RequiresRegister());
4237
4238 locations->AddTemp(Location::RequiresRegister());
4239 // Hidden arg for invoke-interface.
4240 locations->AddTemp(Location::RegisterLocation(RAX));
4241 }
4242
VisitMethodHandleInvokeExact(HInvoke * invoke)4243 void IntrinsicCodeGeneratorX86_64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
4244 LocationSummary* locations = invoke->GetLocations();
4245
4246 CpuRegister method_handle = locations->InAt(0).AsRegister<CpuRegister>();
4247
4248 SlowPathCode* slow_path =
4249 new (codegen_->GetScopedAllocator()) InvokePolymorphicSlowPathX86_64(invoke, method_handle);
4250 codegen_->AddSlowPath(slow_path);
4251 X86_64Assembler* assembler = codegen_->GetAssembler();
4252
4253 CpuRegister call_site_type =
4254 locations->InAt(invoke->GetNumberOfArguments()).AsRegister<CpuRegister>();
4255
4256 // Call site should match with MethodHandle's type.
4257 __ MaybePoisonHeapReference(call_site_type);
4258 __ cmpl(call_site_type, Address(method_handle, mirror::MethodHandle::MethodTypeOffset()));
4259 __ j(kNotEqual, slow_path->GetEntryLabel());
4260
4261 CpuRegister method = CpuRegister(kMethodRegisterArgument);
4262 __ movq(method, Address(method_handle, mirror::MethodHandle::ArtFieldOrMethodOffset()));
4263
4264 Label static_dispatch;
4265 Label execute_target_method;
4266
4267 Address method_handle_kind = Address(method_handle, mirror::MethodHandle::HandleKindOffset());
4268 if (invoke->AsInvokePolymorphic()->CanTargetInstanceMethod()) {
4269 CpuRegister receiver = locations->InAt(1).AsRegister<CpuRegister>();
4270
4271 // Receiver shouldn't be null for all the following cases.
4272 __ testl(receiver, receiver);
4273 __ j(kEqual, slow_path->GetEntryLabel());
4274
4275 __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeDirect));
4276 // No dispatch is needed for invoke-direct.
4277 __ j(kEqual, &execute_target_method);
4278
4279 Label non_virtual_dispatch;
4280 // Handle invoke-virtual case.
4281 __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeVirtual));
4282 __ j(kNotEqual, &non_virtual_dispatch);
4283
4284 // Skip virtual dispatch if `method` is private.
4285 __ testl(Address(method, ArtMethod::AccessFlagsOffset()), Immediate(kAccPrivate));
4286 __ j(kNotZero, &execute_target_method);
4287
4288 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4289
4290 __ movl(temp, Address(method, ArtMethod::DeclaringClassOffset()));
4291 __ cmpl(temp, Address(receiver, mirror::Object::ClassOffset()));
4292 // If method is defined in the receiver's class, execute it as it is.
4293 __ j(kEqual, &execute_target_method);
4294
4295 // MethodIndex is uint16_t.
4296 __ movzxw(temp, Address(method, ArtMethod::MethodIndexOffset()));
4297
4298 constexpr uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4299 // Re-using method register for receiver class.
4300 __ movl(method, Address(receiver, class_offset));
4301 __ MaybeUnpoisonHeapReference(method);
4302
4303 constexpr uint32_t vtable_offset =
4304 mirror::Class::EmbeddedVTableOffset(art::PointerSize::k64).Int32Value();
4305 __ movq(method, Address(method, temp, TIMES_8, vtable_offset));
4306 __ Jump(&execute_target_method);
4307
4308 __ Bind(&non_virtual_dispatch);
4309 __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeInterface));
4310 __ j(kNotEqual, &static_dispatch);
4311
4312 __ movl(temp, Address(method, ArtMethod::AccessFlagsOffset()));
4313
4314 __ testl(temp, Immediate(kAccPrivate));
4315 __ j(kNotZero, &execute_target_method);
4316
4317 CpuRegister hidden_arg = locations->GetTemp(1).AsRegister<CpuRegister>();
4318 // Set the hidden argument.
4319 DCHECK_EQ(RAX, hidden_arg.AsRegister());
4320 __ movq(hidden_arg, method);
4321
4322 Label get_imt_index_from_method_index;
4323 Label do_imt_dispatch;
4324
4325 // Get IMT index.
4326 // Not doing default conflict check as IMT index is set for all method which have
4327 // kAccAbstract bit.
4328 __ testl(temp, Immediate(kAccAbstract));
4329 __ j(kZero, &get_imt_index_from_method_index);
4330
4331 // imt_index_ is uint16_t
4332 __ movzxw(temp, Address(method, ArtMethod::ImtIndexOffset()));
4333 __ Jump(&do_imt_dispatch);
4334
4335 // Default method, do method->GetMethodIndex() & (ImTable::kSizeTruncToPowerOfTwo - 1);
4336 __ Bind(&get_imt_index_from_method_index);
4337 __ movl(temp, Address(method, ArtMethod::MethodIndexOffset()));
4338 __ andl(temp, Immediate(ImTable::kSizeTruncToPowerOfTwo - 1));
4339
4340 __ Bind(&do_imt_dispatch);
4341 // Re-using `method` to store receiver class and ImTableEntry.
4342 __ movl(method, Address(receiver, mirror::Object::ClassOffset()));
4343 __ MaybeUnpoisonHeapReference(method);
4344
4345 __ movq(method, Address(method, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4346 // method = receiver->GetClass()->embedded_imtable_->Get(method_offset);
4347 __ movq(method, Address(method, temp, TIMES_8, /* disp= */ 0));
4348
4349 __ Jump(&execute_target_method);
4350 }
4351 __ Bind(&static_dispatch);
4352 __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeStatic));
4353 __ j(kNotEqual, slow_path->GetEntryLabel());
4354 // MH's kind is invoke-static. The method can be called directly, hence fall-through.
4355
4356 __ Bind(&execute_target_method);
4357 __ call(Address(
4358 method,
4359 ArtMethod::EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k64).SizeValue()));
4360 codegen_->RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4361 __ Bind(slow_path->GetExitLabel());
4362 }
4363
VisitVarHandleGet(HInvoke * invoke)4364 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) {
4365 CreateVarHandleGetLocations(invoke, codegen_);
4366 }
4367
VisitVarHandleGet(HInvoke * invoke)4368 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) {
4369 GenerateVarHandleGet(invoke, codegen_);
4370 }
4371
VisitVarHandleGetAcquire(HInvoke * invoke)4372 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4373 CreateVarHandleGetLocations(invoke, codegen_);
4374 }
4375
VisitVarHandleGetAcquire(HInvoke * invoke)4376 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4377 // VarHandleGetAcquire is the same as VarHandleGet on x86-64 due to the x86 memory model.
4378 GenerateVarHandleGet(invoke, codegen_);
4379 }
4380
VisitVarHandleGetOpaque(HInvoke * invoke)4381 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4382 CreateVarHandleGetLocations(invoke, codegen_);
4383 }
4384
VisitVarHandleGetOpaque(HInvoke * invoke)4385 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4386 // VarHandleGetOpaque is the same as VarHandleGet on x86-64 due to the x86 memory model.
4387 GenerateVarHandleGet(invoke, codegen_);
4388 }
4389
VisitVarHandleGetVolatile(HInvoke * invoke)4390 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4391 CreateVarHandleGetLocations(invoke, codegen_);
4392 }
4393
VisitVarHandleGetVolatile(HInvoke * invoke)4394 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4395 // VarHandleGetVolatile is the same as VarHandleGet on x86-64 due to the x86 memory model.
4396 GenerateVarHandleGet(invoke, codegen_);
4397 }
4398
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4399 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4400 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4401 return;
4402 }
4403
4404 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4405
4406 // Extra temporary is used for card in MarkGCCard and to move 64-bit constants to memory.
4407 locations->AddTemp(Location::RequiresRegister());
4408 }
4409
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_volatile,bool is_atomic,bool byte_swap=false)4410 static void GenerateVarHandleSet(HInvoke* invoke,
4411 CodeGeneratorX86_64* codegen,
4412 bool is_volatile,
4413 bool is_atomic,
4414 bool byte_swap = false) {
4415 X86_64Assembler* assembler = codegen->GetAssembler();
4416
4417 LocationSummary* locations = invoke->GetLocations();
4418 const uint32_t last_temp_index = locations->GetTempCount() - 1;
4419
4420 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4421 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4422
4423 VarHandleTarget target = GetVarHandleTarget(invoke);
4424 VarHandleSlowPathX86_64* slow_path = nullptr;
4425 if (!byte_swap) {
4426 slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
4427 GenerateVarHandleTarget(invoke, target, codegen);
4428 if (slow_path != nullptr) {
4429 slow_path->SetVolatile(is_volatile);
4430 slow_path->SetAtomic(is_atomic);
4431 __ Bind(slow_path->GetNativeByteOrderLabel());
4432 }
4433 }
4434
4435 switch (invoke->GetIntrinsic()) {
4436 case Intrinsics::kVarHandleSetRelease:
4437 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4438 break;
4439 case Intrinsics::kVarHandleSetVolatile:
4440 // setVolatile needs kAnyStore barrier, but HandleFieldSet takes care of that.
4441 break;
4442 default:
4443 // Other intrinsics don't need a barrier.
4444 break;
4445 }
4446
4447 Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4448
4449 // Store the value to the field.
4450 codegen->GetInstructionCodegen()->HandleFieldSet(
4451 invoke,
4452 value_index,
4453 last_temp_index,
4454 value_type,
4455 dst,
4456 CpuRegister(target.object),
4457 is_volatile,
4458 is_atomic,
4459 /*value_can_be_null=*/true,
4460 byte_swap,
4461 // Value can be null, and this write barrier is not being relied on for other sets.
4462 value_type == DataType::Type::kReference ? WriteBarrierKind::kEmitNotBeingReliedOn :
4463 WriteBarrierKind::kDontEmit);
4464
4465 // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
4466
4467 if (slow_path != nullptr) {
4468 DCHECK(!byte_swap);
4469 __ Bind(slow_path->GetExitLabel());
4470 }
4471 }
4472
VisitVarHandleSet(HInvoke * invoke)4473 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
4474 CreateVarHandleSetLocations(invoke, codegen_);
4475 }
4476
VisitVarHandleSet(HInvoke * invoke)4477 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
4478 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4479 }
4480
VisitVarHandleSetOpaque(HInvoke * invoke)4481 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4482 CreateVarHandleSetLocations(invoke, codegen_);
4483 }
4484
VisitVarHandleSetOpaque(HInvoke * invoke)4485 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4486 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4487 }
4488
VisitVarHandleSetRelease(HInvoke * invoke)4489 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4490 CreateVarHandleSetLocations(invoke, codegen_);
4491 }
4492
VisitVarHandleSetRelease(HInvoke * invoke)4493 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4494 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4495 }
4496
VisitVarHandleSetVolatile(HInvoke * invoke)4497 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4498 CreateVarHandleSetLocations(invoke, codegen_);
4499 }
4500
VisitVarHandleSetVolatile(HInvoke * invoke)4501 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4502 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ true, /*is_atomic=*/ true);
4503 }
4504
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4505 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4506 CodeGeneratorX86_64* codegen) {
4507 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4508 return;
4509 }
4510
4511 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4512 uint32_t expected_value_index = number_of_arguments - 2;
4513 uint32_t new_value_index = number_of_arguments - 1;
4514 DataType::Type return_type = invoke->GetType();
4515 DataType::Type expected_type = GetDataTypeFromShorty(invoke, expected_value_index);
4516 DCHECK_EQ(expected_type, GetDataTypeFromShorty(invoke, new_value_index));
4517
4518 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4519
4520 if (DataType::IsFloatingPointType(return_type)) {
4521 locations->SetOut(Location::RequiresFpuRegister());
4522 } else {
4523 // Take advantage of the fact that CMPXCHG writes result to RAX.
4524 locations->SetOut(Location::RegisterLocation(RAX));
4525 }
4526
4527 if (DataType::IsFloatingPointType(expected_type)) {
4528 // RAX is needed to load the expected floating-point value into a register for CMPXCHG.
4529 locations->AddTemp(Location::RegisterLocation(RAX));
4530 // Another temporary is needed to load the new floating-point value into a register for CMPXCHG.
4531 locations->AddTemp(Location::RequiresRegister());
4532 } else {
4533 // Ensure that expected value is in RAX, as required by CMPXCHG.
4534 locations->SetInAt(expected_value_index, Location::RegisterLocation(RAX));
4535 locations->SetInAt(new_value_index, Location::RequiresRegister());
4536 if (expected_type == DataType::Type::kReference) {
4537 // Need two temporaries for MarkGCCard.
4538 locations->AddRegisterTemps(2);
4539 if (codegen->EmitReadBarrier()) {
4540 // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
4541 DCHECK(kUseBakerReadBarrier);
4542 locations->AddTemp(Location::RequiresRegister());
4543 }
4544 }
4545 // RAX is clobbered in CMPXCHG, but no need to mark it as temporary as it's the output register.
4546 DCHECK_EQ(RAX, locations->Out().AsRegister<Register>());
4547 }
4548 }
4549
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_cmpxchg,bool byte_swap=false)4550 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4551 CodeGeneratorX86_64* codegen,
4552 bool is_cmpxchg,
4553 bool byte_swap = false) {
4554 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4555
4556 X86_64Assembler* assembler = codegen->GetAssembler();
4557 LocationSummary* locations = invoke->GetLocations();
4558
4559 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4560 uint32_t expected_value_index = number_of_arguments - 2;
4561 uint32_t new_value_index = number_of_arguments - 1;
4562 DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4563
4564 VarHandleSlowPathX86_64* slow_path = nullptr;
4565 VarHandleTarget target = GetVarHandleTarget(invoke);
4566 if (!byte_swap) {
4567 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4568 GenerateVarHandleTarget(invoke, target, codegen);
4569 if (slow_path != nullptr) {
4570 __ Bind(slow_path->GetNativeByteOrderLabel());
4571 }
4572 }
4573
4574 uint32_t temp_count = locations->GetTempCount();
4575 GenCompareAndSetOrExchange(codegen,
4576 invoke,
4577 type,
4578 CpuRegister(target.object),
4579 CpuRegister(target.offset),
4580 /*temp1_index=*/ temp_count - 1,
4581 /*temp2_index=*/ temp_count - 2,
4582 /*temp3_index=*/ temp_count - 3,
4583 locations->InAt(new_value_index),
4584 locations->InAt(expected_value_index),
4585 locations->Out(),
4586 is_cmpxchg,
4587 byte_swap);
4588
4589 // We are using LOCK CMPXCHG in all cases because there is no CAS equivalent that has weak
4590 // failure semantics. LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
4591
4592 if (slow_path != nullptr) {
4593 DCHECK(!byte_swap);
4594 __ Bind(slow_path->GetExitLabel());
4595 }
4596 }
4597
VisitVarHandleCompareAndSet(HInvoke * invoke)4598 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4599 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4600 }
4601
VisitVarHandleCompareAndSet(HInvoke * invoke)4602 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4603 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4604 }
4605
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4606 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4607 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4608 }
4609
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4610 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4611 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4612 }
4613
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4614 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4615 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4616 }
4617
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4618 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4619 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4620 }
4621
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4622 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4623 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4624 }
4625
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4626 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4627 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4628 }
4629
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4630 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4631 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4632 }
4633
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4634 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4635 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4636 }
4637
VisitVarHandleCompareAndExchange(HInvoke * invoke)4638 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4639 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4640 }
4641
VisitVarHandleCompareAndExchange(HInvoke * invoke)4642 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4643 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4644 }
4645
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4646 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4647 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4648 }
4649
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4650 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4651 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4652 }
4653
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4654 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4655 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4656 }
4657
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4658 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4659 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4660 }
4661
CreateVarHandleGetAndSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4662 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4663 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4664 return;
4665 }
4666
4667 // Get the type from the shorty as the invokes may not return a value.
4668 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4669 uint32_t new_value_index = number_of_arguments - 1;
4670 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4671 DataType::Type return_type = invoke->GetType();
4672 const bool is_void = return_type == DataType::Type::kVoid;
4673 DCHECK_IMPLIES(!is_void, return_type == value_type);
4674
4675 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4676
4677 if (DataType::IsFloatingPointType(value_type)) {
4678 // Only set the `out` register if it's needed. In the void case we don't use `out`.
4679 if (!is_void) {
4680 locations->SetOut(Location::RequiresFpuRegister());
4681 }
4682 // A temporary is needed to load the new floating-point value into a register for XCHG.
4683 locations->AddTemp(Location::RequiresRegister());
4684 } else {
4685 locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4686 if (value_type == DataType::Type::kReference) {
4687 // Need two temporaries for MarkGCCard.
4688 locations->AddRegisterTemps(2);
4689 if (codegen->EmitReadBarrier()) {
4690 // Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier.
4691 DCHECK(kUseBakerReadBarrier);
4692 locations->AddTemp(Location::RequiresRegister());
4693 }
4694 }
4695 // Only set the `out` register if it's needed. In the void case we can still use RAX in the
4696 // same manner as it is marked as a temp register.
4697 if (is_void) {
4698 locations->AddTemp(Location::RegisterLocation(RAX));
4699 } else {
4700 // Use the same register for both the new value and output to take advantage of XCHG.
4701 // It doesn't have to be RAX, but we need to choose some to make sure it's the same.
4702 locations->SetOut(Location::RegisterLocation(RAX));
4703 }
4704 }
4705 }
4706
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,CpuRegister ref,bool byte_swap)4707 static void GenerateVarHandleGetAndSet(HInvoke* invoke,
4708 CodeGeneratorX86_64* codegen,
4709 Location value,
4710 DataType::Type type,
4711 Address field_addr,
4712 CpuRegister ref,
4713 bool byte_swap) {
4714 X86_64Assembler* assembler = codegen->GetAssembler();
4715 LocationSummary* locations = invoke->GetLocations();
4716 Location out = locations->Out();
4717 uint32_t temp_count = locations->GetTempCount();
4718 DataType::Type return_type = invoke->GetType();
4719 const bool is_void = return_type == DataType::Type::kVoid;
4720 DCHECK_IMPLIES(!is_void, return_type == type);
4721
4722 if (DataType::IsFloatingPointType(type)) {
4723 // `getAndSet` for floating-point types: move the new FP value into a register, atomically
4724 // exchange it with the field, and move the old value into the output FP register.
4725 Location temp = locations->GetTemp(temp_count - 1);
4726 codegen->Move(temp, value);
4727 bool is64bit = (type == DataType::Type::kFloat64);
4728 DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4729 if (byte_swap) {
4730 codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4731 }
4732 if (is64bit) {
4733 __ xchgq(temp.AsRegister<CpuRegister>(), field_addr);
4734 } else {
4735 __ xchgl(temp.AsRegister<CpuRegister>(), field_addr);
4736 }
4737 if (byte_swap) {
4738 codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4739 }
4740 if (!is_void) {
4741 __ movd(out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit);
4742 }
4743 } else if (type == DataType::Type::kReference) {
4744 // `getAndSet` for references: load reference and atomically exchange it with the field.
4745 // Output register is the same as the one holding new value, so no need to move the result.
4746 DCHECK(!byte_swap);
4747
4748 // In the void case, we have an extra temp register, which is used to signal the register
4749 // allocator that we are clobering RAX.
4750 const uint32_t extra_temp = is_void ? 1u : 0u;
4751 DCHECK_IMPLIES(is_void,
4752 locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
4753
4754 CpuRegister temp1 = locations->GetTemp(temp_count - extra_temp - 1u).AsRegister<CpuRegister>();
4755 CpuRegister temp2 = locations->GetTemp(temp_count - extra_temp - 2u).AsRegister<CpuRegister>();
4756 CpuRegister valreg = value.AsRegister<CpuRegister>();
4757
4758 if (codegen->EmitBakerReadBarrier()) {
4759 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4760 invoke,
4761 locations->GetTemp(temp_count - extra_temp - 3u),
4762 ref,
4763 field_addr,
4764 /*needs_null_check=*/false,
4765 /*always_update_field=*/true,
4766 &temp1,
4767 &temp2);
4768 }
4769 codegen->MarkGCCard(temp1, temp2, ref);
4770
4771 DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
4772 if (kPoisonHeapReferences) {
4773 // Use a temp to avoid poisoning base of the field address, which might happen if `valreg` is
4774 // the same as `target.object` (for code like `vh.getAndSet(obj, obj)`).
4775 __ movl(temp1, valreg);
4776 __ PoisonHeapReference(temp1);
4777 __ xchgl(temp1, field_addr);
4778 if (!is_void) {
4779 __ UnpoisonHeapReference(temp1);
4780 __ movl(valreg, temp1);
4781 }
4782 } else {
4783 __ xchgl(valreg, field_addr);
4784 }
4785 } else {
4786 // `getAndSet` for integral types: atomically exchange the new value with the field. Output
4787 // register is the same as the one holding new value. Do sign extend / zero extend as needed.
4788 if (byte_swap) {
4789 codegen->GetInstructionCodegen()->Bswap(value, type);
4790 }
4791 CpuRegister valreg = value.AsRegister<CpuRegister>();
4792 DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
4793 switch (type) {
4794 case DataType::Type::kBool:
4795 case DataType::Type::kUint8:
4796 __ xchgb(valreg, field_addr);
4797 if (!is_void) {
4798 __ movzxb(valreg, valreg);
4799 }
4800 break;
4801 case DataType::Type::kInt8:
4802 __ xchgb(valreg, field_addr);
4803 if (!is_void) {
4804 __ movsxb(valreg, valreg);
4805 }
4806 break;
4807 case DataType::Type::kUint16:
4808 __ xchgw(valreg, field_addr);
4809 if (!is_void) {
4810 __ movzxw(valreg, valreg);
4811 }
4812 break;
4813 case DataType::Type::kInt16:
4814 __ xchgw(valreg, field_addr);
4815 if (!is_void) {
4816 __ movsxw(valreg, valreg);
4817 }
4818 break;
4819 case DataType::Type::kInt32:
4820 case DataType::Type::kUint32:
4821 __ xchgl(valreg, field_addr);
4822 break;
4823 case DataType::Type::kInt64:
4824 case DataType::Type::kUint64:
4825 __ xchgq(valreg, field_addr);
4826 break;
4827 default:
4828 LOG(FATAL) << "unexpected type in getAndSet intrinsic: " << type;
4829 UNREACHABLE();
4830 }
4831 if (byte_swap) {
4832 codegen->GetInstructionCodegen()->Bswap(value, type);
4833 }
4834 }
4835 }
4836
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4837 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4838 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4839 return;
4840 }
4841
4842 // Get the type from the shorty as the invokes may not return a value.
4843 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4844 uint32_t new_value_index = number_of_arguments - 1;
4845 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4846 DataType::Type return_type = invoke->GetType();
4847 const bool is_void = return_type == DataType::Type::kVoid;
4848 DCHECK_IMPLIES(!is_void, return_type == value_type);
4849
4850 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4851
4852 DCHECK_NE(DataType::Type::kReference, value_type);
4853 DCHECK(!DataType::IsFloatingPointType(value_type));
4854 // A temporary to compute the bitwise operation on the old and the new values.
4855 locations->AddTemp(Location::RequiresRegister());
4856 // We need value to be either in a register, or a 32-bit constant (as there are no arithmetic
4857 // instructions that accept 64-bit immediate on x86_64).
4858 locations->SetInAt(new_value_index,
4859 DataType::Is64BitType(value_type) ?
4860 Location::RequiresRegister() :
4861 Location::RegisterOrConstant(invoke->InputAt(new_value_index)));
4862 if (is_void) {
4863 // Used as a temporary, even when we are not outputting it so reserve it. This has to be
4864 // requested before the other temporary since there's variable number of temp registers and the
4865 // other temp register is expected to be the last one.
4866 locations->AddTemp(Location::RegisterLocation(RAX));
4867 } else {
4868 // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary.
4869 locations->SetOut(Location::RegisterLocation(RAX));
4870 }
4871 }
4872
GenerateVarHandleGetAndOp(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,GetAndUpdateOp get_and_update_op,bool byte_swap)4873 static void GenerateVarHandleGetAndOp(HInvoke* invoke,
4874 CodeGeneratorX86_64* codegen,
4875 Location value,
4876 DataType::Type type,
4877 Address field_addr,
4878 GetAndUpdateOp get_and_update_op,
4879 bool byte_swap) {
4880 X86_64Assembler* assembler = codegen->GetAssembler();
4881 LocationSummary* locations = invoke->GetLocations();
4882 // In the void case, we have an extra temp register, which is used to signal the register
4883 // allocator that we are clobering RAX.
4884 const bool is_void = invoke->GetType() == DataType::Type::kVoid;
4885 const uint32_t extra_temp = is_void ? 1u : 0u;
4886 const uint32_t temp_count = locations->GetTempCount();
4887 DCHECK_IMPLIES(is_void,
4888 locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
4889 Location temp_loc = locations->GetTemp(temp_count - extra_temp - 1u);
4890 Location rax_loc = Location::RegisterLocation(RAX);
4891 DCHECK_IMPLIES(!is_void, locations->Out().Equals(rax_loc));
4892 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4893 bool is64Bit = DataType::Is64BitType(type);
4894
4895 NearLabel retry;
4896 __ Bind(&retry);
4897
4898 // Load field value into RAX and copy it into a temporary register for the operation.
4899 codegen->LoadFromMemoryNoReference(type, rax_loc, field_addr);
4900 codegen->Move(temp_loc, rax_loc);
4901 if (byte_swap) {
4902 // Byte swap the temporary, since we need to perform operation in native endianness.
4903 codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4904 }
4905
4906 DCHECK_IMPLIES(value.IsConstant(), !is64Bit);
4907 int32_t const_value = value.IsConstant()
4908 ? CodeGenerator::GetInt32ValueOf(value.GetConstant())
4909 : 0;
4910
4911 // Use 32-bit registers for 8/16/32-bit types to save on the REX prefix.
4912 switch (get_and_update_op) {
4913 case GetAndUpdateOp::kAdd:
4914 DCHECK(byte_swap); // The non-byte-swapping path should use a faster XADD instruction.
4915 if (is64Bit) {
4916 __ addq(temp, value.AsRegister<CpuRegister>());
4917 } else if (value.IsConstant()) {
4918 __ addl(temp, Immediate(const_value));
4919 } else {
4920 __ addl(temp, value.AsRegister<CpuRegister>());
4921 }
4922 break;
4923 case GetAndUpdateOp::kBitwiseAnd:
4924 if (is64Bit) {
4925 __ andq(temp, value.AsRegister<CpuRegister>());
4926 } else if (value.IsConstant()) {
4927 __ andl(temp, Immediate(const_value));
4928 } else {
4929 __ andl(temp, value.AsRegister<CpuRegister>());
4930 }
4931 break;
4932 case GetAndUpdateOp::kBitwiseOr:
4933 if (is64Bit) {
4934 __ orq(temp, value.AsRegister<CpuRegister>());
4935 } else if (value.IsConstant()) {
4936 __ orl(temp, Immediate(const_value));
4937 } else {
4938 __ orl(temp, value.AsRegister<CpuRegister>());
4939 }
4940 break;
4941 case GetAndUpdateOp::kBitwiseXor:
4942 if (is64Bit) {
4943 __ xorq(temp, value.AsRegister<CpuRegister>());
4944 } else if (value.IsConstant()) {
4945 __ xorl(temp, Immediate(const_value));
4946 } else {
4947 __ xorl(temp, value.AsRegister<CpuRegister>());
4948 }
4949 break;
4950 default:
4951 LOG(FATAL) << "unexpected operation";
4952 UNREACHABLE();
4953 }
4954
4955 if (byte_swap) {
4956 // RAX still contains the original value, but we need to byte swap the temporary back.
4957 codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4958 }
4959
4960 switch (type) {
4961 case DataType::Type::kBool:
4962 case DataType::Type::kUint8:
4963 case DataType::Type::kInt8:
4964 __ LockCmpxchgb(field_addr, temp);
4965 break;
4966 case DataType::Type::kUint16:
4967 case DataType::Type::kInt16:
4968 __ LockCmpxchgw(field_addr, temp);
4969 break;
4970 case DataType::Type::kInt32:
4971 case DataType::Type::kUint32:
4972 __ LockCmpxchgl(field_addr, temp);
4973 break;
4974 case DataType::Type::kInt64:
4975 case DataType::Type::kUint64:
4976 __ LockCmpxchgq(field_addr, temp);
4977 break;
4978 default:
4979 LOG(FATAL) << "unexpected type in getAndBitwiseOp intrinsic";
4980 UNREACHABLE();
4981 }
4982
4983 __ j(kNotZero, &retry);
4984
4985 // The result is in RAX after CMPXCHG. Byte swap if necessary, but do not sign/zero extend,
4986 // as it has already been done by `LoadFromMemoryNoReference` above (and not altered by CMPXCHG).
4987 if (byte_swap) {
4988 codegen->GetInstructionCodegen()->Bswap(rax_loc, type);
4989 }
4990 }
4991
CreateVarHandleGetAndAddLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4992 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4993 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4994 return;
4995 }
4996
4997 // Get the type from the shorty as the invokes may not return a value.
4998 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4999 uint32_t new_value_index = number_of_arguments - 1;
5000 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
5001 DataType::Type return_type = invoke->GetType();
5002 const bool is_void = return_type == DataType::Type::kVoid;
5003 DCHECK_IMPLIES(!is_void, return_type == value_type);
5004
5005 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
5006
5007 if (DataType::IsFloatingPointType(value_type)) {
5008 // Only set the `out` register if it's needed. In the void case we don't use `out`
5009 if (!is_void) {
5010 locations->SetOut(Location::RequiresFpuRegister());
5011 }
5012 // Require that the new FP value is in a register (and not a constant) for ADDSS/ADDSD.
5013 locations->SetInAt(new_value_index, Location::RequiresFpuRegister());
5014 // CMPXCHG clobbers RAX.
5015 locations->AddTemp(Location::RegisterLocation(RAX));
5016 // An FP temporary to load the old value from the field and perform FP addition.
5017 locations->AddTemp(Location::RequiresFpuRegister());
5018 // A temporary to hold the new value for CMPXCHG.
5019 locations->AddTemp(Location::RequiresRegister());
5020 } else {
5021 DCHECK_NE(value_type, DataType::Type::kReference);
5022 locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
5023 if (GetExpectedVarHandleCoordinatesCount(invoke) == 2) {
5024 // For byte array views with non-native endianness we need extra BSWAP operations, so we
5025 // cannot use XADD and have to fallback to a generic implementation based on CMPXCH. In that
5026 // case we need two temporary registers: one to hold value instead of RAX (which may get
5027 // clobbered by repeated CMPXCHG) and one for performing the operation. At compile time we
5028 // cannot distinguish this case from arrays or native-endian byte array views.
5029 locations->AddRegisterTemps(2);
5030 }
5031 // Only set the `out` register if it's needed. In the void case we can still use RAX in the
5032 // same manner as it is marked as a temp register.
5033 if (is_void) {
5034 locations->AddTemp(Location::RegisterLocation(RAX));
5035 } else {
5036 // Use the same register for both the new value and output to take advantage of XADD.
5037 // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls
5038 // back to GenerateVarHandleGetAndOp that expects out in RAX.
5039 locations->SetOut(Location::RegisterLocation(RAX));
5040 }
5041 }
5042 }
5043
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,bool byte_swap)5044 static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
5045 CodeGeneratorX86_64* codegen,
5046 Location value,
5047 DataType::Type type,
5048 Address field_addr,
5049 bool byte_swap) {
5050 X86_64Assembler* assembler = codegen->GetAssembler();
5051 LocationSummary* locations = invoke->GetLocations();
5052 Location out = locations->Out();
5053 uint32_t temp_count = locations->GetTempCount();
5054
5055 DataType::Type return_type = invoke->GetType();
5056 const bool is_void = return_type == DataType::Type::kVoid;
5057 DCHECK_IMPLIES(!is_void, return_type == type);
5058
5059 if (DataType::IsFloatingPointType(type)) {
5060 if (byte_swap) {
5061 // This code should never be executed: it is the case of a byte array view (since it requires
5062 // a byte swap), and varhandles for byte array views support numeric atomic update access mode
5063 // only for int and long, but not for floating-point types (see javadoc comments for
5064 // java.lang.invoke.MethodHandles.byteArrayViewVarHandle()). But ART varhandle implementation
5065 // for byte array views treats floating-point types them as numeric types in
5066 // ByteArrayViewVarHandle::Access(). Terefore we do generate intrinsic code, but it always
5067 // fails access mode check at runtime prior to reaching this point. Illegal instruction UD2
5068 // ensures that if control flow gets here by mistake, we will notice.
5069 __ ud2();
5070 }
5071
5072 // `getAndAdd` for floating-point types: load the old FP value into a temporary FP register and
5073 // in RAX for CMPXCHG, add the new FP value to the old one, move it to a non-FP temporary for
5074 // CMPXCHG and loop until CMPXCHG succeeds. Move the result from RAX to the output FP register.
5075 bool is64bit = (type == DataType::Type::kFloat64);
5076 DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
5077 XmmRegister fptemp = locations->GetTemp(temp_count - 2).AsFpuRegister<XmmRegister>();
5078 Location rax_loc = Location::RegisterLocation(RAX);
5079 Location temp_loc = locations->GetTemp(temp_count - 1);
5080 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5081
5082 NearLabel retry;
5083 __ Bind(&retry);
5084
5085 // Read value from memory into an FP register and copy in into RAX.
5086 if (is64bit) {
5087 __ movsd(fptemp, field_addr);
5088 } else {
5089 __ movss(fptemp, field_addr);
5090 }
5091 __ movd(CpuRegister(RAX), fptemp, is64bit);
5092 // If necessary, byte swap RAX and update the value in FP register to also be byte-swapped.
5093 if (byte_swap) {
5094 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5095 __ movd(fptemp, CpuRegister(RAX), is64bit);
5096 }
5097 // Perform the FP addition and move it to a temporary register to prepare for CMPXCHG.
5098 if (is64bit) {
5099 __ addsd(fptemp, value.AsFpuRegister<XmmRegister>());
5100 } else {
5101 __ addss(fptemp, value.AsFpuRegister<XmmRegister>());
5102 }
5103 __ movd(temp, fptemp, is64bit);
5104 // If necessary, byte swap RAX before CMPXCHG and the temporary before copying to FP register.
5105 if (byte_swap) {
5106 codegen->GetInstructionCodegen()->Bswap(temp_loc, bswap_type);
5107 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5108 }
5109 if (is64bit) {
5110 __ LockCmpxchgq(field_addr, temp);
5111 } else {
5112 __ LockCmpxchgl(field_addr, temp);
5113 }
5114
5115 __ j(kNotZero, &retry);
5116
5117 // The old value is in RAX, byte swap if necessary.
5118 if (byte_swap) {
5119 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5120 }
5121 if (!is_void) {
5122 __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
5123 }
5124 } else {
5125 if (byte_swap) {
5126 // We cannot use XADD since we need to byte-swap the old value when reading it from memory,
5127 // and then byte-swap the sum before writing it to memory. So fallback to the slower generic
5128 // implementation that is also used for bitwise operations.
5129 // Move value from RAX to a temporary register, as RAX may get clobbered by repeated CMPXCHG.
5130 DCHECK_EQ(GetExpectedVarHandleCoordinatesCount(invoke), 2u);
5131 // In the void case, we have an extra temp register, which is used to signal the register
5132 // allocator that we are clobering RAX.
5133 const uint32_t extra_temp = is_void ? 1u : 0u;
5134 DCHECK_IMPLIES(is_void,
5135 locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
5136 Location temp = locations->GetTemp(temp_count - extra_temp - 2u);
5137 codegen->Move(temp, value);
5138 GenerateVarHandleGetAndOp(
5139 invoke, codegen, temp, type, field_addr, GetAndUpdateOp::kAdd, byte_swap);
5140 } else {
5141 // `getAndAdd` for integral types: atomically exchange the new value with the field and add
5142 // the old value to the field. Output register is the same as the one holding new value. Do
5143 // sign extend / zero extend as needed.
5144 CpuRegister valreg = value.AsRegister<CpuRegister>();
5145 DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
5146 switch (type) {
5147 case DataType::Type::kBool:
5148 case DataType::Type::kUint8:
5149 __ LockXaddb(field_addr, valreg);
5150 if (!is_void) {
5151 __ movzxb(valreg, valreg);
5152 }
5153 break;
5154 case DataType::Type::kInt8:
5155 __ LockXaddb(field_addr, valreg);
5156 if (!is_void) {
5157 __ movsxb(valreg, valreg);
5158 }
5159 break;
5160 case DataType::Type::kUint16:
5161 __ LockXaddw(field_addr, valreg);
5162 if (!is_void) {
5163 __ movzxw(valreg, valreg);
5164 }
5165 break;
5166 case DataType::Type::kInt16:
5167 __ LockXaddw(field_addr, valreg);
5168 if (!is_void) {
5169 __ movsxw(valreg, valreg);
5170 }
5171 break;
5172 case DataType::Type::kInt32:
5173 case DataType::Type::kUint32:
5174 __ LockXaddl(field_addr, valreg);
5175 break;
5176 case DataType::Type::kInt64:
5177 case DataType::Type::kUint64:
5178 __ LockXaddq(field_addr, valreg);
5179 break;
5180 default:
5181 LOG(FATAL) << "unexpected type in getAndAdd intrinsic";
5182 UNREACHABLE();
5183 }
5184 }
5185 }
5186 }
5187
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op,bool need_any_store_barrier,bool need_any_any_barrier,bool byte_swap=false)5188 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5189 CodeGeneratorX86_64* codegen,
5190 GetAndUpdateOp get_and_update_op,
5191 bool need_any_store_barrier,
5192 bool need_any_any_barrier,
5193 bool byte_swap = false) {
5194 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
5195
5196 X86_64Assembler* assembler = codegen->GetAssembler();
5197 LocationSummary* locations = invoke->GetLocations();
5198
5199 // Get the type from the shorty as the invokes may not return a value.
5200 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
5201 Location value = locations->InAt(number_of_arguments - 1);
5202 DataType::Type type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
5203
5204 VarHandleSlowPathX86_64* slow_path = nullptr;
5205 VarHandleTarget target = GetVarHandleTarget(invoke);
5206 if (!byte_swap) {
5207 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
5208 GenerateVarHandleTarget(invoke, target, codegen);
5209 if (slow_path != nullptr) {
5210 slow_path->SetGetAndUpdateOp(get_and_update_op);
5211 slow_path->SetNeedAnyStoreBarrier(need_any_store_barrier);
5212 slow_path->SetNeedAnyAnyBarrier(need_any_any_barrier);
5213 __ Bind(slow_path->GetNativeByteOrderLabel());
5214 }
5215 }
5216
5217 CpuRegister ref(target.object);
5218 Address field_addr(ref, CpuRegister(target.offset), TIMES_1, 0);
5219
5220 if (need_any_store_barrier) {
5221 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5222 }
5223
5224 switch (get_and_update_op) {
5225 case GetAndUpdateOp::kSet:
5226 GenerateVarHandleGetAndSet(invoke, codegen, value, type, field_addr, ref, byte_swap);
5227 break;
5228 case GetAndUpdateOp::kAdd:
5229 GenerateVarHandleGetAndAdd(invoke, codegen, value, type, field_addr, byte_swap);
5230 break;
5231 case GetAndUpdateOp::kBitwiseAnd:
5232 case GetAndUpdateOp::kBitwiseOr:
5233 case GetAndUpdateOp::kBitwiseXor:
5234 GenerateVarHandleGetAndOp(
5235 invoke, codegen, value, type, field_addr, get_and_update_op, byte_swap);
5236 break;
5237 }
5238
5239 if (need_any_any_barrier) {
5240 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5241 }
5242
5243 if (slow_path != nullptr) {
5244 DCHECK(!byte_swap);
5245 __ Bind(slow_path->GetExitLabel());
5246 }
5247 }
5248
VisitVarHandleGetAndSet(HInvoke * invoke)5249 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5250 CreateVarHandleGetAndSetLocations(invoke, codegen_);
5251 }
5252
VisitVarHandleGetAndSet(HInvoke * invoke)5253 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5254 // `getAndSet` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5255 GenerateVarHandleGetAndUpdate(invoke,
5256 codegen_,
5257 GetAndUpdateOp::kSet,
5258 /*need_any_store_barrier=*/ true,
5259 /*need_any_any_barrier=*/ true);
5260 }
5261
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5262 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5263 CreateVarHandleGetAndSetLocations(invoke, codegen_);
5264 }
5265
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5266 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5267 // `getAndSetAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5268 GenerateVarHandleGetAndUpdate(invoke,
5269 codegen_,
5270 GetAndUpdateOp::kSet,
5271 /*need_any_store_barrier=*/ false,
5272 /*need_any_any_barrier=*/ false);
5273 }
5274
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5275 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5276 CreateVarHandleGetAndSetLocations(invoke, codegen_);
5277 }
5278
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5279 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5280 // `getAndSetRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5281 GenerateVarHandleGetAndUpdate(invoke,
5282 codegen_,
5283 GetAndUpdateOp::kSet,
5284 /*need_any_store_barrier=*/ true,
5285 /*need_any_any_barrier=*/ false);
5286 }
5287
VisitVarHandleGetAndAdd(HInvoke * invoke)5288 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5289 CreateVarHandleGetAndAddLocations(invoke, codegen_);
5290 }
5291
VisitVarHandleGetAndAdd(HInvoke * invoke)5292 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5293 // `getAndAdd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5294 GenerateVarHandleGetAndUpdate(invoke,
5295 codegen_,
5296 GetAndUpdateOp::kAdd,
5297 /*need_any_store_barrier=*/ true,
5298 /*need_any_any_barrier=*/ true);
5299 }
5300
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5301 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5302 CreateVarHandleGetAndAddLocations(invoke, codegen_);
5303 }
5304
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5305 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5306 // `getAndAddAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5307 GenerateVarHandleGetAndUpdate(invoke,
5308 codegen_,
5309 GetAndUpdateOp::kAdd,
5310 /*need_any_store_barrier=*/ false,
5311 /*need_any_any_barrier=*/ false);
5312 }
5313
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5314 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5315 CreateVarHandleGetAndAddLocations(invoke, codegen_);
5316 }
5317
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5318 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5319 // `getAndAddRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5320 GenerateVarHandleGetAndUpdate(invoke,
5321 codegen_,
5322 GetAndUpdateOp::kAdd,
5323 /*need_any_store_barrier=*/ true,
5324 /*need_any_any_barrier=*/ false);
5325 }
5326
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5327 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5328 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5329 }
5330
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5331 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5332 // `getAndBitwiseAnd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5333 GenerateVarHandleGetAndUpdate(invoke,
5334 codegen_,
5335 GetAndUpdateOp::kBitwiseAnd,
5336 /*need_any_store_barrier=*/ true,
5337 /*need_any_any_barrier=*/ true);
5338 }
5339
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5340 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5341 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5342 }
5343
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5344 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5345 // `getAndBitwiseAndAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5346 GenerateVarHandleGetAndUpdate(invoke,
5347 codegen_,
5348 GetAndUpdateOp::kBitwiseAnd,
5349 /*need_any_store_barrier=*/ false,
5350 /*need_any_any_barrier=*/ false);
5351 }
5352
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5353 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5354 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5355 }
5356
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5357 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5358 // `getAndBitwiseAndRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5359 GenerateVarHandleGetAndUpdate(invoke,
5360 codegen_,
5361 GetAndUpdateOp::kBitwiseAnd,
5362 /*need_any_store_barrier=*/ true,
5363 /*need_any_any_barrier=*/ false);
5364 }
5365
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5366 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5367 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5368 }
5369
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5370 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5371 // `getAndBitwiseOr` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5372 GenerateVarHandleGetAndUpdate(invoke,
5373 codegen_,
5374 GetAndUpdateOp::kBitwiseOr,
5375 /*need_any_store_barrier=*/ true,
5376 /*need_any_any_barrier=*/ true);
5377 }
5378
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5379 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5380 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5381 }
5382
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5383 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5384 // `getAndBitwiseOrAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5385 GenerateVarHandleGetAndUpdate(invoke,
5386 codegen_,
5387 GetAndUpdateOp::kBitwiseOr,
5388 /*need_any_store_barrier=*/ false,
5389 /*need_any_any_barrier=*/ false);
5390 }
5391
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5392 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5393 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5394 }
5395
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5396 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5397 // `getAndBitwiseOrRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5398 GenerateVarHandleGetAndUpdate(invoke,
5399 codegen_,
5400 GetAndUpdateOp::kBitwiseOr,
5401 /*need_any_store_barrier=*/ true,
5402 /*need_any_any_barrier=*/ false);
5403 }
5404
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5405 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5406 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5407 }
5408
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5409 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5410 // `getAndBitwiseXor` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5411 GenerateVarHandleGetAndUpdate(invoke,
5412 codegen_,
5413 GetAndUpdateOp::kBitwiseXor,
5414 /*need_any_store_barrier=*/ true,
5415 /*need_any_any_barrier=*/ true);
5416 }
5417
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5418 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5419 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5420 }
5421
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5422 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5423 // `getAndBitwiseXorAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5424 GenerateVarHandleGetAndUpdate(invoke,
5425 codegen_,
5426 GetAndUpdateOp::kBitwiseXor,
5427 /*need_any_store_barrier=*/ false,
5428 /*need_any_any_barrier=*/ false);
5429 }
5430
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5431 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5432 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5433 }
5434
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5435 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5436 // `getAndBitwiseXorRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5437 GenerateVarHandleGetAndUpdate(invoke,
5438 codegen_,
5439 GetAndUpdateOp::kBitwiseXor,
5440 /*need_any_store_barrier=*/ true,
5441 /*need_any_any_barrier=*/ false);
5442 }
5443
EmitByteArrayViewCode(CodeGeneratorX86_64 * codegen)5444 void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen) {
5445 DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5446 X86_64Assembler* assembler = codegen->GetAssembler();
5447
5448 HInvoke* invoke = GetInvoke();
5449 LocationSummary* locations = invoke->GetLocations();
5450 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5451 DataType::Type value_type =
5452 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5453 DCHECK_NE(value_type, DataType::Type::kReference);
5454 size_t size = DataType::Size(value_type);
5455 DCHECK_GT(size, 1u);
5456
5457 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
5458 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
5459 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
5460 CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1u).AsRegister<CpuRegister>();
5461
5462 MemberOffset class_offset = mirror::Object::ClassOffset();
5463 MemberOffset array_length_offset = mirror::Array::LengthOffset();
5464 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5465 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5466
5467 VarHandleTarget target = GetVarHandleTarget(invoke);
5468
5469 __ Bind(GetByteArrayViewCheckLabel());
5470
5471 // The main path checked that the coordinateType0 is an array class that matches
5472 // the class of the actual coordinate argument but it does not match the value type.
5473 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5474 codegen->LoadClassRootForIntrinsic(temp, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5475 assembler->MaybePoisonHeapReference(temp);
5476 __ cmpl(temp, Address(varhandle, class_offset.Int32Value()));
5477 __ j(kNotEqual, GetEntryLabel());
5478
5479 // Check for array index out of bounds.
5480 __ movl(temp, Address(object, array_length_offset.Int32Value()));
5481 // SUB sets flags in the same way as CMP.
5482 __ subl(temp, index);
5483 __ j(kBelowEqual, GetEntryLabel());
5484 // The difference between index and array length must be enough for the `value_type` size.
5485 __ cmpl(temp, Immediate(size));
5486 __ j(kBelow, GetEntryLabel());
5487
5488 // Construct the target.
5489 __ leal(CpuRegister(target.offset), Address(index, TIMES_1, data_offset.Int32Value()));
5490
5491 // Alignment check. For unaligned access, go to the runtime.
5492 DCHECK(IsPowerOfTwo(size));
5493 __ testl(CpuRegister(target.offset), Immediate(size - 1u));
5494 __ j(kNotZero, GetEntryLabel());
5495
5496 // Byte order check. For native byte order return to the main path.
5497 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5498 IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5499 // There is no reason to differentiate between native byte order and byte-swap
5500 // for setting a zero bit pattern. Just return to the main path.
5501 __ jmp(GetNativeByteOrderLabel());
5502 return;
5503 }
5504 __ cmpl(Address(varhandle, native_byte_order_offset.Int32Value()), Immediate(0));
5505 __ j(kNotEqual, GetNativeByteOrderLabel());
5506
5507 switch (access_mode_template) {
5508 case mirror::VarHandle::AccessModeTemplate::kGet:
5509 GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
5510 break;
5511 case mirror::VarHandle::AccessModeTemplate::kSet:
5512 GenerateVarHandleSet(invoke, codegen, is_volatile_, is_atomic_, /*byte_swap=*/ true);
5513 break;
5514 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5515 GenerateVarHandleCompareAndSetOrExchange(
5516 invoke, codegen, /*is_cmpxchg=*/ false, /*byte_swap=*/ true);
5517 break;
5518 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5519 GenerateVarHandleCompareAndSetOrExchange(
5520 invoke, codegen, /*is_cmpxchg=*/ true, /*byte_swap=*/ true);
5521 break;
5522 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5523 GenerateVarHandleGetAndUpdate(invoke,
5524 codegen,
5525 get_and_update_op_,
5526 need_any_store_barrier_,
5527 need_any_any_barrier_,
5528 /*byte_swap=*/ true);
5529 break;
5530 }
5531
5532 __ jmp(GetExitLabel());
5533 }
5534
5535 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name)
5536 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED);
5537 #undef MARK_UNIMPLEMENTED
5538
5539 UNREACHABLE_INTRINSICS(X86_64)
5540
5541 #undef __
5542
5543 } // namespace x86_64
5544 } // namespace art
5545