xref: /aosp_15_r20/art/compiler/optimizing/code_generator_vector_arm64_sve.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "base/bit_utils_iterator.h"
21 #include "mirror/array-inl.h"
22 #include "mirror/string.h"
23 
24 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
25 
26 namespace art HIDDEN {
27 namespace arm64 {
28 
29 using helpers::DRegisterFrom;
30 using helpers::InputRegisterAt;
31 using helpers::Int64FromLocation;
32 using helpers::LocationFrom;
33 using helpers::OutputRegister;
34 using helpers::SveStackOperandFrom;
35 using helpers::VRegisterFrom;
36 using helpers::ZRegisterFrom;
37 using helpers::XRegisterFrom;
38 
39 #define __ GetVIXLAssembler()->
40 
41 // Returns whether the value of the constant can be directly encoded into the instruction as
42 // immediate.
SVECanEncodeConstantAsImmediate(HConstant * constant,HInstruction * instr)43 static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
44   if (instr->IsVecReplicateScalar()) {
45     if (constant->IsLongConstant()) {
46       return false;
47     } else if (constant->IsFloatConstant()) {
48       return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
49     } else if (constant->IsDoubleConstant()) {
50       return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
51     }
52     // TODO: Make use of shift part of DUP instruction.
53     int64_t value = CodeGenerator::GetInt64ValueOf(constant);
54     return IsInt<8>(value);
55   }
56 
57   return false;
58 }
59 
60 // Returns
61 //  - constant location - if 'constant' is an actual constant and its value can be
62 //    encoded into the instruction.
63 //  - register location otherwise.
SVEEncodableConstantOrRegister(HInstruction * constant,HInstruction * instr)64 inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
65   if (constant->IsConstant() && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
66     return Location::ConstantLocation(constant);
67   }
68 
69   return Location::RequiresRegister();
70 }
71 
ValidateVectorLength(HVecOperation * instr) const72 void InstructionCodeGeneratorARM64Sve::ValidateVectorLength(HVecOperation* instr) const {
73   DCHECK_EQ(DataType::Size(instr->GetPackedType()) * instr->GetVectorLength(),
74             codegen_->GetSIMDRegisterWidth());
75 }
76 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)77 void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
78   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
79   HInstruction* input = instruction->InputAt(0);
80   switch (instruction->GetPackedType()) {
81     case DataType::Type::kBool:
82     case DataType::Type::kUint8:
83     case DataType::Type::kInt8:
84     case DataType::Type::kUint16:
85     case DataType::Type::kInt16:
86     case DataType::Type::kInt32:
87     case DataType::Type::kInt64:
88       locations->SetInAt(0, SVEEncodableConstantOrRegister(input, instruction));
89       locations->SetOut(Location::RequiresFpuRegister());
90       break;
91     case DataType::Type::kFloat32:
92     case DataType::Type::kFloat64:
93       if (input->IsConstant() &&
94           SVECanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
95         locations->SetInAt(0, Location::ConstantLocation(input));
96         locations->SetOut(Location::RequiresFpuRegister());
97       } else {
98         locations->SetInAt(0, Location::RequiresFpuRegister());
99         locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
100       }
101       break;
102     default:
103       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
104       UNREACHABLE();
105   }
106 }
107 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)108 void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
109   DCHECK(instruction->IsPredicated());
110   LocationSummary* locations = instruction->GetLocations();
111   Location src_loc = locations->InAt(0);
112   const ZRegister dst = ZRegisterFrom(locations->Out());
113   ValidateVectorLength(instruction);
114   switch (instruction->GetPackedType()) {
115     case DataType::Type::kBool:
116     case DataType::Type::kUint8:
117     case DataType::Type::kInt8:
118       if (src_loc.IsConstant()) {
119         __ Dup(dst.VnB(), Int64FromLocation(src_loc));
120       } else {
121         __ Dup(dst.VnB(), InputRegisterAt(instruction, 0));
122       }
123       break;
124     case DataType::Type::kUint16:
125     case DataType::Type::kInt16:
126       if (src_loc.IsConstant()) {
127         __ Dup(dst.VnH(), Int64FromLocation(src_loc));
128       } else {
129         __ Dup(dst.VnH(), InputRegisterAt(instruction, 0));
130       }
131       break;
132     case DataType::Type::kInt32:
133       if (src_loc.IsConstant()) {
134         __ Dup(dst.VnS(), Int64FromLocation(src_loc));
135       } else {
136         __ Dup(dst.VnS(), InputRegisterAt(instruction, 0));
137       }
138       break;
139     case DataType::Type::kInt64:
140       if (src_loc.IsConstant()) {
141         __ Dup(dst.VnD(), Int64FromLocation(src_loc));
142       } else {
143         __ Dup(dst.VnD(), XRegisterFrom(src_loc));
144       }
145       break;
146     case DataType::Type::kFloat32:
147       if (src_loc.IsConstant()) {
148         __ Fdup(dst.VnS(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
149       } else {
150         __ Dup(dst.VnS(), ZRegisterFrom(src_loc).VnS(), 0);
151       }
152       break;
153     case DataType::Type::kFloat64:
154       if (src_loc.IsConstant()) {
155         __ Fdup(dst.VnD(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
156       } else {
157         __ Dup(dst.VnD(), ZRegisterFrom(src_loc).VnD(), 0);
158       }
159       break;
160     default:
161       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
162       UNREACHABLE();
163   }
164 }
165 
VisitVecExtractScalar(HVecExtractScalar * instruction)166 void LocationsBuilderARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
167   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
168   switch (instruction->GetPackedType()) {
169     case DataType::Type::kBool:
170     case DataType::Type::kUint8:
171     case DataType::Type::kInt8:
172     case DataType::Type::kUint16:
173     case DataType::Type::kInt16:
174     case DataType::Type::kInt32:
175     case DataType::Type::kInt64:
176       locations->SetInAt(0, Location::RequiresFpuRegister());
177       locations->SetOut(Location::RequiresRegister());
178       break;
179     case DataType::Type::kFloat32:
180     case DataType::Type::kFloat64:
181       locations->SetInAt(0, Location::RequiresFpuRegister());
182       locations->SetOut(Location::SameAsFirstInput());
183       break;
184     default:
185       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
186       UNREACHABLE();
187   }
188 }
189 
VisitVecExtractScalar(HVecExtractScalar * instruction)190 void InstructionCodeGeneratorARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
191   DCHECK(instruction->IsPredicated());
192   LocationSummary* locations = instruction->GetLocations();
193   const VRegister src = VRegisterFrom(locations->InAt(0));
194   ValidateVectorLength(instruction);
195   switch (instruction->GetPackedType()) {
196     case DataType::Type::kInt32:
197       __ Umov(OutputRegister(instruction), src.V4S(), 0);
198       break;
199     case DataType::Type::kInt64:
200       __ Umov(OutputRegister(instruction), src.V2D(), 0);
201       break;
202     case DataType::Type::kFloat32:
203     case DataType::Type::kFloat64:
204       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
205       break;
206     default:
207       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
208       UNREACHABLE();
209   }
210 }
211 
212 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)213 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
214   LocationSummary* locations = new (allocator) LocationSummary(instruction);
215   switch (instruction->GetPackedType()) {
216     case DataType::Type::kBool:
217       locations->SetInAt(0, Location::RequiresFpuRegister());
218       locations->SetOut(Location::RequiresFpuRegister(),
219                         instruction->IsVecNot() ? Location::kOutputOverlap
220                                                 : Location::kNoOutputOverlap);
221       break;
222     case DataType::Type::kUint8:
223     case DataType::Type::kInt8:
224     case DataType::Type::kUint16:
225     case DataType::Type::kInt16:
226     case DataType::Type::kInt32:
227     case DataType::Type::kInt64:
228     case DataType::Type::kFloat32:
229     case DataType::Type::kFloat64:
230       locations->SetInAt(0, Location::RequiresFpuRegister());
231       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
232       break;
233     default:
234       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
235       UNREACHABLE();
236   }
237 }
238 
VisitVecReduce(HVecReduce * instruction)239 void LocationsBuilderARM64Sve::VisitVecReduce(HVecReduce* instruction) {
240   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
241 }
242 
VisitVecReduce(HVecReduce * instruction)243 void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
244   DCHECK(instruction->IsPredicated());
245   LocationSummary* locations = instruction->GetLocations();
246   const ZRegister src = ZRegisterFrom(locations->InAt(0));
247   const VRegister dst = DRegisterFrom(locations->Out());
248   const PRegister p_reg = GetVecGoverningPReg(instruction);
249   ValidateVectorLength(instruction);
250   switch (instruction->GetPackedType()) {
251     case DataType::Type::kInt32:
252       switch (instruction->GetReductionKind()) {
253         case HVecReduce::kSum:
254           __ Saddv(dst.S(), p_reg, src.VnS());
255           break;
256         default:
257           LOG(FATAL) << "Unsupported SIMD instruction";
258           UNREACHABLE();
259       }
260       break;
261     case DataType::Type::kInt64:
262       switch (instruction->GetReductionKind()) {
263         case HVecReduce::kSum:
264           __ Uaddv(dst.D(), p_reg, src.VnD());
265           break;
266         default:
267           LOG(FATAL) << "Unsupported SIMD instruction";
268           UNREACHABLE();
269       }
270       break;
271     default:
272       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
273       UNREACHABLE();
274   }
275 }
276 
VisitVecCnv(HVecCnv * instruction)277 void LocationsBuilderARM64Sve::VisitVecCnv(HVecCnv* instruction) {
278   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
279 }
280 
VisitVecCnv(HVecCnv * instruction)281 void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
282   DCHECK(instruction->IsPredicated());
283   LocationSummary* locations = instruction->GetLocations();
284   const ZRegister src = ZRegisterFrom(locations->InAt(0));
285   const ZRegister dst = ZRegisterFrom(locations->Out());
286   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
287   DataType::Type from = instruction->GetInputType();
288   DataType::Type to = instruction->GetResultType();
289   ValidateVectorLength(instruction);
290   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
291     __ Scvtf(dst.VnS(), p_reg, src.VnS());
292   } else {
293     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
294   }
295 }
296 
VisitVecNeg(HVecNeg * instruction)297 void LocationsBuilderARM64Sve::VisitVecNeg(HVecNeg* instruction) {
298   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
299 }
300 
VisitVecNeg(HVecNeg * instruction)301 void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
302   DCHECK(instruction->IsPredicated());
303   LocationSummary* locations = instruction->GetLocations();
304   const ZRegister src = ZRegisterFrom(locations->InAt(0));
305   const ZRegister dst = ZRegisterFrom(locations->Out());
306   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
307   ValidateVectorLength(instruction);
308   switch (instruction->GetPackedType()) {
309     case DataType::Type::kUint8:
310     case DataType::Type::kInt8:
311       __ Neg(dst.VnB(), p_reg, src.VnB());
312       break;
313     case DataType::Type::kUint16:
314     case DataType::Type::kInt16:
315       __ Neg(dst.VnH(), p_reg, src.VnH());
316       break;
317     case DataType::Type::kInt32:
318       __ Neg(dst.VnS(), p_reg, src.VnS());
319       break;
320     case DataType::Type::kInt64:
321       __ Neg(dst.VnD(), p_reg, src.VnD());
322       break;
323     case DataType::Type::kFloat32:
324       __ Fneg(dst.VnS(), p_reg, src.VnS());
325       break;
326     case DataType::Type::kFloat64:
327       __ Fneg(dst.VnD(), p_reg, src.VnD());
328       break;
329     default:
330       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
331       UNREACHABLE();
332   }
333 }
334 
VisitVecAbs(HVecAbs * instruction)335 void LocationsBuilderARM64Sve::VisitVecAbs(HVecAbs* instruction) {
336   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
337 }
338 
VisitVecAbs(HVecAbs * instruction)339 void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
340   DCHECK(instruction->IsPredicated());
341   LocationSummary* locations = instruction->GetLocations();
342   const ZRegister src = ZRegisterFrom(locations->InAt(0));
343   const ZRegister dst = ZRegisterFrom(locations->Out());
344   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
345   ValidateVectorLength(instruction);
346   switch (instruction->GetPackedType()) {
347     case DataType::Type::kInt8:
348       __ Abs(dst.VnB(), p_reg, src.VnB());
349       break;
350     case DataType::Type::kInt16:
351       __ Abs(dst.VnH(), p_reg, src.VnH());
352       break;
353     case DataType::Type::kInt32:
354       __ Abs(dst.VnS(), p_reg, src.VnS());
355       break;
356     case DataType::Type::kInt64:
357       __ Abs(dst.VnD(), p_reg, src.VnD());
358       break;
359     case DataType::Type::kFloat32:
360       __ Fabs(dst.VnS(), p_reg, src.VnS());
361       break;
362     case DataType::Type::kFloat64:
363       __ Fabs(dst.VnD(), p_reg, src.VnD());
364       break;
365     default:
366       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
367       UNREACHABLE();
368   }
369 }
370 
VisitVecNot(HVecNot * instruction)371 void LocationsBuilderARM64Sve::VisitVecNot(HVecNot* instruction) {
372   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
373 }
374 
VisitVecNot(HVecNot * instruction)375 void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
376   DCHECK(instruction->IsPredicated());
377   LocationSummary* locations = instruction->GetLocations();
378   const ZRegister src = ZRegisterFrom(locations->InAt(0));
379   const ZRegister dst = ZRegisterFrom(locations->Out());
380   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
381   ValidateVectorLength(instruction);
382   switch (instruction->GetPackedType()) {
383     case DataType::Type::kBool:  // special case boolean-not
384       __ Dup(dst.VnB(), 1);
385       __ Eor(dst.VnB(), p_reg, dst.VnB(), src.VnB());
386       break;
387     case DataType::Type::kUint8:
388     case DataType::Type::kInt8:
389       __ Not(dst.VnB(), p_reg, src.VnB());
390       break;
391     case DataType::Type::kUint16:
392     case DataType::Type::kInt16:
393       __ Not(dst.VnH(), p_reg, src.VnH());
394       break;
395     case DataType::Type::kInt32:
396       __ Not(dst.VnS(), p_reg, src.VnS());
397       break;
398     case DataType::Type::kInt64:
399       __ Not(dst.VnD(), p_reg, src.VnD());
400       break;
401     default:
402       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
403       UNREACHABLE();
404   }
405 }
406 
407 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)408 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
409   LocationSummary* locations = new (allocator) LocationSummary(instruction);
410   switch (instruction->GetPackedType()) {
411     case DataType::Type::kBool:
412     case DataType::Type::kUint8:
413     case DataType::Type::kInt8:
414     case DataType::Type::kUint16:
415     case DataType::Type::kInt16:
416     case DataType::Type::kInt32:
417     case DataType::Type::kInt64:
418     case DataType::Type::kFloat32:
419     case DataType::Type::kFloat64:
420       locations->SetInAt(0, Location::RequiresFpuRegister());
421       locations->SetInAt(1, Location::RequiresFpuRegister());
422       locations->SetOut(Location::SameAsFirstInput());
423       break;
424     default:
425       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
426       UNREACHABLE();
427   }
428 }
429 
VisitVecAdd(HVecAdd * instruction)430 void LocationsBuilderARM64Sve::VisitVecAdd(HVecAdd* instruction) {
431   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
432 }
433 
VisitVecAdd(HVecAdd * instruction)434 void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
435   DCHECK(instruction->IsPredicated());
436   LocationSummary* locations = instruction->GetLocations();
437   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
438   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
439   const ZRegister dst = ZRegisterFrom(locations->Out());
440   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
441   ValidateVectorLength(instruction);
442   switch (instruction->GetPackedType()) {
443     case DataType::Type::kUint8:
444     case DataType::Type::kInt8:
445       __ Add(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
446       break;
447     case DataType::Type::kUint16:
448     case DataType::Type::kInt16:
449       __ Add(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
450       break;
451     case DataType::Type::kInt32:
452       __ Add(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
453       break;
454     case DataType::Type::kInt64:
455       __ Add(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
456       break;
457     case DataType::Type::kFloat32:
458       __ Fadd(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
459       break;
460     case DataType::Type::kFloat64:
461       __ Fadd(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
462       break;
463     default:
464       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
465       UNREACHABLE();
466   }
467 }
468 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)469 void LocationsBuilderARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
470   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
471   UNREACHABLE();
472 }
473 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)474 void InstructionCodeGeneratorARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
475   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
476   UNREACHABLE();
477 }
478 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)479 void LocationsBuilderARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
480   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
481   UNREACHABLE();
482 }
483 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)484 void InstructionCodeGeneratorARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
485   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
486   UNREACHABLE();
487 }
488 
VisitVecSub(HVecSub * instruction)489 void LocationsBuilderARM64Sve::VisitVecSub(HVecSub* instruction) {
490   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
491 }
492 
VisitVecSub(HVecSub * instruction)493 void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
494   DCHECK(instruction->IsPredicated());
495   LocationSummary* locations = instruction->GetLocations();
496   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
497   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
498   const ZRegister dst = ZRegisterFrom(locations->Out());
499   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
500   ValidateVectorLength(instruction);
501   switch (instruction->GetPackedType()) {
502     case DataType::Type::kUint8:
503     case DataType::Type::kInt8:
504       __ Sub(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
505       break;
506     case DataType::Type::kUint16:
507     case DataType::Type::kInt16:
508       __ Sub(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
509       break;
510     case DataType::Type::kInt32:
511       __ Sub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
512       break;
513     case DataType::Type::kInt64:
514       __ Sub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
515       break;
516     case DataType::Type::kFloat32:
517       __ Fsub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
518       break;
519     case DataType::Type::kFloat64:
520       __ Fsub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
521       break;
522     default:
523       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
524       UNREACHABLE();
525   }
526 }
527 
VisitVecSaturationSub(HVecSaturationSub * instruction)528 void LocationsBuilderARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
529   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
530   UNREACHABLE();
531 }
532 
VisitVecSaturationSub(HVecSaturationSub * instruction)533 void InstructionCodeGeneratorARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
534   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
535   UNREACHABLE();
536 }
537 
VisitVecMul(HVecMul * instruction)538 void LocationsBuilderARM64Sve::VisitVecMul(HVecMul* instruction) {
539   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
540 }
541 
VisitVecMul(HVecMul * instruction)542 void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
543   DCHECK(instruction->IsPredicated());
544   LocationSummary* locations = instruction->GetLocations();
545   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
546   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
547   const ZRegister dst = ZRegisterFrom(locations->Out());
548   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
549   ValidateVectorLength(instruction);
550   switch (instruction->GetPackedType()) {
551     case DataType::Type::kUint8:
552     case DataType::Type::kInt8:
553       __ Mul(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
554       break;
555     case DataType::Type::kUint16:
556     case DataType::Type::kInt16:
557       __ Mul(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
558       break;
559     case DataType::Type::kInt32:
560       __ Mul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
561       break;
562     case DataType::Type::kInt64:
563       __ Mul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
564       break;
565     case DataType::Type::kFloat32:
566       __ Fmul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
567       break;
568     case DataType::Type::kFloat64:
569       __ Fmul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
570       break;
571     default:
572       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
573       UNREACHABLE();
574   }
575 }
576 
VisitVecDiv(HVecDiv * instruction)577 void LocationsBuilderARM64Sve::VisitVecDiv(HVecDiv* instruction) {
578   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
579 }
580 
VisitVecDiv(HVecDiv * instruction)581 void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
582   DCHECK(instruction->IsPredicated());
583   LocationSummary* locations = instruction->GetLocations();
584   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
585   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
586   const ZRegister dst = ZRegisterFrom(locations->Out());
587   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
588   ValidateVectorLength(instruction);
589 
590   // Note: VIXL guarantees StrictNaNPropagation for Fdiv.
591   switch (instruction->GetPackedType()) {
592     case DataType::Type::kFloat32:
593       __ Fdiv(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
594       break;
595     case DataType::Type::kFloat64:
596       __ Fdiv(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
597       break;
598     default:
599       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
600       UNREACHABLE();
601   }
602 }
603 
VisitVecMin(HVecMin * instruction)604 void LocationsBuilderARM64Sve::VisitVecMin(HVecMin* instruction) {
605   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
606   UNREACHABLE();
607 }
608 
VisitVecMin(HVecMin * instruction)609 void InstructionCodeGeneratorARM64Sve::VisitVecMin(HVecMin* instruction) {
610   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
611   UNREACHABLE();
612 }
613 
VisitVecMax(HVecMax * instruction)614 void LocationsBuilderARM64Sve::VisitVecMax(HVecMax* instruction) {
615   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
616   UNREACHABLE();
617 }
618 
VisitVecMax(HVecMax * instruction)619 void InstructionCodeGeneratorARM64Sve::VisitVecMax(HVecMax* instruction) {
620   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
621   UNREACHABLE();
622 }
623 
VisitVecAnd(HVecAnd * instruction)624 void LocationsBuilderARM64Sve::VisitVecAnd(HVecAnd* instruction) {
625   // TODO: Allow constants supported by BIC (vector, immediate).
626   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
627 }
628 
VisitVecAnd(HVecAnd * instruction)629 void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
630   DCHECK(instruction->IsPredicated());
631   LocationSummary* locations = instruction->GetLocations();
632   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
633   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
634   const ZRegister dst = ZRegisterFrom(locations->Out());
635   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
636   ValidateVectorLength(instruction);
637   switch (instruction->GetPackedType()) {
638     case DataType::Type::kBool:
639     case DataType::Type::kUint8:
640     case DataType::Type::kInt8:
641       __ And(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
642       break;
643     case DataType::Type::kUint16:
644     case DataType::Type::kInt16:
645       __ And(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
646       break;
647     case DataType::Type::kInt32:
648     case DataType::Type::kFloat32:
649       __ And(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
650       break;
651     case DataType::Type::kInt64:
652     case DataType::Type::kFloat64:
653       __ And(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
654       break;
655     default:
656       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
657       UNREACHABLE();
658   }
659 }
660 
VisitVecAndNot(HVecAndNot * instruction)661 void LocationsBuilderARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
662   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
663 }
664 
VisitVecAndNot(HVecAndNot * instruction)665 void InstructionCodeGeneratorARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
666   // TODO: Use BIC (vector, register).
667   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
668 }
669 
VisitVecOr(HVecOr * instruction)670 void LocationsBuilderARM64Sve::VisitVecOr(HVecOr* instruction) {
671   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
672 }
673 
VisitVecOr(HVecOr * instruction)674 void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
675   DCHECK(instruction->IsPredicated());
676   LocationSummary* locations = instruction->GetLocations();
677   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
678   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
679   const ZRegister dst = ZRegisterFrom(locations->Out());
680   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
681   ValidateVectorLength(instruction);
682   switch (instruction->GetPackedType()) {
683     case DataType::Type::kBool:
684     case DataType::Type::kUint8:
685     case DataType::Type::kInt8:
686       __ Orr(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
687       break;
688     case DataType::Type::kUint16:
689     case DataType::Type::kInt16:
690       __ Orr(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
691       break;
692     case DataType::Type::kInt32:
693     case DataType::Type::kFloat32:
694       __ Orr(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
695       break;
696     case DataType::Type::kInt64:
697     case DataType::Type::kFloat64:
698       __ Orr(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
699       break;
700     default:
701       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
702       UNREACHABLE();
703   }
704 }
705 
VisitVecXor(HVecXor * instruction)706 void LocationsBuilderARM64Sve::VisitVecXor(HVecXor* instruction) {
707   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
708 }
709 
VisitVecXor(HVecXor * instruction)710 void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
711   DCHECK(instruction->IsPredicated());
712   LocationSummary* locations = instruction->GetLocations();
713   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
714   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
715   const ZRegister dst = ZRegisterFrom(locations->Out());
716   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
717   ValidateVectorLength(instruction);
718   switch (instruction->GetPackedType()) {
719     case DataType::Type::kBool:
720     case DataType::Type::kUint8:
721     case DataType::Type::kInt8:
722       __ Eor(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
723       break;
724     case DataType::Type::kUint16:
725     case DataType::Type::kInt16:
726       __ Eor(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
727       break;
728     case DataType::Type::kInt32:
729     case DataType::Type::kFloat32:
730       __ Eor(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
731       break;
732     case DataType::Type::kInt64:
733     case DataType::Type::kFloat64:
734       __ Eor(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
735       break;
736     default:
737       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
738       UNREACHABLE();
739   }
740 }
741 
742 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)743 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
744   LocationSummary* locations = new (allocator) LocationSummary(instruction);
745   switch (instruction->GetPackedType()) {
746     case DataType::Type::kUint8:
747     case DataType::Type::kInt8:
748     case DataType::Type::kUint16:
749     case DataType::Type::kInt16:
750     case DataType::Type::kInt32:
751     case DataType::Type::kInt64:
752       locations->SetInAt(0, Location::RequiresFpuRegister());
753       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
754       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
755       break;
756     default:
757       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
758       UNREACHABLE();
759   }
760 }
761 
VisitVecShl(HVecShl * instruction)762 void LocationsBuilderARM64Sve::VisitVecShl(HVecShl* instruction) {
763   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
764 }
765 
VisitVecShl(HVecShl * instruction)766 void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
767   DCHECK(instruction->IsPredicated());
768   LocationSummary* locations = instruction->GetLocations();
769   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
770   const ZRegister dst = ZRegisterFrom(locations->Out());
771   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
772   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
773   ValidateVectorLength(instruction);
774   switch (instruction->GetPackedType()) {
775     case DataType::Type::kUint8:
776     case DataType::Type::kInt8:
777       __ Lsl(dst.VnB(), p_reg, lhs.VnB(), value);
778       break;
779     case DataType::Type::kUint16:
780     case DataType::Type::kInt16:
781       __ Lsl(dst.VnH(), p_reg, lhs.VnH(), value);
782       break;
783     case DataType::Type::kInt32:
784       __ Lsl(dst.VnS(), p_reg, lhs.VnS(), value);
785       break;
786     case DataType::Type::kInt64:
787       __ Lsl(dst.VnD(), p_reg, lhs.VnD(), value);
788       break;
789     default:
790       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
791       UNREACHABLE();
792   }
793 }
794 
VisitVecShr(HVecShr * instruction)795 void LocationsBuilderARM64Sve::VisitVecShr(HVecShr* instruction) {
796   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
797 }
798 
VisitVecShr(HVecShr * instruction)799 void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
800   DCHECK(instruction->IsPredicated());
801   LocationSummary* locations = instruction->GetLocations();
802   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
803   const ZRegister dst = ZRegisterFrom(locations->Out());
804   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
805   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
806   ValidateVectorLength(instruction);
807   switch (instruction->GetPackedType()) {
808     case DataType::Type::kUint8:
809     case DataType::Type::kInt8:
810       __ Asr(dst.VnB(), p_reg, lhs.VnB(), value);
811       break;
812     case DataType::Type::kUint16:
813     case DataType::Type::kInt16:
814       __ Asr(dst.VnH(), p_reg, lhs.VnH(), value);
815       break;
816     case DataType::Type::kInt32:
817       __ Asr(dst.VnS(), p_reg, lhs.VnS(), value);
818       break;
819     case DataType::Type::kInt64:
820       __ Asr(dst.VnD(), p_reg, lhs.VnD(), value);
821       break;
822     default:
823       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
824       UNREACHABLE();
825   }
826 }
827 
VisitVecUShr(HVecUShr * instruction)828 void LocationsBuilderARM64Sve::VisitVecUShr(HVecUShr* instruction) {
829   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
830 }
831 
VisitVecUShr(HVecUShr * instruction)832 void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
833   DCHECK(instruction->IsPredicated());
834   LocationSummary* locations = instruction->GetLocations();
835   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
836   const ZRegister dst = ZRegisterFrom(locations->Out());
837   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
838   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
839   ValidateVectorLength(instruction);
840   switch (instruction->GetPackedType()) {
841     case DataType::Type::kUint8:
842     case DataType::Type::kInt8:
843       __ Lsr(dst.VnB(), p_reg, lhs.VnB(), value);
844       break;
845     case DataType::Type::kUint16:
846     case DataType::Type::kInt16:
847       __ Lsr(dst.VnH(), p_reg, lhs.VnH(), value);
848       break;
849     case DataType::Type::kInt32:
850       __ Lsr(dst.VnS(), p_reg, lhs.VnS(), value);
851       break;
852     case DataType::Type::kInt64:
853       __ Lsr(dst.VnD(), p_reg, lhs.VnD(), value);
854       break;
855     default:
856       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
857       UNREACHABLE();
858   }
859 }
860 
VisitVecSetScalars(HVecSetScalars * instruction)861 void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
862   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
863 
864   DCHECK_EQ(2u, instruction->InputCount());  // only one input currently implemented + predicate.
865 
866   HInstruction* input = instruction->InputAt(0);
867   bool is_zero = IsZeroBitPattern(input);
868 
869   switch (instruction->GetPackedType()) {
870     case DataType::Type::kBool:
871     case DataType::Type::kUint8:
872     case DataType::Type::kInt8:
873     case DataType::Type::kUint16:
874     case DataType::Type::kInt16:
875     case DataType::Type::kInt32:
876     case DataType::Type::kInt64:
877       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
878                                     : Location::RequiresRegister());
879       locations->SetOut(Location::RequiresFpuRegister());
880       break;
881     case DataType::Type::kFloat32:
882     case DataType::Type::kFloat64:
883       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
884                                     : Location::RequiresFpuRegister());
885       locations->SetOut(Location::RequiresFpuRegister());
886       break;
887     default:
888       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
889       UNREACHABLE();
890   }
891 }
892 
VisitVecSetScalars(HVecSetScalars * instruction)893 void InstructionCodeGeneratorARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
894   DCHECK(instruction->IsPredicated());
895   LocationSummary* locations = instruction->GetLocations();
896   const ZRegister z_dst = ZRegisterFrom(locations->Out());
897 
898   DCHECK_EQ(2u, instruction->InputCount());  // only one input currently implemented + predicate.
899 
900   // Zero out all other elements first.
901   __ Dup(z_dst.VnB(), 0);
902 
903   const VRegister dst = VRegisterFrom(locations->Out());
904   // Shorthand for any type of zero.
905   if (IsZeroBitPattern(instruction->InputAt(0))) {
906     return;
907   }
908   ValidateVectorLength(instruction);
909 
910   // Set required elements.
911   switch (instruction->GetPackedType()) {
912     case DataType::Type::kBool:
913     case DataType::Type::kUint8:
914     case DataType::Type::kInt8:
915       __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
916       break;
917     case DataType::Type::kUint16:
918     case DataType::Type::kInt16:
919       __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
920       break;
921     case DataType::Type::kInt32:
922       __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
923       break;
924     case DataType::Type::kInt64:
925       __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
926       break;
927     default:
928       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
929       UNREACHABLE();
930   }
931 }
932 
933 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)934 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
935   LocationSummary* locations = new (allocator) LocationSummary(instruction);
936   switch (instruction->GetPackedType()) {
937     case DataType::Type::kUint8:
938     case DataType::Type::kInt8:
939     case DataType::Type::kUint16:
940     case DataType::Type::kInt16:
941     case DataType::Type::kInt32:
942     case DataType::Type::kInt64:
943       locations->SetInAt(0, Location::RequiresFpuRegister());
944       locations->SetInAt(1, Location::RequiresFpuRegister());
945       locations->SetInAt(2, Location::RequiresFpuRegister());
946       locations->SetOut(Location::SameAsFirstInput());
947       break;
948     default:
949       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
950       UNREACHABLE();
951   }
952 }
953 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)954 void LocationsBuilderARM64Sve::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
955   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
956 }
957 
958 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
959 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
960 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)961 void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
962     HVecMultiplyAccumulate* instruction) {
963   DCHECK(instruction->IsPredicated());
964   LocationSummary* locations = instruction->GetLocations();
965   const ZRegister acc = ZRegisterFrom(locations->InAt(0));
966   const ZRegister left = ZRegisterFrom(locations->InAt(1));
967   const ZRegister right = ZRegisterFrom(locations->InAt(2));
968   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
969 
970   DCHECK(locations->InAt(0).Equals(locations->Out()));
971   ValidateVectorLength(instruction);
972 
973   switch (instruction->GetPackedType()) {
974     case DataType::Type::kUint8:
975     case DataType::Type::kInt8:
976       if (instruction->GetOpKind() == HInstruction::kAdd) {
977         __ Mla(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
978       } else {
979         __ Mls(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
980       }
981       break;
982     case DataType::Type::kUint16:
983     case DataType::Type::kInt16:
984       if (instruction->GetOpKind() == HInstruction::kAdd) {
985         __ Mla(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
986       } else {
987         __ Mls(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
988       }
989       break;
990     case DataType::Type::kInt32:
991       if (instruction->GetOpKind() == HInstruction::kAdd) {
992         __ Mla(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
993       } else {
994         __ Mls(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
995       }
996       break;
997     default:
998       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
999       UNREACHABLE();
1000   }
1001 }
1002 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1003 void LocationsBuilderARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1004   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
1005   UNREACHABLE();
1006 }
1007 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1008 void InstructionCodeGeneratorARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1009   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
1010   UNREACHABLE();
1011 }
1012 
VisitVecDotProd(HVecDotProd * instruction)1013 void LocationsBuilderARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
1014   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1015   DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1016   locations->SetInAt(0, Location::RequiresFpuRegister());
1017   locations->SetInAt(1, Location::RequiresFpuRegister());
1018   locations->SetInAt(2, Location::RequiresFpuRegister());
1019   locations->SetOut(Location::SameAsFirstInput());
1020 
1021   locations->AddTemp(Location::RequiresFpuRegister());
1022 }
1023 
VisitVecDotProd(HVecDotProd * instruction)1024 void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
1025   DCHECK(instruction->IsPredicated());
1026   LocationSummary* locations = instruction->GetLocations();
1027   DCHECK(locations->InAt(0).Equals(locations->Out()));
1028   const ZRegister acc = ZRegisterFrom(locations->InAt(0));
1029   const ZRegister left = ZRegisterFrom(locations->InAt(1));
1030   const ZRegister right = ZRegisterFrom(locations->InAt(2));
1031   const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
1032   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1033   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1034   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1035             HVecOperation::ToSignedType(b->GetPackedType()));
1036   DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1037   ValidateVectorLength(instruction);
1038 
1039   size_t inputs_data_size = DataType::Size(a->GetPackedType());
1040   switch (inputs_data_size) {
1041     case 1u: {
1042       UseScratchRegisterScope temps(GetVIXLAssembler());
1043       const ZRegister tmp0 = temps.AcquireZ();
1044       const ZRegister tmp1 = ZRegisterFrom(locations->GetTemp(0));
1045 
1046       __ Dup(tmp1.VnB(), 0u);
1047       __ Sel(tmp0.VnB(), p_reg, left.VnB(), tmp1.VnB());
1048       __ Sel(tmp1.VnB(), p_reg, right.VnB(), tmp1.VnB());
1049       if (instruction->IsZeroExtending()) {
1050         __ Udot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
1051       } else {
1052         __ Sdot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
1053       }
1054       break;
1055     }
1056     default:
1057       LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1058   }
1059 }
1060 
1061 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1062 static void CreateVecMemLocations(ArenaAllocator* allocator,
1063                                   HVecMemoryOperation* instruction,
1064                                   bool is_load) {
1065   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1066   switch (instruction->GetPackedType()) {
1067     case DataType::Type::kBool:
1068     case DataType::Type::kUint8:
1069     case DataType::Type::kInt8:
1070     case DataType::Type::kUint16:
1071     case DataType::Type::kInt16:
1072     case DataType::Type::kInt32:
1073     case DataType::Type::kInt64:
1074     case DataType::Type::kFloat32:
1075     case DataType::Type::kFloat64:
1076       locations->SetInAt(0, Location::RequiresRegister());
1077       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1078       if (is_load) {
1079         locations->SetOut(Location::RequiresFpuRegister());
1080       } else {
1081         locations->SetInAt(2, Location::RequiresFpuRegister());
1082       }
1083       break;
1084     default:
1085       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1086       UNREACHABLE();
1087   }
1088 }
1089 
VisitVecLoad(HVecLoad * instruction)1090 void LocationsBuilderARM64Sve::VisitVecLoad(HVecLoad* instruction) {
1091   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1092 }
1093 
VisitVecLoad(HVecLoad * instruction)1094 void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
1095   DCHECK(instruction->IsPredicated());
1096   LocationSummary* locations = instruction->GetLocations();
1097   size_t size = DataType::Size(instruction->GetPackedType());
1098   const ZRegister reg = ZRegisterFrom(locations->Out());
1099   UseScratchRegisterScope temps(GetVIXLAssembler());
1100   Register scratch;
1101   const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
1102   ValidateVectorLength(instruction);
1103 
1104   switch (instruction->GetPackedType()) {
1105     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1106     case DataType::Type::kUint16:
1107       __ Ld1h(reg.VnH(), p_reg,
1108               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1109       break;
1110     case DataType::Type::kBool:
1111     case DataType::Type::kUint8:
1112     case DataType::Type::kInt8:
1113       __ Ld1b(reg.VnB(), p_reg,
1114               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1115       break;
1116     case DataType::Type::kInt32:
1117     case DataType::Type::kFloat32:
1118       __ Ld1w(reg.VnS(), p_reg,
1119               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1120       break;
1121     case DataType::Type::kInt64:
1122     case DataType::Type::kFloat64:
1123       __ Ld1d(reg.VnD(), p_reg,
1124               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1125       break;
1126     default:
1127       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1128       UNREACHABLE();
1129   }
1130 }
1131 
VisitVecStore(HVecStore * instruction)1132 void LocationsBuilderARM64Sve::VisitVecStore(HVecStore* instruction) {
1133   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1134 }
1135 
VisitVecStore(HVecStore * instruction)1136 void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
1137   DCHECK(instruction->IsPredicated());
1138   LocationSummary* locations = instruction->GetLocations();
1139   size_t size = DataType::Size(instruction->GetPackedType());
1140   const ZRegister reg = ZRegisterFrom(locations->InAt(2));
1141   UseScratchRegisterScope temps(GetVIXLAssembler());
1142   Register scratch;
1143   const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
1144   ValidateVectorLength(instruction);
1145 
1146   switch (instruction->GetPackedType()) {
1147     case DataType::Type::kBool:
1148     case DataType::Type::kUint8:
1149     case DataType::Type::kInt8:
1150       __ St1b(reg.VnB(), p_reg,
1151           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1152       break;
1153     case DataType::Type::kUint16:
1154     case DataType::Type::kInt16:
1155       __ St1h(reg.VnH(), p_reg,
1156           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1157       break;
1158     case DataType::Type::kInt32:
1159     case DataType::Type::kFloat32:
1160       __ St1w(reg.VnS(), p_reg,
1161           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1162       break;
1163     case DataType::Type::kInt64:
1164     case DataType::Type::kFloat64:
1165       __ St1d(reg.VnD(), p_reg,
1166           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1167       break;
1168     default:
1169       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1170       UNREACHABLE();
1171   }
1172 }
1173 
VisitVecPredSetAll(HVecPredSetAll * instruction)1174 void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1175   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1176   DCHECK(instruction->InputAt(0)->IsIntConstant());
1177   locations->SetInAt(0, Location::NoLocation());
1178   locations->SetOut(Location::NoLocation());
1179 }
1180 
VisitVecPredSetAll(HVecPredSetAll * instruction)1181 void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1182   // Instruction is not predicated, see nodes_vector.h
1183   DCHECK(!instruction->IsPredicated());
1184   const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
1185 
1186   switch (instruction->GetPackedType()) {
1187     case DataType::Type::kBool:
1188     case DataType::Type::kUint8:
1189     case DataType::Type::kInt8:
1190       __ Ptrue(output_p_reg.VnB(), vixl::aarch64::SVE_ALL);
1191       break;
1192     case DataType::Type::kUint16:
1193     case DataType::Type::kInt16:
1194       __ Ptrue(output_p_reg.VnH(), vixl::aarch64::SVE_ALL);
1195       break;
1196     case DataType::Type::kInt32:
1197     case DataType::Type::kFloat32:
1198       __ Ptrue(output_p_reg.VnS(), vixl::aarch64::SVE_ALL);
1199       break;
1200     case DataType::Type::kInt64:
1201     case DataType::Type::kFloat64:
1202       __ Ptrue(output_p_reg.VnD(), vixl::aarch64::SVE_ALL);
1203       break;
1204     default:
1205       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1206       UNREACHABLE();
1207   }
1208 }
1209 
GenerateIntegerVecComparison(const PRegisterWithLaneSize & pd,const PRegisterZ & pg,const ZRegister & zn,const ZRegister & zm,IfCondition cond)1210 void InstructionCodeGeneratorARM64Sve::GenerateIntegerVecComparison(
1211     const PRegisterWithLaneSize& pd,
1212     const PRegisterZ& pg,
1213     const ZRegister& zn,
1214     const ZRegister& zm,
1215     IfCondition cond) {
1216   switch (cond) {
1217     case kCondEQ:
1218       __ Cmpeq(pd, pg, zn, zm);
1219       return;
1220     case kCondNE:
1221       __ Cmpne(pd, pg, zn, zm);
1222       return;
1223     case kCondLT:
1224       __ Cmplt(pd, pg, zn, zm);
1225       return;
1226     case kCondLE:
1227       __ Cmple(pd, pg, zn, zm);
1228       return;
1229     case kCondGT:
1230       __ Cmpgt(pd, pg, zn, zm);
1231       return;
1232     case kCondGE:
1233       __ Cmpge(pd, pg, zn, zm);
1234       return;
1235     case kCondB:
1236       __ Cmplo(pd, pg, zn, zm);
1237       return;
1238     case kCondBE:
1239       __ Cmpls(pd, pg, zn, zm);
1240       return;
1241     case kCondA:
1242       __ Cmphi(pd, pg, zn, zm);
1243       return;
1244     case kCondAE:
1245       __ Cmphs(pd, pg, zn, zm);
1246       return;
1247   }
1248   LOG(FATAL) << "Condition '" << enum_cast<uint32_t>(cond) << "' not supported: ";
1249   UNREACHABLE();
1250 }
1251 
HandleVecCondition(HVecCondition * instruction)1252 void LocationsBuilderARM64Sve::HandleVecCondition(HVecCondition* instruction) {
1253   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1254   locations->SetInAt(0, Location::RequiresFpuRegister());
1255   locations->SetInAt(1, Location::RequiresFpuRegister());
1256   locations->SetOut(Location::RequiresRegister());
1257 }
1258 
HandleVecCondition(HVecCondition * instruction)1259 void InstructionCodeGeneratorARM64Sve::HandleVecCondition(HVecCondition* instruction) {
1260   DCHECK(instruction->IsPredicated());
1261   LocationSummary* locations = instruction->GetLocations();
1262   const ZRegister left = ZRegisterFrom(locations->InAt(0));
1263   const ZRegister right = ZRegisterFrom(locations->InAt(1));
1264   const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
1265   const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
1266 
1267   HVecOperation* a = instruction->InputAt(0)->AsVecOperation();
1268   HVecOperation* b = instruction->InputAt(1)->AsVecOperation();
1269   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1270             HVecOperation::ToSignedType(b->GetPackedType()));
1271   ValidateVectorLength(instruction);
1272 
1273   // TODO: Support other types, e.g: boolean, float and double.
1274   switch (instruction->GetPackedType()) {
1275     case DataType::Type::kUint8:
1276     case DataType::Type::kInt8:
1277       GenerateIntegerVecComparison(output_p_reg.VnB(),
1278                                    p_reg,
1279                                    left.VnB(),
1280                                    right.VnB(),
1281                                    instruction->GetCondition());
1282       break;
1283     case DataType::Type::kUint16:
1284     case DataType::Type::kInt16:
1285       GenerateIntegerVecComparison(output_p_reg.VnH(),
1286                                    p_reg,
1287                                    left.VnH(),
1288                                    right.VnH(),
1289                                    instruction->GetCondition());
1290       break;
1291     case DataType::Type::kInt32:
1292       GenerateIntegerVecComparison(output_p_reg.VnS(),
1293                                    p_reg,
1294                                    left.VnS(),
1295                                    right.VnS(),
1296                                    instruction->GetCondition());
1297       break;
1298     case DataType::Type::kInt64:
1299       GenerateIntegerVecComparison(output_p_reg.VnD(),
1300                                    p_reg,
1301                                    left.VnD(),
1302                                    right.VnD(),
1303                                    instruction->GetCondition());
1304       break;
1305     default:
1306       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1307       UNREACHABLE();
1308   }
1309 }
1310 
1311 #define FOR_EACH_VEC_CONDITION_INSTRUCTION(M) \
1312   M(VecEqual)                                 \
1313   M(VecNotEqual)                              \
1314   M(VecLessThan)                              \
1315   M(VecLessThanOrEqual)                       \
1316   M(VecGreaterThan)                           \
1317   M(VecGreaterThanOrEqual)                    \
1318   M(VecBelow)                                 \
1319   M(VecBelowOrEqual)                          \
1320   M(VecAbove)                                 \
1321   M(VecAboveOrEqual)
1322 #define DEFINE_VEC_CONDITION_VISITORS(Name)                                                     \
1323 void LocationsBuilderARM64Sve::Visit##Name(H##Name* comp) { HandleVecCondition(comp); }         \
1324 void InstructionCodeGeneratorARM64Sve::Visit##Name(H##Name* comp) { HandleVecCondition(comp); }
FOR_EACH_VEC_CONDITION_INSTRUCTION(DEFINE_VEC_CONDITION_VISITORS)1325 FOR_EACH_VEC_CONDITION_INSTRUCTION(DEFINE_VEC_CONDITION_VISITORS)
1326 #undef DEFINE_VEC_CONDITION_VISITORS
1327 #undef FOR_EACH_VEC_CONDITION_INSTRUCTION
1328 
1329 void LocationsBuilderARM64Sve::VisitVecPredNot(HVecPredNot* instruction) {
1330   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1331   DCHECK(instruction->InputAt(0)->IsVecPredSetOperation());
1332   locations->SetInAt(0, Location::NoLocation());
1333   locations->SetOut(Location::RequiresRegister());
1334 }
1335 
VisitVecPredNot(HVecPredNot * instruction)1336 void InstructionCodeGeneratorARM64Sve::VisitVecPredNot(HVecPredNot* instruction) {
1337   DCHECK(instruction->IsPredicated());
1338 
1339   const PRegister input_p_reg = GetVecPredSetFixedOutPReg(
1340       instruction->InputAt(0)->AsVecPredSetOperation());
1341   const PRegister control_p_reg = GetVecGoverningPReg(instruction);
1342   const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
1343 
1344   __ Not(output_p_reg.VnB(), control_p_reg.Zeroing(), input_p_reg.VnB());
1345 }
1346 
VisitVecPredWhile(HVecPredWhile * instruction)1347 void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
1348   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1349   locations->SetInAt(0, Location::RequiresRegister());
1350   locations->SetInAt(1, Location::RequiresRegister());
1351   // The instruction doesn't really need a core register as out location; this is a hack
1352   // to workaround absence of support for vector predicates in register allocation.
1353   //
1354   // Semantically, the out location of this instruction and predicate inputs locations of
1355   // its users should be a fixed predicate register (similar to
1356   // Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support
1357   // SIMD regs (e.g. predicate), so fixed registers are used explicitly without exposing it
1358   // to the RA (through GetVecPredSetFixedOutPReg()).
1359   //
1360   // To make the RA happy Location::NoLocation() was used for all the vector instructions
1361   // predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation()
1362   // can't be used without changes to RA - "ssa_liveness_analysis.cc] Check failed:
1363   // input->IsEmittedAtUseSite()" would fire.
1364   //
1365   // Using a core register as a hack is the easiest way to tackle this problem. The RA will
1366   // block one core register for the loop without actually using it; this should not be
1367   // a performance issue as a SIMD loop operates mainly on SIMD registers.
1368   //
1369   // TODO: Support SIMD types in register allocator.
1370   locations->SetOut(Location::RequiresRegister());
1371 }
1372 
VisitVecPredWhile(HVecPredWhile * instruction)1373 void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
1374   // Instruction is not predicated, see nodes_vector.h
1375   DCHECK(!instruction->IsPredicated());
1376   // Current implementation of predicated loop execution only supports kLO condition.
1377   DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO);
1378   Register left = InputRegisterAt(instruction, 0);
1379   Register right = InputRegisterAt(instruction, 1);
1380   const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
1381 
1382   DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u);
1383 
1384   switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) {
1385     case 1u:
1386       __ Whilelo(output_p_reg.VnB(), left, right);
1387       break;
1388     case 2u:
1389       __ Whilelo(output_p_reg.VnH(), left, right);
1390       break;
1391     case 4u:
1392       __ Whilelo(output_p_reg.VnS(), left, right);
1393       break;
1394     case 8u:
1395       __ Whilelo(output_p_reg.VnD(), left, right);
1396       break;
1397     default:
1398       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1399       UNREACHABLE();
1400   }
1401 }
1402 
VisitVecPredToBoolean(HVecPredToBoolean * instruction)1403 void LocationsBuilderARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
1404   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1405   locations->SetInAt(0, Location::NoLocation());
1406   // Result of the operation - a boolean value in a core register.
1407   locations->SetOut(Location::RequiresRegister());
1408 }
1409 
VisitVecPredToBoolean(HVecPredToBoolean * instruction)1410 void InstructionCodeGeneratorARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
1411   // Instruction is not predicated, see nodes_vector.h
1412   DCHECK(!instruction->IsPredicated());
1413   Register reg = OutputRegister(instruction);
1414   HInstruction *input = instruction->InputAt(0);
1415   const PRegister output_p_reg = GetVecPredSetFixedOutPReg(input->AsVecPredSetOperation());
1416   __ Ptest(output_p_reg, output_p_reg.VnB());
1417   __ Cset(reg, ARM64PCondition(instruction->GetPCondKind()));
1418 }
1419 
AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope * scope)1420 Location InstructionCodeGeneratorARM64Sve::AllocateSIMDScratchLocation(
1421     vixl::aarch64::UseScratchRegisterScope* scope) {
1422   return LocationFrom(scope->AcquireZ());
1423 }
1424 
FreeSIMDScratchLocation(Location loc,vixl::aarch64::UseScratchRegisterScope * scope)1425 void InstructionCodeGeneratorARM64Sve::FreeSIMDScratchLocation(Location loc,
1426     vixl::aarch64::UseScratchRegisterScope* scope) {
1427   scope->Release(ZRegisterFrom(loc));
1428 }
1429 
LoadSIMDRegFromStack(Location destination,Location source)1430 void InstructionCodeGeneratorARM64Sve::LoadSIMDRegFromStack(Location destination,
1431                                                             Location source) {
1432   __ Ldr(ZRegisterFrom(destination), SveStackOperandFrom(source));
1433 }
1434 
MoveSIMDRegToSIMDReg(Location destination,Location source)1435 void InstructionCodeGeneratorARM64Sve::MoveSIMDRegToSIMDReg(Location destination,
1436                                                             Location source) {
1437   __ Mov(ZRegisterFrom(destination), ZRegisterFrom(source));
1438 }
1439 
MoveToSIMDStackSlot(Location destination,Location source)1440 void InstructionCodeGeneratorARM64Sve::MoveToSIMDStackSlot(Location destination,
1441                                                            Location source) {
1442   DCHECK(destination.IsSIMDStackSlot());
1443 
1444   if (source.IsFpuRegister()) {
1445     __ Str(ZRegisterFrom(source), SveStackOperandFrom(destination));
1446   } else {
1447     DCHECK(source.IsSIMDStackSlot());
1448     UseScratchRegisterScope temps(GetVIXLAssembler());
1449     if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1450       // Very rare situation, only when there are cycles in ParallelMoveResolver graph.
1451       const Register temp = temps.AcquireX();
1452       DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % kArm64WordSize, 0u);
1453       // Emit a number of LDR/STR (XRegister, 64-bit) to cover the whole SIMD register size
1454       // when copying a stack slot.
1455       for (size_t offset = 0, e = codegen_->GetSIMDRegisterWidth();
1456            offset < e;
1457            offset += kArm64WordSize) {
1458         __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + offset));
1459         __ Str(temp, MemOperand(sp, destination.GetStackIndex() + offset));
1460       }
1461     } else {
1462       const ZRegister temp = temps.AcquireZ();
1463       __ Ldr(temp, SveStackOperandFrom(source));
1464       __ Str(temp, SveStackOperandFrom(destination));
1465     }
1466   }
1467 }
1468 
1469 template <bool is_save>
SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64 * codegen,LocationSummary * locations,int64_t spill_offset)1470 void SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64* codegen,
1471                                            LocationSummary* locations,
1472                                            int64_t spill_offset) {
1473   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
1474   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
1475   DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
1476                                                   codegen->GetNumberOfCoreRegisters(),
1477                                                   fp_spills,
1478                                                   codegen->GetNumberOfFloatingPointRegisters()));
1479   MacroAssembler* masm = codegen->GetVIXLAssembler();
1480   Register base = masm->StackPointer();
1481 
1482   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
1483   int64_t core_spill_size = core_list.GetTotalSizeInBytes();
1484   int64_t fp_spill_offset = spill_offset + core_spill_size;
1485 
1486   if (codegen->GetGraph()->HasSIMD()) {
1487     if (is_save) {
1488       masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1489     } else {
1490       masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1491     }
1492     codegen->GetAssembler()->SaveRestoreZRegisterList<is_save>(fp_spills, fp_spill_offset);
1493     return;
1494   }
1495 
1496   // Case when we only need to restore D-registers.
1497   DCHECK(!codegen->GetGraph()->HasSIMD());
1498   DCHECK_LE(codegen->GetSlowPathFPWidth(), kDRegSizeInBytes);
1499   CPURegList fp_list = CPURegList(CPURegister::kVRegister, kDRegSize, fp_spills);
1500   if (is_save) {
1501     masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1502     masm->StoreCPURegList(fp_list, MemOperand(base, fp_spill_offset));
1503   } else {
1504     masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1505     masm->LoadCPURegList(fp_list, MemOperand(base, fp_spill_offset));
1506   }
1507 }
1508 
SaveLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1509 void InstructionCodeGeneratorARM64Sve::SaveLiveRegistersHelper(LocationSummary* locations,
1510                                                                int64_t spill_offset) {
1511   SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ true>(codegen_, locations, spill_offset);
1512 }
1513 
RestoreLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1514 void InstructionCodeGeneratorARM64Sve::RestoreLiveRegistersHelper(LocationSummary* locations,
1515                                                                   int64_t spill_offset) {
1516   SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ false>(codegen_, locations, spill_offset);
1517 }
1518 
1519 #undef __
1520 
1521 }  // namespace arm64
1522 }  // namespace art
1523