xref: /aosp_15_r20/art/compiler/optimizing/code_generator_arm64.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "aarch64/assembler-aarch64.h"
20 #include "aarch64/registers-aarch64.h"
21 #include "arch/arm64/asm_support_arm64.h"
22 #include "arch/arm64/instruction_set_features_arm64.h"
23 #include "arch/arm64/jni_frame_arm64.h"
24 #include "art_method-inl.h"
25 #include "base/bit_utils.h"
26 #include "base/bit_utils_iterator.h"
27 #include "class_root-inl.h"
28 #include "class_table.h"
29 #include "code_generator_utils.h"
30 #include "com_android_art_flags.h"
31 #include "dex/dex_file_types.h"
32 #include "entrypoints/quick/quick_entrypoints.h"
33 #include "entrypoints/quick/quick_entrypoints_enum.h"
34 #include "gc/accounting/card_table.h"
35 #include "gc/space/image_space.h"
36 #include "heap_poisoning.h"
37 #include "interpreter/mterp/nterp.h"
38 #include "intrinsics.h"
39 #include "intrinsics_arm64.h"
40 #include "intrinsics_list.h"
41 #include "intrinsics_utils.h"
42 #include "jit/profiling_info.h"
43 #include "linker/linker_patch.h"
44 #include "lock_word.h"
45 #include "mirror/array-inl.h"
46 #include "mirror/class-inl.h"
47 #include "mirror/var_handle.h"
48 #include "offsets.h"
49 #include "optimizing/common_arm64.h"
50 #include "optimizing/nodes.h"
51 #include "profiling_info_builder.h"
52 #include "thread.h"
53 #include "trace.h"
54 #include "utils/arm64/assembler_arm64.h"
55 #include "utils/assembler.h"
56 #include "utils/stack_checks.h"
57 
58 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
59 using vixl::ExactAssemblyScope;
60 using vixl::CodeBufferCheckScope;
61 using vixl::EmissionCheckScope;
62 
63 namespace art_flags = com::android::art::flags;
64 
65 #ifdef __
66 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
67 #endif
68 
69 namespace art HIDDEN {
70 
71 template<class MirrorType>
72 class GcRoot;
73 
74 namespace arm64 {
75 
76 using helpers::ARM64EncodableConstantOrRegister;
77 using helpers::ArtVixlRegCodeCoherentForRegSet;
78 using helpers::CPURegisterFrom;
79 using helpers::DRegisterFrom;
80 using helpers::FPRegisterFrom;
81 using helpers::HeapOperand;
82 using helpers::HeapOperandFrom;
83 using helpers::InputCPURegisterOrZeroRegAt;
84 using helpers::InputFPRegisterAt;
85 using helpers::InputOperandAt;
86 using helpers::InputRegisterAt;
87 using helpers::Int64FromLocation;
88 using helpers::LocationFrom;
89 using helpers::OperandFromMemOperand;
90 using helpers::OutputCPURegister;
91 using helpers::OutputFPRegister;
92 using helpers::OutputRegister;
93 using helpers::RegisterFrom;
94 using helpers::StackOperandFrom;
95 using helpers::VIXLRegCodeFromART;
96 using helpers::WRegisterFrom;
97 using helpers::XRegisterFrom;
98 
99 // TODO(mythria): Expand SystemRegister in vixl to include this value.
100 uint16_t SYS_CNTVCT_EL0 = SystemRegisterEncoder<1, 3, 14, 0, 2>::value;
101 
102 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
103 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
104 // generates less code/data with a small num_entries.
105 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
106 
ARM64Condition(IfCondition cond)107 inline Condition ARM64Condition(IfCondition cond) {
108   switch (cond) {
109     case kCondEQ: return eq;
110     case kCondNE: return ne;
111     case kCondLT: return lt;
112     case kCondLE: return le;
113     case kCondGT: return gt;
114     case kCondGE: return ge;
115     case kCondB:  return lo;
116     case kCondBE: return ls;
117     case kCondA:  return hi;
118     case kCondAE: return hs;
119   }
120   LOG(FATAL) << "Unreachable";
121   UNREACHABLE();
122 }
123 
ARM64FPCondition(IfCondition cond,bool gt_bias)124 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
125   // The ARM64 condition codes can express all the necessary branches, see the
126   // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
127   // There is no dex instruction or HIR that would need the missing conditions
128   // "equal or unordered" or "not equal".
129   switch (cond) {
130     case kCondEQ: return eq;
131     case kCondNE: return ne /* unordered */;
132     case kCondLT: return gt_bias ? cc : lt /* unordered */;
133     case kCondLE: return gt_bias ? ls : le /* unordered */;
134     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
135     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
136     default:
137       LOG(FATAL) << "UNREACHABLE";
138       UNREACHABLE();
139   }
140 }
141 
ARM64PCondition(HVecPredToBoolean::PCondKind cond)142 Condition ARM64PCondition(HVecPredToBoolean::PCondKind cond) {
143   switch (cond) {
144     case HVecPredToBoolean::PCondKind::kFirst: return mi;
145     case HVecPredToBoolean::PCondKind::kNFirst: return pl;
146     default:
147       LOG(FATAL) << "Unsupported condition type: " << enum_cast<uint32_t>(cond);
148       UNREACHABLE();
149   }
150 }
151 
ARM64ReturnLocation(DataType::Type return_type)152 Location ARM64ReturnLocation(DataType::Type return_type) {
153   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
154   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
155   // but we use the exact registers for clarity.
156   if (return_type == DataType::Type::kFloat32) {
157     return LocationFrom(s0);
158   } else if (return_type == DataType::Type::kFloat64) {
159     return LocationFrom(d0);
160   } else if (return_type == DataType::Type::kInt64) {
161     return LocationFrom(x0);
162   } else if (return_type == DataType::Type::kVoid) {
163     return Location::NoLocation();
164   } else {
165     return LocationFrom(w0);
166   }
167 }
168 
GetReturnLocation(DataType::Type return_type)169 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
170   return ARM64ReturnLocation(return_type);
171 }
172 
OneRegInReferenceOutSaveEverythingCallerSaves()173 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
174   InvokeRuntimeCallingConvention calling_convention;
175   RegisterSet caller_saves = RegisterSet::Empty();
176   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
177   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
178             RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
179                          DataType::Type::kReference).GetCode());
180   return caller_saves;
181 }
182 
183 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
184 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
185 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
186 
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)187 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
188   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
189   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
190   for (uint32_t i : LowToHighBits(core_spills)) {
191     // If the register holds an object, update the stack mask.
192     if (locations->RegisterContainsObject(i)) {
193       locations->SetStackBit(stack_offset / kVRegSize);
194     }
195     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
196     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
197     saved_core_stack_offsets_[i] = stack_offset;
198     stack_offset += kXRegSizeInBytes;
199   }
200 
201   const size_t fp_reg_size = codegen->GetSlowPathFPWidth();
202   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
203   for (uint32_t i : LowToHighBits(fp_spills)) {
204     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
205     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
206     saved_fpu_stack_offsets_[i] = stack_offset;
207     stack_offset += fp_reg_size;
208   }
209 
210   InstructionCodeGeneratorARM64* visitor =
211       down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
212   visitor->SaveLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
213 }
214 
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)215 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
216   InstructionCodeGeneratorARM64* visitor =
217       down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
218   visitor->RestoreLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
219 }
220 
221 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
222  public:
BoundsCheckSlowPathARM64(HBoundsCheck * instruction)223   explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
224 
EmitNativeCode(CodeGenerator * codegen)225   void EmitNativeCode(CodeGenerator* codegen) override {
226     LocationSummary* locations = instruction_->GetLocations();
227     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
228 
229     __ Bind(GetEntryLabel());
230     if (instruction_->CanThrowIntoCatchBlock()) {
231       // Live registers will be restored in the catch block if caught.
232       SaveLiveRegisters(codegen, instruction_->GetLocations());
233     }
234     // We're moving two locations to locations that could overlap, so we need a parallel
235     // move resolver.
236     InvokeRuntimeCallingConvention calling_convention;
237     codegen->EmitParallelMoves(locations->InAt(0),
238                                LocationFrom(calling_convention.GetRegisterAt(0)),
239                                DataType::Type::kInt32,
240                                locations->InAt(1),
241                                LocationFrom(calling_convention.GetRegisterAt(1)),
242                                DataType::Type::kInt32);
243     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
244         ? kQuickThrowStringBounds
245         : kQuickThrowArrayBounds;
246     arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
247     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
248     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
249   }
250 
IsFatal() const251   bool IsFatal() const override { return true; }
252 
GetDescription() const253   const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; }
254 
255  private:
256   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
257 };
258 
259 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
260  public:
DivZeroCheckSlowPathARM64(HDivZeroCheck * instruction)261   explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
262 
EmitNativeCode(CodeGenerator * codegen)263   void EmitNativeCode(CodeGenerator* codegen) override {
264     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
265     __ Bind(GetEntryLabel());
266     arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
267     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
268   }
269 
IsFatal() const270   bool IsFatal() const override { return true; }
271 
GetDescription() const272   const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; }
273 
274  private:
275   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
276 };
277 
278 class LoadMethodTypeSlowPathARM64 : public SlowPathCodeARM64 {
279  public:
LoadMethodTypeSlowPathARM64(HLoadMethodType * mt)280   explicit LoadMethodTypeSlowPathARM64(HLoadMethodType* mt) : SlowPathCodeARM64(mt) {}
281 
EmitNativeCode(CodeGenerator * codegen)282   void EmitNativeCode(CodeGenerator* codegen) override {
283     LocationSummary* locations = instruction_->GetLocations();
284     Location out = locations->Out();
285     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
286 
287     __ Bind(GetEntryLabel());
288     SaveLiveRegisters(codegen, locations);
289 
290     InvokeRuntimeCallingConvention calling_convention;
291     const dex::ProtoIndex proto_index = instruction_->AsLoadMethodType()->GetProtoIndex();
292     __ Mov(calling_convention.GetRegisterAt(0).W(), proto_index.index_);
293 
294     arm64_codegen->InvokeRuntime(kQuickResolveMethodType,
295                                  instruction_,
296                                  instruction_->GetDexPc(),
297                                  this);
298     CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>();
299 
300     DataType::Type type = instruction_->GetType();
301     arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
302     RestoreLiveRegisters(codegen, locations);
303 
304     __ B(GetExitLabel());
305   }
306 
GetDescription() const307   const char* GetDescription() const override { return "LoadMethodTypeSlowPathARM64"; }
308 
309  private:
310   DISALLOW_COPY_AND_ASSIGN(LoadMethodTypeSlowPathARM64);
311 };
312 
313 
314 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
315  public:
LoadClassSlowPathARM64(HLoadClass * cls,HInstruction * at)316   LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at)
317       : SlowPathCodeARM64(at), cls_(cls) {
318     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
319     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
320   }
321 
EmitNativeCode(CodeGenerator * codegen)322   void EmitNativeCode(CodeGenerator* codegen) override {
323     LocationSummary* locations = instruction_->GetLocations();
324     Location out = locations->Out();
325     const uint32_t dex_pc = instruction_->GetDexPc();
326     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
327     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
328 
329     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
330     __ Bind(GetEntryLabel());
331     SaveLiveRegisters(codegen, locations);
332 
333     InvokeRuntimeCallingConvention calling_convention;
334     if (must_resolve_type) {
335       DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile()) ||
336              arm64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
337              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
338                              &cls_->GetDexFile()));
339       dex::TypeIndex type_index = cls_->GetTypeIndex();
340       __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
341       if (cls_->NeedsAccessCheck()) {
342         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
343         arm64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
344       } else {
345         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
346         arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
347       }
348       // If we also must_do_clinit, the resolved type is now in the correct register.
349     } else {
350       DCHECK(must_do_clinit);
351       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
352       arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)),
353                                   source,
354                                   cls_->GetType());
355     }
356     if (must_do_clinit) {
357       arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
358       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
359     }
360 
361     // Move the class to the desired location.
362     if (out.IsValid()) {
363       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
364       DataType::Type type = instruction_->GetType();
365       arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
366     }
367     RestoreLiveRegisters(codegen, locations);
368     __ B(GetExitLabel());
369   }
370 
GetDescription() const371   const char* GetDescription() const override { return "LoadClassSlowPathARM64"; }
372 
373  private:
374   // The class this slow path will load.
375   HLoadClass* const cls_;
376 
377   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
378 };
379 
380 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
381  public:
LoadStringSlowPathARM64(HLoadString * instruction)382   explicit LoadStringSlowPathARM64(HLoadString* instruction)
383       : SlowPathCodeARM64(instruction) {}
384 
EmitNativeCode(CodeGenerator * codegen)385   void EmitNativeCode(CodeGenerator* codegen) override {
386     LocationSummary* locations = instruction_->GetLocations();
387     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
388     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
389 
390     __ Bind(GetEntryLabel());
391     SaveLiveRegisters(codegen, locations);
392 
393     InvokeRuntimeCallingConvention calling_convention;
394     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
395     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
396     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
397     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
398     DataType::Type type = instruction_->GetType();
399     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
400 
401     RestoreLiveRegisters(codegen, locations);
402 
403     __ B(GetExitLabel());
404   }
405 
GetDescription() const406   const char* GetDescription() const override { return "LoadStringSlowPathARM64"; }
407 
408  private:
409   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
410 };
411 
412 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
413  public:
NullCheckSlowPathARM64(HNullCheck * instr)414   explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
415 
EmitNativeCode(CodeGenerator * codegen)416   void EmitNativeCode(CodeGenerator* codegen) override {
417     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
418     __ Bind(GetEntryLabel());
419     if (instruction_->CanThrowIntoCatchBlock()) {
420       // Live registers will be restored in the catch block if caught.
421       SaveLiveRegisters(codegen, instruction_->GetLocations());
422     }
423     arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
424                                  instruction_,
425                                  instruction_->GetDexPc(),
426                                  this);
427     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
428   }
429 
IsFatal() const430   bool IsFatal() const override { return true; }
431 
GetDescription() const432   const char* GetDescription() const override { return "NullCheckSlowPathARM64"; }
433 
434  private:
435   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
436 };
437 
438 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
439  public:
SuspendCheckSlowPathARM64(HSuspendCheck * instruction,HBasicBlock * successor)440   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
441       : SlowPathCodeARM64(instruction), successor_(successor) {}
442 
EmitNativeCode(CodeGenerator * codegen)443   void EmitNativeCode(CodeGenerator* codegen) override {
444     LocationSummary* locations = instruction_->GetLocations();
445     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
446     __ Bind(GetEntryLabel());
447     SaveLiveRegisters(codegen, locations);  // Only saves live vector regs for SIMD.
448     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
449     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
450     RestoreLiveRegisters(codegen, locations);  // Only restores live vector regs for SIMD.
451     if (successor_ == nullptr) {
452       __ B(GetReturnLabel());
453     } else {
454       __ B(arm64_codegen->GetLabelOf(successor_));
455     }
456   }
457 
GetReturnLabel()458   vixl::aarch64::Label* GetReturnLabel() {
459     DCHECK(successor_ == nullptr);
460     return &return_label_;
461   }
462 
GetSuccessor() const463   HBasicBlock* GetSuccessor() const {
464     return successor_;
465   }
466 
GetDescription() const467   const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; }
468 
469  private:
470   // If not null, the block to branch to after the suspend check.
471   HBasicBlock* const successor_;
472 
473   // If `successor_` is null, the label to branch to after the suspend check.
474   vixl::aarch64::Label return_label_;
475 
476   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
477 };
478 
479 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
480  public:
TypeCheckSlowPathARM64(HInstruction * instruction,bool is_fatal)481   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
482       : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
483 
EmitNativeCode(CodeGenerator * codegen)484   void EmitNativeCode(CodeGenerator* codegen) override {
485     LocationSummary* locations = instruction_->GetLocations();
486 
487     DCHECK(instruction_->IsCheckCast()
488            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
489     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
490     uint32_t dex_pc = instruction_->GetDexPc();
491 
492     __ Bind(GetEntryLabel());
493 
494     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
495       SaveLiveRegisters(codegen, locations);
496     }
497 
498     // We're moving two locations to locations that could overlap, so we need a parallel
499     // move resolver.
500     InvokeRuntimeCallingConvention calling_convention;
501     codegen->EmitParallelMoves(locations->InAt(0),
502                                LocationFrom(calling_convention.GetRegisterAt(0)),
503                                DataType::Type::kReference,
504                                locations->InAt(1),
505                                LocationFrom(calling_convention.GetRegisterAt(1)),
506                                DataType::Type::kReference);
507     if (instruction_->IsInstanceOf()) {
508       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
509       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
510       DataType::Type ret_type = instruction_->GetType();
511       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
512       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
513     } else {
514       DCHECK(instruction_->IsCheckCast());
515       arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
516       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
517     }
518 
519     if (!is_fatal_) {
520       RestoreLiveRegisters(codegen, locations);
521       __ B(GetExitLabel());
522     }
523   }
524 
GetDescription() const525   const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; }
IsFatal() const526   bool IsFatal() const override { return is_fatal_; }
527 
528  private:
529   const bool is_fatal_;
530 
531   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
532 };
533 
534 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
535  public:
DeoptimizationSlowPathARM64(HDeoptimize * instruction)536   explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
537       : SlowPathCodeARM64(instruction) {}
538 
EmitNativeCode(CodeGenerator * codegen)539   void EmitNativeCode(CodeGenerator* codegen) override {
540     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
541     __ Bind(GetEntryLabel());
542     LocationSummary* locations = instruction_->GetLocations();
543     SaveLiveRegisters(codegen, locations);
544     InvokeRuntimeCallingConvention calling_convention;
545     __ Mov(calling_convention.GetRegisterAt(0),
546            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
547     arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
548     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
549   }
550 
GetDescription() const551   const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; }
552 
553  private:
554   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
555 };
556 
557 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
558  public:
ArraySetSlowPathARM64(HInstruction * instruction)559   explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
560 
EmitNativeCode(CodeGenerator * codegen)561   void EmitNativeCode(CodeGenerator* codegen) override {
562     LocationSummary* locations = instruction_->GetLocations();
563     __ Bind(GetEntryLabel());
564     SaveLiveRegisters(codegen, locations);
565 
566     InvokeRuntimeCallingConvention calling_convention;
567     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
568     parallel_move.AddMove(
569         locations->InAt(0),
570         LocationFrom(calling_convention.GetRegisterAt(0)),
571         DataType::Type::kReference,
572         nullptr);
573     parallel_move.AddMove(
574         locations->InAt(1),
575         LocationFrom(calling_convention.GetRegisterAt(1)),
576         DataType::Type::kInt32,
577         nullptr);
578     parallel_move.AddMove(
579         locations->InAt(2),
580         LocationFrom(calling_convention.GetRegisterAt(2)),
581         DataType::Type::kReference,
582         nullptr);
583     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
584 
585     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
586     arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
587     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
588     RestoreLiveRegisters(codegen, locations);
589     __ B(GetExitLabel());
590   }
591 
GetDescription() const592   const char* GetDescription() const override { return "ArraySetSlowPathARM64"; }
593 
594  private:
595   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
596 };
597 
EmitTable(CodeGeneratorARM64 * codegen)598 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
599   uint32_t num_entries = switch_instr_->GetNumEntries();
600   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
601 
602   // We are about to use the assembler to place literals directly. Make sure we have enough
603   // underlying code buffer and we have generated the jump table with right size.
604   ExactAssemblyScope scope(codegen->GetVIXLAssembler(),
605                            num_entries * sizeof(int32_t),
606                            CodeBufferCheckScope::kExactSize);
607   codegen->GetVIXLAssembler()->bind(&table_start_);
608   for (uint32_t i = 0; i < num_entries; i++) {
609     codegen->GetVIXLAssembler()->place(jump_targets_[i].get());
610   }
611 }
612 
FixTable(CodeGeneratorARM64 * codegen)613 void JumpTableARM64::FixTable(CodeGeneratorARM64* codegen) {
614   uint32_t num_entries = switch_instr_->GetNumEntries();
615   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
616 
617   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
618   for (uint32_t i = 0; i < num_entries; i++) {
619     vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
620     DCHECK(target_label->IsBound());
621     ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
622     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
623     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
624     jump_targets_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler());
625   }
626 }
627 
628 // Slow path generating a read barrier for a heap reference.
629 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
630  public:
ReadBarrierForHeapReferenceSlowPathARM64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)631   ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
632                                            Location out,
633                                            Location ref,
634                                            Location obj,
635                                            uint32_t offset,
636                                            Location index)
637       : SlowPathCodeARM64(instruction),
638         out_(out),
639         ref_(ref),
640         obj_(obj),
641         offset_(offset),
642         index_(index) {
643     // If `obj` is equal to `out` or `ref`, it means the initial object
644     // has been overwritten by (or after) the heap object reference load
645     // to be instrumented, e.g.:
646     //
647     //   __ Ldr(out, HeapOperand(out, class_offset);
648     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
649     //
650     // In that case, we have lost the information about the original
651     // object, and the emitted read barrier cannot work properly.
652     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
653     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
654   }
655 
EmitNativeCode(CodeGenerator * codegen)656   void EmitNativeCode(CodeGenerator* codegen) override {
657     DCHECK(codegen->EmitReadBarrier());
658     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
659     LocationSummary* locations = instruction_->GetLocations();
660     DataType::Type type = DataType::Type::kReference;
661     DCHECK(locations->CanCall());
662     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
663     DCHECK(instruction_->IsInstanceFieldGet() ||
664            instruction_->IsStaticFieldGet() ||
665            instruction_->IsArrayGet() ||
666            instruction_->IsInstanceOf() ||
667            instruction_->IsCheckCast() ||
668            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
669         << "Unexpected instruction in read barrier for heap reference slow path: "
670         << instruction_->DebugName();
671     // The read barrier instrumentation of object ArrayGet
672     // instructions does not support the HIntermediateAddress
673     // instruction.
674     DCHECK(!(instruction_->IsArrayGet() &&
675              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
676 
677     __ Bind(GetEntryLabel());
678 
679     SaveLiveRegisters(codegen, locations);
680 
681     // We may have to change the index's value, but as `index_` is a
682     // constant member (like other "inputs" of this slow path),
683     // introduce a copy of it, `index`.
684     Location index = index_;
685     if (index_.IsValid()) {
686       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
687       if (instruction_->IsArrayGet()) {
688         // Compute the actual memory offset and store it in `index`.
689         Register index_reg = RegisterFrom(index_, DataType::Type::kInt32);
690         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
691         if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
692           // We are about to change the value of `index_reg` (see the
693           // calls to vixl::MacroAssembler::Lsl and
694           // vixl::MacroAssembler::Mov below), but it has
695           // not been saved by the previous call to
696           // art::SlowPathCode::SaveLiveRegisters, as it is a
697           // callee-save register --
698           // art::SlowPathCode::SaveLiveRegisters does not consider
699           // callee-save registers, as it has been designed with the
700           // assumption that callee-save registers are supposed to be
701           // handled by the called function.  So, as a callee-save
702           // register, `index_reg` _would_ eventually be saved onto
703           // the stack, but it would be too late: we would have
704           // changed its value earlier.  Therefore, we manually save
705           // it here into another freely available register,
706           // `free_reg`, chosen of course among the caller-save
707           // registers (as a callee-save `free_reg` register would
708           // exhibit the same problem).
709           //
710           // Note we could have requested a temporary register from
711           // the register allocator instead; but we prefer not to, as
712           // this is a slow path, and we know we can find a
713           // caller-save register that is available.
714           Register free_reg = FindAvailableCallerSaveRegister(codegen);
715           __ Mov(free_reg.W(), index_reg);
716           index_reg = free_reg;
717           index = LocationFrom(index_reg);
718         } else {
719           // The initial register stored in `index_` has already been
720           // saved in the call to art::SlowPathCode::SaveLiveRegisters
721           // (as it is not a callee-save register), so we can freely
722           // use it.
723         }
724         // Shifting the index value contained in `index_reg` by the scale
725         // factor (2) cannot overflow in practice, as the runtime is
726         // unable to allocate object arrays with a size larger than
727         // 2^26 - 1 (that is, 2^28 - 4 bytes).
728         __ Lsl(index_reg, index_reg, DataType::SizeShift(type));
729         static_assert(
730             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
731             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
732         __ Add(index_reg, index_reg, Operand(offset_));
733       } else {
734         // In the case of the following intrinsics `index_` is not shifted by a scale factor of 2
735         // (as in the case of ArrayGet), as it is actually an offset to an object field within an
736         // object.
737         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
738         DCHECK(instruction_->GetLocations()->Intrinsified());
739         HInvoke* invoke = instruction_->AsInvoke();
740         DCHECK(IsUnsafeGetReference(invoke) ||
741                IsVarHandleGet(invoke) ||
742                IsUnsafeCASReference(invoke) ||
743                IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
744         DCHECK_EQ(offset_, 0u);
745         DCHECK(index_.IsRegister());
746       }
747     }
748 
749     // We're moving two or three locations to locations that could
750     // overlap, so we need a parallel move resolver.
751     InvokeRuntimeCallingConvention calling_convention;
752     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
753     parallel_move.AddMove(ref_,
754                           LocationFrom(calling_convention.GetRegisterAt(0)),
755                           type,
756                           nullptr);
757     parallel_move.AddMove(obj_,
758                           LocationFrom(calling_convention.GetRegisterAt(1)),
759                           type,
760                           nullptr);
761     if (index.IsValid()) {
762       parallel_move.AddMove(index,
763                             LocationFrom(calling_convention.GetRegisterAt(2)),
764                             DataType::Type::kInt32,
765                             nullptr);
766       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
767     } else {
768       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
769       arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
770     }
771     arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
772                                  instruction_,
773                                  instruction_->GetDexPc(),
774                                  this);
775     CheckEntrypointTypes<
776         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
777     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
778 
779     RestoreLiveRegisters(codegen, locations);
780 
781     __ B(GetExitLabel());
782   }
783 
GetDescription() const784   const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
785 
786  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)787   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
788     size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
789     size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
790     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
791       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
792         return Register(VIXLRegCodeFromART(i), kXRegSize);
793       }
794     }
795     // We shall never fail to find a free caller-save register, as
796     // there are more than two core caller-save registers on ARM64
797     // (meaning it is possible to find one which is different from
798     // `ref` and `obj`).
799     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
800     LOG(FATAL) << "Could not find a free register";
801     UNREACHABLE();
802   }
803 
804   const Location out_;
805   const Location ref_;
806   const Location obj_;
807   const uint32_t offset_;
808   // An additional location containing an index to an array.
809   // Only used for HArrayGet and the UnsafeGetObject &
810   // UnsafeGetObjectVolatile intrinsics.
811   const Location index_;
812 
813   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
814 };
815 
816 // Slow path generating a read barrier for a GC root.
817 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
818  public:
ReadBarrierForRootSlowPathARM64(HInstruction * instruction,Location out,Location root)819   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
820       : SlowPathCodeARM64(instruction), out_(out), root_(root) {
821   }
822 
EmitNativeCode(CodeGenerator * codegen)823   void EmitNativeCode(CodeGenerator* codegen) override {
824     DCHECK(codegen->EmitReadBarrier());
825     LocationSummary* locations = instruction_->GetLocations();
826     DataType::Type type = DataType::Type::kReference;
827     DCHECK(locations->CanCall());
828     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
829     DCHECK(instruction_->IsLoadClass() ||
830            instruction_->IsLoadString() ||
831            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
832         << "Unexpected instruction in read barrier for GC root slow path: "
833         << instruction_->DebugName();
834 
835     __ Bind(GetEntryLabel());
836     SaveLiveRegisters(codegen, locations);
837 
838     InvokeRuntimeCallingConvention calling_convention;
839     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
840     // The argument of the ReadBarrierForRootSlow is not a managed
841     // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
842     // thus we need a 64-bit move here, and we cannot use
843     //
844     //   arm64_codegen->MoveLocation(
845     //       LocationFrom(calling_convention.GetRegisterAt(0)),
846     //       root_,
847     //       type);
848     //
849     // which would emit a 32-bit move, as `type` is a (32-bit wide)
850     // reference type (`DataType::Type::kReference`).
851     __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
852     arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
853                                  instruction_,
854                                  instruction_->GetDexPc(),
855                                  this);
856     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
857     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
858 
859     RestoreLiveRegisters(codegen, locations);
860     __ B(GetExitLabel());
861   }
862 
GetDescription() const863   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; }
864 
865  private:
866   const Location out_;
867   const Location root_;
868 
869   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
870 };
871 
872 class TracingMethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
873  public:
TracingMethodEntryExitHooksSlowPathARM64(bool is_method_entry)874   explicit TracingMethodEntryExitHooksSlowPathARM64(bool is_method_entry)
875       : SlowPathCodeARM64(/* instruction= */ nullptr), is_method_entry_(is_method_entry) {}
876 
EmitNativeCode(CodeGenerator * codegen)877   void EmitNativeCode(CodeGenerator* codegen) override {
878     QuickEntrypointEnum entry_point =
879         (is_method_entry_) ? kQuickRecordEntryTraceEvent : kQuickRecordExitTraceEvent;
880     vixl::aarch64::Label call;
881     __ Bind(GetEntryLabel());
882     uint32_t entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entry_point).Int32Value();
883     __ Ldr(lr, MemOperand(tr, entrypoint_offset));
884     __ Blr(lr);
885     __ B(GetExitLabel());
886   }
887 
GetDescription() const888   const char* GetDescription() const override {
889     return "TracingMethodEntryExitHooksSlowPath";
890   }
891 
892  private:
893   const bool is_method_entry_;
894 
895   DISALLOW_COPY_AND_ASSIGN(TracingMethodEntryExitHooksSlowPathARM64);
896 };
897 
898 class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
899  public:
MethodEntryExitHooksSlowPathARM64(HInstruction * instruction)900   explicit MethodEntryExitHooksSlowPathARM64(HInstruction* instruction)
901       : SlowPathCodeARM64(instruction) {}
902 
EmitNativeCode(CodeGenerator * codegen)903   void EmitNativeCode(CodeGenerator* codegen) override {
904     LocationSummary* locations = instruction_->GetLocations();
905     QuickEntrypointEnum entry_point =
906         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
907     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
908     __ Bind(GetEntryLabel());
909     SaveLiveRegisters(codegen, locations);
910     if (instruction_->IsMethodExitHook()) {
911       __ Mov(vixl::aarch64::x4, arm64_codegen->GetFrameSize());
912     }
913     arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
914     RestoreLiveRegisters(codegen, locations);
915     __ B(GetExitLabel());
916   }
917 
GetDescription() const918   const char* GetDescription() const override {
919     return "MethodEntryExitHooksSlowPath";
920   }
921 
922  private:
923   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARM64);
924 };
925 
926 class CompileOptimizedSlowPathARM64 : public SlowPathCodeARM64 {
927  public:
CompileOptimizedSlowPathARM64(HSuspendCheck * check,Register profiling_info)928   CompileOptimizedSlowPathARM64(HSuspendCheck* check, Register profiling_info)
929       : SlowPathCodeARM64(check),
930         profiling_info_(profiling_info) {}
931 
EmitNativeCode(CodeGenerator * codegen)932   void EmitNativeCode(CodeGenerator* codegen) override {
933     uint32_t entrypoint_offset =
934         GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
935     __ Bind(GetEntryLabel());
936     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
937     UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
938     Register counter = temps.AcquireW();
939     __ Mov(counter, ProfilingInfo::GetOptimizeThreshold());
940     __ Strh(counter,
941             MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
942     if (instruction_ != nullptr) {
943       // Only saves live vector regs for SIMD.
944       SaveLiveRegisters(codegen, instruction_->GetLocations());
945     }
946     __ Ldr(lr, MemOperand(tr, entrypoint_offset));
947     // Note: we don't record the call here (and therefore don't generate a stack
948     // map), as the entrypoint should never be suspended.
949     __ Blr(lr);
950     if (instruction_ != nullptr) {
951       // Only restores live vector regs for SIMD.
952       RestoreLiveRegisters(codegen, instruction_->GetLocations());
953     }
954     __ B(GetExitLabel());
955   }
956 
GetDescription() const957   const char* GetDescription() const override {
958     return "CompileOptimizedSlowPath";
959   }
960 
961  private:
962   // The register where the profiling info is stored when entering the slow
963   // path.
964   Register profiling_info_;
965 
966   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARM64);
967 };
968 
969 #undef __
970 
GetNextLocation(DataType::Type type)971 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
972   Location next_location;
973   if (type == DataType::Type::kVoid) {
974     LOG(FATAL) << "Unreachable type " << type;
975   }
976 
977   if (DataType::IsFloatingPointType(type) &&
978       (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
979     next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
980   } else if (!DataType::IsFloatingPointType(type) &&
981              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
982     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
983   } else {
984     size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
985     next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
986                                                 : Location::StackSlot(stack_offset);
987   }
988 
989   // Space on the stack is reserved for all arguments.
990   stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
991   return next_location;
992 }
993 
GetMethodLocation() const994 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
995   return LocationFrom(kArtMethodRegister);
996 }
997 
GetNextLocation(DataType::Type type)998 Location CriticalNativeCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
999   DCHECK_NE(type, DataType::Type::kReference);
1000 
1001   Location location = Location::NoLocation();
1002   if (DataType::IsFloatingPointType(type)) {
1003     if (fpr_index_ < kParameterFPRegistersLength) {
1004       location = LocationFrom(kParameterFPRegisters[fpr_index_]);
1005       ++fpr_index_;
1006     }
1007   } else {
1008     // Native ABI uses the same registers as managed, except that the method register x0
1009     // is a normal argument.
1010     if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
1011       location = LocationFrom(gpr_index_ == 0u ? x0 : kParameterCoreRegisters[gpr_index_ - 1u]);
1012       ++gpr_index_;
1013     }
1014   }
1015   if (location.IsInvalid()) {
1016     if (DataType::Is64BitType(type)) {
1017       location = Location::DoubleStackSlot(stack_offset_);
1018     } else {
1019       location = Location::StackSlot(stack_offset_);
1020     }
1021     stack_offset_ += kFramePointerSize;
1022 
1023     if (for_register_allocation_) {
1024       location = Location::Any();
1025     }
1026   }
1027   return location;
1028 }
1029 
GetReturnLocation(DataType::Type type) const1030 Location CriticalNativeCallingConventionVisitorARM64::GetReturnLocation(DataType::Type type) const {
1031   // We perform conversion to the managed ABI return register after the call if needed.
1032   InvokeDexCallingConventionVisitorARM64 dex_calling_convention;
1033   return dex_calling_convention.GetReturnLocation(type);
1034 }
1035 
GetMethodLocation() const1036 Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const {
1037   // Pass the method in the hidden argument x15.
1038   return Location::RegisterLocation(x15.GetCode());
1039 }
1040 
1041 namespace detail {
1042 
1043 // Mark which intrinsics we don't have handcrafted code for.
1044 template <Intrinsics T>
1045 struct IsUnimplemented {
1046   bool is_unimplemented = false;
1047 };
1048 
1049 #define TRUE_OVERRIDE(Name)                     \
1050   template <>                                   \
1051   struct IsUnimplemented<Intrinsics::k##Name> { \
1052     bool is_unimplemented = true;               \
1053   };
1054 UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE)
1055 #undef TRUE_OVERRIDE
1056 
1057 static constexpr bool kIsIntrinsicUnimplemented[] = {
1058     false,  // kNone
1059 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1060     IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1061     ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1062 #undef IS_UNIMPLEMENTED
1063 };
1064 
1065 }  // namespace detail
1066 
CodeGeneratorARM64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1067 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
1068                                        const CompilerOptions& compiler_options,
1069                                        OptimizingCompilerStats* stats)
1070     : CodeGenerator(graph,
1071                     kNumberOfAllocatableRegisters,
1072                     kNumberOfAllocatableFPRegisters,
1073                     kNumberOfAllocatableRegisterPairs,
1074                     callee_saved_core_registers.GetList(),
1075                     callee_saved_fp_registers.GetList(),
1076                     compiler_options,
1077                     stats,
1078                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1079       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1080       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1081       location_builder_neon_(graph, this),
1082       instruction_visitor_neon_(graph, this),
1083       location_builder_sve_(graph, this),
1084       instruction_visitor_sve_(graph, this),
1085       move_resolver_(graph->GetAllocator(), this),
1086       assembler_(graph->GetAllocator(),
1087                  compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
1088       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1089       app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1090       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1091       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1092       app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1093       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1094       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1095       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1096       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1097       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1098       method_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1099       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1100       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1101       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1102       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1103       jit_patches_(&assembler_, graph->GetAllocator()),
1104       jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1105                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1106   // Save the link register (containing the return address) to mimic Quick.
1107   AddAllocatedRegister(LocationFrom(lr));
1108 
1109   bool use_sve = ShouldUseSVE();
1110   if (use_sve) {
1111     location_builder_ = &location_builder_sve_;
1112     instruction_visitor_ = &instruction_visitor_sve_;
1113   } else {
1114     location_builder_ = &location_builder_neon_;
1115     instruction_visitor_ = &instruction_visitor_neon_;
1116   }
1117 }
1118 
ShouldUseSVE() const1119 bool CodeGeneratorARM64::ShouldUseSVE() const {
1120   return GetInstructionSetFeatures().HasSVE();
1121 }
1122 
GetSIMDRegisterWidth() const1123 size_t CodeGeneratorARM64::GetSIMDRegisterWidth() const {
1124   return SupportsPredicatedSIMD()
1125       ? GetInstructionSetFeatures().GetSVEVectorLength() / kBitsPerByte
1126       : vixl::aarch64::kQRegSizeInBytes;
1127 }
1128 
1129 #define __ GetVIXLAssembler()->
1130 
FixJumpTables()1131 void CodeGeneratorARM64::FixJumpTables() {
1132   for (auto&& jump_table : jump_tables_) {
1133     jump_table->FixTable(this);
1134   }
1135 }
1136 
Finalize()1137 void CodeGeneratorARM64::Finalize() {
1138   FixJumpTables();
1139 
1140   // Emit JIT baker read barrier slow paths.
1141   DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
1142   for (auto& entry : jit_baker_read_barrier_slow_paths_) {
1143     uint32_t encoded_data = entry.first;
1144     vixl::aarch64::Label* slow_path_entry = &entry.second.label;
1145     __ Bind(slow_path_entry);
1146     CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
1147   }
1148 
1149   // Ensure we emit the literal pool.
1150   __ FinalizeCode();
1151 
1152   CodeGenerator::Finalize();
1153 
1154   // Verify Baker read barrier linker patches.
1155   if (kIsDebugBuild) {
1156     ArrayRef<const uint8_t> code(GetCode());
1157     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
1158       DCHECK(info.label.IsBound());
1159       uint32_t literal_offset = info.label.GetLocation();
1160       DCHECK_ALIGNED(literal_offset, 4u);
1161 
1162       auto GetInsn = [&code](uint32_t offset) {
1163         DCHECK_ALIGNED(offset, 4u);
1164         return
1165             (static_cast<uint32_t>(code[offset + 0]) << 0) +
1166             (static_cast<uint32_t>(code[offset + 1]) << 8) +
1167             (static_cast<uint32_t>(code[offset + 2]) << 16)+
1168             (static_cast<uint32_t>(code[offset + 3]) << 24);
1169       };
1170 
1171       const uint32_t encoded_data = info.custom_data;
1172       BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
1173       // Check that the next instruction matches the expected LDR.
1174       switch (kind) {
1175         case BakerReadBarrierKind::kField:
1176         case BakerReadBarrierKind::kAcquire: {
1177           DCHECK_GE(code.size() - literal_offset, 8u);
1178           uint32_t next_insn = GetInsn(literal_offset + 4u);
1179           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
1180           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1181           if (kind == BakerReadBarrierKind::kField) {
1182             // LDR (immediate) with correct base_reg.
1183             CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
1184           } else {
1185             DCHECK(kind == BakerReadBarrierKind::kAcquire);
1186             // LDAR with correct base_reg.
1187             CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5));
1188           }
1189           break;
1190         }
1191         case BakerReadBarrierKind::kArray: {
1192           DCHECK_GE(code.size() - literal_offset, 8u);
1193           uint32_t next_insn = GetInsn(literal_offset + 4u);
1194           // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
1195           // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
1196           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
1197           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1198           CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
1199           CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
1200           break;
1201         }
1202         case BakerReadBarrierKind::kGcRoot: {
1203           DCHECK_GE(literal_offset, 4u);
1204           uint32_t prev_insn = GetInsn(literal_offset - 4u);
1205           const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1206           // Usually LDR (immediate) with correct root_reg but
1207           // we may have a "MOV marked, old_value" for intrinsic CAS.
1208           if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) {    // MOV?
1209             CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);  // LDR?
1210           }
1211           break;
1212         }
1213         default:
1214           LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
1215           UNREACHABLE();
1216       }
1217     }
1218   }
1219 }
1220 
PrepareForEmitNativeCode()1221 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1222   // Note: There are 6 kinds of moves:
1223   // 1. constant -> GPR/FPR (non-cycle)
1224   // 2. constant -> stack (non-cycle)
1225   // 3. GPR/FPR -> GPR/FPR
1226   // 4. GPR/FPR -> stack
1227   // 5. stack -> GPR/FPR
1228   // 6. stack -> stack (non-cycle)
1229   // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1230   // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1231   // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1232   // dependency.
1233   vixl_temps_.Open(GetVIXLAssembler());
1234 }
1235 
FinishEmitNativeCode()1236 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1237   vixl_temps_.Close();
1238 }
1239 
AllocateScratchLocationFor(Location::Kind kind)1240 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1241   DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1242          || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1243          || kind == Location::kSIMDStackSlot);
1244   kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1245       ? Location::kFpuRegister
1246       : Location::kRegister;
1247   Location scratch = GetScratchLocation(kind);
1248   if (!scratch.Equals(Location::NoLocation())) {
1249     return scratch;
1250   }
1251   // Allocate from VIXL temp registers.
1252   if (kind == Location::kRegister) {
1253     scratch = LocationFrom(vixl_temps_.AcquireX());
1254   } else {
1255     DCHECK_EQ(kind, Location::kFpuRegister);
1256     scratch = codegen_->GetGraph()->HasSIMD()
1257         ? codegen_->GetInstructionCodeGeneratorArm64()->AllocateSIMDScratchLocation(&vixl_temps_)
1258         : LocationFrom(vixl_temps_.AcquireD());
1259   }
1260   AddScratchLocation(scratch);
1261   return scratch;
1262 }
1263 
FreeScratchLocation(Location loc)1264 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1265   if (loc.IsRegister()) {
1266     vixl_temps_.Release(XRegisterFrom(loc));
1267   } else {
1268     DCHECK(loc.IsFpuRegister());
1269     if (codegen_->GetGraph()->HasSIMD()) {
1270       codegen_->GetInstructionCodeGeneratorArm64()->FreeSIMDScratchLocation(loc, &vixl_temps_);
1271     } else {
1272       vixl_temps_.Release(DRegisterFrom(loc));
1273     }
1274   }
1275   RemoveScratchLocation(loc);
1276 }
1277 
EmitMove(size_t index)1278 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1279   MoveOperands* move = moves_[index];
1280   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
1281 }
1282 
VisitMethodExitHook(HMethodExitHook * method_hook)1283 void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1284   LocationSummary* locations = new (GetGraph()->GetAllocator())
1285       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1286   DataType::Type return_type = method_hook->InputAt(0)->GetType();
1287   locations->SetInAt(0, ARM64ReturnLocation(return_type));
1288 }
1289 
GenerateMethodEntryExitHook(HInstruction * instruction)1290 void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1291   MacroAssembler* masm = GetVIXLAssembler();
1292   UseScratchRegisterScope temps(masm);
1293   Register addr = temps.AcquireX();
1294   Register curr_entry = temps.AcquireX();
1295   Register value = curr_entry.W();
1296 
1297   SlowPathCodeARM64* slow_path =
1298       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction);
1299   codegen_->AddSlowPath(slow_path);
1300 
1301   if (instruction->IsMethodExitHook()) {
1302     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1303     // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1304     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1305     // disabled in debuggable runtime. The other bit is used when this method itself requires a
1306     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1307     __ Ldr(value, MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1308     __ Cbnz(value, slow_path->GetEntryLabel());
1309   }
1310 
1311   uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1312   MemberOffset  offset = instruction->IsMethodExitHook() ?
1313       instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
1314       instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1315   __ Mov(addr, address + offset.Int32Value());
1316   __ Ldrb(value, MemOperand(addr, 0));
1317   __ Cmp(value, Operand(instrumentation::Instrumentation::kFastTraceListeners));
1318   // Check if there are any method entry / exit listeners. If no, continue.
1319   __ B(lt, slow_path->GetExitLabel());
1320   // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
1321   // If yes, just take the slow path.
1322   __ B(gt, slow_path->GetEntryLabel());
1323 
1324   Register init_entry = addr;
1325   // Check if there is place in the buffer to store a new entry, if no, take slow path.
1326   uint32_t trace_buffer_curr_entry_offset =
1327       Thread::TraceBufferCurrPtrOffset<kArm64PointerSize>().Int32Value();
1328   __ Ldr(curr_entry, MemOperand(tr, trace_buffer_curr_entry_offset));
1329   __ Sub(curr_entry, curr_entry, kNumEntriesForWallClock * sizeof(void*));
1330   __ Ldr(init_entry, MemOperand(tr, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue()));
1331   __ Cmp(curr_entry, init_entry);
1332   __ B(lt, slow_path->GetEntryLabel());
1333 
1334   // Update the index in the `Thread`.
1335   __ Str(curr_entry, MemOperand(tr, trace_buffer_curr_entry_offset));
1336 
1337   Register tmp = init_entry;
1338   // Record method pointer and trace action.
1339   __ Ldr(tmp, MemOperand(sp, 0));
1340   // Use last two bits to encode trace method action. For MethodEntry it is 0
1341   // so no need to set the bits since they are 0 already.
1342   if (instruction->IsMethodExitHook()) {
1343     DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1344     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1345     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1346     __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1347   }
1348   __ Str(tmp, MemOperand(curr_entry, kMethodOffsetInBytes));
1349   // Record the timestamp.
1350   __ Mrs(tmp, (SystemRegister)SYS_CNTVCT_EL0);
1351   __ Str(tmp, MemOperand(curr_entry, kTimestampOffsetInBytes));
1352   __ Bind(slow_path->GetExitLabel());
1353 }
1354 
VisitMethodExitHook(HMethodExitHook * instruction)1355 void InstructionCodeGeneratorARM64::VisitMethodExitHook(HMethodExitHook* instruction) {
1356   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1357   DCHECK(codegen_->RequiresCurrentMethod());
1358   GenerateMethodEntryExitHook(instruction);
1359 }
1360 
VisitMethodEntryHook(HMethodEntryHook * method_hook)1361 void LocationsBuilderARM64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1362   new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1363 }
1364 
VisitMethodEntryHook(HMethodEntryHook * instruction)1365 void InstructionCodeGeneratorARM64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1366   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1367   DCHECK(codegen_->RequiresCurrentMethod());
1368   GenerateMethodEntryExitHook(instruction);
1369 }
1370 
MaybeRecordTraceEvent(bool is_method_entry)1371 void CodeGeneratorARM64::MaybeRecordTraceEvent(bool is_method_entry) {
1372   if (!art_flags::always_enable_profile_code()) {
1373     return;
1374   }
1375 
1376   MacroAssembler* masm = GetVIXLAssembler();
1377   UseScratchRegisterScope temps(masm);
1378   Register addr = temps.AcquireX();
1379   CHECK(addr.Is(vixl::aarch64::x16));
1380 
1381   SlowPathCodeARM64* slow_path =
1382       new (GetScopedAllocator()) TracingMethodEntryExitHooksSlowPathARM64(is_method_entry);
1383   AddSlowPath(slow_path);
1384 
1385   __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue()));
1386   __ Cbnz(addr, slow_path->GetEntryLabel());
1387   __ Bind(slow_path->GetExitLabel());
1388 }
1389 
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1390 void CodeGeneratorARM64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1391   MacroAssembler* masm = GetVIXLAssembler();
1392   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1393     UseScratchRegisterScope temps(masm);
1394     Register counter = temps.AcquireX();
1395     Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX();
1396     if (!is_frame_entry) {
1397       __ Ldr(method, MemOperand(sp, 0));
1398     }
1399     __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1400     vixl::aarch64::Label done;
1401     DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
1402     __ Cbz(counter, &done);
1403     __ Add(counter, counter, -1);
1404     __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1405     __ Bind(&done);
1406   }
1407 
1408   if (GetGraph()->IsCompilingBaseline() &&
1409       GetGraph()->IsUsefulOptimizing() &&
1410       !Runtime::Current()->IsAotCompiler()) {
1411     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1412     DCHECK(info != nullptr);
1413     DCHECK(!HasEmptyFrame());
1414     uint64_t address = reinterpret_cast64<uint64_t>(info);
1415     UseScratchRegisterScope temps(masm);
1416     Register counter = temps.AcquireW();
1417     SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARM64(
1418         suspend_check, /* profiling_info= */ lr);
1419     AddSlowPath(slow_path);
1420     __ Ldr(lr, jit_patches_.DeduplicateUint64Literal(address));
1421     __ Ldrh(counter, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1422     __ Cbz(counter, slow_path->GetEntryLabel());
1423     __ Add(counter, counter, -1);
1424     __ Strh(counter, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1425     __ Bind(slow_path->GetExitLabel());
1426   }
1427 }
1428 
GenerateFrameEntry()1429 void CodeGeneratorARM64::GenerateFrameEntry() {
1430   MacroAssembler* masm = GetVIXLAssembler();
1431 
1432   // Check if we need to generate the clinit check. We will jump to the
1433   // resolution stub if the class is not initialized and the executing thread is
1434   // not the thread initializing it.
1435   // We do this before constructing the frame to get the correct stack trace if
1436   // an exception is thrown.
1437   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1438     UseScratchRegisterScope temps(masm);
1439     vixl::aarch64::Label resolution;
1440     vixl::aarch64::Label memory_barrier;
1441 
1442     Register temp1 = temps.AcquireW();
1443     Register temp2 = temps.AcquireW();
1444 
1445     // Check if we're visibly initialized.
1446 
1447     // We don't emit a read barrier here to save on code size. We rely on the
1448     // resolution trampoline to do a suspend check before re-entering this code.
1449     __ Ldr(temp1, MemOperand(kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
1450     __ Ldrb(temp2, HeapOperand(temp1, kClassStatusByteOffset));
1451     __ Cmp(temp2, kShiftedVisiblyInitializedValue);
1452     __ B(hs, &frame_entry_label_);
1453 
1454     // Check if we're initialized and jump to code that does a memory barrier if
1455     // so.
1456     __ Cmp(temp2, kShiftedInitializedValue);
1457     __ B(hs, &memory_barrier);
1458 
1459     // Check if we're initializing and the thread initializing is the one
1460     // executing the code.
1461     __ Cmp(temp2, kShiftedInitializingValue);
1462     __ B(lo, &resolution);
1463 
1464     __ Ldr(temp1, HeapOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
1465     __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArm64PointerSize>().Int32Value()));
1466     __ Cmp(temp1, temp2);
1467     __ B(eq, &frame_entry_label_);
1468     __ Bind(&resolution);
1469 
1470     // Jump to the resolution stub.
1471     ThreadOffset64 entrypoint_offset =
1472         GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline);
1473     __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value()));
1474     __ Br(temp1.X());
1475 
1476     __ Bind(&memory_barrier);
1477     GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
1478   }
1479   __ Bind(&frame_entry_label_);
1480 
1481   bool do_overflow_check =
1482       FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
1483   if (do_overflow_check) {
1484     UseScratchRegisterScope temps(masm);
1485     Register temp = temps.AcquireX();
1486     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1487     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64)));
1488     {
1489       // Ensure that between load and RecordPcInfo there are no pools emitted.
1490       ExactAssemblyScope eas(GetVIXLAssembler(),
1491                              kInstructionSize,
1492                              CodeBufferCheckScope::kExactSize);
1493       __ ldr(wzr, MemOperand(temp, 0));
1494       RecordPcInfo(nullptr, 0);
1495     }
1496   }
1497 
1498   if (!HasEmptyFrame()) {
1499     // Make sure the frame size isn't unreasonably large.
1500     DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1501 
1502     // Stack layout:
1503     //      sp[frame_size - 8]        : lr.
1504     //      ...                       : other preserved core registers.
1505     //      ...                       : other preserved fp registers.
1506     //      ...                       : reserved frame space.
1507     //      sp[0]                     : current method.
1508     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1509     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1510     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1511     DCHECK(!preserved_core_registers.IsEmpty());
1512     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1513     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1514 
1515     // Save the current method if we need it, or if using STP reduces code
1516     // size. Note that we do not do this in HCurrentMethod, as the
1517     // instruction might have been removed in the SSA graph.
1518     CPURegister lowest_spill;
1519     if (core_spills_offset == kXRegSizeInBytes) {
1520       // If there is no gap between the method and the lowest core spill, use
1521       // aligned STP pre-index to store both. Max difference is 512. We do
1522       // that to reduce code size even if we do not have to save the method.
1523       DCHECK_LE(frame_size, 512);  // 32 core registers are only 256 bytes.
1524       lowest_spill = preserved_core_registers.PopLowestIndex();
1525       __ Stp(kArtMethodRegister, lowest_spill, MemOperand(sp, -frame_size, PreIndex));
1526     } else if (RequiresCurrentMethod()) {
1527       __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1528     } else {
1529       __ Claim(frame_size);
1530     }
1531     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1532     if (lowest_spill.IsValid()) {
1533       GetAssembler()->cfi().RelOffset(DWARFReg(lowest_spill), core_spills_offset);
1534       core_spills_offset += kXRegSizeInBytes;
1535     }
1536     GetAssembler()->SpillRegisters(preserved_core_registers, core_spills_offset);
1537     GetAssembler()->SpillRegisters(preserved_fp_registers, fp_spills_offset);
1538 
1539     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1540       // Initialize should_deoptimize flag to 0.
1541       Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1542       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1543     }
1544 
1545     MaybeRecordTraceEvent(/* is_method_entry= */ true);
1546   }
1547   MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1548   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
1549 }
1550 
GenerateFrameExit()1551 void CodeGeneratorARM64::GenerateFrameExit() {
1552   GetAssembler()->cfi().RememberState();
1553   if (!HasEmptyFrame()) {
1554     MaybeRecordTraceEvent(/* is_method_entry= */ false);
1555 
1556     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1557     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1558     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1559     DCHECK(!preserved_core_registers.IsEmpty());
1560     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1561     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1562 
1563     CPURegister lowest_spill;
1564     if (core_spills_offset == kXRegSizeInBytes) {
1565       // If there is no gap between the method and the lowest core spill, use
1566       // aligned LDP pre-index to pop both. Max difference is 504. We do
1567       // that to reduce code size even though the loaded method is unused.
1568       DCHECK_LE(frame_size, 504);  // 32 core registers are only 256 bytes.
1569       lowest_spill = preserved_core_registers.PopLowestIndex();
1570       core_spills_offset += kXRegSizeInBytes;
1571     }
1572     GetAssembler()->UnspillRegisters(preserved_fp_registers, fp_spills_offset);
1573     GetAssembler()->UnspillRegisters(preserved_core_registers, core_spills_offset);
1574     if (lowest_spill.IsValid()) {
1575       __ Ldp(xzr, lowest_spill, MemOperand(sp, frame_size, PostIndex));
1576       GetAssembler()->cfi().Restore(DWARFReg(lowest_spill));
1577     } else {
1578       __ Drop(frame_size);
1579     }
1580     GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1581   }
1582   __ Ret();
1583   GetAssembler()->cfi().RestoreState();
1584   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1585 }
1586 
GetFramePreservedCoreRegisters() const1587 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1588   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1589   return CPURegList(CPURegister::kRegister, kXRegSize,
1590                     core_spill_mask_);
1591 }
1592 
GetFramePreservedFPRegisters() const1593 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1594   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1595                                          GetNumberOfFloatingPointRegisters()));
1596   return CPURegList(CPURegister::kVRegister, kDRegSize,
1597                     fpu_spill_mask_);
1598 }
1599 
Bind(HBasicBlock * block)1600 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1601   __ Bind(GetLabelOf(block));
1602 }
1603 
MoveConstant(Location location,int32_t value)1604 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1605   DCHECK(location.IsRegister());
1606   __ Mov(RegisterFrom(location, DataType::Type::kInt32), value);
1607 }
1608 
AddLocationAsTemp(Location location,LocationSummary * locations)1609 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1610   if (location.IsRegister()) {
1611     locations->AddTemp(location);
1612   } else {
1613     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1614   }
1615 }
1616 
MaybeMarkGCCard(Register object,Register value,bool emit_null_check)1617 void CodeGeneratorARM64::MaybeMarkGCCard(Register object, Register value, bool emit_null_check) {
1618   vixl::aarch64::Label done;
1619   if (emit_null_check) {
1620     __ Cbz(value, &done);
1621   }
1622   MarkGCCard(object);
1623   if (emit_null_check) {
1624     __ Bind(&done);
1625   }
1626 }
1627 
MarkGCCard(Register object)1628 void CodeGeneratorARM64::MarkGCCard(Register object) {
1629   UseScratchRegisterScope temps(GetVIXLAssembler());
1630   Register card = temps.AcquireX();
1631   Register temp = temps.AcquireW();  // Index within the CardTable - 32bit.
1632   // Load the address of the card table into `card`.
1633   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1634   // Calculate the offset (in the card table) of the card corresponding to `object`.
1635   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1636   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
1637   // `object`'s card.
1638   //
1639   // Register `card` contains the address of the card table. Note that the card
1640   // table's base is biased during its creation so that it always starts at an
1641   // address whose least-significant byte is equal to `kCardDirty` (see
1642   // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
1643   // below writes the `kCardDirty` (byte) value into the `object`'s card
1644   // (located at `card + object >> kCardShift`).
1645   //
1646   // This dual use of the value in register `card` (1. to calculate the location
1647   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
1648   // (no need to explicitly load `kCardDirty` as an immediate value).
1649   __ Strb(card, MemOperand(card, temp.X()));
1650 }
1651 
CheckGCCardIsValid(Register object)1652 void CodeGeneratorARM64::CheckGCCardIsValid(Register object) {
1653   UseScratchRegisterScope temps(GetVIXLAssembler());
1654   Register card = temps.AcquireX();
1655   Register temp = temps.AcquireW();  // Index within the CardTable - 32bit.
1656   vixl::aarch64::Label done;
1657   // Load the address of the card table into `card`.
1658   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1659   // Calculate the offset (in the card table) of the card corresponding to `object`.
1660   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1661   // assert (!clean || !self->is_gc_marking)
1662   __ Ldrb(temp, MemOperand(card, temp.X()));
1663   static_assert(gc::accounting::CardTable::kCardClean == 0);
1664   __ Cbnz(temp, &done);
1665   __ Cbz(mr, &done);
1666   __ Unreachable();
1667   __ Bind(&done);
1668 }
1669 
SetupBlockedRegisters() const1670 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1671   // Blocked core registers:
1672   //      lr        : Runtime reserved.
1673   //      tr        : Runtime reserved.
1674   //      mr        : Runtime reserved.
1675   //      ip1       : VIXL core temp.
1676   //      ip0       : VIXL core temp.
1677   //      x18       : Platform register.
1678   //
1679   // Blocked fp registers:
1680   //      d31       : VIXL fp temp.
1681   CPURegList reserved_core_registers = vixl_reserved_core_registers;
1682   reserved_core_registers.Combine(runtime_reserved_core_registers);
1683   while (!reserved_core_registers.IsEmpty()) {
1684     blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1685   }
1686   blocked_core_registers_[X18] = true;
1687 
1688   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1689   while (!reserved_fp_registers.IsEmpty()) {
1690     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1691   }
1692 
1693   if (GetGraph()->IsDebuggable()) {
1694     // Stubs do not save callee-save floating point registers. If the graph
1695     // is debuggable, we need to deal with these registers differently. For
1696     // now, just block them.
1697     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1698     while (!reserved_fp_registers_debuggable.IsEmpty()) {
1699       blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1700     }
1701   }
1702 }
1703 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1704 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1705   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1706   __ Str(reg, MemOperand(sp, stack_index));
1707   return kArm64WordSize;
1708 }
1709 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1710 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1711   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1712   __ Ldr(reg, MemOperand(sp, stack_index));
1713   return kArm64WordSize;
1714 }
1715 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1716 size_t CodeGeneratorARM64::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
1717                                                      [[maybe_unused]] uint32_t reg_id) {
1718   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1719              << "use SaveRestoreLiveRegistersHelper";
1720   UNREACHABLE();
1721 }
1722 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1723 size_t CodeGeneratorARM64::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
1724                                                         [[maybe_unused]] uint32_t reg_id) {
1725   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1726              << "use SaveRestoreLiveRegistersHelper";
1727   UNREACHABLE();
1728 }
1729 
DumpCoreRegister(std::ostream & stream,int reg) const1730 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1731   stream << XRegister(reg);
1732 }
1733 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1734 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1735   stream << DRegister(reg);
1736 }
1737 
GetInstructionSetFeatures() const1738 const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const {
1739   return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures();
1740 }
1741 
MoveConstant(CPURegister destination,HConstant * constant)1742 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1743   if (constant->IsIntConstant()) {
1744     __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1745   } else if (constant->IsLongConstant()) {
1746     __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1747   } else if (constant->IsNullConstant()) {
1748     __ Mov(Register(destination), 0);
1749   } else if (constant->IsFloatConstant()) {
1750     __ Fmov(VRegister(destination), constant->AsFloatConstant()->GetValue());
1751   } else {
1752     DCHECK(constant->IsDoubleConstant());
1753     __ Fmov(VRegister(destination), constant->AsDoubleConstant()->GetValue());
1754   }
1755 }
1756 
1757 
CoherentConstantAndType(Location constant,DataType::Type type)1758 static bool CoherentConstantAndType(Location constant, DataType::Type type) {
1759   DCHECK(constant.IsConstant());
1760   HConstant* cst = constant.GetConstant();
1761   return (cst->IsIntConstant() && type == DataType::Type::kInt32) ||
1762          // Null is mapped to a core W register, which we associate with kPrimInt.
1763          (cst->IsNullConstant() && type == DataType::Type::kInt32) ||
1764          (cst->IsLongConstant() && type == DataType::Type::kInt64) ||
1765          (cst->IsFloatConstant() && type == DataType::Type::kFloat32) ||
1766          (cst->IsDoubleConstant() && type == DataType::Type::kFloat64);
1767 }
1768 
1769 // Allocate a scratch register from the VIXL pool, querying first
1770 // the floating-point register pool, and then the core register
1771 // pool. This is essentially a reimplementation of
1772 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1773 // using a different allocation strategy.
AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler * masm,vixl::aarch64::UseScratchRegisterScope * temps,int size_in_bits)1774 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1775                                                     vixl::aarch64::UseScratchRegisterScope* temps,
1776                                                     int size_in_bits) {
1777   return masm->GetScratchVRegisterList()->IsEmpty()
1778       ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1779       : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1780 }
1781 
MoveLocation(Location destination,Location source,DataType::Type dst_type)1782 void CodeGeneratorARM64::MoveLocation(Location destination,
1783                                       Location source,
1784                                       DataType::Type dst_type) {
1785   if (source.Equals(destination)) {
1786     return;
1787   }
1788 
1789   // A valid move can always be inferred from the destination and source
1790   // locations. When moving from and to a register, the argument type can be
1791   // used to generate 32bit instead of 64bit moves. In debug mode we also
1792   // checks the coherency of the locations and the type.
1793   bool unspecified_type = (dst_type == DataType::Type::kVoid);
1794 
1795   if (destination.IsRegister() || destination.IsFpuRegister()) {
1796     if (unspecified_type) {
1797       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1798       if (source.IsStackSlot() ||
1799           (src_cst != nullptr && (src_cst->IsIntConstant()
1800                                   || src_cst->IsFloatConstant()
1801                                   || src_cst->IsNullConstant()))) {
1802         // For stack slots and 32bit constants, a 64bit type is appropriate.
1803         dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
1804       } else {
1805         // If the source is a double stack slot or a 64bit constant, a 64bit
1806         // type is appropriate. Else the source is a register, and since the
1807         // type has not been specified, we chose a 64bit type to force a 64bit
1808         // move.
1809         dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
1810       }
1811     }
1812     DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
1813            (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
1814     CPURegister dst = CPURegisterFrom(destination, dst_type);
1815     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1816       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1817       __ Ldr(dst, StackOperandFrom(source));
1818     } else if (source.IsSIMDStackSlot()) {
1819       GetInstructionCodeGeneratorArm64()->LoadSIMDRegFromStack(destination, source);
1820     } else if (source.IsConstant()) {
1821       DCHECK(CoherentConstantAndType(source, dst_type));
1822       MoveConstant(dst, source.GetConstant());
1823     } else if (source.IsRegister()) {
1824       if (destination.IsRegister()) {
1825         __ Mov(Register(dst), RegisterFrom(source, dst_type));
1826       } else {
1827         DCHECK(destination.IsFpuRegister());
1828         DataType::Type source_type = DataType::Is64BitType(dst_type)
1829             ? DataType::Type::kInt64
1830             : DataType::Type::kInt32;
1831         __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1832       }
1833     } else {
1834       DCHECK(source.IsFpuRegister());
1835       if (destination.IsRegister()) {
1836         DataType::Type source_type = DataType::Is64BitType(dst_type)
1837             ? DataType::Type::kFloat64
1838             : DataType::Type::kFloat32;
1839         __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1840       } else {
1841         DCHECK(destination.IsFpuRegister());
1842         if (GetGraph()->HasSIMD()) {
1843           GetInstructionCodeGeneratorArm64()->MoveSIMDRegToSIMDReg(destination, source);
1844         } else {
1845           __ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type));
1846         }
1847       }
1848     }
1849   } else if (destination.IsSIMDStackSlot()) {
1850     GetInstructionCodeGeneratorArm64()->MoveToSIMDStackSlot(destination, source);
1851   } else {  // The destination is not a register. It must be a stack slot.
1852     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1853     if (source.IsRegister() || source.IsFpuRegister()) {
1854       if (unspecified_type) {
1855         if (source.IsRegister()) {
1856           dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
1857         } else {
1858           dst_type =
1859               destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
1860         }
1861       }
1862       DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
1863              (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
1864       __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1865     } else if (source.IsConstant()) {
1866       DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1867           << source << " " << dst_type;
1868       UseScratchRegisterScope temps(GetVIXLAssembler());
1869       HConstant* src_cst = source.GetConstant();
1870       CPURegister temp;
1871       if (src_cst->IsZeroBitPattern()) {
1872         temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1873             ? Register(xzr)
1874             : Register(wzr);
1875       } else {
1876         if (src_cst->IsIntConstant()) {
1877           temp = temps.AcquireW();
1878         } else if (src_cst->IsLongConstant()) {
1879           temp = temps.AcquireX();
1880         } else if (src_cst->IsFloatConstant()) {
1881           temp = temps.AcquireS();
1882         } else {
1883           DCHECK(src_cst->IsDoubleConstant());
1884           temp = temps.AcquireD();
1885         }
1886         MoveConstant(temp, src_cst);
1887       }
1888       __ Str(temp, StackOperandFrom(destination));
1889     } else {
1890       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1891       DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1892       UseScratchRegisterScope temps(GetVIXLAssembler());
1893       // Use any scratch register (a core or a floating-point one)
1894       // from VIXL scratch register pools as a temporary.
1895       //
1896       // We used to only use the FP scratch register pool, but in some
1897       // rare cases the only register from this pool (D31) would
1898       // already be used (e.g. within a ParallelMove instruction, when
1899       // a move is blocked by a another move requiring a scratch FP
1900       // register, which would reserve D31). To prevent this issue, we
1901       // ask for a scratch register of any type (core or FP).
1902       //
1903       // Also, we start by asking for a FP scratch register first, as the
1904       // demand of scratch core registers is higher. This is why we
1905       // use AcquireFPOrCoreCPURegisterOfSize instead of
1906       // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1907       // allocates core scratch registers first.
1908       CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1909           GetVIXLAssembler(),
1910           &temps,
1911           (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1912       __ Ldr(temp, StackOperandFrom(source));
1913       __ Str(temp, StackOperandFrom(destination));
1914     }
1915   }
1916 }
1917 
Load(DataType::Type type,CPURegister dst,const MemOperand & src)1918 void CodeGeneratorARM64::Load(DataType::Type type,
1919                               CPURegister dst,
1920                               const MemOperand& src) {
1921   switch (type) {
1922     case DataType::Type::kBool:
1923     case DataType::Type::kUint8:
1924       __ Ldrb(Register(dst), src);
1925       break;
1926     case DataType::Type::kInt8:
1927       __ Ldrsb(Register(dst), src);
1928       break;
1929     case DataType::Type::kUint16:
1930       __ Ldrh(Register(dst), src);
1931       break;
1932     case DataType::Type::kInt16:
1933       __ Ldrsh(Register(dst), src);
1934       break;
1935     case DataType::Type::kInt32:
1936     case DataType::Type::kReference:
1937     case DataType::Type::kInt64:
1938     case DataType::Type::kFloat32:
1939     case DataType::Type::kFloat64:
1940       DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1941       __ Ldr(dst, src);
1942       break;
1943     case DataType::Type::kUint32:
1944     case DataType::Type::kUint64:
1945     case DataType::Type::kVoid:
1946       LOG(FATAL) << "Unreachable type " << type;
1947   }
1948 }
1949 
LoadAcquire(HInstruction * instruction,DataType::Type type,CPURegister dst,const MemOperand & src,bool needs_null_check)1950 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1951                                      DataType::Type type,
1952                                      CPURegister dst,
1953                                      const MemOperand& src,
1954                                      bool needs_null_check) {
1955   MacroAssembler* masm = GetVIXLAssembler();
1956   UseScratchRegisterScope temps(masm);
1957   Register temp_base = temps.AcquireX();
1958 
1959   DCHECK(!src.IsPreIndex());
1960   DCHECK(!src.IsPostIndex());
1961 
1962   // TODO(vixl): Let the MacroAssembler handle MemOperand.
1963   __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1964   {
1965     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1966     MemOperand base = MemOperand(temp_base);
1967     switch (type) {
1968       case DataType::Type::kBool:
1969       case DataType::Type::kUint8:
1970       case DataType::Type::kInt8:
1971         {
1972           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1973           __ ldarb(Register(dst), base);
1974           if (needs_null_check) {
1975             MaybeRecordImplicitNullCheck(instruction);
1976           }
1977         }
1978         if (type == DataType::Type::kInt8) {
1979           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1980         }
1981         break;
1982       case DataType::Type::kUint16:
1983       case DataType::Type::kInt16:
1984         {
1985           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1986           __ ldarh(Register(dst), base);
1987           if (needs_null_check) {
1988             MaybeRecordImplicitNullCheck(instruction);
1989           }
1990         }
1991         if (type == DataType::Type::kInt16) {
1992           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1993         }
1994         break;
1995       case DataType::Type::kInt32:
1996       case DataType::Type::kReference:
1997       case DataType::Type::kInt64:
1998         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1999         {
2000           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2001           __ ldar(Register(dst), base);
2002           if (needs_null_check) {
2003             MaybeRecordImplicitNullCheck(instruction);
2004           }
2005         }
2006         break;
2007       case DataType::Type::kFloat32:
2008       case DataType::Type::kFloat64: {
2009         DCHECK(dst.IsFPRegister());
2010         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
2011 
2012         Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2013         {
2014           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2015           __ ldar(temp, base);
2016           if (needs_null_check) {
2017             MaybeRecordImplicitNullCheck(instruction);
2018           }
2019         }
2020         __ Fmov(VRegister(dst), temp);
2021         break;
2022       }
2023       case DataType::Type::kUint32:
2024       case DataType::Type::kUint64:
2025       case DataType::Type::kVoid:
2026         LOG(FATAL) << "Unreachable type " << type;
2027     }
2028   }
2029 }
2030 
Store(DataType::Type type,CPURegister src,const MemOperand & dst)2031 void CodeGeneratorARM64::Store(DataType::Type type,
2032                                CPURegister src,
2033                                const MemOperand& dst) {
2034   switch (type) {
2035     case DataType::Type::kBool:
2036     case DataType::Type::kUint8:
2037     case DataType::Type::kInt8:
2038       __ Strb(Register(src), dst);
2039       break;
2040     case DataType::Type::kUint16:
2041     case DataType::Type::kInt16:
2042       __ Strh(Register(src), dst);
2043       break;
2044     case DataType::Type::kInt32:
2045     case DataType::Type::kReference:
2046     case DataType::Type::kInt64:
2047     case DataType::Type::kFloat32:
2048     case DataType::Type::kFloat64:
2049       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
2050       __ Str(src, dst);
2051       break;
2052     case DataType::Type::kUint32:
2053     case DataType::Type::kUint64:
2054     case DataType::Type::kVoid:
2055       LOG(FATAL) << "Unreachable type " << type;
2056   }
2057 }
2058 
StoreRelease(HInstruction * instruction,DataType::Type type,CPURegister src,const MemOperand & dst,bool needs_null_check)2059 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
2060                                       DataType::Type type,
2061                                       CPURegister src,
2062                                       const MemOperand& dst,
2063                                       bool needs_null_check) {
2064   MacroAssembler* masm = GetVIXLAssembler();
2065   UseScratchRegisterScope temps(GetVIXLAssembler());
2066   Register temp_base = temps.AcquireX();
2067 
2068   DCHECK(!dst.IsPreIndex());
2069   DCHECK(!dst.IsPostIndex());
2070 
2071   // TODO(vixl): Let the MacroAssembler handle this.
2072   Operand op = OperandFromMemOperand(dst);
2073   __ Add(temp_base, dst.GetBaseRegister(), op);
2074   MemOperand base = MemOperand(temp_base);
2075   // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2076   switch (type) {
2077     case DataType::Type::kBool:
2078     case DataType::Type::kUint8:
2079     case DataType::Type::kInt8:
2080       {
2081         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2082         __ stlrb(Register(src), base);
2083         if (needs_null_check) {
2084           MaybeRecordImplicitNullCheck(instruction);
2085         }
2086       }
2087       break;
2088     case DataType::Type::kUint16:
2089     case DataType::Type::kInt16:
2090       {
2091         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2092         __ stlrh(Register(src), base);
2093         if (needs_null_check) {
2094           MaybeRecordImplicitNullCheck(instruction);
2095         }
2096       }
2097       break;
2098     case DataType::Type::kInt32:
2099     case DataType::Type::kReference:
2100     case DataType::Type::kInt64:
2101       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
2102       {
2103         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2104         __ stlr(Register(src), base);
2105         if (needs_null_check) {
2106           MaybeRecordImplicitNullCheck(instruction);
2107         }
2108       }
2109       break;
2110     case DataType::Type::kFloat32:
2111     case DataType::Type::kFloat64: {
2112       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
2113       Register temp_src;
2114       if (src.IsZero()) {
2115         // The zero register is used to avoid synthesizing zero constants.
2116         temp_src = Register(src);
2117       } else {
2118         DCHECK(src.IsFPRegister());
2119         temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2120         __ Fmov(temp_src, VRegister(src));
2121       }
2122       {
2123         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2124         __ stlr(temp_src, base);
2125         if (needs_null_check) {
2126           MaybeRecordImplicitNullCheck(instruction);
2127         }
2128       }
2129       break;
2130     }
2131     case DataType::Type::kUint32:
2132     case DataType::Type::kUint64:
2133     case DataType::Type::kVoid:
2134       LOG(FATAL) << "Unreachable type " << type;
2135   }
2136 }
2137 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2138 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
2139                                        HInstruction* instruction,
2140                                        uint32_t dex_pc,
2141                                        SlowPathCode* slow_path) {
2142   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2143 
2144   ThreadOffset64 entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entrypoint);
2145   // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
2146   // entire oat file. This adds an extra branch and we do not want to slow down the main path.
2147   // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
2148   if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
2149     __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
2150     // Ensure the pc position is recorded immediately after the `blr` instruction.
2151     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
2152     __ blr(lr);
2153     if (EntrypointRequiresStackMap(entrypoint)) {
2154       RecordPcInfo(instruction, dex_pc, slow_path);
2155     }
2156   } else {
2157     // Ensure the pc position is recorded immediately after the `bl` instruction.
2158     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
2159     EmitEntrypointThunkCall(entrypoint_offset);
2160     if (EntrypointRequiresStackMap(entrypoint)) {
2161       RecordPcInfo(instruction, dex_pc, slow_path);
2162     }
2163   }
2164 }
2165 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2166 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2167                                                              HInstruction* instruction,
2168                                                              SlowPathCode* slow_path) {
2169   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2170   __ Ldr(lr, MemOperand(tr, entry_point_offset));
2171   __ Blr(lr);
2172 }
2173 
GenerateClassInitializationCheck(SlowPathCodeARM64 * slow_path,Register class_reg)2174 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
2175                                                                      Register class_reg) {
2176   UseScratchRegisterScope temps(GetVIXLAssembler());
2177   Register temp = temps.AcquireW();
2178 
2179   // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize
2180   // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code
2181   // size, load only the high byte of the field and compare with 0xf0.
2182   // Note: The same code size could be achieved with LDR+MNV(asr #24)+CBNZ but benchmarks
2183   // show that this pattern is slower (tested on little cores).
2184   __ Ldrb(temp, HeapOperand(class_reg, kClassStatusByteOffset));
2185   __ Cmp(temp, kShiftedVisiblyInitializedValue);
2186   __ B(lo, slow_path->GetEntryLabel());
2187   __ Bind(slow_path->GetExitLabel());
2188 }
2189 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl::aarch64::Register temp)2190 void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
2191     HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
2192   uint32_t path_to_root = check->GetBitstringPathToRoot();
2193   uint32_t mask = check->GetBitstringMask();
2194   DCHECK(IsPowerOfTwo(mask + 1));
2195   size_t mask_bits = WhichPowerOf2(mask + 1);
2196 
2197   if (mask_bits == 16u) {
2198     // Load only the bitstring part of the status word.
2199     __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
2200   } else {
2201     // /* uint32_t */ temp = temp->status_
2202     __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
2203     // Extract the bitstring bits.
2204     __ Ubfx(temp, temp, 0, mask_bits);
2205   }
2206   // Compare the bitstring bits to `path_to_root`.
2207   __ Cmp(temp, path_to_root);
2208 }
2209 
GenerateMemoryBarrier(MemBarrierKind kind)2210 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
2211   BarrierType type = BarrierAll;
2212 
2213   switch (kind) {
2214     case MemBarrierKind::kAnyAny:
2215     case MemBarrierKind::kAnyStore: {
2216       type = BarrierAll;
2217       break;
2218     }
2219     case MemBarrierKind::kLoadAny: {
2220       type = BarrierReads;
2221       break;
2222     }
2223     case MemBarrierKind::kStoreStore: {
2224       type = BarrierWrites;
2225       break;
2226     }
2227     default:
2228       LOG(FATAL) << "Unexpected memory barrier " << kind;
2229   }
2230   __ Dmb(InnerShareable, type);
2231 }
2232 
CanUseImplicitSuspendCheck() const2233 bool CodeGeneratorARM64::CanUseImplicitSuspendCheck() const {
2234   // Use implicit suspend checks if requested in compiler options unless there are SIMD
2235   // instructions in the graph. The implicit suspend check saves all FP registers as
2236   // 64-bit (in line with the calling convention) but SIMD instructions can use 128-bit
2237   // registers, so they need to be saved in an explicit slow path.
2238   return GetCompilerOptions().GetImplicitSuspendChecks() && !GetGraph()->HasSIMD();
2239 }
2240 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)2241 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
2242                                                          HBasicBlock* successor) {
2243   if (instruction->IsNoOp()) {
2244     if (successor != nullptr) {
2245       __ B(codegen_->GetLabelOf(successor));
2246     }
2247     return;
2248   }
2249 
2250   if (codegen_->CanUseImplicitSuspendCheck()) {
2251     __ Ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister));
2252     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
2253     if (successor != nullptr) {
2254       __ B(codegen_->GetLabelOf(successor));
2255     }
2256     return;
2257   }
2258 
2259   SuspendCheckSlowPathARM64* slow_path =
2260       down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
2261   if (slow_path == nullptr) {
2262     slow_path =
2263         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor);
2264     instruction->SetSlowPath(slow_path);
2265     codegen_->AddSlowPath(slow_path);
2266     if (successor != nullptr) {
2267       DCHECK(successor->IsLoopHeader());
2268     }
2269   } else {
2270     DCHECK_EQ(slow_path->GetSuccessor(), successor);
2271   }
2272 
2273   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
2274   Register temp = temps.AcquireW();
2275 
2276   __ Ldr(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
2277   __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
2278   if (successor == nullptr) {
2279     __ B(ne, slow_path->GetEntryLabel());
2280     __ Bind(slow_path->GetReturnLabel());
2281   } else {
2282     __ B(eq, codegen_->GetLabelOf(successor));
2283     __ B(slow_path->GetEntryLabel());
2284     // slow_path will return to GetLabelOf(successor).
2285   }
2286 }
2287 
InstructionCodeGeneratorARM64(HGraph * graph,CodeGeneratorARM64 * codegen)2288 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
2289                                                              CodeGeneratorARM64* codegen)
2290       : InstructionCodeGenerator(graph, codegen),
2291         assembler_(codegen->GetAssembler()),
2292         codegen_(codegen) {}
2293 
HandleBinaryOp(HBinaryOperation * instr)2294 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
2295   DCHECK_EQ(instr->InputCount(), 2U);
2296   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2297   DataType::Type type = instr->GetResultType();
2298   switch (type) {
2299     case DataType::Type::kInt32:
2300     case DataType::Type::kInt64:
2301       locations->SetInAt(0, Location::RequiresRegister());
2302       locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
2303       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2304       break;
2305 
2306     case DataType::Type::kFloat32:
2307     case DataType::Type::kFloat64:
2308       locations->SetInAt(0, Location::RequiresFpuRegister());
2309       locations->SetInAt(1, Location::RequiresFpuRegister());
2310       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2311       break;
2312 
2313     default:
2314       LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
2315   }
2316 }
2317 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2318 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
2319                                            const FieldInfo& field_info) {
2320   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2321 
2322   bool object_field_get_with_read_barrier =
2323       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
2324   LocationSummary* locations =
2325       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2326                                                        object_field_get_with_read_barrier
2327                                                            ? LocationSummary::kCallOnSlowPath
2328                                                            : LocationSummary::kNoCall);
2329   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
2330     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2331     // We need a temporary register for the read barrier load in
2332     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2333     // only if the field is volatile or the offset is too big.
2334     if (field_info.IsVolatile() ||
2335         field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
2336       locations->AddTemp(FixedTempLocation());
2337     }
2338   }
2339   // Input for object receiver.
2340   locations->SetInAt(0, Location::RequiresRegister());
2341   if (DataType::IsFloatingPointType(instruction->GetType())) {
2342     locations->SetOut(Location::RequiresFpuRegister());
2343   } else {
2344     // The output overlaps for an object field get for non-Baker read barriers: we do not want
2345     // the load to overwrite the object's location, as we need it to emit the read barrier.
2346     // Baker read barrier implementation with introspection does not have this restriction.
2347     bool overlap = object_field_get_with_read_barrier && !kUseBakerReadBarrier;
2348     locations->SetOut(Location::RequiresRegister(),
2349                       overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2350   }
2351 }
2352 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2353 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
2354                                                    const FieldInfo& field_info) {
2355   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2356   LocationSummary* locations = instruction->GetLocations();
2357   uint32_t receiver_input = 0;
2358   Location base_loc = locations->InAt(receiver_input);
2359   Location out = locations->Out();
2360   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
2361   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
2362   DataType::Type load_type = instruction->GetType();
2363   MemOperand field =
2364       HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset());
2365 
2366   if (load_type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) {
2367     // Object FieldGet with Baker's read barrier case.
2368     // /* HeapReference<Object> */ out = *(base + offset)
2369     Register base = RegisterFrom(base_loc, DataType::Type::kReference);
2370     Location maybe_temp =
2371         (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2372     // Note that potential implicit null checks are handled in this
2373     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
2374     codegen_->GenerateFieldLoadWithBakerReadBarrier(
2375         instruction,
2376         out,
2377         base,
2378         offset,
2379         maybe_temp,
2380         /* needs_null_check= */ true,
2381         field_info.IsVolatile());
2382   } else {
2383     // General case.
2384     if (field_info.IsVolatile()) {
2385       // Note that a potential implicit null check is handled in this
2386       // CodeGeneratorARM64::LoadAcquire call.
2387       // NB: LoadAcquire will record the pc info if needed.
2388       codegen_->LoadAcquire(instruction,
2389                             load_type,
2390                             OutputCPURegister(instruction),
2391                             field,
2392                             /* needs_null_check= */ true);
2393     } else {
2394       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2395       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2396       codegen_->Load(load_type, OutputCPURegister(instruction), field);
2397       codegen_->MaybeRecordImplicitNullCheck(instruction);
2398     }
2399     if (load_type == DataType::Type::kReference) {
2400       // If read barriers are enabled, emit read barriers other than
2401       // Baker's using a slow path (and also unpoison the loaded
2402       // reference, if heap poisoning is enabled).
2403       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2404     }
2405   }
2406 }
2407 
HandleFieldSet(HInstruction * instruction)2408 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2409   LocationSummary* locations =
2410       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2411   locations->SetInAt(0, Location::RequiresRegister());
2412   HInstruction* value = instruction->InputAt(1);
2413   if (IsZeroBitPattern(value)) {
2414     locations->SetInAt(1, Location::ConstantLocation(value));
2415   } else if (DataType::IsFloatingPointType(value->GetType())) {
2416     locations->SetInAt(1, Location::RequiresFpuRegister());
2417   } else {
2418     locations->SetInAt(1, Location::RequiresRegister());
2419   }
2420 }
2421 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)2422 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2423                                                    const FieldInfo& field_info,
2424                                                    bool value_can_be_null,
2425                                                    WriteBarrierKind write_barrier_kind) {
2426   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2427 
2428   Register obj = InputRegisterAt(instruction, 0);
2429   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2430   CPURegister source = value;
2431   Offset offset = field_info.GetFieldOffset();
2432   DataType::Type field_type = field_info.GetFieldType();
2433   {
2434     // We use a block to end the scratch scope before the write barrier, thus
2435     // freeing the temporary registers so they can be used in `MarkGCCard`.
2436     UseScratchRegisterScope temps(GetVIXLAssembler());
2437 
2438     if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
2439       DCHECK(value.IsW());
2440       Register temp = temps.AcquireW();
2441       __ Mov(temp, value.W());
2442       GetAssembler()->PoisonHeapReference(temp.W());
2443       source = temp;
2444     }
2445 
2446     if (field_info.IsVolatile()) {
2447       codegen_->StoreRelease(
2448           instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true);
2449     } else {
2450       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2451       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2452       codegen_->Store(field_type, source, HeapOperand(obj, offset));
2453       codegen_->MaybeRecordImplicitNullCheck(instruction);
2454     }
2455   }
2456 
2457   const bool needs_write_barrier =
2458       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
2459 
2460   if (needs_write_barrier) {
2461     DCHECK_IMPLIES(Register(value).IsZero(),
2462                    write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn);
2463     codegen_->MaybeMarkGCCard(
2464         obj,
2465         Register(value),
2466         value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
2467   } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
2468     codegen_->CheckGCCardIsValid(obj);
2469   }
2470 }
2471 
HandleBinaryOp(HBinaryOperation * instr)2472 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2473   DataType::Type type = instr->GetType();
2474 
2475   switch (type) {
2476     case DataType::Type::kInt32:
2477     case DataType::Type::kInt64: {
2478       Register dst = OutputRegister(instr);
2479       Register lhs = InputRegisterAt(instr, 0);
2480       Operand rhs = InputOperandAt(instr, 1);
2481       if (instr->IsAdd()) {
2482         __ Add(dst, lhs, rhs);
2483       } else if (instr->IsAnd()) {
2484         __ And(dst, lhs, rhs);
2485       } else if (instr->IsOr()) {
2486         __ Orr(dst, lhs, rhs);
2487       } else if (instr->IsSub()) {
2488         __ Sub(dst, lhs, rhs);
2489       } else if (instr->IsRol()) {
2490         if (rhs.IsImmediate()) {
2491           uint32_t shift = (-rhs.GetImmediate()) & (lhs.GetSizeInBits() - 1);
2492           __ Ror(dst, lhs, shift);
2493         } else {
2494           UseScratchRegisterScope temps(GetVIXLAssembler());
2495 
2496           // Ensure shift distance is in the same size register as the result. If
2497           // we are rotating a long and the shift comes in a w register originally,
2498           // we don't need to sxtw for use as an x since the shift distances are
2499           // all & reg_bits - 1.
2500           Register right = RegisterFrom(instr->GetLocations()->InAt(1), type);
2501           Register negated = (type == DataType::Type::kInt32) ? temps.AcquireW() : temps.AcquireX();
2502           __ Neg(negated, right);
2503           __ Ror(dst, lhs, negated);
2504         }
2505       } else if (instr->IsRor()) {
2506         if (rhs.IsImmediate()) {
2507           uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2508           __ Ror(dst, lhs, shift);
2509         } else {
2510           // Ensure shift distance is in the same size register as the result. If
2511           // we are rotating a long and the shift comes in a w register originally,
2512           // we don't need to sxtw for use as an x since the shift distances are
2513           // all & reg_bits - 1.
2514           __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2515         }
2516       } else if (instr->IsMin() || instr->IsMax()) {
2517           __ Cmp(lhs, rhs);
2518           __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt);
2519       } else {
2520         DCHECK(instr->IsXor());
2521         __ Eor(dst, lhs, rhs);
2522       }
2523       break;
2524     }
2525     case DataType::Type::kFloat32:
2526     case DataType::Type::kFloat64: {
2527       VRegister dst = OutputFPRegister(instr);
2528       VRegister lhs = InputFPRegisterAt(instr, 0);
2529       VRegister rhs = InputFPRegisterAt(instr, 1);
2530       if (instr->IsAdd()) {
2531         __ Fadd(dst, lhs, rhs);
2532       } else if (instr->IsSub()) {
2533         __ Fsub(dst, lhs, rhs);
2534       } else if (instr->IsMin()) {
2535         __ Fmin(dst, lhs, rhs);
2536       } else if (instr->IsMax()) {
2537         __ Fmax(dst, lhs, rhs);
2538       } else {
2539         LOG(FATAL) << "Unexpected floating-point binary operation";
2540       }
2541       break;
2542     }
2543     default:
2544       LOG(FATAL) << "Unexpected binary operation type " << type;
2545   }
2546 }
2547 
HandleShift(HBinaryOperation * instr)2548 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2549   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2550 
2551   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2552   DataType::Type type = instr->GetResultType();
2553   switch (type) {
2554     case DataType::Type::kInt32:
2555     case DataType::Type::kInt64: {
2556       locations->SetInAt(0, Location::RequiresRegister());
2557       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2558       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2559       break;
2560     }
2561     default:
2562       LOG(FATAL) << "Unexpected shift type " << type;
2563   }
2564 }
2565 
HandleShift(HBinaryOperation * instr)2566 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2567   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2568 
2569   DataType::Type type = instr->GetType();
2570   switch (type) {
2571     case DataType::Type::kInt32:
2572     case DataType::Type::kInt64: {
2573       Register dst = OutputRegister(instr);
2574       Register lhs = InputRegisterAt(instr, 0);
2575       Operand rhs = InputOperandAt(instr, 1);
2576       if (rhs.IsImmediate()) {
2577         uint32_t shift_value = rhs.GetImmediate() &
2578             (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2579         if (instr->IsShl()) {
2580           __ Lsl(dst, lhs, shift_value);
2581         } else if (instr->IsShr()) {
2582           __ Asr(dst, lhs, shift_value);
2583         } else {
2584           __ Lsr(dst, lhs, shift_value);
2585         }
2586       } else {
2587         Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2588 
2589         if (instr->IsShl()) {
2590           __ Lsl(dst, lhs, rhs_reg);
2591         } else if (instr->IsShr()) {
2592           __ Asr(dst, lhs, rhs_reg);
2593         } else {
2594           __ Lsr(dst, lhs, rhs_reg);
2595         }
2596       }
2597       break;
2598     }
2599     default:
2600       LOG(FATAL) << "Unexpected shift operation type " << type;
2601   }
2602 }
2603 
VisitAdd(HAdd * instruction)2604 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2605   HandleBinaryOp(instruction);
2606 }
2607 
VisitAdd(HAdd * instruction)2608 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2609   HandleBinaryOp(instruction);
2610 }
2611 
VisitAnd(HAnd * instruction)2612 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2613   HandleBinaryOp(instruction);
2614 }
2615 
VisitAnd(HAnd * instruction)2616 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2617   HandleBinaryOp(instruction);
2618 }
2619 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2620 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2621   DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType();
2622   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2623   locations->SetInAt(0, Location::RequiresRegister());
2624   // There is no immediate variant of negated bitwise instructions in AArch64.
2625   locations->SetInAt(1, Location::RequiresRegister());
2626   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2627 }
2628 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2629 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2630   Register dst = OutputRegister(instr);
2631   Register lhs = InputRegisterAt(instr, 0);
2632   Register rhs = InputRegisterAt(instr, 1);
2633 
2634   switch (instr->GetOpKind()) {
2635     case HInstruction::kAnd:
2636       __ Bic(dst, lhs, rhs);
2637       break;
2638     case HInstruction::kOr:
2639       __ Orn(dst, lhs, rhs);
2640       break;
2641     case HInstruction::kXor:
2642       __ Eon(dst, lhs, rhs);
2643       break;
2644     default:
2645       LOG(FATAL) << "Unreachable";
2646   }
2647 }
2648 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2649 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2650     HDataProcWithShifterOp* instruction) {
2651   DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
2652          instruction->GetType() == DataType::Type::kInt64);
2653   LocationSummary* locations =
2654       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2655   if (instruction->GetInstrKind() == HInstruction::kNeg) {
2656     locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)));
2657   } else {
2658     locations->SetInAt(0, Location::RequiresRegister());
2659   }
2660   locations->SetInAt(1, Location::RequiresRegister());
2661   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2662 }
2663 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2664 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2665     HDataProcWithShifterOp* instruction) {
2666   DataType::Type type = instruction->GetType();
2667   HInstruction::InstructionKind kind = instruction->GetInstrKind();
2668   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
2669   Register out = OutputRegister(instruction);
2670   Register left;
2671   if (kind != HInstruction::kNeg) {
2672     left = InputRegisterAt(instruction, 0);
2673   }
2674   // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2675   // shifter operand operation, the IR generating `right_reg` (input to the type
2676   // conversion) can have a different type from the current instruction's type,
2677   // so we manually indicate the type.
2678   Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2679   Operand right_operand(0);
2680 
2681   HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2682   if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2683     right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2684   } else {
2685     right_operand = Operand(right_reg,
2686                             helpers::ShiftFromOpKind(op_kind),
2687                             instruction->GetShiftAmount());
2688   }
2689 
2690   // Logical binary operations do not support extension operations in the
2691   // operand. Note that VIXL would still manage if it was passed by generating
2692   // the extension as a separate instruction.
2693   // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2694   DCHECK_IMPLIES(right_operand.IsExtendedRegister(),
2695                  kind != HInstruction::kAnd && kind != HInstruction::kOr &&
2696                      kind != HInstruction::kXor && kind != HInstruction::kNeg);
2697   switch (kind) {
2698     case HInstruction::kAdd:
2699       __ Add(out, left, right_operand);
2700       break;
2701     case HInstruction::kAnd:
2702       __ And(out, left, right_operand);
2703       break;
2704     case HInstruction::kNeg:
2705       DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2706       __ Neg(out, right_operand);
2707       break;
2708     case HInstruction::kOr:
2709       __ Orr(out, left, right_operand);
2710       break;
2711     case HInstruction::kSub:
2712       __ Sub(out, left, right_operand);
2713       break;
2714     case HInstruction::kXor:
2715       __ Eor(out, left, right_operand);
2716       break;
2717     default:
2718       LOG(FATAL) << "Unexpected operation kind: " << kind;
2719       UNREACHABLE();
2720   }
2721 }
2722 
VisitIntermediateAddress(HIntermediateAddress * instruction)2723 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2724   LocationSummary* locations =
2725       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2726   locations->SetInAt(0, Location::RequiresRegister());
2727   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2728   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2729 }
2730 
VisitIntermediateAddress(HIntermediateAddress * instruction)2731 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2732   __ Add(OutputRegister(instruction),
2733          InputRegisterAt(instruction, 0),
2734          Operand(InputOperandAt(instruction, 1)));
2735 }
2736 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2737 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
2738   LocationSummary* locations =
2739       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2740 
2741   HIntConstant* shift = instruction->GetShift()->AsIntConstant();
2742 
2743   locations->SetInAt(0, Location::RequiresRegister());
2744   // For byte case we don't need to shift the index variable so we can encode the data offset into
2745   // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
2746   // data offset constant generation out of the loop and reduce the critical path length in the
2747   // loop.
2748   locations->SetInAt(1, shift->GetValue() == 0
2749                         ? Location::ConstantLocation(instruction->GetOffset())
2750                         : Location::RequiresRegister());
2751   locations->SetInAt(2, Location::ConstantLocation(shift));
2752   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2753 }
2754 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2755 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
2756     HIntermediateAddressIndex* instruction) {
2757   Register index_reg = InputRegisterAt(instruction, 0);
2758   uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2));
2759   uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
2760 
2761   if (shift == 0) {
2762     __ Add(OutputRegister(instruction), index_reg, offset);
2763   } else {
2764     Register offset_reg = InputRegisterAt(instruction, 1);
2765     __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
2766   }
2767 }
2768 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2769 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2770   LocationSummary* locations =
2771       new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
2772   HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2773   if (instr->GetOpKind() == HInstruction::kSub &&
2774       accumulator->IsConstant() &&
2775       accumulator->AsConstant()->IsArithmeticZero()) {
2776     // Don't allocate register for Mneg instruction.
2777   } else {
2778     locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2779                        Location::RequiresRegister());
2780   }
2781   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2782   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2783   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2784 }
2785 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2786 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2787   Register res = OutputRegister(instr);
2788   Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2789   Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2790 
2791   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2792   // This fixup should be carried out for all multiply-accumulate instructions:
2793   // madd, msub, smaddl, smsubl, umaddl and umsubl.
2794   if (instr->GetType() == DataType::Type::kInt64 &&
2795       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2796     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2797     ptrdiff_t off = masm->GetCursorOffset();
2798     if (off >= static_cast<ptrdiff_t>(kInstructionSize) &&
2799         masm->GetInstructionAt(off - static_cast<ptrdiff_t>(kInstructionSize))->IsLoadOrStore()) {
2800       // Make sure we emit only exactly one nop.
2801       ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2802       __ nop();
2803     }
2804   }
2805 
2806   if (instr->GetOpKind() == HInstruction::kAdd) {
2807     Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2808     __ Madd(res, mul_left, mul_right, accumulator);
2809   } else {
2810     DCHECK(instr->GetOpKind() == HInstruction::kSub);
2811     HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2812     if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2813       __ Mneg(res, mul_left, mul_right);
2814     } else {
2815       Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2816       __ Msub(res, mul_left, mul_right, accumulator);
2817     }
2818   }
2819 }
2820 
VisitArrayGet(HArrayGet * instruction)2821 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2822   bool object_array_get_with_read_barrier =
2823       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
2824   LocationSummary* locations =
2825       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2826                                                        object_array_get_with_read_barrier
2827                                                            ? LocationSummary::kCallOnSlowPath
2828                                                            : LocationSummary::kNoCall);
2829   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2830     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2831     if (instruction->GetIndex()->IsConstant()) {
2832       // Array loads with constant index are treated as field loads.
2833       // We need a temporary register for the read barrier load in
2834       // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2835       // only if the offset is too big.
2836       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2837       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2838       offset += index << DataType::SizeShift(DataType::Type::kReference);
2839       if (offset >= kReferenceLoadMinFarOffset) {
2840         locations->AddTemp(FixedTempLocation());
2841       }
2842     } else if (!instruction->GetArray()->IsIntermediateAddress()) {
2843       // We need a non-scratch temporary for the array data pointer in
2844       // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no
2845       // intermediate address.
2846       locations->AddTemp(Location::RequiresRegister());
2847     }
2848   }
2849   locations->SetInAt(0, Location::RequiresRegister());
2850   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2851   if (DataType::IsFloatingPointType(instruction->GetType())) {
2852     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2853   } else {
2854     // The output overlaps for an object array get for non-Baker read barriers: we do not want
2855     // the load to overwrite the object's location, as we need it to emit the read barrier.
2856     // Baker read barrier implementation with introspection does not have this restriction.
2857     bool overlap = object_array_get_with_read_barrier && !kUseBakerReadBarrier;
2858     locations->SetOut(Location::RequiresRegister(),
2859                       overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2860   }
2861 }
2862 
VisitArrayGet(HArrayGet * instruction)2863 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2864   DataType::Type type = instruction->GetType();
2865   Register obj = InputRegisterAt(instruction, 0);
2866   LocationSummary* locations = instruction->GetLocations();
2867   Location index = locations->InAt(1);
2868   Location out = locations->Out();
2869   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2870   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2871                                         instruction->IsStringCharAt();
2872   MacroAssembler* masm = GetVIXLAssembler();
2873   UseScratchRegisterScope temps(masm);
2874 
2875   // The non-Baker read barrier instrumentation of object ArrayGet instructions
2876   // does not support the HIntermediateAddress instruction.
2877   DCHECK(!((type == DataType::Type::kReference) &&
2878            instruction->GetArray()->IsIntermediateAddress() &&
2879            codegen_->EmitNonBakerReadBarrier()));
2880 
2881   if (type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) {
2882     // Object ArrayGet with Baker's read barrier case.
2883     // Note that a potential implicit null check is handled in the
2884     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2885     DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
2886     if (index.IsConstant()) {
2887       DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2888       // Array load with a constant index can be treated as a field load.
2889       offset += Int64FromLocation(index) << DataType::SizeShift(type);
2890       Location maybe_temp =
2891           (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2892       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2893                                                       out,
2894                                                       obj.W(),
2895                                                       offset,
2896                                                       maybe_temp,
2897                                                       /* needs_null_check= */ false,
2898                                                       /* use_load_acquire= */ false);
2899     } else {
2900       codegen_->GenerateArrayLoadWithBakerReadBarrier(
2901           instruction, out, obj.W(), offset, index, /* needs_null_check= */ false);
2902     }
2903   } else {
2904     // General case.
2905     MemOperand source = HeapOperand(obj);
2906     Register length;
2907     if (maybe_compressed_char_at) {
2908       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2909       length = temps.AcquireW();
2910       {
2911         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2912         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2913 
2914         if (instruction->GetArray()->IsIntermediateAddress()) {
2915           DCHECK_LT(count_offset, offset);
2916           int64_t adjusted_offset =
2917               static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2918           // Note that `adjusted_offset` is negative, so this will be a LDUR.
2919           __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2920         } else {
2921           __ Ldr(length, HeapOperand(obj, count_offset));
2922         }
2923         codegen_->MaybeRecordImplicitNullCheck(instruction);
2924       }
2925     }
2926     if (index.IsConstant()) {
2927       if (maybe_compressed_char_at) {
2928         vixl::aarch64::Label uncompressed_load, done;
2929         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2930                       "Expecting 0=compressed, 1=uncompressed");
2931         __ Tbnz(length.W(), 0, &uncompressed_load);
2932         __ Ldrb(Register(OutputCPURegister(instruction)),
2933                 HeapOperand(obj, offset + Int64FromLocation(index)));
2934         __ B(&done);
2935         __ Bind(&uncompressed_load);
2936         __ Ldrh(Register(OutputCPURegister(instruction)),
2937                 HeapOperand(obj, offset + (Int64FromLocation(index) << 1)));
2938         __ Bind(&done);
2939       } else {
2940         offset += Int64FromLocation(index) << DataType::SizeShift(type);
2941         source = HeapOperand(obj, offset);
2942       }
2943     } else {
2944       Register temp = temps.AcquireSameSizeAs(obj);
2945       if (instruction->GetArray()->IsIntermediateAddress()) {
2946         // We do not need to compute the intermediate address from the array: the
2947         // input instruction has done it already. See the comment in
2948         // `TryExtractArrayAccessAddress()`.
2949         if (kIsDebugBuild) {
2950           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2951           DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2952         }
2953         temp = obj;
2954       } else {
2955         __ Add(temp, obj, offset);
2956       }
2957       if (maybe_compressed_char_at) {
2958         vixl::aarch64::Label uncompressed_load, done;
2959         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2960                       "Expecting 0=compressed, 1=uncompressed");
2961         __ Tbnz(length.W(), 0, &uncompressed_load);
2962         __ Ldrb(Register(OutputCPURegister(instruction)),
2963                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2964         __ B(&done);
2965         __ Bind(&uncompressed_load);
2966         __ Ldrh(Register(OutputCPURegister(instruction)),
2967                 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2968         __ Bind(&done);
2969       } else {
2970         source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type));
2971       }
2972     }
2973     if (!maybe_compressed_char_at) {
2974       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2975       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2976       codegen_->Load(type, OutputCPURegister(instruction), source);
2977       codegen_->MaybeRecordImplicitNullCheck(instruction);
2978     }
2979 
2980     if (type == DataType::Type::kReference) {
2981       static_assert(
2982           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2983           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2984       Location obj_loc = locations->InAt(0);
2985       if (index.IsConstant()) {
2986         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2987       } else {
2988         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2989       }
2990     }
2991   }
2992 }
2993 
VisitArrayLength(HArrayLength * instruction)2994 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2995   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2996   locations->SetInAt(0, Location::RequiresRegister());
2997   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2998 }
2999 
VisitArrayLength(HArrayLength * instruction)3000 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
3001   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
3002   vixl::aarch64::Register out = OutputRegister(instruction);
3003   {
3004     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
3005     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3006     __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
3007     codegen_->MaybeRecordImplicitNullCheck(instruction);
3008   }
3009   // Mask out compression flag from String's array length.
3010   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
3011     __ Lsr(out.W(), out.W(), 1u);
3012   }
3013 }
3014 
VisitArraySet(HArraySet * instruction)3015 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
3016   DataType::Type value_type = instruction->GetComponentType();
3017 
3018   bool needs_type_check = instruction->NeedsTypeCheck();
3019   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
3020       instruction,
3021       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
3022   locations->SetInAt(0, Location::RequiresRegister());
3023   locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetIndex()));
3024   HInstruction* value = instruction->GetValue();
3025   if (IsZeroBitPattern(value)) {
3026     locations->SetInAt(2, Location::ConstantLocation(value));
3027   } else if (DataType::IsFloatingPointType(value_type)) {
3028     locations->SetInAt(2, Location::RequiresFpuRegister());
3029   } else {
3030     locations->SetInAt(2, Location::RequiresRegister());
3031   }
3032 }
3033 
VisitArraySet(HArraySet * instruction)3034 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
3035   DataType::Type value_type = instruction->GetComponentType();
3036   LocationSummary* locations = instruction->GetLocations();
3037   bool needs_type_check = instruction->NeedsTypeCheck();
3038   const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
3039   bool needs_write_barrier =
3040       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
3041 
3042   Register array = InputRegisterAt(instruction, 0);
3043   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
3044   CPURegister source = value;
3045   Location index = locations->InAt(1);
3046   size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
3047   MemOperand destination = HeapOperand(array);
3048   MacroAssembler* masm = GetVIXLAssembler();
3049 
3050   if (!needs_write_barrier) {
3051     if (codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind)) {
3052       codegen_->CheckGCCardIsValid(array);
3053     }
3054 
3055     DCHECK(!needs_type_check);
3056     UseScratchRegisterScope temps(masm);
3057     if (index.IsConstant()) {
3058       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
3059       destination = HeapOperand(array, offset);
3060     } else {
3061       Register temp_dest = temps.AcquireSameSizeAs(array);
3062       if (instruction->GetArray()->IsIntermediateAddress()) {
3063         // We do not need to compute the intermediate address from the array: the
3064         // input instruction has done it already. See the comment in
3065         // `TryExtractArrayAccessAddress()`.
3066         if (kIsDebugBuild) {
3067           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
3068           DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
3069         }
3070         temp_dest = array;
3071       } else {
3072         __ Add(temp_dest, array, offset);
3073       }
3074       destination = HeapOperand(temp_dest,
3075                                 XRegisterFrom(index),
3076                                 LSL,
3077                                 DataType::SizeShift(value_type));
3078     }
3079 
3080     if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
3081       DCHECK(value.IsW());
3082       Register temp_src = temps.AcquireW();
3083       __ Mov(temp_src, value.W());
3084       GetAssembler()->PoisonHeapReference(temp_src.W());
3085       source = temp_src;
3086     }
3087 
3088     {
3089       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
3090       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3091       codegen_->Store(value_type, source, destination);
3092       codegen_->MaybeRecordImplicitNullCheck(instruction);
3093     }
3094   } else {
3095     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
3096     bool can_value_be_null = true;
3097     // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
3098     // value is null (without an extra CompareAndBranchIfZero since we already checked if the
3099     // value is null for the type check).
3100     bool skip_marking_gc_card = false;
3101     SlowPathCodeARM64* slow_path = nullptr;
3102     vixl::aarch64::Label skip_writing_card;
3103     if (!Register(value).IsZero()) {
3104       can_value_be_null = instruction->GetValueCanBeNull();
3105       skip_marking_gc_card =
3106           can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
3107       vixl::aarch64::Label do_store;
3108       if (can_value_be_null) {
3109         if (skip_marking_gc_card) {
3110           __ Cbz(Register(value), &skip_writing_card);
3111         } else {
3112           __ Cbz(Register(value), &do_store);
3113         }
3114       }
3115 
3116       if (needs_type_check) {
3117         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
3118         codegen_->AddSlowPath(slow_path);
3119 
3120         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3121         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3122         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3123 
3124         UseScratchRegisterScope temps(masm);
3125         Register temp = temps.AcquireSameSizeAs(array);
3126         Register temp2 = temps.AcquireSameSizeAs(array);
3127 
3128         // Note that when Baker read barriers are enabled, the type
3129         // checks are performed without read barriers.  This is fine,
3130         // even in the case where a class object is in the from-space
3131         // after the flip, as a comparison involving such a type would
3132         // not produce a false positive; it may of course produce a
3133         // false negative, in which case we would take the ArraySet
3134         // slow path.
3135 
3136         // /* HeapReference<Class> */ temp = array->klass_
3137         {
3138           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
3139           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3140           __ Ldr(temp, HeapOperand(array, class_offset));
3141           codegen_->MaybeRecordImplicitNullCheck(instruction);
3142         }
3143         GetAssembler()->MaybeUnpoisonHeapReference(temp);
3144 
3145         // /* HeapReference<Class> */ temp = temp->component_type_
3146         __ Ldr(temp, HeapOperand(temp, component_offset));
3147         // /* HeapReference<Class> */ temp2 = value->klass_
3148         __ Ldr(temp2, HeapOperand(Register(value), class_offset));
3149         // If heap poisoning is enabled, no need to unpoison `temp`
3150         // nor `temp2`, as we are comparing two poisoned references.
3151         __ Cmp(temp, temp2);
3152 
3153         if (instruction->StaticTypeOfArrayIsObjectArray()) {
3154           vixl::aarch64::Label do_put;
3155           __ B(eq, &do_put);
3156           // If heap poisoning is enabled, the `temp` reference has
3157           // not been unpoisoned yet; unpoison it now.
3158           GetAssembler()->MaybeUnpoisonHeapReference(temp);
3159 
3160           // /* HeapReference<Class> */ temp = temp->super_class_
3161           __ Ldr(temp, HeapOperand(temp, super_offset));
3162           // If heap poisoning is enabled, no need to unpoison
3163           // `temp`, as we are comparing against null below.
3164           __ Cbnz(temp, slow_path->GetEntryLabel());
3165           __ Bind(&do_put);
3166         } else {
3167           __ B(ne, slow_path->GetEntryLabel());
3168         }
3169       }
3170 
3171       if (can_value_be_null && !skip_marking_gc_card) {
3172         DCHECK(do_store.IsLinked());
3173         __ Bind(&do_store);
3174       }
3175     }
3176 
3177     DCHECK_NE(write_barrier_kind, WriteBarrierKind::kDontEmit);
3178     DCHECK_IMPLIES(Register(value).IsZero(),
3179                    write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn);
3180     codegen_->MarkGCCard(array);
3181 
3182     if (skip_marking_gc_card) {
3183       // Note that we don't check that the GC card is valid as it can be correctly clean.
3184       DCHECK(skip_writing_card.IsLinked());
3185       __ Bind(&skip_writing_card);
3186     }
3187 
3188     UseScratchRegisterScope temps(masm);
3189     if (kPoisonHeapReferences) {
3190       DCHECK(value.IsW());
3191       Register temp_source = temps.AcquireW();
3192       __ Mov(temp_source, value.W());
3193       GetAssembler()->PoisonHeapReference(temp_source);
3194       source = temp_source;
3195     }
3196 
3197     if (index.IsConstant()) {
3198       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
3199       destination = HeapOperand(array, offset);
3200     } else {
3201       Register temp_base = temps.AcquireSameSizeAs(array);
3202       __ Add(temp_base, array, offset);
3203       destination = HeapOperand(temp_base,
3204                                 XRegisterFrom(index),
3205                                 LSL,
3206                                 DataType::SizeShift(value_type));
3207     }
3208 
3209     {
3210       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
3211       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3212       __ Str(source, destination);
3213 
3214       if (can_value_be_null || !needs_type_check) {
3215         codegen_->MaybeRecordImplicitNullCheck(instruction);
3216       }
3217     }
3218 
3219     if (slow_path != nullptr) {
3220       __ Bind(slow_path->GetExitLabel());
3221     }
3222   }
3223 }
3224 
VisitBoundsCheck(HBoundsCheck * instruction)3225 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3226   RegisterSet caller_saves = RegisterSet::Empty();
3227   InvokeRuntimeCallingConvention calling_convention;
3228   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3229   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
3230   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
3231 
3232   // If both index and length are constant, we can check the bounds statically and
3233   // generate code accordingly. We want to make sure we generate constant locations
3234   // in that case, regardless of whether they are encodable in the comparison or not.
3235   HInstruction* index = instruction->InputAt(0);
3236   HInstruction* length = instruction->InputAt(1);
3237   bool both_const = index->IsConstant() && length->IsConstant();
3238   locations->SetInAt(0, both_const
3239       ? Location::ConstantLocation(index)
3240       : ARM64EncodableConstantOrRegister(index, instruction));
3241   locations->SetInAt(1, both_const
3242       ? Location::ConstantLocation(length)
3243       : ARM64EncodableConstantOrRegister(length, instruction));
3244 }
3245 
VisitBoundsCheck(HBoundsCheck * instruction)3246 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3247   LocationSummary* locations = instruction->GetLocations();
3248   Location index_loc = locations->InAt(0);
3249   Location length_loc = locations->InAt(1);
3250 
3251   int cmp_first_input = 0;
3252   int cmp_second_input = 1;
3253   Condition cond = hs;
3254 
3255   if (index_loc.IsConstant()) {
3256     int64_t index = Int64FromLocation(index_loc);
3257     if (length_loc.IsConstant()) {
3258       int64_t length = Int64FromLocation(length_loc);
3259       if (index < 0 || index >= length) {
3260         BoundsCheckSlowPathARM64* slow_path =
3261             new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
3262         codegen_->AddSlowPath(slow_path);
3263         __ B(slow_path->GetEntryLabel());
3264       } else {
3265         // BCE will remove the bounds check if we are guaranteed to pass.
3266         // However, some optimization after BCE may have generated this, and we should not
3267         // generate a bounds check if it is a valid range.
3268       }
3269       return;
3270     }
3271     // Only the index is constant: change the order of the operands and commute the condition
3272     // so we can use an immediate constant for the index (only the second input to a cmp
3273     // instruction can be an immediate).
3274     cmp_first_input = 1;
3275     cmp_second_input = 0;
3276     cond = ls;
3277   }
3278   BoundsCheckSlowPathARM64* slow_path =
3279       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
3280   __ Cmp(InputRegisterAt(instruction, cmp_first_input),
3281          InputOperandAt(instruction, cmp_second_input));
3282   codegen_->AddSlowPath(slow_path);
3283   __ B(slow_path->GetEntryLabel(), cond);
3284 }
3285 
VisitClinitCheck(HClinitCheck * check)3286 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
3287   LocationSummary* locations =
3288       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
3289   locations->SetInAt(0, Location::RequiresRegister());
3290   if (check->HasUses()) {
3291     locations->SetOut(Location::SameAsFirstInput());
3292   }
3293   // Rely on the type initialization to save everything we need.
3294   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
3295 }
3296 
VisitClinitCheck(HClinitCheck * check)3297 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
3298   // We assume the class is not null.
3299   SlowPathCodeARM64* slow_path =
3300       new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check);
3301   codegen_->AddSlowPath(slow_path);
3302   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
3303 }
3304 
IsFloatingPointZeroConstant(HInstruction * inst)3305 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
3306   return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
3307       || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
3308 }
3309 
GenerateFcmp(HInstruction * instruction)3310 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
3311   VRegister lhs_reg = InputFPRegisterAt(instruction, 0);
3312   Location rhs_loc = instruction->GetLocations()->InAt(1);
3313   if (rhs_loc.IsConstant()) {
3314     // 0.0 is the only immediate that can be encoded directly in
3315     // an FCMP instruction.
3316     //
3317     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
3318     // specify that in a floating-point comparison, positive zero
3319     // and negative zero are considered equal, so we can use the
3320     // literal 0.0 for both cases here.
3321     //
3322     // Note however that some methods (Float.equal, Float.compare,
3323     // Float.compareTo, Double.equal, Double.compare,
3324     // Double.compareTo, Math.max, Math.min, StrictMath.max,
3325     // StrictMath.min) consider 0.0 to be (strictly) greater than
3326     // -0.0. So if we ever translate calls to these methods into a
3327     // HCompare instruction, we must handle the -0.0 case with
3328     // care here.
3329     DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
3330     __ Fcmp(lhs_reg, 0.0);
3331   } else {
3332     __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
3333   }
3334 }
3335 
VisitCompare(HCompare * compare)3336 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
3337   LocationSummary* locations =
3338       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
3339   DataType::Type compare_type = compare->GetComparisonType();
3340   HInstruction* rhs = compare->InputAt(1);
3341   switch (compare_type) {
3342     case DataType::Type::kBool:
3343     case DataType::Type::kUint8:
3344     case DataType::Type::kInt8:
3345     case DataType::Type::kUint16:
3346     case DataType::Type::kInt16:
3347     case DataType::Type::kInt32:
3348     case DataType::Type::kUint32:
3349     case DataType::Type::kInt64:
3350     case DataType::Type::kUint64: {
3351       locations->SetInAt(0, Location::RequiresRegister());
3352       locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, compare));
3353       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3354       break;
3355     }
3356     case DataType::Type::kFloat32:
3357     case DataType::Type::kFloat64: {
3358       locations->SetInAt(0, Location::RequiresFpuRegister());
3359       locations->SetInAt(1,
3360                          IsFloatingPointZeroConstant(rhs)
3361                              ? Location::ConstantLocation(rhs)
3362                              : Location::RequiresFpuRegister());
3363       locations->SetOut(Location::RequiresRegister());
3364       break;
3365     }
3366     default:
3367       LOG(FATAL) << "Unexpected type for compare operation " << compare_type;
3368   }
3369 }
3370 
VisitCompare(HCompare * compare)3371 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
3372   DataType::Type compare_type = compare->GetComparisonType();
3373 
3374   //  0 if: left == right
3375   //  1 if: left  > right
3376   // -1 if: left  < right
3377   Condition less_cond = lt;
3378   switch (compare_type) {
3379     case DataType::Type::kUint32:
3380     case DataType::Type::kUint64:
3381       less_cond = lo;
3382       FALLTHROUGH_INTENDED;
3383     case DataType::Type::kBool:
3384     case DataType::Type::kUint8:
3385     case DataType::Type::kInt8:
3386     case DataType::Type::kUint16:
3387     case DataType::Type::kInt16:
3388     case DataType::Type::kInt32:
3389     case DataType::Type::kInt64: {
3390       Register result = OutputRegister(compare);
3391       Register left = InputRegisterAt(compare, 0);
3392       Operand right = InputOperandAt(compare, 1);
3393       __ Cmp(left, right);
3394       __ Cset(result, ne);                 // result == +1 if NE or 0 otherwise
3395       __ Cneg(result, result, less_cond);  // result == -1 if LT or unchanged otherwise
3396       break;
3397     }
3398     case DataType::Type::kFloat32:
3399     case DataType::Type::kFloat64: {
3400       Register result = OutputRegister(compare);
3401       GenerateFcmp(compare);
3402       __ Cset(result, ne);
3403       __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
3404       break;
3405     }
3406     default:
3407       LOG(FATAL) << "Unimplemented compare type " << compare_type;
3408   }
3409 }
3410 
HandleCondition(HCondition * instruction)3411 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
3412   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
3413 
3414   HInstruction* rhs = instruction->InputAt(1);
3415   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3416     locations->SetInAt(0, Location::RequiresFpuRegister());
3417     locations->SetInAt(1,
3418                        IsFloatingPointZeroConstant(rhs)
3419                            ? Location::ConstantLocation(rhs)
3420                            : Location::RequiresFpuRegister());
3421   } else {
3422     // Integer cases.
3423     locations->SetInAt(0, Location::RequiresRegister());
3424     locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, instruction));
3425   }
3426 
3427   if (!instruction->IsEmittedAtUseSite()) {
3428     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3429   }
3430 }
3431 
HandleCondition(HCondition * instruction)3432 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
3433   if (instruction->IsEmittedAtUseSite()) {
3434     return;
3435   }
3436 
3437   LocationSummary* locations = instruction->GetLocations();
3438   Register res = RegisterFrom(locations->Out(), instruction->GetType());
3439   IfCondition if_cond = instruction->GetCondition();
3440 
3441   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3442     GenerateFcmp(instruction);
3443     __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
3444   } else {
3445     // Integer cases.
3446     Register lhs = InputRegisterAt(instruction, 0);
3447     Operand rhs = InputOperandAt(instruction, 1);
3448     __ Cmp(lhs, rhs);
3449     __ Cset(res, ARM64Condition(if_cond));
3450   }
3451 }
3452 
3453 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
3454   M(Equal)                                                                               \
3455   M(NotEqual)                                                                            \
3456   M(LessThan)                                                                            \
3457   M(LessThanOrEqual)                                                                     \
3458   M(GreaterThan)                                                                         \
3459   M(GreaterThanOrEqual)                                                                  \
3460   M(Below)                                                                               \
3461   M(BelowOrEqual)                                                                        \
3462   M(Above)                                                                               \
3463   M(AboveOrEqual)
3464 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
3465 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
3466 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)3467 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3468 #undef DEFINE_CONDITION_VISITORS
3469 #undef FOR_EACH_CONDITION_INSTRUCTION
3470 
3471 void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) {
3472   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3473   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3474   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
3475 
3476   Register out = OutputRegister(instruction);
3477   Register dividend = InputRegisterAt(instruction, 0);
3478 
3479   Register final_dividend;
3480   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
3481     // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
3482     // NOTE: The generated code for HDiv correctly works for the INT32_MIN/INT64_MIN dividends:
3483     //   imm == 2
3484     //     add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
3485     //     asr out, out(0x80000001), #1 => out = 0xc0000000
3486     //     This is the same as 'asr out, 0x80000000, #1'
3487     //
3488     //   imm > 2
3489     //     add temp, dividend(0x80000000), imm - 1 => temp = 0b10..01..1, where the number
3490     //         of the rightmost 1s is ctz_imm.
3491     //     cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3492     //     csel out, temp(0b10..01..1), dividend(0x80000000), lt => out = 0b10..01..1
3493     //     asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
3494     //         leftmost 1s is ctz_imm + 1.
3495     //     This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
3496     //
3497     //   imm == INT32_MIN
3498     //     add tmp, dividend(0x80000000), #0x7fffffff => tmp = -1
3499     //     cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3500     //     csel out, temp(-1), dividend(0x80000000), lt => out = -1
3501     //     neg out, out(-1), asr #31 => out = 1
3502     //     This is the same as 'neg out, dividend(0x80000000), asr #31'.
3503     final_dividend = dividend;
3504   } else {
3505     if (abs_imm == 2) {
3506       int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte;
3507       __ Add(out, dividend, Operand(dividend, LSR, bits - 1));
3508     } else {
3509       UseScratchRegisterScope temps(GetVIXLAssembler());
3510       Register temp = temps.AcquireSameSizeAs(out);
3511       __ Add(temp, dividend, abs_imm - 1);
3512       __ Cmp(dividend, 0);
3513       __ Csel(out, temp, dividend, lt);
3514     }
3515     final_dividend = out;
3516   }
3517 
3518   int ctz_imm = CTZ(abs_imm);
3519   if (imm > 0) {
3520     __ Asr(out, final_dividend, ctz_imm);
3521   } else {
3522     __ Neg(out, Operand(final_dividend, ASR, ctz_imm));
3523   }
3524 }
3525 
3526 // Return true if the magic number was modified by subtracting 2^32(Int32 div) or 2^64(Int64 div).
3527 // So dividend needs to be added.
NeedToAddDividend(int64_t magic_number,int64_t divisor)3528 static inline bool NeedToAddDividend(int64_t magic_number, int64_t divisor) {
3529   return divisor > 0 && magic_number < 0;
3530 }
3531 
3532 // Return true if the magic number was modified by adding 2^32(Int32 div) or 2^64(Int64 div).
3533 // So dividend needs to be subtracted.
NeedToSubDividend(int64_t magic_number,int64_t divisor)3534 static inline bool NeedToSubDividend(int64_t magic_number, int64_t divisor) {
3535   return divisor < 0 && magic_number > 0;
3536 }
3537 
3538 // Generate code which increments the value in register 'in' by 1 if the value is negative.
3539 // It is done with 'add out, in, in, lsr #31 or #63'.
3540 // If the value is a result of an operation setting the N flag, CINC MI can be used
3541 // instead of ADD. 'use_cond_inc' controls this.
GenerateIncrementNegativeByOne(Register out,Register in,bool use_cond_inc)3542 void InstructionCodeGeneratorARM64::GenerateIncrementNegativeByOne(
3543     Register out,
3544     Register in,
3545     bool use_cond_inc) {
3546   if (use_cond_inc) {
3547     __ Cinc(out, in, mi);
3548   } else {
3549     __ Add(out, in, Operand(in, LSR, in.GetSizeInBits() - 1));
3550   }
3551 }
3552 
3553 // Helper to generate code producing the result of HRem with a constant divisor.
GenerateResultRemWithAnyConstant(Register out,Register dividend,Register quotient,int64_t divisor,UseScratchRegisterScope * temps_scope)3554 void InstructionCodeGeneratorARM64::GenerateResultRemWithAnyConstant(
3555     Register out,
3556     Register dividend,
3557     Register quotient,
3558     int64_t divisor,
3559     UseScratchRegisterScope* temps_scope) {
3560   Register temp_imm = temps_scope->AcquireSameSizeAs(out);
3561   __ Mov(temp_imm, divisor);
3562   __ Msub(out, quotient, temp_imm, dividend);
3563 }
3564 
3565 // Helper to generate code for HDiv/HRem instructions when a dividend is non-negative and
3566 // a divisor is a positive constant, not power of 2.
GenerateInt64UnsignedDivRemWithAnyPositiveConstant(HBinaryOperation * instruction)3567 void InstructionCodeGeneratorARM64::GenerateInt64UnsignedDivRemWithAnyPositiveConstant(
3568     HBinaryOperation* instruction) {
3569   DCHECK(instruction->IsDiv() || instruction->IsRem());
3570   DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
3571 
3572   LocationSummary* locations = instruction->GetLocations();
3573   Location second = locations->InAt(1);
3574   DCHECK(second.IsConstant());
3575 
3576   Register out = OutputRegister(instruction);
3577   Register dividend = InputRegisterAt(instruction, 0);
3578   int64_t imm = Int64FromConstant(second.GetConstant());
3579   DCHECK_GT(imm, 0);
3580 
3581   int64_t magic;
3582   int shift;
3583   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
3584 
3585   UseScratchRegisterScope temps(GetVIXLAssembler());
3586   Register temp = temps.AcquireSameSizeAs(out);
3587 
3588   auto generate_unsigned_div_code = [this, magic, shift](Register out,
3589                                                          Register dividend,
3590                                                          Register temp) {
3591     // temp = get_high(dividend * magic)
3592     __ Mov(temp, magic);
3593     if (magic > 0 && shift == 0) {
3594       __ Smulh(out, dividend, temp);
3595     } else {
3596       __ Smulh(temp, dividend, temp);
3597       if (magic < 0) {
3598         // The negative magic means that the multiplier m is greater than INT64_MAX.
3599         // In such a case shift is never 0. See the proof in
3600         // InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant.
3601         __ Add(temp, temp, dividend);
3602       }
3603       DCHECK_NE(shift, 0);
3604       __ Lsr(out, temp, shift);
3605     }
3606   };
3607 
3608   if (instruction->IsDiv()) {
3609     generate_unsigned_div_code(out, dividend, temp);
3610   } else {
3611     generate_unsigned_div_code(temp, dividend, temp);
3612     GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3613   }
3614 }
3615 
3616 // Helper to generate code for HDiv/HRem instructions for any dividend and a constant divisor
3617 // (not power of 2).
GenerateInt64DivRemWithAnyConstant(HBinaryOperation * instruction)3618 void InstructionCodeGeneratorARM64::GenerateInt64DivRemWithAnyConstant(
3619     HBinaryOperation* instruction) {
3620   DCHECK(instruction->IsDiv() || instruction->IsRem());
3621   DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
3622 
3623   LocationSummary* locations = instruction->GetLocations();
3624   Location second = locations->InAt(1);
3625   DCHECK(second.IsConstant());
3626 
3627   Register out = OutputRegister(instruction);
3628   Register dividend = InputRegisterAt(instruction, 0);
3629   int64_t imm = Int64FromConstant(second.GetConstant());
3630 
3631   int64_t magic;
3632   int shift;
3633   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
3634 
3635   UseScratchRegisterScope temps(GetVIXLAssembler());
3636   Register temp = temps.AcquireSameSizeAs(out);
3637 
3638   // temp = get_high(dividend * magic)
3639   __ Mov(temp, magic);
3640   __ Smulh(temp, dividend, temp);
3641 
3642   // The multiplication result might need some corrections to be finalized.
3643   // The last correction is to increment by 1, if the result is negative.
3644   // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3645   // Such ADD usually has latency 2, e.g. on Cortex-A55.
3646   // However if one of the corrections is ADD or SUB, the sign can be detected
3647   // with ADDS/SUBS. They set the N flag if the result is negative.
3648   // This allows to use CINC MI which has latency 1.
3649   bool use_cond_inc = false;
3650 
3651   // Some combinations of magic_number and the divisor require to correct the result.
3652   // Check whether the correction is needed.
3653   if (NeedToAddDividend(magic, imm)) {
3654     __ Adds(temp, temp, dividend);
3655     use_cond_inc = true;
3656   } else if (NeedToSubDividend(magic, imm)) {
3657     __ Subs(temp, temp, dividend);
3658     use_cond_inc = true;
3659   }
3660 
3661   if (shift != 0) {
3662     __ Asr(temp, temp, shift);
3663   }
3664 
3665   if (instruction->IsRem()) {
3666     GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3667     GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3668   } else {
3669     GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3670   }
3671 }
3672 
GenerateInt32DivRemWithAnyConstant(HBinaryOperation * instruction)3673 void InstructionCodeGeneratorARM64::GenerateInt32DivRemWithAnyConstant(
3674     HBinaryOperation* instruction) {
3675   DCHECK(instruction->IsDiv() || instruction->IsRem());
3676   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
3677 
3678   LocationSummary* locations = instruction->GetLocations();
3679   Location second = locations->InAt(1);
3680   DCHECK(second.IsConstant());
3681 
3682   Register out = OutputRegister(instruction);
3683   Register dividend = InputRegisterAt(instruction, 0);
3684   int64_t imm = Int64FromConstant(second.GetConstant());
3685 
3686   int64_t magic;
3687   int shift;
3688   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3689   UseScratchRegisterScope temps(GetVIXLAssembler());
3690   Register temp = temps.AcquireSameSizeAs(out);
3691 
3692   // temp = get_high(dividend * magic)
3693   __ Mov(temp, magic);
3694   __ Smull(temp.X(), dividend, temp);
3695 
3696   // The multiplication result might need some corrections to be finalized.
3697   // The last correction is to increment by 1, if the result is negative.
3698   // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3699   // Such ADD usually has latency 2, e.g. on Cortex-A55.
3700   // However if one of the corrections is ADD or SUB, the sign can be detected
3701   // with ADDS/SUBS. They set the N flag if the result is negative.
3702   // This allows to use CINC MI which has latency 1.
3703   bool use_cond_inc = false;
3704 
3705   // ADD/SUB correction is performed in the high 32 bits
3706   // as high 32 bits are ignored because type are kInt32.
3707   if (NeedToAddDividend(magic, imm)) {
3708     __ Adds(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3709     use_cond_inc = true;
3710   } else if (NeedToSubDividend(magic, imm)) {
3711     __ Subs(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3712     use_cond_inc = true;
3713   }
3714 
3715   // Extract the result from the high 32 bits and apply the final right shift.
3716   DCHECK_LT(shift, 32);
3717   if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
3718     // No need to adjust the result for a non-negative dividend and a positive divisor.
3719     if (instruction->IsDiv()) {
3720       __ Lsr(out.X(), temp.X(), 32 + shift);
3721     } else {
3722       __ Lsr(temp.X(), temp.X(), 32 + shift);
3723       GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3724     }
3725   } else {
3726     __ Asr(temp.X(), temp.X(), 32 + shift);
3727 
3728     if (instruction->IsRem()) {
3729       GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3730       GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3731     } else {
3732       GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3733     }
3734   }
3735 }
3736 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction,int64_t divisor)3737 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction,
3738                                                                   int64_t divisor) {
3739   DCHECK(instruction->IsDiv() || instruction->IsRem());
3740   if (instruction->GetResultType() == DataType::Type::kInt64) {
3741     if (divisor > 0 && HasNonNegativeInputAt(instruction, 0)) {
3742       GenerateInt64UnsignedDivRemWithAnyPositiveConstant(instruction);
3743     } else {
3744       GenerateInt64DivRemWithAnyConstant(instruction);
3745     }
3746   } else {
3747     GenerateInt32DivRemWithAnyConstant(instruction);
3748   }
3749 }
3750 
GenerateIntDivForConstDenom(HDiv * instruction)3751 void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) {
3752   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3753 
3754   if (imm == 0) {
3755     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3756     return;
3757   }
3758 
3759   if (IsPowerOfTwo(AbsOrMin(imm))) {
3760     GenerateIntDivForPower2Denom(instruction);
3761   } else {
3762     // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier.
3763     DCHECK(imm < -2 || imm > 2) << imm;
3764     GenerateDivRemWithAnyConstant(instruction, imm);
3765   }
3766 }
3767 
GenerateIntDiv(HDiv * instruction)3768 void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) {
3769   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
3770        << instruction->GetResultType();
3771 
3772   if (instruction->GetLocations()->InAt(1).IsConstant()) {
3773     GenerateIntDivForConstDenom(instruction);
3774   } else {
3775     Register out = OutputRegister(instruction);
3776     Register dividend = InputRegisterAt(instruction, 0);
3777     Register divisor = InputRegisterAt(instruction, 1);
3778     __ Sdiv(out, dividend, divisor);
3779   }
3780 }
3781 
VisitDiv(HDiv * div)3782 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3783   LocationSummary* locations =
3784       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3785   switch (div->GetResultType()) {
3786     case DataType::Type::kInt32:
3787     case DataType::Type::kInt64:
3788       locations->SetInAt(0, Location::RequiresRegister());
3789       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3790       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3791       break;
3792 
3793     case DataType::Type::kFloat32:
3794     case DataType::Type::kFloat64:
3795       locations->SetInAt(0, Location::RequiresFpuRegister());
3796       locations->SetInAt(1, Location::RequiresFpuRegister());
3797       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3798       break;
3799 
3800     default:
3801       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3802   }
3803 }
3804 
VisitDiv(HDiv * div)3805 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3806   DataType::Type type = div->GetResultType();
3807   switch (type) {
3808     case DataType::Type::kInt32:
3809     case DataType::Type::kInt64:
3810       GenerateIntDiv(div);
3811       break;
3812 
3813     case DataType::Type::kFloat32:
3814     case DataType::Type::kFloat64:
3815       __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3816       break;
3817 
3818     default:
3819       LOG(FATAL) << "Unexpected div type " << type;
3820   }
3821 }
3822 
VisitDivZeroCheck(HDivZeroCheck * instruction)3823 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3824   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3825   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3826 }
3827 
VisitDivZeroCheck(HDivZeroCheck * instruction)3828 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3829   SlowPathCodeARM64* slow_path =
3830       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction);
3831   codegen_->AddSlowPath(slow_path);
3832   Location value = instruction->GetLocations()->InAt(0);
3833 
3834   DataType::Type type = instruction->GetType();
3835 
3836   if (!DataType::IsIntegralType(type)) {
3837     LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3838     UNREACHABLE();
3839   }
3840 
3841   if (value.IsConstant()) {
3842     int64_t divisor = Int64FromLocation(value);
3843     if (divisor == 0) {
3844       __ B(slow_path->GetEntryLabel());
3845     } else {
3846       // A division by a non-null constant is valid. We don't need to perform
3847       // any check, so simply fall through.
3848     }
3849   } else {
3850     __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3851   }
3852 }
3853 
VisitDoubleConstant(HDoubleConstant * constant)3854 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3855   LocationSummary* locations =
3856       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3857   locations->SetOut(Location::ConstantLocation(constant));
3858 }
3859 
VisitDoubleConstant(HDoubleConstant * constant)3860 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3861     [[maybe_unused]] HDoubleConstant* constant) {
3862   // Will be generated at use site.
3863 }
3864 
VisitExit(HExit * exit)3865 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3866   exit->SetLocations(nullptr);
3867 }
3868 
VisitExit(HExit * exit)3869 void InstructionCodeGeneratorARM64::VisitExit([[maybe_unused]] HExit* exit) {}
3870 
VisitFloatConstant(HFloatConstant * constant)3871 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3872   LocationSummary* locations =
3873       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3874   locations->SetOut(Location::ConstantLocation(constant));
3875 }
3876 
VisitFloatConstant(HFloatConstant * constant)3877 void InstructionCodeGeneratorARM64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
3878   // Will be generated at use site.
3879 }
3880 
HandleGoto(HInstruction * got,HBasicBlock * successor)3881 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3882   if (successor->IsExitBlock()) {
3883     DCHECK(got->GetPrevious()->AlwaysThrows());
3884     return;  // no code needed
3885   }
3886 
3887   HBasicBlock* block = got->GetBlock();
3888   HInstruction* previous = got->GetPrevious();
3889   HLoopInformation* info = block->GetLoopInformation();
3890 
3891   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3892     codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
3893     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3894     return;  // `GenerateSuspendCheck()` emitted the jump.
3895   }
3896   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3897     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3898     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
3899   }
3900   if (!codegen_->GoesToNextBlock(block, successor)) {
3901     __ B(codegen_->GetLabelOf(successor));
3902   }
3903 }
3904 
VisitGoto(HGoto * got)3905 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3906   got->SetLocations(nullptr);
3907 }
3908 
VisitGoto(HGoto * got)3909 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3910   HandleGoto(got, got->GetSuccessor());
3911 }
3912 
VisitTryBoundary(HTryBoundary * try_boundary)3913 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3914   try_boundary->SetLocations(nullptr);
3915 }
3916 
VisitTryBoundary(HTryBoundary * try_boundary)3917 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3918   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3919   if (!successor->IsExitBlock()) {
3920     HandleGoto(try_boundary, successor);
3921   }
3922 }
3923 
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl::aarch64::Label * true_target,vixl::aarch64::Label * false_target)3924 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3925                                                           size_t condition_input_index,
3926                                                           vixl::aarch64::Label* true_target,
3927                                                           vixl::aarch64::Label* false_target) {
3928   HInstruction* cond = instruction->InputAt(condition_input_index);
3929 
3930   if (true_target == nullptr && false_target == nullptr) {
3931     // Nothing to do. The code always falls through.
3932     return;
3933   } else if (cond->IsIntConstant()) {
3934     // Constant condition, statically compared against "true" (integer value 1).
3935     if (cond->AsIntConstant()->IsTrue()) {
3936       if (true_target != nullptr) {
3937         __ B(true_target);
3938       }
3939     } else {
3940       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3941       if (false_target != nullptr) {
3942         __ B(false_target);
3943       }
3944     }
3945     return;
3946   }
3947 
3948   // The following code generates these patterns:
3949   //  (1) true_target == nullptr && false_target != nullptr
3950   //        - opposite condition true => branch to false_target
3951   //  (2) true_target != nullptr && false_target == nullptr
3952   //        - condition true => branch to true_target
3953   //  (3) true_target != nullptr && false_target != nullptr
3954   //        - condition true => branch to true_target
3955   //        - branch to false_target
3956   if (IsBooleanValueOrMaterializedCondition(cond)) {
3957     // The condition instruction has been materialized, compare the output to 0.
3958     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3959     DCHECK(cond_val.IsRegister());
3960     if (true_target == nullptr) {
3961       __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3962     } else {
3963       __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3964     }
3965   } else {
3966     // The condition instruction has not been materialized, use its inputs as
3967     // the comparison and its condition as the branch condition.
3968     HCondition* condition = cond->AsCondition();
3969 
3970     DataType::Type type = condition->InputAt(0)->GetType();
3971     if (DataType::IsFloatingPointType(type)) {
3972       GenerateFcmp(condition);
3973       if (true_target == nullptr) {
3974         IfCondition opposite_condition = condition->GetOppositeCondition();
3975         __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3976       } else {
3977         __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3978       }
3979     } else {
3980       // Integer cases.
3981       Register lhs = InputRegisterAt(condition, 0);
3982       Operand rhs = InputOperandAt(condition, 1);
3983 
3984       Condition arm64_cond;
3985       vixl::aarch64::Label* non_fallthrough_target;
3986       if (true_target == nullptr) {
3987         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3988         non_fallthrough_target = false_target;
3989       } else {
3990         arm64_cond = ARM64Condition(condition->GetCondition());
3991         non_fallthrough_target = true_target;
3992       }
3993 
3994       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3995           rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3996         switch (arm64_cond) {
3997           case eq:
3998             __ Cbz(lhs, non_fallthrough_target);
3999             break;
4000           case ne:
4001             __ Cbnz(lhs, non_fallthrough_target);
4002             break;
4003           case lt:
4004             // Test the sign bit and branch accordingly.
4005             __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
4006             break;
4007           case ge:
4008             // Test the sign bit and branch accordingly.
4009             __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
4010             break;
4011           default:
4012             // Without the `static_cast` the compiler throws an error for
4013             // `-Werror=sign-promo`.
4014             LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
4015         }
4016       } else {
4017         __ Cmp(lhs, rhs);
4018         __ B(arm64_cond, non_fallthrough_target);
4019       }
4020     }
4021   }
4022 
4023   // If neither branch falls through (case 3), the conditional branch to `true_target`
4024   // was already emitted (case 2) and we need to emit a jump to `false_target`.
4025   if (true_target != nullptr && false_target != nullptr) {
4026     __ B(false_target);
4027   }
4028 }
4029 
VisitIf(HIf * if_instr)4030 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
4031   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
4032   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
4033     locations->SetInAt(0, Location::RequiresRegister());
4034   }
4035 }
4036 
VisitIf(HIf * if_instr)4037 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
4038   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
4039   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
4040   vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
4041   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
4042     true_target = nullptr;
4043   }
4044   vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
4045   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
4046     false_target = nullptr;
4047   }
4048   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
4049     if (GetGraph()->IsCompilingBaseline() &&
4050         codegen_->GetCompilerOptions().ProfileBranches() &&
4051         !Runtime::Current()->IsAotCompiler()) {
4052       DCHECK(if_instr->InputAt(0)->IsCondition());
4053       ProfilingInfo* info = GetGraph()->GetProfilingInfo();
4054       DCHECK(info != nullptr);
4055       BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
4056       // Currently, not all If branches are profiled.
4057       if (cache != nullptr) {
4058         uint64_t address =
4059             reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
4060         static_assert(
4061             BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
4062             "Unexpected offsets for BranchCache");
4063         vixl::aarch64::Label done;
4064         UseScratchRegisterScope temps(GetVIXLAssembler());
4065         Register temp = temps.AcquireX();
4066         Register counter = temps.AcquireW();
4067         Register condition = InputRegisterAt(if_instr, 0).X();
4068         __ Mov(temp, address);
4069         __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
4070         __ Add(counter, counter, 1);
4071         __ Tbnz(counter, 16, &done);
4072         __ Strh(counter, MemOperand(temp, condition, LSL, 1));
4073         __ Bind(&done);
4074       }
4075     }
4076   }
4077   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
4078 }
4079 
VisitDeoptimize(HDeoptimize * deoptimize)4080 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
4081   LocationSummary* locations = new (GetGraph()->GetAllocator())
4082       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
4083   InvokeRuntimeCallingConvention calling_convention;
4084   RegisterSet caller_saves = RegisterSet::Empty();
4085   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
4086   locations->SetCustomSlowPathCallerSaves(caller_saves);
4087   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
4088     locations->SetInAt(0, Location::RequiresRegister());
4089   }
4090 }
4091 
VisitDeoptimize(HDeoptimize * deoptimize)4092 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
4093   SlowPathCodeARM64* slow_path =
4094       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
4095   GenerateTestAndBranch(deoptimize,
4096                         /* condition_input_index= */ 0,
4097                         slow_path->GetEntryLabel(),
4098                         /* false_target= */ nullptr);
4099 }
4100 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)4101 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
4102   LocationSummary* locations = new (GetGraph()->GetAllocator())
4103       LocationSummary(flag, LocationSummary::kNoCall);
4104   locations->SetOut(Location::RequiresRegister());
4105 }
4106 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)4107 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
4108   __ Ldr(OutputRegister(flag),
4109          MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
4110 }
4111 
IsConditionOnFloatingPointValues(HInstruction * condition)4112 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
4113   return condition->IsCondition() &&
4114          DataType::IsFloatingPointType(condition->InputAt(0)->GetType());
4115 }
4116 
GetConditionForSelect(HCondition * condition)4117 static inline Condition GetConditionForSelect(HCondition* condition) {
4118   IfCondition cond = condition->GetCondition();
4119   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
4120                                                      : ARM64Condition(cond);
4121 }
4122 
VisitSelect(HSelect * select)4123 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
4124   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
4125   if (DataType::IsFloatingPointType(select->GetType())) {
4126     locations->SetInAt(0, Location::RequiresFpuRegister());
4127     locations->SetInAt(1, Location::RequiresFpuRegister());
4128     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4129   } else {
4130     HConstant* cst_true_value = select->GetTrueValue()->AsConstantOrNull();
4131     HConstant* cst_false_value = select->GetFalseValue()->AsConstantOrNull();
4132     bool is_true_value_constant = cst_true_value != nullptr;
4133     bool is_false_value_constant = cst_false_value != nullptr;
4134     // Ask VIXL whether we should synthesize constants in registers.
4135     // We give an arbitrary register to VIXL when dealing with non-constant inputs.
4136     Operand true_op = is_true_value_constant ?
4137         Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
4138     Operand false_op = is_false_value_constant ?
4139         Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
4140     bool true_value_in_register = false;
4141     bool false_value_in_register = false;
4142     MacroAssembler::GetCselSynthesisInformation(
4143         x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
4144     true_value_in_register |= !is_true_value_constant;
4145     false_value_in_register |= !is_false_value_constant;
4146 
4147     locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
4148                                                  : Location::ConstantLocation(cst_true_value));
4149     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
4150                                                   : Location::ConstantLocation(cst_false_value));
4151     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4152   }
4153 
4154   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
4155     locations->SetInAt(2, Location::RequiresRegister());
4156   }
4157 }
4158 
VisitSelect(HSelect * select)4159 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
4160   HInstruction* cond = select->GetCondition();
4161   Condition csel_cond;
4162 
4163   if (IsBooleanValueOrMaterializedCondition(cond)) {
4164     if (cond->IsCondition() && cond->GetNext() == select) {
4165       // Use the condition flags set by the previous instruction.
4166       csel_cond = GetConditionForSelect(cond->AsCondition());
4167     } else {
4168       __ Cmp(InputRegisterAt(select, 2), 0);
4169       csel_cond = ne;
4170     }
4171   } else if (IsConditionOnFloatingPointValues(cond)) {
4172     GenerateFcmp(cond);
4173     csel_cond = GetConditionForSelect(cond->AsCondition());
4174   } else {
4175     __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
4176     csel_cond = GetConditionForSelect(cond->AsCondition());
4177   }
4178 
4179   if (DataType::IsFloatingPointType(select->GetType())) {
4180     __ Fcsel(OutputFPRegister(select),
4181              InputFPRegisterAt(select, 1),
4182              InputFPRegisterAt(select, 0),
4183              csel_cond);
4184   } else {
4185     __ Csel(OutputRegister(select),
4186             InputOperandAt(select, 1),
4187             InputOperandAt(select, 0),
4188             csel_cond);
4189   }
4190 }
4191 
VisitNop(HNop * nop)4192 void LocationsBuilderARM64::VisitNop(HNop* nop) {
4193   new (GetGraph()->GetAllocator()) LocationSummary(nop);
4194 }
4195 
VisitNop(HNop *)4196 void InstructionCodeGeneratorARM64::VisitNop(HNop*) {
4197   // The environment recording already happened in CodeGenerator::Compile.
4198 }
4199 
IncreaseFrame(size_t adjustment)4200 void CodeGeneratorARM64::IncreaseFrame(size_t adjustment) {
4201   __ Claim(adjustment);
4202   GetAssembler()->cfi().AdjustCFAOffset(adjustment);
4203 }
4204 
DecreaseFrame(size_t adjustment)4205 void CodeGeneratorARM64::DecreaseFrame(size_t adjustment) {
4206   __ Drop(adjustment);
4207   GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
4208 }
4209 
GenerateNop()4210 void CodeGeneratorARM64::GenerateNop() {
4211   __ Nop();
4212 }
4213 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4214 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4215   HandleFieldGet(instruction, instruction->GetFieldInfo());
4216 }
4217 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4218 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4219   HandleFieldGet(instruction, instruction->GetFieldInfo());
4220 }
4221 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4222 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4223   HandleFieldSet(instruction);
4224 }
4225 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4226 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4227   HandleFieldSet(instruction,
4228                  instruction->GetFieldInfo(),
4229                  instruction->GetValueCanBeNull(),
4230                  instruction->GetWriteBarrierKind());
4231 }
4232 
4233 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)4234 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
4235   if (emit_read_barrier &&
4236       (kUseBakerReadBarrier ||
4237           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
4238           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
4239           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
4240     return 1;
4241   }
4242   return 0;
4243 }
4244 
4245 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
4246 // interface pointer, one for loading the current interface.
4247 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)4248 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
4249   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
4250     return 3;
4251   }
4252   return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
4253 }
4254 
VisitInstanceOf(HInstanceOf * instruction)4255 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
4256   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4257   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4258   bool baker_read_barrier_slow_path = false;
4259   switch (type_check_kind) {
4260     case TypeCheckKind::kExactCheck:
4261     case TypeCheckKind::kAbstractClassCheck:
4262     case TypeCheckKind::kClassHierarchyCheck:
4263     case TypeCheckKind::kArrayObjectCheck:
4264     case TypeCheckKind::kInterfaceCheck: {
4265       bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
4266       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
4267       baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
4268                                      (type_check_kind != TypeCheckKind::kInterfaceCheck);
4269       break;
4270     }
4271     case TypeCheckKind::kArrayCheck:
4272     case TypeCheckKind::kUnresolvedCheck:
4273       call_kind = LocationSummary::kCallOnSlowPath;
4274       break;
4275     case TypeCheckKind::kBitstringCheck:
4276       break;
4277   }
4278 
4279   LocationSummary* locations =
4280       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4281   if (baker_read_barrier_slow_path) {
4282     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4283   }
4284   locations->SetInAt(0, Location::RequiresRegister());
4285   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
4286     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
4287     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
4288     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
4289   } else {
4290     locations->SetInAt(1, Location::RequiresRegister());
4291   }
4292   // The "out" register is used as a temporary, so it overlaps with the inputs.
4293   // Note that TypeCheckSlowPathARM64 uses this register too.
4294   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4295   // Add temps if necessary for read barriers.
4296   locations->AddRegisterTemps(
4297       NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
4298 }
4299 
VisitInstanceOf(HInstanceOf * instruction)4300 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
4301   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4302   LocationSummary* locations = instruction->GetLocations();
4303   Location obj_loc = locations->InAt(0);
4304   Register obj = InputRegisterAt(instruction, 0);
4305   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
4306       ? Register()
4307       : InputRegisterAt(instruction, 1);
4308   Location out_loc = locations->Out();
4309   Register out = OutputRegister(instruction);
4310   const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
4311   DCHECK_LE(num_temps, 1u);
4312   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
4313   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4314   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4315   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4316   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4317   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4318   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4319   const uint32_t object_array_data_offset =
4320       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4321 
4322   vixl::aarch64::Label done, zero;
4323   SlowPathCodeARM64* slow_path = nullptr;
4324 
4325   // Return 0 if `obj` is null.
4326   // Avoid null check if we know `obj` is not null.
4327   if (instruction->MustDoNullCheck()) {
4328     __ Cbz(obj, &zero);
4329   }
4330 
4331   switch (type_check_kind) {
4332     case TypeCheckKind::kExactCheck: {
4333       ReadBarrierOption read_barrier_option =
4334           codegen_->ReadBarrierOptionForInstanceOf(instruction);
4335       // /* HeapReference<Class> */ out = obj->klass_
4336       GenerateReferenceLoadTwoRegisters(instruction,
4337                                         out_loc,
4338                                         obj_loc,
4339                                         class_offset,
4340                                         maybe_temp_loc,
4341                                         read_barrier_option);
4342       __ Cmp(out, cls);
4343       __ Cset(out, eq);
4344       if (zero.IsLinked()) {
4345         __ B(&done);
4346       }
4347       break;
4348     }
4349 
4350     case TypeCheckKind::kAbstractClassCheck: {
4351       ReadBarrierOption read_barrier_option =
4352           codegen_->ReadBarrierOptionForInstanceOf(instruction);
4353       // /* HeapReference<Class> */ out = obj->klass_
4354       GenerateReferenceLoadTwoRegisters(instruction,
4355                                         out_loc,
4356                                         obj_loc,
4357                                         class_offset,
4358                                         maybe_temp_loc,
4359                                         read_barrier_option);
4360       // If the class is abstract, we eagerly fetch the super class of the
4361       // object to avoid doing a comparison we know will fail.
4362       vixl::aarch64::Label loop, success;
4363       __ Bind(&loop);
4364       // /* HeapReference<Class> */ out = out->super_class_
4365       GenerateReferenceLoadOneRegister(instruction,
4366                                        out_loc,
4367                                        super_offset,
4368                                        maybe_temp_loc,
4369                                        read_barrier_option);
4370       // If `out` is null, we use it for the result, and jump to `done`.
4371       __ Cbz(out, &done);
4372       __ Cmp(out, cls);
4373       __ B(ne, &loop);
4374       __ Mov(out, 1);
4375       if (zero.IsLinked()) {
4376         __ B(&done);
4377       }
4378       break;
4379     }
4380 
4381     case TypeCheckKind::kClassHierarchyCheck: {
4382       ReadBarrierOption read_barrier_option =
4383           codegen_->ReadBarrierOptionForInstanceOf(instruction);
4384       // /* HeapReference<Class> */ out = obj->klass_
4385       GenerateReferenceLoadTwoRegisters(instruction,
4386                                         out_loc,
4387                                         obj_loc,
4388                                         class_offset,
4389                                         maybe_temp_loc,
4390                                         read_barrier_option);
4391       // Walk over the class hierarchy to find a match.
4392       vixl::aarch64::Label loop, success;
4393       __ Bind(&loop);
4394       __ Cmp(out, cls);
4395       __ B(eq, &success);
4396       // /* HeapReference<Class> */ out = out->super_class_
4397       GenerateReferenceLoadOneRegister(instruction,
4398                                        out_loc,
4399                                        super_offset,
4400                                        maybe_temp_loc,
4401                                        read_barrier_option);
4402       __ Cbnz(out, &loop);
4403       // If `out` is null, we use it for the result, and jump to `done`.
4404       __ B(&done);
4405       __ Bind(&success);
4406       __ Mov(out, 1);
4407       if (zero.IsLinked()) {
4408         __ B(&done);
4409       }
4410       break;
4411     }
4412 
4413     case TypeCheckKind::kArrayObjectCheck: {
4414       ReadBarrierOption read_barrier_option =
4415           codegen_->ReadBarrierOptionForInstanceOf(instruction);
4416       // /* HeapReference<Class> */ out = obj->klass_
4417       GenerateReferenceLoadTwoRegisters(instruction,
4418                                         out_loc,
4419                                         obj_loc,
4420                                         class_offset,
4421                                         maybe_temp_loc,
4422                                         read_barrier_option);
4423       // Do an exact check.
4424       vixl::aarch64::Label exact_check;
4425       __ Cmp(out, cls);
4426       __ B(eq, &exact_check);
4427       // Otherwise, we need to check that the object's class is a non-primitive array.
4428       // /* HeapReference<Class> */ out = out->component_type_
4429       GenerateReferenceLoadOneRegister(instruction,
4430                                        out_loc,
4431                                        component_offset,
4432                                        maybe_temp_loc,
4433                                        read_barrier_option);
4434       // If `out` is null, we use it for the result, and jump to `done`.
4435       __ Cbz(out, &done);
4436       __ Ldrh(out, HeapOperand(out, primitive_offset));
4437       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4438       __ Cbnz(out, &zero);
4439       __ Bind(&exact_check);
4440       __ Mov(out, 1);
4441       __ B(&done);
4442       break;
4443     }
4444 
4445     case TypeCheckKind::kArrayCheck: {
4446       // No read barrier since the slow path will retry upon failure.
4447       // /* HeapReference<Class> */ out = obj->klass_
4448       GenerateReferenceLoadTwoRegisters(instruction,
4449                                         out_loc,
4450                                         obj_loc,
4451                                         class_offset,
4452                                         maybe_temp_loc,
4453                                         kWithoutReadBarrier);
4454       __ Cmp(out, cls);
4455       DCHECK(locations->OnlyCallsOnSlowPath());
4456       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4457           instruction, /* is_fatal= */ false);
4458       codegen_->AddSlowPath(slow_path);
4459       __ B(ne, slow_path->GetEntryLabel());
4460       __ Mov(out, 1);
4461       if (zero.IsLinked()) {
4462         __ B(&done);
4463       }
4464       break;
4465     }
4466 
4467     case TypeCheckKind::kInterfaceCheck: {
4468       if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
4469         DCHECK(locations->OnlyCallsOnSlowPath());
4470         slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4471             instruction, /* is_fatal= */ false);
4472         codegen_->AddSlowPath(slow_path);
4473         if (codegen_->EmitNonBakerReadBarrier()) {
4474           __ B(slow_path->GetEntryLabel());
4475           break;
4476         }
4477         // For Baker read barrier, take the slow path while marking.
4478         __ Cbnz(mr, slow_path->GetEntryLabel());
4479       }
4480 
4481       // Fast-path without read barriers.
4482       UseScratchRegisterScope temps(GetVIXLAssembler());
4483       Register temp = temps.AcquireW();
4484       Register temp2 = temps.AcquireW();
4485       // /* HeapReference<Class> */ temp = obj->klass_
4486       __ Ldr(temp, HeapOperand(obj, class_offset));
4487       GetAssembler()->MaybeUnpoisonHeapReference(temp);
4488       // /* HeapReference<Class> */ temp = temp->iftable_
4489       __ Ldr(temp, HeapOperand(temp, iftable_offset));
4490       GetAssembler()->MaybeUnpoisonHeapReference(temp);
4491       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
4492       __ Ldr(out, HeapOperand(temp, array_length_offset));
4493       // Loop through the `IfTable` and check if any class matches.
4494       vixl::aarch64::Label loop;
4495       __ Bind(&loop);
4496       __ Cbz(out, &done);  // If taken, the result in `out` is already 0 (false).
4497       __ Ldr(temp2, HeapOperand(temp, object_array_data_offset));
4498       GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4499       // Go to next interface.
4500       __ Add(temp, temp, 2 * kHeapReferenceSize);
4501       __ Sub(out, out, 2);
4502       // Compare the classes and continue the loop if they do not match.
4503       __ Cmp(cls, temp2);
4504       __ B(ne, &loop);
4505       __ Mov(out, 1);
4506       if (zero.IsLinked()) {
4507         __ B(&done);
4508       }
4509       break;
4510     }
4511 
4512     case TypeCheckKind::kUnresolvedCheck: {
4513       // Note that we indeed only call on slow path, but we always go
4514       // into the slow path for the unresolved check case.
4515       //
4516       // We cannot directly call the InstanceofNonTrivial runtime
4517       // entry point without resorting to a type checking slow path
4518       // here (i.e. by calling InvokeRuntime directly), as it would
4519       // require to assign fixed registers for the inputs of this
4520       // HInstanceOf instruction (following the runtime calling
4521       // convention), which might be cluttered by the potential first
4522       // read barrier emission at the beginning of this method.
4523       //
4524       // TODO: Introduce a new runtime entry point taking the object
4525       // to test (instead of its class) as argument, and let it deal
4526       // with the read barrier issues. This will let us refactor this
4527       // case of the `switch` code as it was previously (with a direct
4528       // call to the runtime not using a type checking slow path).
4529       // This should also be beneficial for the other cases above.
4530       DCHECK(locations->OnlyCallsOnSlowPath());
4531       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4532           instruction, /* is_fatal= */ false);
4533       codegen_->AddSlowPath(slow_path);
4534       __ B(slow_path->GetEntryLabel());
4535       break;
4536     }
4537 
4538     case TypeCheckKind::kBitstringCheck: {
4539       // /* HeapReference<Class> */ temp = obj->klass_
4540       GenerateReferenceLoadTwoRegisters(instruction,
4541                                         out_loc,
4542                                         obj_loc,
4543                                         class_offset,
4544                                         maybe_temp_loc,
4545                                         kWithoutReadBarrier);
4546 
4547       GenerateBitstringTypeCheckCompare(instruction, out);
4548       __ Cset(out, eq);
4549       if (zero.IsLinked()) {
4550         __ B(&done);
4551       }
4552       break;
4553     }
4554   }
4555 
4556   if (zero.IsLinked()) {
4557     __ Bind(&zero);
4558     __ Mov(out, 0);
4559   }
4560 
4561   if (done.IsLinked()) {
4562     __ Bind(&done);
4563   }
4564 
4565   if (slow_path != nullptr) {
4566     __ Bind(slow_path->GetExitLabel());
4567   }
4568 }
4569 
VisitCheckCast(HCheckCast * instruction)4570 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
4571   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4572   LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
4573   LocationSummary* locations =
4574       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4575   locations->SetInAt(0, Location::RequiresRegister());
4576   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
4577     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
4578     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
4579     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
4580   } else {
4581     locations->SetInAt(1, Location::RequiresRegister());
4582   }
4583   locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
4584 }
4585 
VisitCheckCast(HCheckCast * instruction)4586 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
4587   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4588   LocationSummary* locations = instruction->GetLocations();
4589   Location obj_loc = locations->InAt(0);
4590   Register obj = InputRegisterAt(instruction, 0);
4591   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
4592       ? Register()
4593       : InputRegisterAt(instruction, 1);
4594   const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
4595   DCHECK_GE(num_temps, 1u);
4596   DCHECK_LE(num_temps, 3u);
4597   Location temp_loc = locations->GetTemp(0);
4598   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
4599   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
4600   Register temp = WRegisterFrom(temp_loc);
4601   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4602   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4603   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4604   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4605   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4606   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4607   const uint32_t object_array_data_offset =
4608       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4609 
4610   bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
4611   SlowPathCodeARM64* type_check_slow_path =
4612       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4613           instruction, is_type_check_slow_path_fatal);
4614   codegen_->AddSlowPath(type_check_slow_path);
4615 
4616   vixl::aarch64::Label done;
4617   // Avoid null check if we know obj is not null.
4618   if (instruction->MustDoNullCheck()) {
4619     __ Cbz(obj, &done);
4620   }
4621 
4622   switch (type_check_kind) {
4623     case TypeCheckKind::kExactCheck:
4624     case TypeCheckKind::kArrayCheck: {
4625       // /* HeapReference<Class> */ temp = obj->klass_
4626       GenerateReferenceLoadTwoRegisters(instruction,
4627                                         temp_loc,
4628                                         obj_loc,
4629                                         class_offset,
4630                                         maybe_temp2_loc,
4631                                         kWithoutReadBarrier);
4632 
4633       __ Cmp(temp, cls);
4634       // Jump to slow path for throwing the exception or doing a
4635       // more involved array check.
4636       __ B(ne, type_check_slow_path->GetEntryLabel());
4637       break;
4638     }
4639 
4640     case TypeCheckKind::kAbstractClassCheck: {
4641       // /* HeapReference<Class> */ temp = obj->klass_
4642       GenerateReferenceLoadTwoRegisters(instruction,
4643                                         temp_loc,
4644                                         obj_loc,
4645                                         class_offset,
4646                                         maybe_temp2_loc,
4647                                         kWithoutReadBarrier);
4648 
4649       // If the class is abstract, we eagerly fetch the super class of the
4650       // object to avoid doing a comparison we know will fail.
4651       vixl::aarch64::Label loop;
4652       __ Bind(&loop);
4653       // /* HeapReference<Class> */ temp = temp->super_class_
4654       GenerateReferenceLoadOneRegister(instruction,
4655                                        temp_loc,
4656                                        super_offset,
4657                                        maybe_temp2_loc,
4658                                        kWithoutReadBarrier);
4659 
4660       // If the class reference currently in `temp` is null, jump to the slow path to throw the
4661       // exception.
4662       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4663       // Otherwise, compare classes.
4664       __ Cmp(temp, cls);
4665       __ B(ne, &loop);
4666       break;
4667     }
4668 
4669     case TypeCheckKind::kClassHierarchyCheck: {
4670       // /* HeapReference<Class> */ temp = obj->klass_
4671       GenerateReferenceLoadTwoRegisters(instruction,
4672                                         temp_loc,
4673                                         obj_loc,
4674                                         class_offset,
4675                                         maybe_temp2_loc,
4676                                         kWithoutReadBarrier);
4677 
4678       // Walk over the class hierarchy to find a match.
4679       vixl::aarch64::Label loop;
4680       __ Bind(&loop);
4681       __ Cmp(temp, cls);
4682       __ B(eq, &done);
4683 
4684       // /* HeapReference<Class> */ temp = temp->super_class_
4685       GenerateReferenceLoadOneRegister(instruction,
4686                                        temp_loc,
4687                                        super_offset,
4688                                        maybe_temp2_loc,
4689                                        kWithoutReadBarrier);
4690 
4691       // If the class reference currently in `temp` is not null, jump
4692       // back at the beginning of the loop.
4693       __ Cbnz(temp, &loop);
4694       // Otherwise, jump to the slow path to throw the exception.
4695       __ B(type_check_slow_path->GetEntryLabel());
4696       break;
4697     }
4698 
4699     case TypeCheckKind::kArrayObjectCheck: {
4700       // /* HeapReference<Class> */ temp = obj->klass_
4701       GenerateReferenceLoadTwoRegisters(instruction,
4702                                         temp_loc,
4703                                         obj_loc,
4704                                         class_offset,
4705                                         maybe_temp2_loc,
4706                                         kWithoutReadBarrier);
4707 
4708       // Do an exact check.
4709       __ Cmp(temp, cls);
4710       __ B(eq, &done);
4711 
4712       // Otherwise, we need to check that the object's class is a non-primitive array.
4713       // /* HeapReference<Class> */ temp = temp->component_type_
4714       GenerateReferenceLoadOneRegister(instruction,
4715                                        temp_loc,
4716                                        component_offset,
4717                                        maybe_temp2_loc,
4718                                        kWithoutReadBarrier);
4719 
4720       // If the component type is null, jump to the slow path to throw the exception.
4721       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4722       // Otherwise, the object is indeed an array. Further check that this component type is not a
4723       // primitive type.
4724       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
4725       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4726       __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
4727       break;
4728     }
4729 
4730     case TypeCheckKind::kUnresolvedCheck:
4731       // We always go into the type check slow path for the unresolved check cases.
4732       //
4733       // We cannot directly call the CheckCast runtime entry point
4734       // without resorting to a type checking slow path here (i.e. by
4735       // calling InvokeRuntime directly), as it would require to
4736       // assign fixed registers for the inputs of this HInstanceOf
4737       // instruction (following the runtime calling convention), which
4738       // might be cluttered by the potential first read barrier
4739       // emission at the beginning of this method.
4740       __ B(type_check_slow_path->GetEntryLabel());
4741       break;
4742     case TypeCheckKind::kInterfaceCheck: {
4743       // /* HeapReference<Class> */ temp = obj->klass_
4744       GenerateReferenceLoadTwoRegisters(instruction,
4745                                         temp_loc,
4746                                         obj_loc,
4747                                         class_offset,
4748                                         maybe_temp2_loc,
4749                                         kWithoutReadBarrier);
4750 
4751       // /* HeapReference<Class> */ temp = temp->iftable_
4752       GenerateReferenceLoadOneRegister(instruction,
4753                                        temp_loc,
4754                                        iftable_offset,
4755                                        maybe_temp2_loc,
4756                                        kWithoutReadBarrier);
4757       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
4758       __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4759       // Loop through the iftable and check if any class matches.
4760       vixl::aarch64::Label start_loop;
4761       __ Bind(&start_loop);
4762       __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4763       __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4764       GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4765       // Go to next interface.
4766       __ Add(temp, temp, 2 * kHeapReferenceSize);
4767       __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4768       // Compare the classes and continue the loop if they do not match.
4769       __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4770       __ B(ne, &start_loop);
4771       break;
4772     }
4773 
4774     case TypeCheckKind::kBitstringCheck: {
4775       // /* HeapReference<Class> */ temp = obj->klass_
4776       GenerateReferenceLoadTwoRegisters(instruction,
4777                                         temp_loc,
4778                                         obj_loc,
4779                                         class_offset,
4780                                         maybe_temp2_loc,
4781                                         kWithoutReadBarrier);
4782 
4783       GenerateBitstringTypeCheckCompare(instruction, temp);
4784       __ B(ne, type_check_slow_path->GetEntryLabel());
4785       break;
4786     }
4787   }
4788   __ Bind(&done);
4789 
4790   __ Bind(type_check_slow_path->GetExitLabel());
4791 }
4792 
VisitIntConstant(HIntConstant * constant)4793 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4794   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4795   locations->SetOut(Location::ConstantLocation(constant));
4796 }
4797 
VisitIntConstant(HIntConstant * constant)4798 void InstructionCodeGeneratorARM64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
4799   // Will be generated at use site.
4800 }
4801 
VisitNullConstant(HNullConstant * constant)4802 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4803   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4804   locations->SetOut(Location::ConstantLocation(constant));
4805 }
4806 
VisitNullConstant(HNullConstant * constant)4807 void InstructionCodeGeneratorARM64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
4808   // Will be generated at use site.
4809 }
4810 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4811 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4812   // The trampoline uses the same calling convention as dex calling conventions,
4813   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4814   // the method_idx.
4815   HandleInvoke(invoke);
4816 }
4817 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4818 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4819   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4820   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4821 }
4822 
HandleInvoke(HInvoke * invoke)4823 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4824   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4825   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4826 }
4827 
VisitInvokeInterface(HInvokeInterface * invoke)4828 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4829   HandleInvoke(invoke);
4830   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
4831     // We cannot request ip1 as it's blocked by the register allocator.
4832     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
4833   }
4834 }
4835 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)4836 void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
4837                                                        Register klass) {
4838   DCHECK_EQ(klass.GetCode(), 0u);
4839   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
4840     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
4841     DCHECK(info != nullptr);
4842     InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
4843         info, GetCompilerOptions(), instruction->AsInvoke());
4844     if (cache != nullptr) {
4845       uint64_t address = reinterpret_cast64<uint64_t>(cache);
4846       vixl::aarch64::Label done;
4847       __ Mov(x8, address);
4848       __ Ldr(w9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
4849       // Fast path for a monomorphic cache.
4850       __ Cmp(klass.W(), w9);
4851       __ B(eq, &done);
4852       InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
4853       __ Bind(&done);
4854     } else {
4855       // This is unexpected, but we don't guarantee stable compilation across
4856       // JIT runs so just warn about it.
4857       ScopedObjectAccess soa(Thread::Current());
4858       LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
4859     }
4860   }
4861 }
4862 
VisitInvokeInterface(HInvokeInterface * invoke)4863 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4864   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4865   LocationSummary* locations = invoke->GetLocations();
4866   Register temp = XRegisterFrom(locations->GetTemp(0));
4867   Location receiver = locations->InAt(0);
4868   Offset class_offset = mirror::Object::ClassOffset();
4869   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4870 
4871   // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4872   if (receiver.IsStackSlot()) {
4873     __ Ldr(temp.W(), StackOperandFrom(receiver));
4874     {
4875       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4876       // /* HeapReference<Class> */ temp = temp->klass_
4877       __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4878       codegen_->MaybeRecordImplicitNullCheck(invoke);
4879     }
4880   } else {
4881     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4882     // /* HeapReference<Class> */ temp = receiver->klass_
4883     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4884     codegen_->MaybeRecordImplicitNullCheck(invoke);
4885   }
4886 
4887   // Instead of simply (possibly) unpoisoning `temp` here, we should
4888   // emit a read barrier for the previous class reference load.
4889   // However this is not required in practice, as this is an
4890   // intermediate/temporary reference and because the current
4891   // concurrent copying collector keeps the from-space memory
4892   // intact/accessible until the end of the marking phase (the
4893   // concurrent copying collector may not in the future).
4894   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4895 
4896   // If we're compiling baseline, update the inline cache.
4897   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
4898 
4899   // The register ip1 is required to be used for the hidden argument in
4900   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4901   MacroAssembler* masm = GetVIXLAssembler();
4902   UseScratchRegisterScope scratch_scope(masm);
4903   scratch_scope.Exclude(ip1);
4904   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
4905     Location interface_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
4906     if (interface_method.IsStackSlot()) {
4907       __ Ldr(ip1, StackOperandFrom(interface_method));
4908     } else {
4909       __ Mov(ip1, XRegisterFrom(interface_method));
4910     }
4911   // If the load kind is through a runtime call, we will pass the method we
4912   // fetch the IMT, which will either be a no-op if we don't hit the conflict
4913   // stub, or will make us always go through the trampoline when there is a
4914   // conflict.
4915   } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
4916     codegen_->LoadMethod(
4917         invoke->GetHiddenArgumentLoadKind(), Location::RegisterLocation(ip1.GetCode()), invoke);
4918   }
4919 
4920   __ Ldr(temp,
4921       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4922   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4923       invoke->GetImtIndex(), kArm64PointerSize));
4924   // temp = temp->GetImtEntryAt(method_offset);
4925   __ Ldr(temp, MemOperand(temp, method_offset));
4926   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
4927     // We pass the method from the IMT in case of a conflict. This will ensure
4928     // we go into the runtime to resolve the actual method.
4929     __ Mov(ip1, temp);
4930   }
4931   // lr = temp->GetEntryPoint();
4932   __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4933 
4934   {
4935     // Ensure the pc position is recorded immediately after the `blr` instruction.
4936     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4937 
4938     // lr();
4939     __ blr(lr);
4940     DCHECK(!codegen_->IsLeafMethod());
4941     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4942   }
4943 
4944   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4945 }
4946 
VisitInvokeVirtual(HInvokeVirtual * invoke)4947 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4948   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4949   if (intrinsic.TryDispatch(invoke)) {
4950     return;
4951   }
4952 
4953   HandleInvoke(invoke);
4954 }
4955 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4956 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4957   // Explicit clinit checks triggered by static invokes must have been pruned by
4958   // art::PrepareForRegisterAllocation.
4959   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4960 
4961   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4962   if (intrinsic.TryDispatch(invoke)) {
4963     return;
4964   }
4965 
4966   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
4967     CriticalNativeCallingConventionVisitorARM64 calling_convention_visitor(
4968         /*for_register_allocation=*/ true);
4969     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4970   } else {
4971     HandleInvoke(invoke);
4972   }
4973 }
4974 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARM64 * codegen)4975 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4976   if (invoke->GetLocations()->Intrinsified()) {
4977     IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4978     intrinsic.Dispatch(invoke);
4979     return true;
4980   }
4981   return false;
4982 }
4983 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)4984 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4985     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4986     [[maybe_unused]] ArtMethod* method) {
4987   // On ARM64 we support all dispatch types.
4988   return desired_dispatch_info;
4989 }
4990 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)4991 void CodeGeneratorARM64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
4992   switch (load_kind) {
4993     case MethodLoadKind::kBootImageLinkTimePcRelative: {
4994       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4995       // Add ADRP with its PC-relative method patch.
4996       vixl::aarch64::Label* adrp_label =
4997           NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
4998       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4999       // Add ADD with its PC-relative method patch.
5000       vixl::aarch64::Label* add_label =
5001           NewBootImageMethodPatch(invoke->GetResolvedMethodReference(), adrp_label);
5002       EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
5003       break;
5004     }
5005     case MethodLoadKind::kBootImageRelRo: {
5006       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
5007       uint32_t boot_image_offset = GetBootImageOffset(invoke);
5008       LoadBootImageRelRoEntry(WRegisterFrom(temp), boot_image_offset);
5009       break;
5010     }
5011     case MethodLoadKind::kAppImageRelRo: {
5012       DCHECK(GetCompilerOptions().IsAppImage());
5013       // Add ADRP with its PC-relative method patch.
5014       vixl::aarch64::Label* adrp_label =
5015           NewAppImageMethodPatch(invoke->GetResolvedMethodReference());
5016       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
5017       // Add LDR with its PC-relative method patch.
5018       // Note: App image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
5019       vixl::aarch64::Label* ldr_label =
5020           NewAppImageMethodPatch(invoke->GetResolvedMethodReference(), adrp_label);
5021       EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp));
5022       break;
5023     }
5024     case MethodLoadKind::kBssEntry: {
5025       // Add ADRP with its PC-relative .bss entry patch.
5026       vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(invoke->GetMethodReference());
5027       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
5028       // Add LDR with its PC-relative .bss entry patch.
5029       vixl::aarch64::Label* ldr_label =
5030           NewMethodBssEntryPatch(invoke->GetMethodReference(), adrp_label);
5031       // All aligned loads are implicitly atomic consume operations on ARM64.
5032       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
5033       break;
5034     }
5035     case MethodLoadKind::kJitDirectAddress: {
5036       // Load method address from literal pool.
5037       __ Ldr(XRegisterFrom(temp),
5038              jit_patches_.DeduplicateUint64Literal(
5039                  reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
5040       break;
5041     }
5042     case MethodLoadKind::kRuntimeCall: {
5043       // Test situation, don't do anything.
5044       break;
5045     }
5046     default: {
5047       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
5048       UNREACHABLE();
5049     }
5050   }
5051 }
5052 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5053 void CodeGeneratorARM64::GenerateStaticOrDirectCall(
5054     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5055   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
5056   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
5057   switch (invoke->GetMethodLoadKind()) {
5058     case MethodLoadKind::kStringInit: {
5059       uint32_t offset =
5060           GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5061       // temp = thread->string_init_entrypoint
5062       __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
5063       break;
5064     }
5065     case MethodLoadKind::kRecursive:
5066       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5067       break;
5068     case MethodLoadKind::kRuntimeCall:
5069       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5070       return;  // No code pointer retrieval; the runtime performs the call directly.
5071     case MethodLoadKind::kBootImageLinkTimePcRelative:
5072       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5073       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
5074         // Do not materialize the method pointer, load directly the entrypoint.
5075         // Add ADRP with its PC-relative JNI entrypoint patch.
5076         vixl::aarch64::Label* adrp_label =
5077             NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference());
5078         EmitAdrpPlaceholder(adrp_label, lr);
5079         // Add the LDR with its PC-relative method patch.
5080         vixl::aarch64::Label* add_label =
5081             NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), adrp_label);
5082         EmitLdrOffsetPlaceholder(add_label, lr, lr);
5083         break;
5084       }
5085       FALLTHROUGH_INTENDED;
5086     default:
5087       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
5088       break;
5089   }
5090 
5091   auto call_lr = [&]() {
5092     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
5093     ExactAssemblyScope eas(GetVIXLAssembler(),
5094                            kInstructionSize,
5095                            CodeBufferCheckScope::kExactSize);
5096     // lr()
5097     __ blr(lr);
5098     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5099   };
5100   switch (invoke->GetCodePtrLocation()) {
5101     case CodePtrLocation::kCallSelf:
5102       {
5103         DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
5104         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
5105         ExactAssemblyScope eas(GetVIXLAssembler(),
5106                                kInstructionSize,
5107                                CodeBufferCheckScope::kExactSize);
5108         __ bl(&frame_entry_label_);
5109         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5110       }
5111       break;
5112     case CodePtrLocation::kCallCriticalNative: {
5113       size_t out_frame_size =
5114           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
5115                                     kAapcs64StackAlignment,
5116                                     GetCriticalNativeDirectCallFrameSize>(invoke);
5117       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
5118         call_lr();
5119       } else {
5120         // LR = callee_method->ptr_sized_fields_.data_;  // EntryPointFromJni
5121         MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kArm64PointerSize);
5122         __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
5123         // lr()
5124         call_lr();
5125       }
5126       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
5127       switch (invoke->GetType()) {
5128         case DataType::Type::kBool:
5129           __ Ubfx(w0, w0, 0, 8);
5130           break;
5131         case DataType::Type::kInt8:
5132           __ Sbfx(w0, w0, 0, 8);
5133           break;
5134         case DataType::Type::kUint16:
5135           __ Ubfx(w0, w0, 0, 16);
5136           break;
5137         case DataType::Type::kInt16:
5138           __ Sbfx(w0, w0, 0, 16);
5139           break;
5140         case DataType::Type::kInt32:
5141         case DataType::Type::kInt64:
5142         case DataType::Type::kFloat32:
5143         case DataType::Type::kFloat64:
5144         case DataType::Type::kVoid:
5145           break;
5146         default:
5147           DCHECK(false) << invoke->GetType();
5148           break;
5149       }
5150       if (out_frame_size != 0u) {
5151         DecreaseFrame(out_frame_size);
5152       }
5153       break;
5154     }
5155     case CodePtrLocation::kCallArtMethod: {
5156       // LR = callee_method->ptr_sized_fields_.entry_point_from_quick_compiled_code_;
5157       MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
5158       __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
5159       // lr()
5160       call_lr();
5161       break;
5162     }
5163   }
5164 
5165   DCHECK(!IsLeafMethod());
5166 }
5167 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5168 void CodeGeneratorARM64::GenerateVirtualCall(
5169     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5170   // Use the calling convention instead of the location of the receiver, as
5171   // intrinsics may have put the receiver in a different register. In the intrinsics
5172   // slow path, the arguments have been moved to the right place, so here we are
5173   // guaranteed that the receiver is the first register of the calling convention.
5174   InvokeDexCallingConvention calling_convention;
5175   Register receiver = calling_convention.GetRegisterAt(0);
5176   Register temp = XRegisterFrom(temp_in);
5177   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5178       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
5179   Offset class_offset = mirror::Object::ClassOffset();
5180   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
5181 
5182   DCHECK(receiver.IsRegister());
5183 
5184   {
5185     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
5186     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5187     // /* HeapReference<Class> */ temp = receiver->klass_
5188     __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
5189     MaybeRecordImplicitNullCheck(invoke);
5190   }
5191   // Instead of simply (possibly) unpoisoning `temp` here, we should
5192   // emit a read barrier for the previous class reference load.
5193   // However this is not required in practice, as this is an
5194   // intermediate/temporary reference and because the current
5195   // concurrent copying collector keeps the from-space memory
5196   // intact/accessible until the end of the marking phase (the
5197   // concurrent copying collector may not in the future).
5198   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
5199 
5200   // If we're compiling baseline, update the inline cache.
5201   MaybeGenerateInlineCacheCheck(invoke, temp);
5202 
5203   // temp = temp->GetMethodAt(method_offset);
5204   __ Ldr(temp, MemOperand(temp, method_offset));
5205   // lr = temp->GetEntryPoint();
5206   __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
5207   {
5208     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
5209     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
5210     // lr();
5211     __ blr(lr);
5212     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5213   }
5214 }
5215 
MoveFromReturnRegister(Location trg,DataType::Type type)5216 void CodeGeneratorARM64::MoveFromReturnRegister(Location trg, DataType::Type type) {
5217   if (!trg.IsValid()) {
5218     DCHECK(type == DataType::Type::kVoid);
5219     return;
5220   }
5221 
5222   DCHECK_NE(type, DataType::Type::kVoid);
5223 
5224   if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
5225     Register trg_reg = RegisterFrom(trg, type);
5226     Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
5227     __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
5228   } else {
5229     VRegister trg_reg = FPRegisterFrom(trg, type);
5230     VRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
5231     __ Fmov(trg_reg, res_reg);
5232   }
5233 }
5234 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)5235 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
5236   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
5237   if (intrinsic.TryDispatch(invoke)) {
5238     return;
5239   }
5240   HandleInvoke(invoke);
5241 }
5242 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)5243 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
5244   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5245     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5246     return;
5247   }
5248   codegen_->GenerateInvokePolymorphicCall(invoke);
5249   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5250 }
5251 
VisitInvokeCustom(HInvokeCustom * invoke)5252 void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
5253   HandleInvoke(invoke);
5254 }
5255 
VisitInvokeCustom(HInvokeCustom * invoke)5256 void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
5257   codegen_->GenerateInvokeCustomCall(invoke);
5258   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5259 }
5260 
NewBootImageIntrinsicPatch(uint32_t intrinsic_data,vixl::aarch64::Label * adrp_label)5261 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch(
5262     uint32_t intrinsic_data,
5263     vixl::aarch64::Label* adrp_label) {
5264   return NewPcRelativePatch(
5265       /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_other_patches_);
5266 }
5267 
NewBootImageRelRoPatch(uint32_t boot_image_offset,vixl::aarch64::Label * adrp_label)5268 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
5269     uint32_t boot_image_offset,
5270     vixl::aarch64::Label* adrp_label) {
5271   return NewPcRelativePatch(
5272       /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_other_patches_);
5273 }
5274 
NewBootImageMethodPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)5275 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
5276     MethodReference target_method,
5277     vixl::aarch64::Label* adrp_label) {
5278   return NewPcRelativePatch(
5279       target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_);
5280 }
5281 
NewAppImageMethodPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)5282 vixl::aarch64::Label* CodeGeneratorARM64::NewAppImageMethodPatch(
5283     MethodReference target_method,
5284     vixl::aarch64::Label* adrp_label) {
5285   return NewPcRelativePatch(
5286       target_method.dex_file, target_method.index, adrp_label, &app_image_method_patches_);
5287 }
5288 
NewMethodBssEntryPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)5289 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
5290     MethodReference target_method,
5291     vixl::aarch64::Label* adrp_label) {
5292   return NewPcRelativePatch(
5293       target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_);
5294 }
5295 
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)5296 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch(
5297     const DexFile& dex_file,
5298     dex::TypeIndex type_index,
5299     vixl::aarch64::Label* adrp_label) {
5300   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_);
5301 }
5302 
NewAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)5303 vixl::aarch64::Label* CodeGeneratorARM64::NewAppImageTypePatch(
5304     const DexFile& dex_file,
5305     dex::TypeIndex type_index,
5306     vixl::aarch64::Label* adrp_label) {
5307   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &app_image_type_patches_);
5308 }
5309 
NewBssEntryTypePatch(HLoadClass * load_class,vixl::aarch64::Label * adrp_label)5310 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
5311     HLoadClass* load_class,
5312     vixl::aarch64::Label* adrp_label) {
5313   const DexFile& dex_file = load_class->GetDexFile();
5314   dex::TypeIndex type_index = load_class->GetTypeIndex();
5315   ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
5316   switch (load_class->GetLoadKind()) {
5317     case HLoadClass::LoadKind::kBssEntry:
5318       patches = &type_bss_entry_patches_;
5319       break;
5320     case HLoadClass::LoadKind::kBssEntryPublic:
5321       patches = &public_type_bss_entry_patches_;
5322       break;
5323     case HLoadClass::LoadKind::kBssEntryPackage:
5324       patches = &package_type_bss_entry_patches_;
5325       break;
5326     default:
5327       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5328       UNREACHABLE();
5329   }
5330   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, patches);
5331 }
5332 
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)5333 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch(
5334     const DexFile& dex_file,
5335     dex::StringIndex string_index,
5336     vixl::aarch64::Label* adrp_label) {
5337   return NewPcRelativePatch(
5338       &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_);
5339 }
5340 
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)5341 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
5342     const DexFile& dex_file,
5343     dex::StringIndex string_index,
5344     vixl::aarch64::Label* adrp_label) {
5345   return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
5346 }
5347 
NewMethodTypeBssEntryPatch(HLoadMethodType * load_method_type,vixl::aarch64::Label * adrp_label)5348 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodTypeBssEntryPatch(
5349     HLoadMethodType* load_method_type,
5350     vixl::aarch64::Label* adrp_label) {
5351   return NewPcRelativePatch(&load_method_type->GetDexFile(),
5352                             load_method_type->GetProtoIndex().index_,
5353                             adrp_label,
5354                             &method_type_bss_entry_patches_);
5355 }
5356 
NewBootImageJniEntrypointPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)5357 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageJniEntrypointPatch(
5358     MethodReference target_method,
5359     vixl::aarch64::Label* adrp_label) {
5360   return NewPcRelativePatch(
5361       target_method.dex_file, target_method.index, adrp_label, &boot_image_jni_entrypoint_patches_);
5362 }
5363 
EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset)5364 void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) {
5365   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
5366   DCHECK(!GetCompilerOptions().IsJitCompiler());
5367   call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
5368   vixl::aarch64::Label* bl_label = &call_entrypoint_patches_.back().label;
5369   __ bind(bl_label);
5370   __ bl(static_cast<int64_t>(0));  // Placeholder, patched at link-time.
5371 }
5372 
EmitBakerReadBarrierCbnz(uint32_t custom_data)5373 void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) {
5374   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
5375   if (GetCompilerOptions().IsJitCompiler()) {
5376     auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
5377     vixl::aarch64::Label* slow_path_entry = &it->second.label;
5378     __ cbnz(mr, slow_path_entry);
5379   } else {
5380     baker_read_barrier_patches_.emplace_back(custom_data);
5381     vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label;
5382     __ bind(cbnz_label);
5383     __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
5384   }
5385 }
5386 
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,vixl::aarch64::Label * adrp_label,ArenaDeque<PcRelativePatchInfo> * patches)5387 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
5388     const DexFile* dex_file,
5389     uint32_t offset_or_index,
5390     vixl::aarch64::Label* adrp_label,
5391     ArenaDeque<PcRelativePatchInfo>* patches) {
5392   // Add a patch entry and return the label.
5393   patches->emplace_back(dex_file, offset_or_index);
5394   PcRelativePatchInfo* info = &patches->back();
5395   vixl::aarch64::Label* label = &info->label;
5396   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
5397   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
5398   return label;
5399 }
5400 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)5401 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
5402   jit_patches_.EmitJitRootPatches(code, roots_data, *GetCodeGenerationData());
5403 }
5404 
EmitAdrpPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register reg)5405 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
5406                                              vixl::aarch64::Register reg) {
5407   DCHECK(reg.IsX());
5408   SingleEmissionCheckScope guard(GetVIXLAssembler());
5409   __ Bind(fixup_label);
5410   __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
5411 }
5412 
EmitAddPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)5413 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
5414                                             vixl::aarch64::Register out,
5415                                             vixl::aarch64::Register base) {
5416   DCHECK(out.IsX());
5417   DCHECK(base.IsX());
5418   SingleEmissionCheckScope guard(GetVIXLAssembler());
5419   __ Bind(fixup_label);
5420   __ add(out, base, Operand(/* offset placeholder */ 0));
5421 }
5422 
EmitLdrOffsetPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)5423 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
5424                                                   vixl::aarch64::Register out,
5425                                                   vixl::aarch64::Register base) {
5426   DCHECK(base.IsX());
5427   SingleEmissionCheckScope guard(GetVIXLAssembler());
5428   __ Bind(fixup_label);
5429   __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
5430 }
5431 
LoadBootImageRelRoEntry(vixl::aarch64::Register reg,uint32_t boot_image_offset)5432 void CodeGeneratorARM64::LoadBootImageRelRoEntry(vixl::aarch64::Register reg,
5433                                                  uint32_t boot_image_offset) {
5434   DCHECK(reg.IsW());
5435   // Add ADRP with its PC-relative boot image .data.img.rel.ro patch.
5436   vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
5437   EmitAdrpPlaceholder(adrp_label, reg.X());
5438   // Add LDR with its PC-relative boot image .data.img.rel.ro patch.
5439   vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
5440   EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X());
5441 }
5442 
LoadBootImageAddress(vixl::aarch64::Register reg,uint32_t boot_image_reference)5443 void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
5444                                               uint32_t boot_image_reference) {
5445   if (GetCompilerOptions().IsBootImage()) {
5446     // Add ADRP with its PC-relative type patch.
5447     vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference);
5448     EmitAdrpPlaceholder(adrp_label, reg.X());
5449     // Add ADD with its PC-relative type patch.
5450     vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label);
5451     EmitAddPlaceholder(add_label, reg.X(), reg.X());
5452   } else if (GetCompilerOptions().GetCompilePic()) {
5453     LoadBootImageRelRoEntry(reg, boot_image_reference);
5454   } else {
5455     DCHECK(GetCompilerOptions().IsJitCompiler());
5456     gc::Heap* heap = Runtime::Current()->GetHeap();
5457     DCHECK(!heap->GetBootImageSpaces().empty());
5458     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5459     __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address)));
5460   }
5461 }
5462 
LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,TypeReference target_type)5463 void CodeGeneratorARM64::LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,
5464                                                        TypeReference target_type) {
5465   // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5466   DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5467   // Add ADRP with its PC-relative type patch.
5468   vixl::aarch64::Label* adrp_label =
5469       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
5470   EmitAdrpPlaceholder(adrp_label, reg.X());
5471   // Add ADD with its PC-relative type patch.
5472   vixl::aarch64::Label* add_label =
5473       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex(), adrp_label);
5474   EmitAddPlaceholder(add_label, reg.X(), reg.X());
5475 }
5476 
LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg,HInvoke * invoke)5477 void CodeGeneratorARM64::LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg, HInvoke* invoke) {
5478   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5479   if (GetCompilerOptions().IsBootImage()) {
5480     MethodReference target_method = invoke->GetResolvedMethodReference();
5481     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5482     LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
5483   } else {
5484     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5485     LoadBootImageAddress(reg, boot_image_offset);
5486   }
5487 }
5488 
LoadClassRootForIntrinsic(vixl::aarch64::Register reg,ClassRoot class_root)5489 void CodeGeneratorARM64::LoadClassRootForIntrinsic(vixl::aarch64::Register reg,
5490                                                    ClassRoot class_root) {
5491   if (GetCompilerOptions().IsBootImage()) {
5492     ScopedObjectAccess soa(Thread::Current());
5493     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
5494     TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
5495     LoadTypeForBootImageIntrinsic(reg, target_type);
5496   } else {
5497     uint32_t boot_image_offset = GetBootImageOffset(class_root);
5498     LoadBootImageAddress(reg, boot_image_offset);
5499   }
5500 }
5501 
5502 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5503 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
5504     const ArenaDeque<PcRelativePatchInfo>& infos,
5505     ArenaVector<linker::LinkerPatch>* linker_patches) {
5506   for (const PcRelativePatchInfo& info : infos) {
5507     linker_patches->push_back(Factory(info.label.GetLocation(),
5508                                       info.target_dex_file,
5509                                       info.pc_insn_label->GetLocation(),
5510                                       info.offset_or_index));
5511   }
5512 }
5513 
5514 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5515 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5516                                      const DexFile* target_dex_file,
5517                                      uint32_t pc_insn_offset,
5518                                      uint32_t boot_image_offset) {
5519   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
5520   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5521 }
5522 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5523 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5524   DCHECK(linker_patches->empty());
5525   size_t size =
5526       boot_image_method_patches_.size() +
5527       app_image_method_patches_.size() +
5528       method_bss_entry_patches_.size() +
5529       boot_image_type_patches_.size() +
5530       app_image_type_patches_.size() +
5531       type_bss_entry_patches_.size() +
5532       public_type_bss_entry_patches_.size() +
5533       package_type_bss_entry_patches_.size() +
5534       boot_image_string_patches_.size() +
5535       string_bss_entry_patches_.size() +
5536       method_type_bss_entry_patches_.size() +
5537       boot_image_jni_entrypoint_patches_.size() +
5538       boot_image_other_patches_.size() +
5539       call_entrypoint_patches_.size() +
5540       baker_read_barrier_patches_.size();
5541   linker_patches->reserve(size);
5542   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5543     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5544         boot_image_method_patches_, linker_patches);
5545     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5546         boot_image_type_patches_, linker_patches);
5547     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5548         boot_image_string_patches_, linker_patches);
5549   } else {
5550     DCHECK(boot_image_method_patches_.empty());
5551     DCHECK(boot_image_type_patches_.empty());
5552     DCHECK(boot_image_string_patches_.empty());
5553   }
5554   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
5555   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
5556   if (GetCompilerOptions().IsBootImage()) {
5557     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5558         boot_image_other_patches_, linker_patches);
5559   } else {
5560     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
5561         boot_image_other_patches_, linker_patches);
5562     EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
5563         app_image_method_patches_, linker_patches);
5564     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
5565         app_image_type_patches_, linker_patches);
5566   }
5567   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5568       method_bss_entry_patches_, linker_patches);
5569   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5570       type_bss_entry_patches_, linker_patches);
5571   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5572       public_type_bss_entry_patches_, linker_patches);
5573   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5574       package_type_bss_entry_patches_, linker_patches);
5575   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5576       string_bss_entry_patches_, linker_patches);
5577   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodTypeBssEntryPatch>(
5578       method_type_bss_entry_patches_, linker_patches);
5579   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5580       boot_image_jni_entrypoint_patches_, linker_patches);
5581   for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) {
5582     DCHECK(info.target_dex_file == nullptr);
5583     linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
5584         info.label.GetLocation(), info.offset_or_index));
5585   }
5586   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
5587     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
5588         info.label.GetLocation(), info.custom_data));
5589   }
5590   DCHECK_EQ(size, linker_patches->size());
5591 }
5592 
NeedsThunkCode(const linker::LinkerPatch & patch) const5593 bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
5594   return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
5595          patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
5596          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
5597 }
5598 
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)5599 void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
5600                                        /*out*/ ArenaVector<uint8_t>* code,
5601                                        /*out*/ std::string* debug_name) {
5602   Arm64Assembler assembler(GetGraph()->GetAllocator());
5603   switch (patch.GetType()) {
5604     case linker::LinkerPatch::Type::kCallRelative: {
5605       // The thunk just uses the entry point in the ArtMethod. This works even for calls
5606       // to the generic JNI and interpreter trampolines.
5607       Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5608           kArm64PointerSize).Int32Value());
5609       assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
5610       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
5611         *debug_name = "MethodCallThunk";
5612       }
5613       break;
5614     }
5615     case linker::LinkerPatch::Type::kCallEntrypoint: {
5616       Offset offset(patch.EntrypointOffset());
5617       assembler.JumpTo(ManagedRegister(arm64::TR), offset, ManagedRegister(arm64::IP0));
5618       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
5619         *debug_name = "EntrypointCallThunk_" + std::to_string(offset.Uint32Value());
5620       }
5621       break;
5622     }
5623     case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
5624       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
5625       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
5626       break;
5627     }
5628     default:
5629       LOG(FATAL) << "Unexpected patch type " << patch.GetType();
5630       UNREACHABLE();
5631   }
5632 
5633   // Ensure we emit the literal pool if any.
5634   assembler.FinalizeCode();
5635   code->resize(assembler.CodeSize());
5636   MemoryRegion code_region(code->data(), code->size());
5637   assembler.CopyInstructions(code_region);
5638 }
5639 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)5640 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
5641   // Explicit clinit checks triggered by static invokes must have been pruned by
5642   // art::PrepareForRegisterAllocation.
5643   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
5644 
5645   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5646     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5647     return;
5648   }
5649 
5650   LocationSummary* locations = invoke->GetLocations();
5651   codegen_->GenerateStaticOrDirectCall(
5652       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
5653 
5654   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5655 }
5656 
VisitInvokeVirtual(HInvokeVirtual * invoke)5657 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
5658   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5659     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5660     return;
5661   }
5662 
5663   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
5664   DCHECK(!codegen_->IsLeafMethod());
5665 
5666   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5667 }
5668 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5669 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
5670     HLoadClass::LoadKind desired_class_load_kind) {
5671   switch (desired_class_load_kind) {
5672     case HLoadClass::LoadKind::kInvalid:
5673       LOG(FATAL) << "UNREACHABLE";
5674       UNREACHABLE();
5675     case HLoadClass::LoadKind::kReferrersClass:
5676       break;
5677     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5678     case HLoadClass::LoadKind::kBootImageRelRo:
5679     case HLoadClass::LoadKind::kAppImageRelRo:
5680     case HLoadClass::LoadKind::kBssEntry:
5681     case HLoadClass::LoadKind::kBssEntryPublic:
5682     case HLoadClass::LoadKind::kBssEntryPackage:
5683       DCHECK(!GetCompilerOptions().IsJitCompiler());
5684       break;
5685     case HLoadClass::LoadKind::kJitBootImageAddress:
5686     case HLoadClass::LoadKind::kJitTableAddress:
5687       DCHECK(GetCompilerOptions().IsJitCompiler());
5688       break;
5689     case HLoadClass::LoadKind::kRuntimeCall:
5690       break;
5691   }
5692   return desired_class_load_kind;
5693 }
5694 
VisitLoadClass(HLoadClass * cls)5695 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
5696   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5697   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5698     InvokeRuntimeCallingConvention calling_convention;
5699     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5700         cls,
5701         LocationFrom(calling_convention.GetRegisterAt(0)),
5702         LocationFrom(vixl::aarch64::x0));
5703     DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
5704     return;
5705   }
5706   DCHECK_EQ(cls->NeedsAccessCheck(),
5707             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5708                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
5709 
5710   const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
5711   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5712       ? LocationSummary::kCallOnSlowPath
5713       : LocationSummary::kNoCall;
5714   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5715   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5716     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5717   }
5718 
5719   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5720     locations->SetInAt(0, Location::RequiresRegister());
5721   }
5722   locations->SetOut(Location::RequiresRegister());
5723   if (load_kind == HLoadClass::LoadKind::kBssEntry ||
5724       load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5725       load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
5726     if (codegen_->EmitNonBakerReadBarrier()) {
5727       // For non-Baker read barrier we have a temp-clobbering call.
5728     } else {
5729       // Rely on the type resolution or initialization and marking to save everything we need.
5730       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5731     }
5732   }
5733 }
5734 
5735 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5736 // move.
VisitLoadClass(HLoadClass * cls)5737 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5738   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5739   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5740     codegen_->GenerateLoadClassRuntimeCall(cls);
5741     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5742     return;
5743   }
5744   DCHECK_EQ(cls->NeedsAccessCheck(),
5745             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5746                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
5747 
5748   Location out_loc = cls->GetLocations()->Out();
5749   Register out = OutputRegister(cls);
5750 
5751   const ReadBarrierOption read_barrier_option =
5752       cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
5753   bool generate_null_check = false;
5754   switch (load_kind) {
5755     case HLoadClass::LoadKind::kReferrersClass: {
5756       DCHECK(!cls->CanCallRuntime());
5757       DCHECK(!cls->MustGenerateClinitCheck());
5758       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5759       Register current_method = InputRegisterAt(cls, 0);
5760       codegen_->GenerateGcRootFieldLoad(cls,
5761                                         out_loc,
5762                                         current_method,
5763                                         ArtMethod::DeclaringClassOffset().Int32Value(),
5764                                         /* fixup_label= */ nullptr,
5765                                         read_barrier_option);
5766       break;
5767     }
5768     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
5769       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
5770              codegen_->GetCompilerOptions().IsBootImageExtension());
5771       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5772       // Add ADRP with its PC-relative type patch.
5773       const DexFile& dex_file = cls->GetDexFile();
5774       dex::TypeIndex type_index = cls->GetTypeIndex();
5775       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
5776       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5777       // Add ADD with its PC-relative type patch.
5778       vixl::aarch64::Label* add_label =
5779           codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
5780       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5781       break;
5782     }
5783     case HLoadClass::LoadKind::kBootImageRelRo: {
5784       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5785       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
5786       codegen_->LoadBootImageRelRoEntry(out.W(), boot_image_offset);
5787       break;
5788     }
5789     case HLoadClass::LoadKind::kAppImageRelRo: {
5790       DCHECK(codegen_->GetCompilerOptions().IsAppImage());
5791       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5792       // Add ADRP with its PC-relative type patch.
5793       const DexFile& dex_file = cls->GetDexFile();
5794       dex::TypeIndex type_index = cls->GetTypeIndex();
5795       vixl::aarch64::Label* adrp_label = codegen_->NewAppImageTypePatch(dex_file, type_index);
5796       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5797       // Add LDR with its PC-relative type patch.
5798       vixl::aarch64::Label* ldr_label =
5799           codegen_->NewAppImageTypePatch(dex_file, type_index, adrp_label);
5800       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
5801       break;
5802     }
5803     case HLoadClass::LoadKind::kBssEntry:
5804     case HLoadClass::LoadKind::kBssEntryPublic:
5805     case HLoadClass::LoadKind::kBssEntryPackage: {
5806       // Add ADRP with its PC-relative Class .bss entry patch.
5807       vixl::aarch64::Register temp = XRegisterFrom(out_loc);
5808       vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(cls);
5809       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5810       // Add LDR with its PC-relative Class .bss entry patch.
5811       vixl::aarch64::Label* ldr_label = codegen_->NewBssEntryTypePatch(cls, adrp_label);
5812       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
5813       // All aligned loads are implicitly atomic consume operations on ARM64.
5814       codegen_->GenerateGcRootFieldLoad(cls,
5815                                         out_loc,
5816                                         temp,
5817                                         /* offset placeholder */ 0u,
5818                                         ldr_label,
5819                                         read_barrier_option);
5820       generate_null_check = true;
5821       break;
5822     }
5823     case HLoadClass::LoadKind::kJitBootImageAddress: {
5824       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5825       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
5826       DCHECK_NE(address, 0u);
5827       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5828       break;
5829     }
5830     case HLoadClass::LoadKind::kJitTableAddress: {
5831       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
5832                                                        cls->GetTypeIndex(),
5833                                                        cls->GetClass()));
5834       codegen_->GenerateGcRootFieldLoad(cls,
5835                                         out_loc,
5836                                         out.X(),
5837                                         /* offset= */ 0,
5838                                         /* fixup_label= */ nullptr,
5839                                         read_barrier_option);
5840       break;
5841     }
5842     case HLoadClass::LoadKind::kRuntimeCall:
5843     case HLoadClass::LoadKind::kInvalid:
5844       LOG(FATAL) << "UNREACHABLE";
5845       UNREACHABLE();
5846   }
5847 
5848   bool do_clinit = cls->MustGenerateClinitCheck();
5849   if (generate_null_check || do_clinit) {
5850     DCHECK(cls->CanCallRuntime());
5851     SlowPathCodeARM64* slow_path =
5852         new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls);
5853     codegen_->AddSlowPath(slow_path);
5854     if (generate_null_check) {
5855       __ Cbz(out, slow_path->GetEntryLabel());
5856     }
5857     if (cls->MustGenerateClinitCheck()) {
5858       GenerateClassInitializationCheck(slow_path, out);
5859     } else {
5860       __ Bind(slow_path->GetExitLabel());
5861     }
5862     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5863   }
5864 }
5865 
VisitLoadMethodHandle(HLoadMethodHandle * load)5866 void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5867   InvokeRuntimeCallingConvention calling_convention;
5868   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5869   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
5870 }
5871 
VisitLoadMethodHandle(HLoadMethodHandle * load)5872 void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5873   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
5874 }
5875 
VisitLoadMethodType(HLoadMethodType * load)5876 void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) {
5877   if (load->GetLoadKind() == HLoadMethodType::LoadKind::kRuntimeCall) {
5878     InvokeRuntimeCallingConvention calling_convention;
5879     Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5880     CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
5881   } else {
5882     LocationSummary* locations =
5883         new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
5884     locations->SetOut(Location::RequiresRegister());
5885     if (load->GetLoadKind() == HLoadMethodType::LoadKind::kBssEntry) {
5886       if (codegen_->EmitNonBakerReadBarrier()) {
5887         // For non-Baker read barrier we have a temp-clobbering call.
5888       } else {
5889         // Rely on the pResolveMethodType to save everything.
5890         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5891       }
5892     }
5893   }
5894 }
5895 
VisitLoadMethodType(HLoadMethodType * load)5896 void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) {
5897   Location out_loc = load->GetLocations()->Out();
5898   Register out = OutputRegister(load);
5899 
5900   switch (load->GetLoadKind()) {
5901     case HLoadMethodType::LoadKind::kBssEntry: {
5902       // Add ADRP with its PC-relative Class .bss entry patch.
5903       vixl::aarch64::Register temp = XRegisterFrom(out_loc);
5904       vixl::aarch64::Label* adrp_label = codegen_->NewMethodTypeBssEntryPatch(load);
5905       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5906       // Add LDR with its PC-relative MethodType .bss entry patch.
5907       vixl::aarch64::Label* ldr_label = codegen_->NewMethodTypeBssEntryPatch(load, adrp_label);
5908       // /* GcRoot<mirror::MethodType> */ out = *(base_address + offset)  /* PC-relative */
5909       // All aligned loads are implicitly atomic consume operations on ARM64.
5910       codegen_->GenerateGcRootFieldLoad(load,
5911                                         out_loc,
5912                                         temp,
5913                                         /* offset placeholder */ 0u,
5914                                         ldr_label,
5915                                         codegen_->GetCompilerReadBarrierOption());
5916       SlowPathCodeARM64* slow_path =
5917           new (codegen_->GetScopedAllocator()) LoadMethodTypeSlowPathARM64(load);
5918       codegen_->AddSlowPath(slow_path);
5919       __ Cbz(out, slow_path->GetEntryLabel());
5920       __ Bind(slow_path->GetExitLabel());
5921       codegen_->MaybeGenerateMarkingRegisterCheck(/* code = */ __LINE__);
5922       return;
5923     }
5924     case HLoadMethodType::LoadKind::kJitTableAddress: {
5925       __ Ldr(out, codegen_->DeduplicateJitMethodTypeLiteral(load->GetDexFile(),
5926                                                             load->GetProtoIndex(),
5927                                                             load->GetMethodType()));
5928       codegen_->GenerateGcRootFieldLoad(load,
5929                                         out_loc,
5930                                         out.X(),
5931                                         /* offset= */ 0,
5932                                         /* fixup_label= */ nullptr,
5933                                         codegen_->GetCompilerReadBarrierOption());
5934       return;
5935     }
5936     default:
5937       DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kRuntimeCall);
5938       codegen_->GenerateLoadMethodTypeRuntimeCall(load);
5939       break;
5940   }
5941 }
5942 
GetExceptionTlsAddress()5943 static MemOperand GetExceptionTlsAddress() {
5944   return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
5945 }
5946 
VisitLoadException(HLoadException * load)5947 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
5948   LocationSummary* locations =
5949       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
5950   locations->SetOut(Location::RequiresRegister());
5951 }
5952 
VisitLoadException(HLoadException * instruction)5953 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
5954   __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
5955 }
5956 
VisitClearException(HClearException * clear)5957 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
5958   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
5959 }
5960 
VisitClearException(HClearException * clear)5961 void InstructionCodeGeneratorARM64::VisitClearException([[maybe_unused]] HClearException* clear) {
5962   __ Str(wzr, GetExceptionTlsAddress());
5963 }
5964 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5965 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
5966     HLoadString::LoadKind desired_string_load_kind) {
5967   switch (desired_string_load_kind) {
5968     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5969     case HLoadString::LoadKind::kBootImageRelRo:
5970     case HLoadString::LoadKind::kBssEntry:
5971       DCHECK(!GetCompilerOptions().IsJitCompiler());
5972       break;
5973     case HLoadString::LoadKind::kJitBootImageAddress:
5974     case HLoadString::LoadKind::kJitTableAddress:
5975       DCHECK(GetCompilerOptions().IsJitCompiler());
5976       break;
5977     case HLoadString::LoadKind::kRuntimeCall:
5978       break;
5979   }
5980   return desired_string_load_kind;
5981 }
5982 
VisitLoadString(HLoadString * load)5983 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
5984   LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
5985   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
5986   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
5987     InvokeRuntimeCallingConvention calling_convention;
5988     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
5989   } else {
5990     locations->SetOut(Location::RequiresRegister());
5991     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5992       if (codegen_->EmitNonBakerReadBarrier()) {
5993         // For non-Baker read barrier we have a temp-clobbering call.
5994       } else {
5995         // Rely on the pResolveString and marking to save everything we need.
5996         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5997       }
5998     }
5999   }
6000 }
6001 
6002 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6003 // move.
VisitLoadString(HLoadString * load)6004 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6005   Register out = OutputRegister(load);
6006   Location out_loc = load->GetLocations()->Out();
6007 
6008   switch (load->GetLoadKind()) {
6009     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6010       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6011              codegen_->GetCompilerOptions().IsBootImageExtension());
6012       // Add ADRP with its PC-relative String patch.
6013       const DexFile& dex_file = load->GetDexFile();
6014       const dex::StringIndex string_index = load->GetStringIndex();
6015       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
6016       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
6017       // Add ADD with its PC-relative String patch.
6018       vixl::aarch64::Label* add_label =
6019           codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
6020       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
6021       return;
6022     }
6023     case HLoadString::LoadKind::kBootImageRelRo: {
6024       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6025       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
6026       codegen_->LoadBootImageRelRoEntry(out.W(), boot_image_offset);
6027       return;
6028     }
6029     case HLoadString::LoadKind::kBssEntry: {
6030       // Add ADRP with its PC-relative String .bss entry patch.
6031       const DexFile& dex_file = load->GetDexFile();
6032       const dex::StringIndex string_index = load->GetStringIndex();
6033       Register temp = XRegisterFrom(out_loc);
6034       vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
6035       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
6036       // Add LDR with its PC-relative String .bss entry patch.
6037       vixl::aarch64::Label* ldr_label =
6038           codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
6039       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
6040       // All aligned loads are implicitly atomic consume operations on ARM64.
6041       codegen_->GenerateGcRootFieldLoad(load,
6042                                         out_loc,
6043                                         temp,
6044                                         /* offset placeholder */ 0u,
6045                                         ldr_label,
6046                                         codegen_->GetCompilerReadBarrierOption());
6047       SlowPathCodeARM64* slow_path =
6048           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
6049       codegen_->AddSlowPath(slow_path);
6050       __ Cbz(out.X(), slow_path->GetEntryLabel());
6051       __ Bind(slow_path->GetExitLabel());
6052       codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6053       return;
6054     }
6055     case HLoadString::LoadKind::kJitBootImageAddress: {
6056       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6057       DCHECK_NE(address, 0u);
6058       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
6059       return;
6060     }
6061     case HLoadString::LoadKind::kJitTableAddress: {
6062       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
6063                                                         load->GetStringIndex(),
6064                                                         load->GetString()));
6065       codegen_->GenerateGcRootFieldLoad(load,
6066                                         out_loc,
6067                                         out.X(),
6068                                         /* offset= */ 0,
6069                                         /* fixup_label= */ nullptr,
6070                                         codegen_->GetCompilerReadBarrierOption());
6071       return;
6072     }
6073     default:
6074       break;
6075   }
6076 
6077   InvokeRuntimeCallingConvention calling_convention;
6078   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
6079   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
6080   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
6081   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6082   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6083 }
6084 
VisitLongConstant(HLongConstant * constant)6085 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
6086   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
6087   locations->SetOut(Location::ConstantLocation(constant));
6088 }
6089 
VisitLongConstant(HLongConstant * constant)6090 void InstructionCodeGeneratorARM64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
6091   // Will be generated at use site.
6092 }
6093 
VisitMonitorOperation(HMonitorOperation * instruction)6094 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
6095   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6096       instruction, LocationSummary::kCallOnMainOnly);
6097   InvokeRuntimeCallingConvention calling_convention;
6098   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
6099 }
6100 
VisitMonitorOperation(HMonitorOperation * instruction)6101 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
6102   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
6103                           instruction,
6104                           instruction->GetDexPc());
6105   if (instruction->IsEnter()) {
6106     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6107   } else {
6108     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6109   }
6110   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6111 }
6112 
VisitMul(HMul * mul)6113 void LocationsBuilderARM64::VisitMul(HMul* mul) {
6114   LocationSummary* locations =
6115       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
6116   switch (mul->GetResultType()) {
6117     case DataType::Type::kInt32:
6118     case DataType::Type::kInt64:
6119       locations->SetInAt(0, Location::RequiresRegister());
6120       locations->SetInAt(1, Location::RequiresRegister());
6121       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6122       break;
6123 
6124     case DataType::Type::kFloat32:
6125     case DataType::Type::kFloat64:
6126       locations->SetInAt(0, Location::RequiresFpuRegister());
6127       locations->SetInAt(1, Location::RequiresFpuRegister());
6128       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6129       break;
6130 
6131     default:
6132       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
6133   }
6134 }
6135 
VisitMul(HMul * mul)6136 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
6137   switch (mul->GetResultType()) {
6138     case DataType::Type::kInt32:
6139     case DataType::Type::kInt64:
6140       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
6141       break;
6142 
6143     case DataType::Type::kFloat32:
6144     case DataType::Type::kFloat64:
6145       __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
6146       break;
6147 
6148     default:
6149       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
6150   }
6151 }
6152 
VisitNeg(HNeg * neg)6153 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
6154   LocationSummary* locations =
6155       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
6156   switch (neg->GetResultType()) {
6157     case DataType::Type::kInt32:
6158     case DataType::Type::kInt64:
6159       locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
6160       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6161       break;
6162 
6163     case DataType::Type::kFloat32:
6164     case DataType::Type::kFloat64:
6165       locations->SetInAt(0, Location::RequiresFpuRegister());
6166       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6167       break;
6168 
6169     default:
6170       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
6171   }
6172 }
6173 
VisitNeg(HNeg * neg)6174 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
6175   switch (neg->GetResultType()) {
6176     case DataType::Type::kInt32:
6177     case DataType::Type::kInt64:
6178       __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
6179       break;
6180 
6181     case DataType::Type::kFloat32:
6182     case DataType::Type::kFloat64:
6183       __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
6184       break;
6185 
6186     default:
6187       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
6188   }
6189 }
6190 
VisitNewArray(HNewArray * instruction)6191 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
6192   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6193       instruction, LocationSummary::kCallOnMainOnly);
6194   InvokeRuntimeCallingConvention calling_convention;
6195   locations->SetOut(LocationFrom(x0));
6196   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
6197   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
6198 }
6199 
VisitNewArray(HNewArray * instruction)6200 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
6201   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
6202   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
6203   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
6204   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
6205   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6206 }
6207 
VisitNewInstance(HNewInstance * instruction)6208 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
6209   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6210       instruction, LocationSummary::kCallOnMainOnly);
6211   InvokeRuntimeCallingConvention calling_convention;
6212   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
6213   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
6214 }
6215 
VisitNewInstance(HNewInstance * instruction)6216 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
6217   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
6218   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
6219   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6220 }
6221 
VisitNot(HNot * instruction)6222 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
6223   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6224   locations->SetInAt(0, Location::RequiresRegister());
6225   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6226 }
6227 
VisitNot(HNot * instruction)6228 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
6229   switch (instruction->GetResultType()) {
6230     case DataType::Type::kInt32:
6231     case DataType::Type::kInt64:
6232       __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
6233       break;
6234 
6235     default:
6236       LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
6237   }
6238 }
6239 
VisitBooleanNot(HBooleanNot * instruction)6240 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
6241   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6242   locations->SetInAt(0, Location::RequiresRegister());
6243   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6244 }
6245 
VisitBooleanNot(HBooleanNot * instruction)6246 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
6247   __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
6248 }
6249 
VisitNullCheck(HNullCheck * instruction)6250 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
6251   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6252   locations->SetInAt(0, Location::RequiresRegister());
6253 }
6254 
GenerateImplicitNullCheck(HNullCheck * instruction)6255 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
6256   if (CanMoveNullCheckToUser(instruction)) {
6257     return;
6258   }
6259   {
6260     // Ensure that between load and RecordPcInfo there are no pools emitted.
6261     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6262     Location obj = instruction->GetLocations()->InAt(0);
6263     __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
6264     RecordPcInfo(instruction, instruction->GetDexPc());
6265   }
6266 }
6267 
GenerateExplicitNullCheck(HNullCheck * instruction)6268 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
6269   SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction);
6270   AddSlowPath(slow_path);
6271 
6272   LocationSummary* locations = instruction->GetLocations();
6273   Location obj = locations->InAt(0);
6274 
6275   __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
6276 }
6277 
VisitNullCheck(HNullCheck * instruction)6278 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
6279   codegen_->GenerateNullCheck(instruction);
6280 }
6281 
VisitOr(HOr * instruction)6282 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
6283   HandleBinaryOp(instruction);
6284 }
6285 
VisitOr(HOr * instruction)6286 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
6287   HandleBinaryOp(instruction);
6288 }
6289 
VisitParallelMove(HParallelMove * instruction)6290 void LocationsBuilderARM64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6291   LOG(FATAL) << "Unreachable";
6292 }
6293 
VisitParallelMove(HParallelMove * instruction)6294 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
6295   if (instruction->GetNext()->IsSuspendCheck() &&
6296       instruction->GetBlock()->GetLoopInformation() != nullptr) {
6297     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6298     // The back edge will generate the suspend check.
6299     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6300   }
6301 
6302   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6303 }
6304 
VisitParameterValue(HParameterValue * instruction)6305 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
6306   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6307   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
6308   if (location.IsStackSlot()) {
6309     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
6310   } else if (location.IsDoubleStackSlot()) {
6311     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
6312   }
6313   locations->SetOut(location);
6314 }
6315 
VisitParameterValue(HParameterValue * instruction)6316 void InstructionCodeGeneratorARM64::VisitParameterValue(
6317     [[maybe_unused]] HParameterValue* instruction) {
6318   // Nothing to do, the parameter is already at its location.
6319 }
6320 
VisitCurrentMethod(HCurrentMethod * instruction)6321 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
6322   LocationSummary* locations =
6323       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6324   locations->SetOut(LocationFrom(kArtMethodRegister));
6325 }
6326 
VisitCurrentMethod(HCurrentMethod * instruction)6327 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
6328     [[maybe_unused]] HCurrentMethod* instruction) {
6329   // Nothing to do, the method is already at its location.
6330 }
6331 
VisitPhi(HPhi * instruction)6332 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
6333   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6334   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
6335     locations->SetInAt(i, Location::Any());
6336   }
6337   locations->SetOut(Location::Any());
6338 }
6339 
VisitPhi(HPhi * instruction)6340 void InstructionCodeGeneratorARM64::VisitPhi([[maybe_unused]] HPhi* instruction) {
6341   LOG(FATAL) << "Unreachable";
6342 }
6343 
VisitRem(HRem * rem)6344 void LocationsBuilderARM64::VisitRem(HRem* rem) {
6345   DataType::Type type = rem->GetResultType();
6346   LocationSummary::CallKind call_kind =
6347       DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
6348                                            : LocationSummary::kNoCall;
6349   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
6350 
6351   switch (type) {
6352     case DataType::Type::kInt32:
6353     case DataType::Type::kInt64:
6354       locations->SetInAt(0, Location::RequiresRegister());
6355       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
6356       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6357       break;
6358 
6359     case DataType::Type::kFloat32:
6360     case DataType::Type::kFloat64: {
6361       InvokeRuntimeCallingConvention calling_convention;
6362       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
6363       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
6364       locations->SetOut(calling_convention.GetReturnLocation(type));
6365 
6366       break;
6367     }
6368 
6369     default:
6370       LOG(FATAL) << "Unexpected rem type " << type;
6371   }
6372 }
6373 
GenerateIntRemForPower2Denom(HRem * instruction)6374 void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) {
6375   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
6376   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
6377   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
6378 
6379   Register out = OutputRegister(instruction);
6380   Register dividend = InputRegisterAt(instruction, 0);
6381 
6382   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
6383     // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
6384     // NOTE: The generated code for HRem correctly works for the INT32_MIN/INT64_MIN dividends.
6385     // INT*_MIN % imm must be 0 for any imm of power 2. 'and' works only with bits
6386     // 0..30 (Int32 case)/0..62 (Int64 case) of a dividend. For INT32_MIN/INT64_MIN they are zeros.
6387     // So 'and' always produces zero.
6388     __ And(out, dividend, abs_imm - 1);
6389   } else {
6390     if (abs_imm == 2) {
6391       __ Cmp(dividend, 0);
6392       __ And(out, dividend, 1);
6393       __ Csneg(out, out, out, ge);
6394     } else {
6395       UseScratchRegisterScope temps(GetVIXLAssembler());
6396       Register temp = temps.AcquireSameSizeAs(out);
6397 
6398       __ Negs(temp, dividend);
6399       __ And(out, dividend, abs_imm - 1);
6400       __ And(temp, temp, abs_imm - 1);
6401       __ Csneg(out, out, temp, mi);
6402     }
6403   }
6404 }
6405 
GenerateIntRemForConstDenom(HRem * instruction)6406 void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) {
6407   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
6408 
6409   if (imm == 0) {
6410     // Do not generate anything.
6411     // DivZeroCheck would prevent any code to be executed.
6412     return;
6413   }
6414 
6415   if (IsPowerOfTwo(AbsOrMin(imm))) {
6416     // Cases imm == -1 or imm == 1 are handled in constant folding by
6417     // InstructionWithAbsorbingInputSimplifier.
6418     // If the cases have survided till code generation they are handled in
6419     // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0).
6420     // The correct code is generated for them, just more instructions.
6421     GenerateIntRemForPower2Denom(instruction);
6422   } else {
6423     DCHECK(imm < -2 || imm > 2) << imm;
6424     GenerateDivRemWithAnyConstant(instruction, imm);
6425   }
6426 }
6427 
GenerateIntRem(HRem * instruction)6428 void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) {
6429   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
6430          << instruction->GetResultType();
6431 
6432   if (instruction->GetLocations()->InAt(1).IsConstant()) {
6433     GenerateIntRemForConstDenom(instruction);
6434   } else {
6435     Register out = OutputRegister(instruction);
6436     Register dividend = InputRegisterAt(instruction, 0);
6437     Register divisor = InputRegisterAt(instruction, 1);
6438     UseScratchRegisterScope temps(GetVIXLAssembler());
6439     Register temp = temps.AcquireSameSizeAs(out);
6440     __ Sdiv(temp, dividend, divisor);
6441     __ Msub(out, temp, divisor, dividend);
6442   }
6443 }
6444 
VisitRem(HRem * rem)6445 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
6446   DataType::Type type = rem->GetResultType();
6447 
6448   switch (type) {
6449     case DataType::Type::kInt32:
6450     case DataType::Type::kInt64: {
6451       GenerateIntRem(rem);
6452       break;
6453     }
6454 
6455     case DataType::Type::kFloat32:
6456     case DataType::Type::kFloat64: {
6457       QuickEntrypointEnum entrypoint =
6458           (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
6459       codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
6460       if (type == DataType::Type::kFloat32) {
6461         CheckEntrypointTypes<kQuickFmodf, float, float, float>();
6462       } else {
6463         CheckEntrypointTypes<kQuickFmod, double, double, double>();
6464       }
6465       break;
6466     }
6467 
6468     default:
6469       LOG(FATAL) << "Unexpected rem type " << type;
6470       UNREACHABLE();
6471   }
6472 }
6473 
VisitMin(HMin * min)6474 void LocationsBuilderARM64::VisitMin(HMin* min) {
6475   HandleBinaryOp(min);
6476 }
6477 
VisitMin(HMin * min)6478 void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
6479   HandleBinaryOp(min);
6480 }
6481 
VisitMax(HMax * max)6482 void LocationsBuilderARM64::VisitMax(HMax* max) {
6483   HandleBinaryOp(max);
6484 }
6485 
VisitMax(HMax * max)6486 void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
6487   HandleBinaryOp(max);
6488 }
6489 
VisitAbs(HAbs * abs)6490 void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
6491   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
6492   switch (abs->GetResultType()) {
6493     case DataType::Type::kInt32:
6494     case DataType::Type::kInt64:
6495       locations->SetInAt(0, Location::RequiresRegister());
6496       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6497       break;
6498     case DataType::Type::kFloat32:
6499     case DataType::Type::kFloat64:
6500       locations->SetInAt(0, Location::RequiresFpuRegister());
6501       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6502       break;
6503     default:
6504       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
6505   }
6506 }
6507 
VisitAbs(HAbs * abs)6508 void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) {
6509   switch (abs->GetResultType()) {
6510     case DataType::Type::kInt32:
6511     case DataType::Type::kInt64: {
6512       Register in_reg = InputRegisterAt(abs, 0);
6513       Register out_reg = OutputRegister(abs);
6514       __ Cmp(in_reg, Operand(0));
6515       __ Cneg(out_reg, in_reg, lt);
6516       break;
6517     }
6518     case DataType::Type::kFloat32:
6519     case DataType::Type::kFloat64: {
6520       VRegister in_reg = InputFPRegisterAt(abs, 0);
6521       VRegister out_reg = OutputFPRegister(abs);
6522       __ Fabs(out_reg, in_reg);
6523       break;
6524     }
6525     default:
6526       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
6527   }
6528 }
6529 
VisitConstructorFence(HConstructorFence * constructor_fence)6530 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
6531   constructor_fence->SetLocations(nullptr);
6532 }
6533 
VisitConstructorFence(HConstructorFence * constructor_fence)6534 void InstructionCodeGeneratorARM64::VisitConstructorFence(
6535     [[maybe_unused]] HConstructorFence* constructor_fence) {
6536   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
6537 }
6538 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)6539 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
6540   memory_barrier->SetLocations(nullptr);
6541 }
6542 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)6543 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
6544   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
6545 }
6546 
VisitReturn(HReturn * instruction)6547 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
6548   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6549   DataType::Type return_type = instruction->InputAt(0)->GetType();
6550   locations->SetInAt(0, ARM64ReturnLocation(return_type));
6551 }
6552 
VisitReturn(HReturn * ret)6553 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* ret) {
6554   if (GetGraph()->IsCompilingOsr()) {
6555     // To simplify callers of an OSR method, we put the return value in both
6556     // floating point and core register.
6557     switch (ret->InputAt(0)->GetType()) {
6558       case DataType::Type::kFloat32:
6559         __ Fmov(w0, s0);
6560         break;
6561       case DataType::Type::kFloat64:
6562         __ Fmov(x0, d0);
6563         break;
6564       default:
6565         break;
6566     }
6567   }
6568   codegen_->GenerateFrameExit();
6569 }
6570 
VisitReturnVoid(HReturnVoid * instruction)6571 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
6572   instruction->SetLocations(nullptr);
6573 }
6574 
VisitReturnVoid(HReturnVoid * instruction)6575 void InstructionCodeGeneratorARM64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) {
6576   codegen_->GenerateFrameExit();
6577 }
6578 
VisitRol(HRol * rol)6579 void LocationsBuilderARM64::VisitRol(HRol* rol) {
6580   HandleBinaryOp(rol);
6581 }
6582 
VisitRol(HRol * rol)6583 void InstructionCodeGeneratorARM64::VisitRol(HRol* rol) {
6584   HandleBinaryOp(rol);
6585 }
6586 
VisitRor(HRor * ror)6587 void LocationsBuilderARM64::VisitRor(HRor* ror) {
6588   HandleBinaryOp(ror);
6589 }
6590 
VisitRor(HRor * ror)6591 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
6592   HandleBinaryOp(ror);
6593 }
6594 
VisitShl(HShl * shl)6595 void LocationsBuilderARM64::VisitShl(HShl* shl) {
6596   HandleShift(shl);
6597 }
6598 
VisitShl(HShl * shl)6599 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
6600   HandleShift(shl);
6601 }
6602 
VisitShr(HShr * shr)6603 void LocationsBuilderARM64::VisitShr(HShr* shr) {
6604   HandleShift(shr);
6605 }
6606 
VisitShr(HShr * shr)6607 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
6608   HandleShift(shr);
6609 }
6610 
VisitSub(HSub * instruction)6611 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
6612   HandleBinaryOp(instruction);
6613 }
6614 
VisitSub(HSub * instruction)6615 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
6616   HandleBinaryOp(instruction);
6617 }
6618 
VisitStaticFieldGet(HStaticFieldGet * instruction)6619 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6620   HandleFieldGet(instruction, instruction->GetFieldInfo());
6621 }
6622 
VisitStaticFieldGet(HStaticFieldGet * instruction)6623 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6624   HandleFieldGet(instruction, instruction->GetFieldInfo());
6625 }
6626 
VisitStaticFieldSet(HStaticFieldSet * instruction)6627 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6628   HandleFieldSet(instruction);
6629 }
6630 
VisitStaticFieldSet(HStaticFieldSet * instruction)6631 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6632   HandleFieldSet(instruction,
6633                  instruction->GetFieldInfo(),
6634                  instruction->GetValueCanBeNull(),
6635                  instruction->GetWriteBarrierKind());
6636 }
6637 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6638 void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6639   codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(x0));
6640 }
6641 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6642 void InstructionCodeGeneratorARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6643   __ Mov(w0, instruction->GetFormat()->GetValue());
6644   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6645 }
6646 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6647 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
6648     HUnresolvedInstanceFieldGet* instruction) {
6649   FieldAccessCallingConventionARM64 calling_convention;
6650   codegen_->CreateUnresolvedFieldLocationSummary(
6651       instruction, instruction->GetFieldType(), calling_convention);
6652 }
6653 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6654 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
6655     HUnresolvedInstanceFieldGet* instruction) {
6656   FieldAccessCallingConventionARM64 calling_convention;
6657   codegen_->GenerateUnresolvedFieldAccess(instruction,
6658                                           instruction->GetFieldType(),
6659                                           instruction->GetFieldIndex(),
6660                                           instruction->GetDexPc(),
6661                                           calling_convention);
6662 }
6663 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6664 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
6665     HUnresolvedInstanceFieldSet* instruction) {
6666   FieldAccessCallingConventionARM64 calling_convention;
6667   codegen_->CreateUnresolvedFieldLocationSummary(
6668       instruction, instruction->GetFieldType(), calling_convention);
6669 }
6670 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6671 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
6672     HUnresolvedInstanceFieldSet* instruction) {
6673   FieldAccessCallingConventionARM64 calling_convention;
6674   codegen_->GenerateUnresolvedFieldAccess(instruction,
6675                                           instruction->GetFieldType(),
6676                                           instruction->GetFieldIndex(),
6677                                           instruction->GetDexPc(),
6678                                           calling_convention);
6679 }
6680 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6681 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
6682     HUnresolvedStaticFieldGet* instruction) {
6683   FieldAccessCallingConventionARM64 calling_convention;
6684   codegen_->CreateUnresolvedFieldLocationSummary(
6685       instruction, instruction->GetFieldType(), calling_convention);
6686 }
6687 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6688 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
6689     HUnresolvedStaticFieldGet* instruction) {
6690   FieldAccessCallingConventionARM64 calling_convention;
6691   codegen_->GenerateUnresolvedFieldAccess(instruction,
6692                                           instruction->GetFieldType(),
6693                                           instruction->GetFieldIndex(),
6694                                           instruction->GetDexPc(),
6695                                           calling_convention);
6696 }
6697 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6698 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
6699     HUnresolvedStaticFieldSet* instruction) {
6700   FieldAccessCallingConventionARM64 calling_convention;
6701   codegen_->CreateUnresolvedFieldLocationSummary(
6702       instruction, instruction->GetFieldType(), calling_convention);
6703 }
6704 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6705 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
6706     HUnresolvedStaticFieldSet* instruction) {
6707   FieldAccessCallingConventionARM64 calling_convention;
6708   codegen_->GenerateUnresolvedFieldAccess(instruction,
6709                                           instruction->GetFieldType(),
6710                                           instruction->GetFieldIndex(),
6711                                           instruction->GetDexPc(),
6712                                           calling_convention);
6713 }
6714 
VisitSuspendCheck(HSuspendCheck * instruction)6715 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
6716   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6717       instruction, LocationSummary::kCallOnSlowPath);
6718   // In suspend check slow path, usually there are no caller-save registers at all.
6719   // If SIMD instructions are present, however, we force spilling all live SIMD
6720   // registers in full width (since the runtime only saves/restores lower part).
6721   // Note that only a suspend check can see live SIMD registers. In the
6722   // loop optimization, we make sure this does not happen for any other slow
6723   // path.
6724   locations->SetCustomSlowPathCallerSaves(
6725       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6726 }
6727 
VisitSuspendCheck(HSuspendCheck * instruction)6728 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
6729   HBasicBlock* block = instruction->GetBlock();
6730   if (block->GetLoopInformation() != nullptr) {
6731     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6732     // The back edge will generate the suspend check.
6733     return;
6734   }
6735   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6736     // The goto will generate the suspend check.
6737     return;
6738   }
6739   GenerateSuspendCheck(instruction, nullptr);
6740   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6741 }
6742 
VisitThrow(HThrow * instruction)6743 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
6744   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6745       instruction, LocationSummary::kCallOnMainOnly);
6746   InvokeRuntimeCallingConvention calling_convention;
6747   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
6748 }
6749 
VisitThrow(HThrow * instruction)6750 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
6751   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6752   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6753 }
6754 
VisitTypeConversion(HTypeConversion * conversion)6755 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
6756   LocationSummary* locations =
6757       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
6758   DataType::Type input_type = conversion->GetInputType();
6759   DataType::Type result_type = conversion->GetResultType();
6760   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6761       << input_type << " -> " << result_type;
6762   if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
6763       (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
6764     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
6765   }
6766 
6767   if (DataType::IsFloatingPointType(input_type)) {
6768     locations->SetInAt(0, Location::RequiresFpuRegister());
6769   } else {
6770     locations->SetInAt(0, Location::RequiresRegister());
6771   }
6772 
6773   if (DataType::IsFloatingPointType(result_type)) {
6774     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6775   } else {
6776     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6777   }
6778 }
6779 
VisitTypeConversion(HTypeConversion * conversion)6780 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
6781   DataType::Type result_type = conversion->GetResultType();
6782   DataType::Type input_type = conversion->GetInputType();
6783 
6784   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6785       << input_type << " -> " << result_type;
6786 
6787   if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
6788     int result_size = DataType::Size(result_type);
6789     int input_size = DataType::Size(input_type);
6790     int min_size = std::min(result_size, input_size);
6791     Register output = OutputRegister(conversion);
6792     Register source = InputRegisterAt(conversion, 0);
6793     if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) {
6794       // 'int' values are used directly as W registers, discarding the top
6795       // bits, so we don't need to sign-extend and can just perform a move.
6796       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
6797       // top 32 bits of the target register. We theoretically could leave those
6798       // bits unchanged, but we would have to make sure that no code uses a
6799       // 32bit input value as a 64bit value assuming that the top 32 bits are
6800       // zero.
6801       __ Mov(output.W(), source.W());
6802     } else if (DataType::IsUnsignedType(result_type) ||
6803                (DataType::IsUnsignedType(input_type) && input_size < result_size)) {
6804       __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte);
6805     } else {
6806       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
6807     }
6808   } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
6809     __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
6810   } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
6811     CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
6812     __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
6813   } else if (DataType::IsFloatingPointType(result_type) &&
6814              DataType::IsFloatingPointType(input_type)) {
6815     __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
6816   } else {
6817     LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
6818                 << " to " << result_type;
6819   }
6820 }
6821 
VisitUShr(HUShr * ushr)6822 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
6823   HandleShift(ushr);
6824 }
6825 
VisitUShr(HUShr * ushr)6826 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
6827   HandleShift(ushr);
6828 }
6829 
VisitXor(HXor * instruction)6830 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
6831   HandleBinaryOp(instruction);
6832 }
6833 
VisitXor(HXor * instruction)6834 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
6835   HandleBinaryOp(instruction);
6836 }
6837 
VisitBoundType(HBoundType * instruction)6838 void LocationsBuilderARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
6839   // Nothing to do, this should be removed during prepare for register allocator.
6840   LOG(FATAL) << "Unreachable";
6841 }
6842 
VisitBoundType(HBoundType * instruction)6843 void InstructionCodeGeneratorARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
6844   // Nothing to do, this should be removed during prepare for register allocator.
6845   LOG(FATAL) << "Unreachable";
6846 }
6847 
6848 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6849 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6850   LocationSummary* locations =
6851       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6852   locations->SetInAt(0, Location::RequiresRegister());
6853 }
6854 
VisitPackedSwitch(HPackedSwitch * switch_instr)6855 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6856   int32_t lower_bound = switch_instr->GetStartValue();
6857   uint32_t num_entries = switch_instr->GetNumEntries();
6858   Register value_reg = InputRegisterAt(switch_instr, 0);
6859   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6860 
6861   if (num_entries <= kPackedSwitchCompareJumpThreshold) {
6862     // Create a series of compare/jumps.
6863     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6864     Register temp = temps.AcquireW();
6865     __ Subs(temp, value_reg, Operand(lower_bound));
6866 
6867     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6868     // Jump to successors[0] if value == lower_bound.
6869     __ B(eq, codegen_->GetLabelOf(successors[0]));
6870     int32_t last_index = 0;
6871     for (; num_entries - last_index > 2; last_index += 2) {
6872       __ Subs(temp, temp, Operand(2));
6873       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
6874       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
6875       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
6876       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
6877     }
6878     if (num_entries - last_index == 2) {
6879       // The last missing case_value.
6880       __ Cmp(temp, Operand(1));
6881       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
6882     }
6883 
6884     // And the default for any other value.
6885     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6886       __ B(codegen_->GetLabelOf(default_block));
6887     }
6888   } else {
6889     JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
6890 
6891     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6892 
6893     // Below instructions should use at most one blocked register. Since there are two blocked
6894     // registers, we are free to block one.
6895     Register temp_w = temps.AcquireW();
6896     Register index;
6897     // Remove the bias.
6898     if (lower_bound != 0) {
6899       index = temp_w;
6900       __ Sub(index, value_reg, Operand(lower_bound));
6901     } else {
6902       index = value_reg;
6903     }
6904 
6905     // Jump to default block if index is out of the range.
6906     __ Cmp(index, Operand(num_entries));
6907     __ B(hs, codegen_->GetLabelOf(default_block));
6908 
6909     // In current VIXL implementation, it won't require any blocked registers to encode the
6910     // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
6911     // register pressure.
6912     Register table_base = temps.AcquireX();
6913 
6914     const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t);
6915     ExactAssemblyScope scope(codegen_->GetVIXLAssembler(),
6916                              kInstructionSize * 4 + jump_size,
6917                              CodeBufferCheckScope::kExactSize);
6918 
6919     // Load jump offset from the table.
6920     // Note: the table start address is always in range as the table is emitted immediately
6921     // after these 4 instructions.
6922     __ adr(table_base, jump_table->GetTableStartLabel());
6923     Register jump_offset = temp_w;
6924     __ ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
6925 
6926     // Jump to target block by branching to table_base(pc related) + offset.
6927     Register target_address = table_base;
6928     __ add(target_address, table_base, Operand(jump_offset, SXTW));
6929     __ br(target_address);
6930 
6931     jump_table->EmitTable(codegen_);
6932   }
6933 }
6934 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6935 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
6936     HInstruction* instruction,
6937     Location out,
6938     uint32_t offset,
6939     Location maybe_temp,
6940     ReadBarrierOption read_barrier_option) {
6941   DataType::Type type = DataType::Type::kReference;
6942   Register out_reg = RegisterFrom(out, type);
6943   if (read_barrier_option == kWithReadBarrier) {
6944     DCHECK(codegen_->EmitReadBarrier());
6945     if (kUseBakerReadBarrier) {
6946       // Load with fast path based Baker's read barrier.
6947       // /* HeapReference<Object> */ out = *(out + offset)
6948       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6949                                                       out,
6950                                                       out_reg,
6951                                                       offset,
6952                                                       maybe_temp,
6953                                                       /* needs_null_check= */ false,
6954                                                       /* use_load_acquire= */ false);
6955     } else {
6956       // Load with slow path based read barrier.
6957       // Save the value of `out` into `maybe_temp` before overwriting it
6958       // in the following move operation, as we will need it for the
6959       // read barrier below.
6960       Register temp_reg = RegisterFrom(maybe_temp, type);
6961       __ Mov(temp_reg, out_reg);
6962       // /* HeapReference<Object> */ out = *(out + offset)
6963       __ Ldr(out_reg, HeapOperand(out_reg, offset));
6964       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6965     }
6966   } else {
6967     // Plain load with no read barrier.
6968     // /* HeapReference<Object> */ out = *(out + offset)
6969     __ Ldr(out_reg, HeapOperand(out_reg, offset));
6970     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
6971   }
6972 }
6973 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6974 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
6975     HInstruction* instruction,
6976     Location out,
6977     Location obj,
6978     uint32_t offset,
6979     Location maybe_temp,
6980     ReadBarrierOption read_barrier_option) {
6981   DataType::Type type = DataType::Type::kReference;
6982   Register out_reg = RegisterFrom(out, type);
6983   Register obj_reg = RegisterFrom(obj, type);
6984   if (read_barrier_option == kWithReadBarrier) {
6985     DCHECK(codegen_->EmitReadBarrier());
6986     if (kUseBakerReadBarrier) {
6987       // Load with fast path based Baker's read barrier.
6988       // /* HeapReference<Object> */ out = *(obj + offset)
6989       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6990                                                       out,
6991                                                       obj_reg,
6992                                                       offset,
6993                                                       maybe_temp,
6994                                                       /* needs_null_check= */ false,
6995                                                       /* use_load_acquire= */ false);
6996     } else {
6997       // Load with slow path based read barrier.
6998       // /* HeapReference<Object> */ out = *(obj + offset)
6999       __ Ldr(out_reg, HeapOperand(obj_reg, offset));
7000       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7001     }
7002   } else {
7003     // Plain load with no read barrier.
7004     // /* HeapReference<Object> */ out = *(obj + offset)
7005     __ Ldr(out_reg, HeapOperand(obj_reg, offset));
7006     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
7007   }
7008 }
7009 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,Register obj,uint32_t offset,vixl::aarch64::Label * fixup_label,ReadBarrierOption read_barrier_option)7010 void CodeGeneratorARM64::GenerateGcRootFieldLoad(
7011     HInstruction* instruction,
7012     Location root,
7013     Register obj,
7014     uint32_t offset,
7015     vixl::aarch64::Label* fixup_label,
7016     ReadBarrierOption read_barrier_option) {
7017   DCHECK(fixup_label == nullptr || offset == 0u);
7018   Register root_reg = RegisterFrom(root, DataType::Type::kReference);
7019   if (read_barrier_option == kWithReadBarrier) {
7020     DCHECK(EmitReadBarrier());
7021     if (kUseBakerReadBarrier) {
7022       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7023       // Baker's read barrier are used.
7024 
7025       // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
7026       // the Marking Register) to decide whether we need to enter
7027       // the slow path to mark the GC root.
7028       //
7029       // We use shared thunks for the slow path; shared within the method
7030       // for JIT, across methods for AOT. That thunk checks the reference
7031       // and jumps to the entrypoint if needed.
7032       //
7033       //     lr = &return_address;
7034       //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
7035       //     if (mr) {  // Thread::Current()->GetIsGcMarking()
7036       //       goto gc_root_thunk<root_reg>(lr)
7037       //     }
7038       //   return_address:
7039 
7040       UseScratchRegisterScope temps(GetVIXLAssembler());
7041       DCHECK(temps.IsAvailable(ip0));
7042       DCHECK(temps.IsAvailable(ip1));
7043       temps.Exclude(ip0, ip1);
7044       uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
7045 
7046       ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
7047       vixl::aarch64::Label return_address;
7048       __ adr(lr, &return_address);
7049       if (fixup_label != nullptr) {
7050         __ bind(fixup_label);
7051       }
7052       static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
7053                     "GC root LDR must be 2 instructions (8B) before the return address label.");
7054       __ ldr(root_reg, MemOperand(obj.X(), offset));
7055       EmitBakerReadBarrierCbnz(custom_data);
7056       __ bind(&return_address);
7057     } else {
7058       // GC root loaded through a slow path for read barriers other
7059       // than Baker's.
7060       // /* GcRoot<mirror::Object>* */ root = obj + offset
7061       if (fixup_label == nullptr) {
7062         __ Add(root_reg.X(), obj.X(), offset);
7063       } else {
7064         EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
7065       }
7066       // /* mirror::Object* */ root = root->Read()
7067       GenerateReadBarrierForRootSlow(instruction, root, root);
7068     }
7069   } else {
7070     // Plain GC root load with no read barrier.
7071     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
7072     if (fixup_label == nullptr) {
7073       __ Ldr(root_reg, MemOperand(obj, offset));
7074     } else {
7075       EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
7076     }
7077     // Note that GC roots are not affected by heap poisoning, thus we
7078     // do not have to unpoison `root_reg` here.
7079   }
7080   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
7081 }
7082 
GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value,vixl::aarch64::Register old_value)7083 void CodeGeneratorARM64::GenerateIntrinsicMoveWithBakerReadBarrier(
7084     vixl::aarch64::Register marked_old_value,
7085     vixl::aarch64::Register old_value) {
7086   DCHECK(EmitBakerReadBarrier());
7087 
7088   // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
7089   uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode());
7090 
7091   ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
7092   vixl::aarch64::Label return_address;
7093   __ adr(lr, &return_address);
7094   static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
7095                 "GC root LDR must be 2 instructions (8B) before the return address label.");
7096   __ mov(marked_old_value, old_value);
7097   EmitBakerReadBarrierCbnz(custom_data);
7098   __ bind(&return_address);
7099 }
7100 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl::aarch64::Register obj,const vixl::aarch64::MemOperand & src,bool needs_null_check,bool use_load_acquire)7101 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7102                                                                Location ref,
7103                                                                vixl::aarch64::Register obj,
7104                                                                const vixl::aarch64::MemOperand& src,
7105                                                                bool needs_null_check,
7106                                                                bool use_load_acquire) {
7107   DCHECK(EmitBakerReadBarrier());
7108 
7109   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
7110   // Marking Register) to decide whether we need to enter the slow
7111   // path to mark the reference. Then, in the slow path, check the
7112   // gray bit in the lock word of the reference's holder (`obj`) to
7113   // decide whether to mark `ref` or not.
7114   //
7115   // We use shared thunks for the slow path; shared within the method
7116   // for JIT, across methods for AOT. That thunk checks the holder
7117   // and jumps to the entrypoint if needed. If the holder is not gray,
7118   // it creates a fake dependency and returns to the LDR instruction.
7119   //
7120   //     lr = &gray_return_address;
7121   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
7122   //       goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr)
7123   //     }
7124   //   not_gray_return_address:
7125   //     // Original reference load. If the offset is too large to fit
7126   //     // into LDR, we use an adjusted base register here.
7127   //     HeapReference<mirror::Object> reference = *(obj+offset);
7128   //   gray_return_address:
7129 
7130   DCHECK(src.GetAddrMode() == vixl::aarch64::Offset);
7131   DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>));
7132 
7133   UseScratchRegisterScope temps(GetVIXLAssembler());
7134   DCHECK(temps.IsAvailable(ip0));
7135   DCHECK(temps.IsAvailable(ip1));
7136   temps.Exclude(ip0, ip1);
7137   uint32_t custom_data = use_load_acquire
7138       ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode())
7139       : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode());
7140 
7141   {
7142     ExactAssemblyScope guard(GetVIXLAssembler(),
7143                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
7144     vixl::aarch64::Label return_address;
7145     __ adr(lr, &return_address);
7146     EmitBakerReadBarrierCbnz(custom_data);
7147     static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
7148                   "Field LDR must be 1 instruction (4B) before the return address label; "
7149                   " 2 instructions (8B) for heap poisoning.");
7150     Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
7151     if (use_load_acquire) {
7152       DCHECK_EQ(src.GetOffset(), 0);
7153       __ ldar(ref_reg, src);
7154     } else {
7155       __ ldr(ref_reg, src);
7156     }
7157     if (needs_null_check) {
7158       MaybeRecordImplicitNullCheck(instruction);
7159     }
7160     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
7161     // macro instructions disallowed in ExactAssemblyScope.
7162     if (kPoisonHeapReferences) {
7163       __ neg(ref_reg, Operand(ref_reg));
7164     }
7165     __ bind(&return_address);
7166   }
7167   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
7168 }
7169 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check,bool use_load_acquire)7170 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7171                                                                Location ref,
7172                                                                Register obj,
7173                                                                uint32_t offset,
7174                                                                Location maybe_temp,
7175                                                                bool needs_null_check,
7176                                                                bool use_load_acquire) {
7177   DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
7178   Register base = obj;
7179   if (use_load_acquire) {
7180     DCHECK(maybe_temp.IsRegister());
7181     base = WRegisterFrom(maybe_temp);
7182     __ Add(base, obj, offset);
7183     offset = 0u;
7184   } else if (offset >= kReferenceLoadMinFarOffset) {
7185     DCHECK(maybe_temp.IsRegister());
7186     base = WRegisterFrom(maybe_temp);
7187     static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
7188     __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
7189     offset &= (kReferenceLoadMinFarOffset - 1u);
7190   }
7191   MemOperand src(base.X(), offset);
7192   GenerateFieldLoadWithBakerReadBarrier(
7193       instruction, ref, obj, src, needs_null_check, use_load_acquire);
7194 }
7195 
GenerateArrayLoadWithBakerReadBarrier(HArrayGet * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)7196 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
7197                                                                Location ref,
7198                                                                Register obj,
7199                                                                uint32_t data_offset,
7200                                                                Location index,
7201                                                                bool needs_null_check) {
7202   DCHECK(EmitBakerReadBarrier());
7203 
7204   static_assert(
7205       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7206       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7207   size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
7208 
7209   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
7210   // Marking Register) to decide whether we need to enter the slow
7211   // path to mark the reference. Then, in the slow path, check the
7212   // gray bit in the lock word of the reference's holder (`obj`) to
7213   // decide whether to mark `ref` or not.
7214   //
7215   // We use shared thunks for the slow path; shared within the method
7216   // for JIT, across methods for AOT. That thunk checks the holder
7217   // and jumps to the entrypoint if needed. If the holder is not gray,
7218   // it creates a fake dependency and returns to the LDR instruction.
7219   //
7220   //     lr = &gray_return_address;
7221   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
7222   //       goto array_thunk<base_reg>(lr)
7223   //     }
7224   //   not_gray_return_address:
7225   //     // Original reference load. If the offset is too large to fit
7226   //     // into LDR, we use an adjusted base register here.
7227   //     HeapReference<mirror::Object> reference = data[index];
7228   //   gray_return_address:
7229 
7230   DCHECK(index.IsValid());
7231   Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
7232   Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
7233 
7234   UseScratchRegisterScope temps(GetVIXLAssembler());
7235   DCHECK(temps.IsAvailable(ip0));
7236   DCHECK(temps.IsAvailable(ip1));
7237   temps.Exclude(ip0, ip1);
7238 
7239   Register temp;
7240   if (instruction->GetArray()->IsIntermediateAddress()) {
7241     // We do not need to compute the intermediate address from the array: the
7242     // input instruction has done it already. See the comment in
7243     // `TryExtractArrayAccessAddress()`.
7244     if (kIsDebugBuild) {
7245       HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
7246       DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
7247     }
7248     temp = obj;
7249   } else {
7250     temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
7251     __ Add(temp.X(), obj.X(), Operand(data_offset));
7252   }
7253 
7254   uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
7255 
7256   {
7257     ExactAssemblyScope guard(GetVIXLAssembler(),
7258                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
7259     vixl::aarch64::Label return_address;
7260     __ adr(lr, &return_address);
7261     EmitBakerReadBarrierCbnz(custom_data);
7262     static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
7263                   "Array LDR must be 1 instruction (4B) before the return address label; "
7264                   " 2 instructions (8B) for heap poisoning.");
7265     __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
7266     DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
7267     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
7268     // macro instructions disallowed in ExactAssemblyScope.
7269     if (kPoisonHeapReferences) {
7270       __ neg(ref_reg, Operand(ref_reg));
7271     }
7272     __ bind(&return_address);
7273   }
7274   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
7275 }
7276 
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)7277 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
7278   // The following condition is a compile-time one, so it does not have a run-time cost.
7279   if (kIsDebugBuild && EmitBakerReadBarrier()) {
7280     // The following condition is a run-time one; it is executed after the
7281     // previous compile-time test, to avoid penalizing non-debug builds.
7282     if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
7283       UseScratchRegisterScope temps(GetVIXLAssembler());
7284       Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
7285       GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
7286     }
7287   }
7288 }
7289 
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7290 SlowPathCodeARM64* CodeGeneratorARM64::AddReadBarrierSlowPath(HInstruction* instruction,
7291                                                               Location out,
7292                                                               Location ref,
7293                                                               Location obj,
7294                                                               uint32_t offset,
7295                                                               Location index) {
7296   SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
7297       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
7298   AddSlowPath(slow_path);
7299   return slow_path;
7300 }
7301 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7302 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
7303                                                  Location out,
7304                                                  Location ref,
7305                                                  Location obj,
7306                                                  uint32_t offset,
7307                                                  Location index) {
7308   DCHECK(EmitReadBarrier());
7309 
7310   // Insert a slow path based read barrier *after* the reference load.
7311   //
7312   // If heap poisoning is enabled, the unpoisoning of the loaded
7313   // reference will be carried out by the runtime within the slow
7314   // path.
7315   //
7316   // Note that `ref` currently does not get unpoisoned (when heap
7317   // poisoning is enabled), which is alright as the `ref` argument is
7318   // not used by the artReadBarrierSlow entry point.
7319   //
7320   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7321   SlowPathCodeARM64* slow_path = AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
7322 
7323   __ B(slow_path->GetEntryLabel());
7324   __ Bind(slow_path->GetExitLabel());
7325 }
7326 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7327 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7328                                                       Location out,
7329                                                       Location ref,
7330                                                       Location obj,
7331                                                       uint32_t offset,
7332                                                       Location index) {
7333   if (EmitReadBarrier()) {
7334     // Baker's read barriers shall be handled by the fast path
7335     // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
7336     DCHECK(!kUseBakerReadBarrier);
7337     // If heap poisoning is enabled, unpoisoning will be taken care of
7338     // by the runtime within the slow path.
7339     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7340   } else if (kPoisonHeapReferences) {
7341     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
7342   }
7343 }
7344 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7345 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7346                                                         Location out,
7347                                                         Location root) {
7348   DCHECK(EmitReadBarrier());
7349 
7350   // Insert a slow path based read barrier *after* the GC root load.
7351   //
7352   // Note that GC roots are not affected by heap poisoning, so we do
7353   // not need to do anything special for this here.
7354   SlowPathCodeARM64* slow_path =
7355       new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
7356   AddSlowPath(slow_path);
7357 
7358   __ B(slow_path->GetEntryLabel());
7359   __ Bind(slow_path->GetExitLabel());
7360 }
7361 
VisitClassTableGet(HClassTableGet * instruction)7362 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
7363   LocationSummary* locations =
7364       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7365   locations->SetInAt(0, Location::RequiresRegister());
7366   locations->SetOut(Location::RequiresRegister());
7367 }
7368 
VisitClassTableGet(HClassTableGet * instruction)7369 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
7370   LocationSummary* locations = instruction->GetLocations();
7371   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
7372     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
7373         instruction->GetIndex(), kArm64PointerSize).SizeValue();
7374     __ Ldr(XRegisterFrom(locations->Out()),
7375            MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
7376   } else {
7377     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
7378         instruction->GetIndex(), kArm64PointerSize));
7379     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
7380         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
7381     __ Ldr(XRegisterFrom(locations->Out()),
7382            MemOperand(XRegisterFrom(locations->Out()), method_offset));
7383   }
7384 }
7385 
VecNEONAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)7386 MemOperand InstructionCodeGeneratorARM64::VecNEONAddress(
7387     HVecMemoryOperation* instruction,
7388     UseScratchRegisterScope* temps_scope,
7389     size_t size,
7390     bool is_string_char_at,
7391     /*out*/ Register* scratch) {
7392   LocationSummary* locations = instruction->GetLocations();
7393   Register base = InputRegisterAt(instruction, 0);
7394 
7395   if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
7396     DCHECK(!is_string_char_at);
7397     return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
7398   }
7399 
7400   Location index = locations->InAt(1);
7401   uint32_t offset = is_string_char_at
7402       ? mirror::String::ValueOffset().Uint32Value()
7403       : mirror::Array::DataOffset(size).Uint32Value();
7404   size_t shift = ComponentSizeShiftWidth(size);
7405 
7406   // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
7407   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
7408 
7409   if (index.IsConstant()) {
7410     offset += Int64FromLocation(index) << shift;
7411     return HeapOperand(base, offset);
7412   } else {
7413     *scratch = temps_scope->AcquireSameSizeAs(base);
7414     __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
7415     return HeapOperand(*scratch, offset);
7416   }
7417 }
7418 
VecSVEAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)7419 SVEMemOperand InstructionCodeGeneratorARM64::VecSVEAddress(
7420     HVecMemoryOperation* instruction,
7421     UseScratchRegisterScope* temps_scope,
7422     size_t size,
7423     bool is_string_char_at,
7424     /*out*/ Register* scratch) {
7425   LocationSummary* locations = instruction->GetLocations();
7426   Register base = InputRegisterAt(instruction, 0);
7427   Location index = locations->InAt(1);
7428 
7429   DCHECK(!instruction->InputAt(1)->IsIntermediateAddressIndex());
7430   DCHECK(!index.IsConstant());
7431 
7432   uint32_t offset = is_string_char_at
7433       ? mirror::String::ValueOffset().Uint32Value()
7434       : mirror::Array::DataOffset(size).Uint32Value();
7435   size_t shift = ComponentSizeShiftWidth(size);
7436 
7437   if (instruction->InputAt(0)->IsIntermediateAddress()) {
7438     return SVEMemOperand(base.X(), XRegisterFrom(index), LSL, shift);
7439   }
7440 
7441   *scratch = temps_scope->AcquireSameSizeAs(base);
7442   __ Add(*scratch, base, offset);
7443   return SVEMemOperand(scratch->X(), XRegisterFrom(index), LSL, shift);
7444 }
7445 
7446 #undef __
7447 #undef QUICK_ENTRY_POINT
7448 
7449 #define __ assembler.GetVIXLAssembler()->
7450 
EmitGrayCheckAndFastPath(arm64::Arm64Assembler & assembler,vixl::aarch64::Register base_reg,vixl::aarch64::MemOperand & lock_word,vixl::aarch64::Label * slow_path,vixl::aarch64::Label * throw_npe=nullptr)7451 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
7452                                      vixl::aarch64::Register base_reg,
7453                                      vixl::aarch64::MemOperand& lock_word,
7454                                      vixl::aarch64::Label* slow_path,
7455                                      vixl::aarch64::Label* throw_npe = nullptr) {
7456   vixl::aarch64::Label throw_npe_cont;
7457   // Load the lock word containing the rb_state.
7458   __ Ldr(ip0.W(), lock_word);
7459   // Given the numeric representation, it's enough to check the low bit of the rb_state.
7460   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7461   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7462   __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
7463   static_assert(
7464       BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
7465       "Field and array LDR offsets must be the same to reuse the same code.");
7466   // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
7467   if (throw_npe != nullptr) {
7468     __ Bind(&throw_npe_cont);
7469   }
7470   // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
7471   static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
7472                 "Field LDR must be 1 instruction (4B) before the return address label; "
7473                 " 2 instructions (8B) for heap poisoning.");
7474   __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
7475   // Introduce a dependency on the lock_word including rb_state,
7476   // to prevent load-load reordering, and without using
7477   // a memory barrier (which would be more expensive).
7478   __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
7479   __ Br(lr);          // And return back to the function.
7480   if (throw_npe != nullptr) {
7481     // Clear IP0 before returning to the fast path.
7482     __ Bind(throw_npe);
7483     __ Mov(ip0.X(), xzr);
7484     __ B(&throw_npe_cont);
7485   }
7486   // Note: The fake dependency is unnecessary for the slow path.
7487 }
7488 
7489 // Load the read barrier introspection entrypoint in register `entrypoint`.
LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler & assembler,vixl::aarch64::Register entrypoint)7490 static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
7491                                                        vixl::aarch64::Register entrypoint) {
7492   // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
7493   DCHECK_EQ(ip0.GetCode(), 16u);
7494   const int32_t entry_point_offset =
7495       Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
7496   __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
7497 }
7498 
CompileBakerReadBarrierThunk(Arm64Assembler & assembler,uint32_t encoded_data,std::string * debug_name)7499 void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
7500                                                       uint32_t encoded_data,
7501                                                       /*out*/ std::string* debug_name) {
7502   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
7503   switch (kind) {
7504     case BakerReadBarrierKind::kField:
7505     case BakerReadBarrierKind::kAcquire: {
7506       Register base_reg =
7507           vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7508       CheckValidReg(base_reg.GetCode());
7509       Register holder_reg =
7510           vixl::aarch64::XRegister(BakerReadBarrierSecondRegField::Decode(encoded_data));
7511       CheckValidReg(holder_reg.GetCode());
7512       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7513       temps.Exclude(ip0, ip1);
7514       // In the case of a field load (with relaxed semantic), if `base_reg` differs from
7515       // `holder_reg`, the offset was too large and we must have emitted (during the construction
7516       // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
7517       // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
7518       // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
7519       // not necessarily do that check before going to the thunk.
7520       //
7521       // In the case of a field load with load-acquire semantics (where `base_reg` always differs
7522       // from `holder_reg`), we also need an explicit null check when implicit null checks are
7523       // allowed, as we do not emit one before going to the thunk.
7524       vixl::aarch64::Label throw_npe_label;
7525       vixl::aarch64::Label* throw_npe = nullptr;
7526       if (GetCompilerOptions().GetImplicitNullChecks() &&
7527           (holder_reg.Is(base_reg) || (kind == BakerReadBarrierKind::kAcquire))) {
7528         throw_npe = &throw_npe_label;
7529         __ Cbz(holder_reg.W(), throw_npe);
7530       }
7531       // Check if the holder is gray and, if not, add fake dependency to the base register
7532       // and return to the LDR instruction to load the reference. Otherwise, use introspection
7533       // to load the reference and call the entrypoint that performs further checks on the
7534       // reference and marks it if needed.
7535       vixl::aarch64::Label slow_path;
7536       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
7537       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
7538       __ Bind(&slow_path);
7539       if (kind == BakerReadBarrierKind::kField) {
7540         MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
7541         __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
7542         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7543         __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
7544         __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
7545       } else {
7546         DCHECK(kind == BakerReadBarrierKind::kAcquire);
7547         DCHECK(!base_reg.Is(holder_reg));
7548         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7549         __ Ldar(ip0.W(), MemOperand(base_reg));
7550       }
7551       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
7552       __ Br(ip1);                           // Jump to the entrypoint.
7553       break;
7554     }
7555     case BakerReadBarrierKind::kArray: {
7556       Register base_reg =
7557           vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7558       CheckValidReg(base_reg.GetCode());
7559       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7560                 BakerReadBarrierSecondRegField::Decode(encoded_data));
7561       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7562       temps.Exclude(ip0, ip1);
7563       vixl::aarch64::Label slow_path;
7564       int32_t data_offset =
7565           mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
7566       MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
7567       DCHECK_LT(lock_word.GetOffset(), 0);
7568       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
7569       __ Bind(&slow_path);
7570       MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
7571       __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
7572       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7573       __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
7574       __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
7575                                             // a switch case target based on the index register.
7576       __ Mov(ip0, base_reg);                // Move the base register to ip0.
7577       __ Br(ip1);                           // Jump to the entrypoint's array switch case.
7578       break;
7579     }
7580     case BakerReadBarrierKind::kGcRoot: {
7581       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
7582       // and it does not have a forwarding address), call the correct introspection entrypoint;
7583       // otherwise return the reference (or the extracted forwarding address).
7584       // There is no gray bit check for GC roots.
7585       Register root_reg =
7586           vixl::aarch64::WRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7587       CheckValidReg(root_reg.GetCode());
7588       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7589                 BakerReadBarrierSecondRegField::Decode(encoded_data));
7590       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7591       temps.Exclude(ip0, ip1);
7592       vixl::aarch64::Label return_label, not_marked, forwarding_address;
7593       __ Cbz(root_reg, &return_label);
7594       MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
7595       __ Ldr(ip0.W(), lock_word);
7596       __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, &not_marked);
7597       __ Bind(&return_label);
7598       __ Br(lr);
7599       __ Bind(&not_marked);
7600       __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
7601       __ B(&forwarding_address, mi);
7602       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7603       // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
7604       // art_quick_read_barrier_mark_introspection_gc_roots.
7605       __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
7606       __ Mov(ip0.W(), root_reg);
7607       __ Br(ip1);
7608       __ Bind(&forwarding_address);
7609       __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
7610       __ Br(lr);
7611       break;
7612     }
7613     default:
7614       LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
7615       UNREACHABLE();
7616   }
7617 
7618   // For JIT, the slow path is considered part of the compiled method,
7619   // so JIT should pass null as `debug_name`.
7620   DCHECK_IMPLIES(GetCompilerOptions().IsJitCompiler(), debug_name == nullptr);
7621   if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
7622     std::ostringstream oss;
7623     oss << "BakerReadBarrierThunk";
7624     switch (kind) {
7625       case BakerReadBarrierKind::kField:
7626         oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
7627             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
7628         break;
7629       case BakerReadBarrierKind::kAcquire:
7630         oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
7631             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
7632         break;
7633       case BakerReadBarrierKind::kArray:
7634         oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
7635         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7636                   BakerReadBarrierSecondRegField::Decode(encoded_data));
7637         break;
7638       case BakerReadBarrierKind::kGcRoot:
7639         oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
7640         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7641                   BakerReadBarrierSecondRegField::Decode(encoded_data));
7642         break;
7643     }
7644     *debug_name = oss.str();
7645   }
7646 }
7647 
7648 #undef __
7649 
7650 }  // namespace arm64
7651 }  // namespace art
7652