xref: /aosp_15_r20/art/compiler/optimizing/code_generator_arm64.h (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
19 
20 #include "base/bit_field.h"
21 #include "base/macros.h"
22 #include "class_root.h"
23 #include "code_generator.h"
24 #include "common_arm64.h"
25 #include "dex/dex_file_types.h"
26 #include "dex/string_reference.h"
27 #include "dex/type_reference.h"
28 #include "driver/compiler_options.h"
29 #include "jit_patches_arm64.h"
30 #include "nodes.h"
31 #include "parallel_move_resolver.h"
32 #include "utils/arm64/assembler_arm64.h"
33 
34 // TODO(VIXL): Make VIXL compile cleanly with -Wshadow, -Wdeprecated-declarations.
35 #pragma GCC diagnostic push
36 #pragma GCC diagnostic ignored "-Wshadow"
37 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
38 #include "aarch64/disasm-aarch64.h"
39 #include "aarch64/macro-assembler-aarch64.h"
40 #pragma GCC diagnostic pop
41 
42 namespace art HIDDEN {
43 
44 namespace linker {
45 class Arm64RelativePatcherTest;
46 }  // namespace linker
47 
48 namespace arm64 {
49 
50 class CodeGeneratorARM64;
51 
52 // Use a local definition to prevent copying mistakes.
53 static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize);
54 
55 // This constant is used as an approximate margin when emission of veneer and literal pools
56 // must be blocked.
57 static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize;
58 
59 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
60 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
61 // For the Baker read barrier implementation using link-time generated thunks we need to split
62 // the offset explicitly.
63 static constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
64 
65 static const vixl::aarch64::Register kParameterCoreRegisters[] = {
66     vixl::aarch64::x1,
67     vixl::aarch64::x2,
68     vixl::aarch64::x3,
69     vixl::aarch64::x4,
70     vixl::aarch64::x5,
71     vixl::aarch64::x6,
72     vixl::aarch64::x7
73 };
74 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
75 static const vixl::aarch64::VRegister kParameterFPRegisters[] = {
76     vixl::aarch64::d0,
77     vixl::aarch64::d1,
78     vixl::aarch64::d2,
79     vixl::aarch64::d3,
80     vixl::aarch64::d4,
81     vixl::aarch64::d5,
82     vixl::aarch64::d6,
83     vixl::aarch64::d7
84 };
85 static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
86 
87 // Thread Register.
88 const vixl::aarch64::Register tr = vixl::aarch64::x19;
89 // Marking Register.
90 const vixl::aarch64::Register mr = vixl::aarch64::x20;
91 // Implicit suspend check register.
92 const vixl::aarch64::Register kImplicitSuspendCheckRegister = vixl::aarch64::x21;
93 // Method register on invoke.
94 static const vixl::aarch64::Register kArtMethodRegister = vixl::aarch64::x0;
95 const vixl::aarch64::CPURegList vixl_reserved_core_registers(vixl::aarch64::ip0,
96                                                              vixl::aarch64::ip1);
97 const vixl::aarch64::CPURegList vixl_reserved_fp_registers(vixl::aarch64::d31);
98 
99 const vixl::aarch64::CPURegList runtime_reserved_core_registers =
100     vixl::aarch64::CPURegList(
101         tr,
102         // Reserve X20 as Marking Register when emitting Baker read barriers.
103         // TODO: We don't need to reserve marking-register for userfaultfd GC. But
104         // that would require some work in the assembler code as the right GC is
105         // chosen at load-time and not compile time.
106         (kReserveMarkingRegister ? mr : vixl::aarch64::NoCPUReg),
107         kImplicitSuspendCheckRegister,
108         vixl::aarch64::lr);
109 
110 // Some instructions have special requirements for a temporary, for example
111 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
112 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field
113 // loads with large offsets need a fixed register to limit the number of link-time
114 // thunks we generate. For these and similar cases, we want to reserve a specific
115 // register that's neither callee-save nor an argument register. We choose x15.
FixedTempLocation()116 inline Location FixedTempLocation() {
117   return Location::RegisterLocation(vixl::aarch64::x15.GetCode());
118 }
119 
120 // Callee-save registers AAPCS64, without x19 (Thread Register) (nor
121 // x20 (Marking Register) when emitting Baker read barriers).
122 const vixl::aarch64::CPURegList callee_saved_core_registers(
123     vixl::aarch64::CPURegister::kRegister,
124     vixl::aarch64::kXRegSize,
125     (kReserveMarkingRegister ? vixl::aarch64::x21.GetCode() : vixl::aarch64::x20.GetCode()),
126     vixl::aarch64::x30.GetCode());
127 const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister,
128                                                           vixl::aarch64::kDRegSize,
129                                                           vixl::aarch64::d8.GetCode(),
130                                                           vixl::aarch64::d15.GetCode());
131 Location ARM64ReturnLocation(DataType::Type return_type);
132 
133 vixl::aarch64::Condition ARM64PCondition(HVecPredToBoolean::PCondKind cond);
134 
135 #define UNIMPLEMENTED_INTRINSIC_LIST_ARM64(V) \
136   V(MathSignumFloat)                          \
137   V(MathSignumDouble)                         \
138   V(MathCopySignFloat)                        \
139   V(MathCopySignDouble)                       \
140   V(IntegerRemainderUnsigned)                 \
141   V(LongRemainderUnsigned)                    \
142   V(StringStringIndexOf)                      \
143   V(StringStringIndexOfAfter)                 \
144   V(StringBufferAppend)                       \
145   V(StringBufferLength)                       \
146   V(StringBufferToString)                     \
147   V(StringBuilderAppendObject)                \
148   V(StringBuilderAppendString)                \
149   V(StringBuilderAppendCharSequence)          \
150   V(StringBuilderAppendCharArray)             \
151   V(StringBuilderAppendBoolean)               \
152   V(StringBuilderAppendChar)                  \
153   V(StringBuilderAppendInt)                   \
154   V(StringBuilderAppendLong)                  \
155   V(StringBuilderAppendFloat)                 \
156   V(StringBuilderAppendDouble)                \
157   V(StringBuilderLength)                      \
158   V(StringBuilderToString)                    \
159   V(SystemArrayCopyByte)                      \
160   V(SystemArrayCopyInt)                       \
161   V(UnsafeArrayBaseOffset)                    \
162   /* 1.8 */                                   \
163   V(MethodHandleInvoke)                       \
164   /* OpenJDK 11 */                            \
165   V(JdkUnsafeArrayBaseOffset)
166 
167 class SlowPathCodeARM64 : public SlowPathCode {
168  public:
SlowPathCodeARM64(HInstruction * instruction)169   explicit SlowPathCodeARM64(HInstruction* instruction)
170       : SlowPathCode(instruction), entry_label_(), exit_label_() {}
171 
GetEntryLabel()172   vixl::aarch64::Label* GetEntryLabel() { return &entry_label_; }
GetExitLabel()173   vixl::aarch64::Label* GetExitLabel() { return &exit_label_; }
174 
175   void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override;
176   void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override;
177 
178  private:
179   vixl::aarch64::Label entry_label_;
180   vixl::aarch64::Label exit_label_;
181 
182   DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
183 };
184 
185 class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> {
186  public:
187   using VIXLInt32Literal = vixl::aarch64::Literal<int32_t>;
188 
JumpTableARM64(HPackedSwitch * switch_instr)189   explicit JumpTableARM64(HPackedSwitch* switch_instr)
190       : switch_instr_(switch_instr),
191         table_start_(),
192         jump_targets_(switch_instr->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
193       uint32_t num_entries = switch_instr_->GetNumEntries();
194       for (uint32_t i = 0; i < num_entries; i++) {
195         VIXLInt32Literal* lit = new VIXLInt32Literal(0);
196         jump_targets_.emplace_back(lit);
197       }
198     }
199 
GetTableStartLabel()200   vixl::aarch64::Label* GetTableStartLabel() { return &table_start_; }
201 
202   // Emits the jump table into the code buffer; jump target offsets are not yet known.
203   void EmitTable(CodeGeneratorARM64* codegen);
204 
205   // Updates the offsets in the jump table, to be used when the jump targets basic blocks
206   // addresses are resolved.
207   void FixTable(CodeGeneratorARM64* codegen);
208 
209  private:
210   HPackedSwitch* const switch_instr_;
211   vixl::aarch64::Label table_start_;
212 
213   // Contains literals for the switch's jump targets.
214   ArenaVector<std::unique_ptr<VIXLInt32Literal>> jump_targets_;
215 
216   DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
217 };
218 
219 static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] = {
220     vixl::aarch64::x0,
221     vixl::aarch64::x1,
222     vixl::aarch64::x2,
223     vixl::aarch64::x3,
224     vixl::aarch64::x4,
225     vixl::aarch64::x5,
226     vixl::aarch64::x6,
227     vixl::aarch64::x7
228 };
229 static constexpr size_t kRuntimeParameterCoreRegistersLength =
230     arraysize(kRuntimeParameterCoreRegisters);
231 static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] = {
232     vixl::aarch64::d0,
233     vixl::aarch64::d1,
234     vixl::aarch64::d2,
235     vixl::aarch64::d3,
236     vixl::aarch64::d4,
237     vixl::aarch64::d5,
238     vixl::aarch64::d6,
239     vixl::aarch64::d7
240 };
241 static constexpr size_t kRuntimeParameterFpuRegistersLength =
242     arraysize(kRuntimeParameterCoreRegisters);
243 
244 class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register,
245                                                                 vixl::aarch64::VRegister> {
246  public:
InvokeRuntimeCallingConvention()247   InvokeRuntimeCallingConvention()
248       : CallingConvention(kRuntimeParameterCoreRegisters,
249                           kRuntimeParameterCoreRegistersLength,
250                           kRuntimeParameterFpuRegisters,
251                           kRuntimeParameterFpuRegistersLength,
252                           kArm64PointerSize) {}
253 
254   Location GetReturnLocation(DataType::Type return_type);
255 
256  private:
257   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
258 };
259 
260 class InvokeDexCallingConvention : public CallingConvention<vixl::aarch64::Register,
261                                                             vixl::aarch64::VRegister> {
262  public:
InvokeDexCallingConvention()263   InvokeDexCallingConvention()
264       : CallingConvention(kParameterCoreRegisters,
265                           kParameterCoreRegistersLength,
266                           kParameterFPRegisters,
267                           kParameterFPRegistersLength,
268                           kArm64PointerSize) {}
269 
GetReturnLocation(DataType::Type return_type)270   Location GetReturnLocation(DataType::Type return_type) const {
271     return ARM64ReturnLocation(return_type);
272   }
273 
274 
275  private:
276   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
277 };
278 
279 class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor {
280  public:
InvokeDexCallingConventionVisitorARM64()281   InvokeDexCallingConventionVisitorARM64() {}
~InvokeDexCallingConventionVisitorARM64()282   virtual ~InvokeDexCallingConventionVisitorARM64() {}
283 
284   Location GetNextLocation(DataType::Type type) override;
GetReturnLocation(DataType::Type return_type)285   Location GetReturnLocation(DataType::Type return_type) const override {
286     return calling_convention.GetReturnLocation(return_type);
287   }
288   Location GetMethodLocation() const override;
289 
290  private:
291   InvokeDexCallingConvention calling_convention;
292 
293   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64);
294 };
295 
296 class CriticalNativeCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor {
297  public:
CriticalNativeCallingConventionVisitorARM64(bool for_register_allocation)298   explicit CriticalNativeCallingConventionVisitorARM64(bool for_register_allocation)
299       : for_register_allocation_(for_register_allocation) {}
300 
~CriticalNativeCallingConventionVisitorARM64()301   virtual ~CriticalNativeCallingConventionVisitorARM64() {}
302 
303   Location GetNextLocation(DataType::Type type) override;
304   Location GetReturnLocation(DataType::Type type) const override;
305   Location GetMethodLocation() const override;
306 
GetStackOffset()307   size_t GetStackOffset() const { return stack_offset_; }
308 
309  private:
310   // Register allocator does not support adjusting frame size, so we cannot provide final locations
311   // of stack arguments for register allocation. We ask the register allocator for any location and
312   // move these arguments to the right place after adjusting the SP when generating the call.
313   const bool for_register_allocation_;
314   size_t gpr_index_ = 0u;
315   size_t fpr_index_ = 0u;
316   size_t stack_offset_ = 0u;
317 
318   DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorARM64);
319 };
320 
321 class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
322  public:
FieldAccessCallingConventionARM64()323   FieldAccessCallingConventionARM64() {}
324 
GetObjectLocation()325   Location GetObjectLocation() const override {
326     return helpers::LocationFrom(vixl::aarch64::x1);
327   }
GetFieldIndexLocation()328   Location GetFieldIndexLocation() const override {
329     return helpers::LocationFrom(vixl::aarch64::x0);
330   }
GetReturnLocation(DataType::Type type)331   Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override {
332     return helpers::LocationFrom(vixl::aarch64::x0);
333   }
GetSetValueLocation(DataType::Type type,bool is_instance)334   Location GetSetValueLocation([[maybe_unused]] DataType::Type type,
335                                bool is_instance) const override {
336     return is_instance
337         ? helpers::LocationFrom(vixl::aarch64::x2)
338         : helpers::LocationFrom(vixl::aarch64::x1);
339   }
GetFpuLocation(DataType::Type type)340   Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
341     return helpers::LocationFrom(vixl::aarch64::d0);
342   }
343 
344  private:
345   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64);
346 };
347 
348 class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
349  public:
350   InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
351 
352 #define DECLARE_VISIT_INSTRUCTION(name, super) \
353   void Visit##name(H##name* instr) override;
354 
355   FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)356   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
357   FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
358 
359 #undef DECLARE_VISIT_INSTRUCTION
360 
361   void VisitInstruction(HInstruction* instruction) override {
362     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
363                << " (id " << instruction->GetId() << ")";
364   }
365 
GetAssembler()366   Arm64Assembler* GetAssembler() const { return assembler_; }
GetVIXLAssembler()367   vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
368 
369   // SIMD helpers.
370   virtual Location AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope* scope) = 0;
371   virtual void FreeSIMDScratchLocation(Location loc,
372                                        vixl::aarch64::UseScratchRegisterScope* scope)  = 0;
373   virtual void LoadSIMDRegFromStack(Location destination, Location source) = 0;
374   virtual void MoveSIMDRegToSIMDReg(Location destination, Location source) = 0;
375   virtual void MoveToSIMDStackSlot(Location destination, Location source) = 0;
376   virtual void SaveLiveRegistersHelper(LocationSummary* locations,
377                                        int64_t spill_offset) = 0;
378   virtual void RestoreLiveRegistersHelper(LocationSummary* locations,
379                                           int64_t spill_offset) = 0;
380 
381  protected:
382   void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
383                                         vixl::aarch64::Register class_reg);
384   void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
385                                          vixl::aarch64::Register temp);
386   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
387   void HandleBinaryOp(HBinaryOperation* instr);
388 
389   void HandleFieldSet(HInstruction* instruction,
390                       const FieldInfo& field_info,
391                       bool value_can_be_null,
392                       WriteBarrierKind write_barrier_kind);
393   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
394   void HandleCondition(HCondition* instruction);
395 
396   // Generate a heap reference load using one register `out`:
397   //
398   //   out <- *(out + offset)
399   //
400   // while honoring heap poisoning and/or read barriers (if any).
401   //
402   // Location `maybe_temp` is used when generating a read barrier and
403   // shall be a register in that case; it may be an invalid location
404   // otherwise.
405   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
406                                         Location out,
407                                         uint32_t offset,
408                                         Location maybe_temp,
409                                         ReadBarrierOption read_barrier_option);
410   // Generate a heap reference load using two different registers
411   // `out` and `obj`:
412   //
413   //   out <- *(obj + offset)
414   //
415   // while honoring heap poisoning and/or read barriers (if any).
416   //
417   // Location `maybe_temp` is used when generating a Baker's (fast
418   // path) read barrier and shall be a register in that case; it may
419   // be an invalid location otherwise.
420   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
421                                          Location out,
422                                          Location obj,
423                                          uint32_t offset,
424                                          Location maybe_temp,
425                                          ReadBarrierOption read_barrier_option);
426 
427   // Generate a floating-point comparison.
428   void GenerateFcmp(HInstruction* instruction);
429 
430   void HandleShift(HBinaryOperation* instr);
431   void GenerateTestAndBranch(HInstruction* instruction,
432                              size_t condition_input_index,
433                              vixl::aarch64::Label* true_target,
434                              vixl::aarch64::Label* false_target);
435   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
436   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
437   void GenerateIncrementNegativeByOne(vixl::aarch64::Register out,
438                                       vixl::aarch64::Register in, bool use_cond_inc);
439   void GenerateResultRemWithAnyConstant(vixl::aarch64::Register out,
440                                         vixl::aarch64::Register dividend,
441                                         vixl::aarch64::Register quotient,
442                                         int64_t divisor,
443                                         // This function may acquire a scratch register.
444                                         vixl::aarch64::UseScratchRegisterScope* temps_scope);
445   void GenerateInt64UnsignedDivRemWithAnyPositiveConstant(HBinaryOperation* instruction);
446   void GenerateInt64DivRemWithAnyConstant(HBinaryOperation* instruction);
447   void GenerateInt32DivRemWithAnyConstant(HBinaryOperation* instruction);
448   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction, int64_t divisor);
449   void GenerateIntDiv(HDiv* instruction);
450   void GenerateIntDivForConstDenom(HDiv *instruction);
451   void GenerateIntDivForPower2Denom(HDiv *instruction);
452   void GenerateIntRem(HRem* instruction);
453   void GenerateIntRemForConstDenom(HRem *instruction);
454   void GenerateIntRemForPower2Denom(HRem *instruction);
455   void HandleGoto(HInstruction* got, HBasicBlock* successor);
456   void GenerateMethodEntryExitHook(HInstruction* instruction);
457 
458   // Helpers to set up locations for vector memory operations. Returns the memory operand and,
459   // if used, sets the output parameter scratch to a temporary register used in this operand,
460   // so that the client can release it right after the memory operand use.
461   // Neon version.
462   vixl::aarch64::MemOperand VecNEONAddress(
463       HVecMemoryOperation* instruction,
464       // This function may acquire a scratch register.
465       vixl::aarch64::UseScratchRegisterScope* temps_scope,
466       size_t size,
467       bool is_string_char_at,
468       /*out*/ vixl::aarch64::Register* scratch);
469   // SVE version.
470   vixl::aarch64::SVEMemOperand VecSVEAddress(
471       HVecMemoryOperation* instruction,
472       // This function may acquire a scratch register.
473       vixl::aarch64::UseScratchRegisterScope* temps_scope,
474       size_t size,
475       bool is_string_char_at,
476       /*out*/ vixl::aarch64::Register* scratch);
477 
478   Arm64Assembler* const assembler_;
479   CodeGeneratorARM64* const codegen_;
480 
481   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorARM64);
482 };
483 
484 class LocationsBuilderARM64 : public HGraphVisitor {
485  public:
LocationsBuilderARM64(HGraph * graph,CodeGeneratorARM64 * codegen)486   LocationsBuilderARM64(HGraph* graph, CodeGeneratorARM64* codegen)
487       : HGraphVisitor(graph), codegen_(codegen) {}
488 
489 #define DECLARE_VISIT_INSTRUCTION(name, super) \
490   void Visit##name(H##name* instr) override;
491 
492   FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)493   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
494   FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
495 
496 #undef DECLARE_VISIT_INSTRUCTION
497 
498   void VisitInstruction(HInstruction* instruction) override {
499     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
500                << " (id " << instruction->GetId() << ")";
501   }
502 
503  protected:
504   void HandleBinaryOp(HBinaryOperation* instr);
505   void HandleFieldSet(HInstruction* instruction);
506   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
507   void HandleInvoke(HInvoke* instr);
508   void HandleCondition(HCondition* instruction);
509   void HandleShift(HBinaryOperation* instr);
510 
511   CodeGeneratorARM64* const codegen_;
512   InvokeDexCallingConventionVisitorARM64 parameter_visitor_;
513 
514   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
515 };
516 
517 class InstructionCodeGeneratorARM64Neon : public InstructionCodeGeneratorARM64 {
518  public:
InstructionCodeGeneratorARM64Neon(HGraph * graph,CodeGeneratorARM64 * codegen)519   InstructionCodeGeneratorARM64Neon(HGraph* graph, CodeGeneratorARM64* codegen) :
520       InstructionCodeGeneratorARM64(graph, codegen) {}
521 
522 #define DECLARE_VISIT_INSTRUCTION(name, super) \
523   void Visit##name(H##name* instr) override;
524 
525   FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
526 
527 #undef DECLARE_VISIT_INSTRUCTION
528 
529   Location AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope* scope) override;
530   void FreeSIMDScratchLocation(Location loc,
531                                vixl::aarch64::UseScratchRegisterScope* scope) override;
532   void LoadSIMDRegFromStack(Location destination, Location source) override;
533   void MoveSIMDRegToSIMDReg(Location destination, Location source) override;
534   void MoveToSIMDStackSlot(Location destination, Location source) override;
535   void SaveLiveRegistersHelper(LocationSummary* locations, int64_t spill_offset) override;
536   void RestoreLiveRegistersHelper(LocationSummary* locations, int64_t spill_offset) override;
537 };
538 
539 class LocationsBuilderARM64Neon : public LocationsBuilderARM64 {
540  public:
LocationsBuilderARM64Neon(HGraph * graph,CodeGeneratorARM64 * codegen)541   LocationsBuilderARM64Neon(HGraph* graph, CodeGeneratorARM64* codegen) :
542       LocationsBuilderARM64(graph, codegen) {}
543 
544 #define DECLARE_VISIT_INSTRUCTION(name, super) \
545   void Visit##name(H##name* instr) override;
546 
547   FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
548 
549 #undef DECLARE_VISIT_INSTRUCTION
550 };
551 
552 class InstructionCodeGeneratorARM64Sve : public InstructionCodeGeneratorARM64 {
553  public:
InstructionCodeGeneratorARM64Sve(HGraph * graph,CodeGeneratorARM64 * codegen)554   InstructionCodeGeneratorARM64Sve(HGraph* graph, CodeGeneratorARM64* codegen) :
555       InstructionCodeGeneratorARM64(graph, codegen) {}
556 
557 #define DECLARE_VISIT_INSTRUCTION(name, super) \
558   void Visit##name(H##name* instr) override;
559 
560   FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
561 
562 #undef DECLARE_VISIT_INSTRUCTION
563 
564   Location AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope* scope) override;
565   void FreeSIMDScratchLocation(Location loc,
566                                vixl::aarch64::UseScratchRegisterScope* scope) override;
567   void LoadSIMDRegFromStack(Location destination, Location source) override;
568   void MoveSIMDRegToSIMDReg(Location destination, Location source) override;
569   void MoveToSIMDStackSlot(Location destination, Location source) override;
570   void SaveLiveRegistersHelper(LocationSummary* locations, int64_t spill_offset) override;
571   void RestoreLiveRegistersHelper(LocationSummary* locations, int64_t spill_offset) override;
572 
573  private:
574   // Validate that instruction vector length and packed type are compliant with the SIMD
575   // register size (full SIMD register is used).
576   void ValidateVectorLength(HVecOperation* instr) const;
577 
GetVecGoverningPReg(HVecOperation * instr)578   vixl::aarch64::PRegister GetVecGoverningPReg(HVecOperation* instr) {
579     return GetVecPredSetFixedOutPReg(instr->GetGoverningPredicate());
580   }
581 
582   // Returns a fixed p-reg for predicate setting instruction.
583   //
584   // Currently we only support diamond CF loops for predicated vectorization; also we don't have
585   // register allocator support for vector predicates. Thus we use fixed P-regs for loop main,
586   // True and False predicates as a temporary solution.
587   //
588   // TODO: Support SIMD types and registers in ART.
GetVecPredSetFixedOutPReg(HVecPredSetOperation * instr)589   static vixl::aarch64::PRegister GetVecPredSetFixedOutPReg(HVecPredSetOperation* instr) {
590     if (instr->IsVecPredWhile() || instr->IsVecPredSetAll()) {
591       // VecPredWhile and VecPredSetAll live ranges never overlap due to the current vectorization
592       // scheme: the former only is live inside a vectorized loop and the later is never in a
593       // loop and never spans across loops.
594       return vixl::aarch64::p0;
595     } else if (instr->IsVecPredNot()) {
596       // This relies on the fact that we only use PredNot manually in the autovectorizer,
597       // so there is only one of them in each loop.
598       return vixl::aarch64::p1;
599     } else {
600       DCHECK(instr->IsVecCondition());
601       return vixl::aarch64::p2;
602     }
603   }
604 
605   // Generate a vector comparison instruction based on the IfCondition.
606   void GenerateIntegerVecComparison(const vixl::aarch64::PRegisterWithLaneSize& pd,
607                                     const vixl::aarch64::PRegisterZ& pg,
608                                     const vixl::aarch64::ZRegister& zn,
609                                     const vixl::aarch64::ZRegister& zm,
610                                     IfCondition cond);
611   void HandleVecCondition(HVecCondition* instruction);
612 };
613 
614 class LocationsBuilderARM64Sve : public LocationsBuilderARM64 {
615  public:
LocationsBuilderARM64Sve(HGraph * graph,CodeGeneratorARM64 * codegen)616   LocationsBuilderARM64Sve(HGraph* graph, CodeGeneratorARM64* codegen) :
617       LocationsBuilderARM64(graph, codegen) {}
618 
619 #define DECLARE_VISIT_INSTRUCTION(name, super) \
620   void Visit##name(H##name* instr) override;
621 
622   FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
623 
624 #undef DECLARE_VISIT_INSTRUCTION
625  private:
626   void HandleVecCondition(HVecCondition* instruction);
627 };
628 
629 class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
630  public:
ParallelMoveResolverARM64(ArenaAllocator * allocator,CodeGeneratorARM64 * codegen)631   ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
632       : ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {}
633 
634  protected:
635   void PrepareForEmitNativeCode() override;
636   void FinishEmitNativeCode() override;
637   Location AllocateScratchLocationFor(Location::Kind kind) override;
638   void FreeScratchLocation(Location loc) override;
639   void EmitMove(size_t index) override;
640 
641  private:
642   Arm64Assembler* GetAssembler() const;
GetVIXLAssembler()643   vixl::aarch64::MacroAssembler* GetVIXLAssembler() const {
644     return GetAssembler()->GetVIXLAssembler();
645   }
646 
647   CodeGeneratorARM64* const codegen_;
648   vixl::aarch64::UseScratchRegisterScope vixl_temps_;
649 
650   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
651 };
652 
653 class CodeGeneratorARM64 : public CodeGenerator {
654  public:
655   CodeGeneratorARM64(HGraph* graph,
656                      const CompilerOptions& compiler_options,
657                      OptimizingCompilerStats* stats = nullptr);
~CodeGeneratorARM64()658   virtual ~CodeGeneratorARM64() {}
659 
660   void GenerateFrameEntry() override;
661   void GenerateFrameExit() override;
662 
663   vixl::aarch64::CPURegList GetFramePreservedCoreRegisters() const;
664   vixl::aarch64::CPURegList GetFramePreservedFPRegisters() const;
665 
666   void Bind(HBasicBlock* block) override;
667 
GetLabelOf(HBasicBlock * block)668   vixl::aarch64::Label* GetLabelOf(HBasicBlock* block) {
669     block = FirstNonEmptyBlock(block);
670     return &(block_labels_[block->GetBlockId()]);
671   }
672 
GetWordSize()673   size_t GetWordSize() const override {
674     return kArm64WordSize;
675   }
676 
SupportsPredicatedSIMD()677   bool SupportsPredicatedSIMD() const override { return ShouldUseSVE(); }
678 
GetSlowPathFPWidth()679   size_t GetSlowPathFPWidth() const override {
680     return GetGraph()->HasSIMD()
681         ? GetSIMDRegisterWidth()
682         : vixl::aarch64::kDRegSizeInBytes;
683   }
684 
GetCalleePreservedFPWidth()685   size_t GetCalleePreservedFPWidth() const override {
686     return vixl::aarch64::kDRegSizeInBytes;
687   }
688 
689   size_t GetSIMDRegisterWidth() const override;
690 
GetAddressOf(HBasicBlock * block)691   uintptr_t GetAddressOf(HBasicBlock* block) override {
692     vixl::aarch64::Label* block_entry_label = GetLabelOf(block);
693     DCHECK(block_entry_label->IsBound());
694     return block_entry_label->GetLocation();
695   }
696 
GetLocationBuilder()697   HGraphVisitor* GetLocationBuilder() override { return location_builder_; }
GetInstructionCodeGeneratorArm64()698   InstructionCodeGeneratorARM64* GetInstructionCodeGeneratorArm64() {
699     return instruction_visitor_;
700   }
GetInstructionVisitor()701   HGraphVisitor* GetInstructionVisitor() override { return GetInstructionCodeGeneratorArm64(); }
GetAssembler()702   Arm64Assembler* GetAssembler() override { return &assembler_; }
GetAssembler()703   const Arm64Assembler& GetAssembler() const override { return assembler_; }
GetVIXLAssembler()704   vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
705 
706   // Emit a write barrier if:
707   // A) emit_null_check is false
708   // B) emit_null_check is true, and value is not null.
709   void MaybeMarkGCCard(vixl::aarch64::Register object,
710                        vixl::aarch64::Register value,
711                        bool emit_null_check);
712 
713   // Emit a write barrier unconditionally.
714   void MarkGCCard(vixl::aarch64::Register object);
715 
716   // Crash if the card table is not valid. This check is only emitted for the CC GC. We assert
717   // `(!clean || !self->is_gc_marking)`, since the card table should not be set to clean when the CC
718   // GC is marking for eliminated write barriers.
719   void CheckGCCardIsValid(vixl::aarch64::Register object);
720 
721   void GenerateMemoryBarrier(MemBarrierKind kind);
722 
723   // Register allocation.
724 
725   void SetupBlockedRegisters() const override;
726 
727   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
728   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
729   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
730   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
731 
732   // The number of registers that can be allocated. The register allocator may
733   // decide to reserve and not use a few of them.
734   // We do not consider registers sp, xzr, wzr. They are either not allocatable
735   // (xzr, wzr), or make for poor allocatable registers (sp alignment
736   // requirements, etc.). This also facilitates our task as all other registers
737   // can easily be mapped via to or from their type and index or code.
738   static const int kNumberOfAllocatableRegisters = vixl::aarch64::kNumberOfRegisters - 1;
739   static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfVRegisters;
740   static constexpr int kNumberOfAllocatableRegisterPairs = 0;
741 
742   void DumpCoreRegister(std::ostream& stream, int reg) const override;
743   void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
744 
GetInstructionSet()745   InstructionSet GetInstructionSet() const override {
746     return InstructionSet::kArm64;
747   }
748 
749   const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const;
750 
Initialize()751   void Initialize() override {
752     block_labels_.resize(GetGraph()->GetBlocks().size());
753   }
754 
755   // We want to use the STP and LDP instructions to spill and restore registers for slow paths.
756   // These instructions can only encode offsets that are multiples of the register size accessed.
GetPreferredSlotsAlignment()757   uint32_t GetPreferredSlotsAlignment() const override { return vixl::aarch64::kXRegSizeInBytes; }
758 
CreateJumpTable(HPackedSwitch * switch_instr)759   JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) {
760     jump_tables_.emplace_back(new (GetGraph()->GetAllocator()) JumpTableARM64(switch_instr));
761     return jump_tables_.back().get();
762   }
763 
764   void Finalize() override;
765 
766   // Code generation helpers.
767   void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant);
768   void MoveConstant(Location destination, int32_t value) override;
769   void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
770   void AddLocationAsTemp(Location location, LocationSummary* locations) override;
771 
772   void Load(DataType::Type type,
773             vixl::aarch64::CPURegister dst,
774             const vixl::aarch64::MemOperand& src);
775   void Store(DataType::Type type,
776              vixl::aarch64::CPURegister src,
777              const vixl::aarch64::MemOperand& dst);
778   void LoadAcquire(HInstruction* instruction,
779                    DataType::Type type,
780                    vixl::aarch64::CPURegister dst,
781                    const vixl::aarch64::MemOperand& src,
782                    bool needs_null_check);
783   void StoreRelease(HInstruction* instruction,
784                     DataType::Type type,
785                     vixl::aarch64::CPURegister src,
786                     const vixl::aarch64::MemOperand& dst,
787                     bool needs_null_check);
788 
789   // Generate code to invoke a runtime entry point.
790   void InvokeRuntime(QuickEntrypointEnum entrypoint,
791                      HInstruction* instruction,
792                      uint32_t dex_pc,
793                      SlowPathCode* slow_path = nullptr) override;
794 
795   // Generate code to invoke a runtime entry point, but do not record
796   // PC-related information in a stack map.
797   void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
798                                            HInstruction* instruction,
799                                            SlowPathCode* slow_path);
800 
GetMoveResolver()801   ParallelMoveResolverARM64* GetMoveResolver() override { return &move_resolver_; }
802 
NeedsTwoRegisters(DataType::Type type)803   bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
804 
805   // Check if the desired_string_load_kind is supported. If it is, return it,
806   // otherwise return a fall-back kind that should be used instead.
807   HLoadString::LoadKind GetSupportedLoadStringKind(
808       HLoadString::LoadKind desired_string_load_kind) override;
809 
810   // Check if the desired_class_load_kind is supported. If it is, return it,
811   // otherwise return a fall-back kind that should be used instead.
812   HLoadClass::LoadKind GetSupportedLoadClassKind(
813       HLoadClass::LoadKind desired_class_load_kind) override;
814 
815   // Check if the desired_dispatch_info is supported. If it is, return it,
816   // otherwise return a fall-back info that should be used instead.
817   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
818       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
819       ArtMethod* method) override;
820 
821   void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke);
822   void GenerateStaticOrDirectCall(
823       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
824   void GenerateVirtualCall(
825       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
826 
827   void MoveFromReturnRegister(Location trg, DataType::Type type) override;
828 
829   // Add a new boot image intrinsic patch for an instruction and return the label
830   // to be bound before the instruction. The instruction will be either the
831   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
832   // to the associated ADRP patch label).
833   vixl::aarch64::Label* NewBootImageIntrinsicPatch(uint32_t intrinsic_data,
834                                                    vixl::aarch64::Label* adrp_label = nullptr);
835 
836   // Add a new boot image relocation patch for an instruction and return the label
837   // to be bound before the instruction. The instruction will be either the
838   // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing
839   // to the associated ADRP patch label).
840   vixl::aarch64::Label* NewBootImageRelRoPatch(uint32_t boot_image_offset,
841                                                vixl::aarch64::Label* adrp_label = nullptr);
842 
843   // Add a new boot image method patch for an instruction and return the label
844   // to be bound before the instruction. The instruction will be either the
845   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
846   // to the associated ADRP patch label).
847   vixl::aarch64::Label* NewBootImageMethodPatch(MethodReference target_method,
848                                                 vixl::aarch64::Label* adrp_label = nullptr);
849 
850   // Add a new app image method patch for an instruction and return the label
851   // to be bound before the instruction. The instruction will be either the
852   // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing
853   // to the associated ADRP patch label).
854   vixl::aarch64::Label* NewAppImageMethodPatch(MethodReference target_method,
855                                                vixl::aarch64::Label* adrp_label = nullptr);
856 
857   // Add a new .bss entry method patch for an instruction and return
858   // the label to be bound before the instruction. The instruction will be
859   // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
860   // pointing to the associated ADRP patch label).
861   vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method,
862                                                vixl::aarch64::Label* adrp_label = nullptr);
863 
864   // Add a new boot image type patch for an instruction and return the label
865   // to be bound before the instruction. The instruction will be either the
866   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
867   // to the associated ADRP patch label).
868   vixl::aarch64::Label* NewBootImageTypePatch(const DexFile& dex_file,
869                                               dex::TypeIndex type_index,
870                                               vixl::aarch64::Label* adrp_label = nullptr);
871 
872   // Add a new app image type patch for an instruction and return the label
873   // to be bound before the instruction. The instruction will be either the
874   // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing
875   // to the associated ADRP patch label).
876   vixl::aarch64::Label* NewAppImageTypePatch(const DexFile& dex_file,
877                                              dex::TypeIndex type_index,
878                                              vixl::aarch64::Label* adrp_label = nullptr);
879 
880   // Add a new .bss entry type patch for an instruction and return the label
881   // to be bound before the instruction. The instruction will be either the
882   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
883   // to the associated ADRP patch label).
884   vixl::aarch64::Label* NewBssEntryTypePatch(HLoadClass* load_class,
885                                              vixl::aarch64::Label* adrp_label = nullptr);
886 
887   // Add a new boot image string patch for an instruction and return the label
888   // to be bound before the instruction. The instruction will be either the
889   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
890   // to the associated ADRP patch label).
891   vixl::aarch64::Label* NewBootImageStringPatch(const DexFile& dex_file,
892                                                 dex::StringIndex string_index,
893                                                 vixl::aarch64::Label* adrp_label = nullptr);
894 
895   // Add a new .bss entry string patch for an instruction and return the label
896   // to be bound before the instruction. The instruction will be either the
897   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
898   // to the associated ADRP patch label).
899   vixl::aarch64::Label* NewStringBssEntryPatch(const DexFile& dex_file,
900                                                dex::StringIndex string_index,
901                                                vixl::aarch64::Label* adrp_label = nullptr);
902 
903   // Add a new .bss entry MethodType patch for an instruction and return the label
904   // to be bound before the instruction. The instruction will be either the
905   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
906   // to the associated ADRP patch label).
907   vixl::aarch64::Label* NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type,
908                                                    vixl::aarch64::Label* adrp_label = nullptr);
909 
910   // Add a new boot image JNI entrypoint patch for an instruction and return the label
911   // to be bound before the instruction. The instruction will be either the
912   // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing
913   // to the associated ADRP patch label).
914   vixl::aarch64::Label* NewBootImageJniEntrypointPatch(MethodReference target_method,
915                                                        vixl::aarch64::Label* adrp_label = nullptr);
916 
917   // Emit the BL instruction for entrypoint thunk call and record the associated patch for AOT.
918   void EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset);
919 
920   // Emit the CBNZ instruction for baker read barrier and record
921   // the associated patch for AOT or slow path for JIT.
922   void EmitBakerReadBarrierCbnz(uint32_t custom_data);
923 
DeduplicateBootImageAddressLiteral(uint64_t address)924   vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address) {
925     return jit_patches_.DeduplicateBootImageAddressLiteral(address);
926   }
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)927   vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
928                                                                 dex::StringIndex string_index,
929                                                                 Handle<mirror::String> handle) {
930     return jit_patches_.DeduplicateJitStringLiteral(
931         dex_file, string_index, handle, GetCodeGenerationData());
932   }
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex class_index,Handle<mirror::Class> handle)933   vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(const DexFile& dex_file,
934                                                                dex::TypeIndex class_index,
935                                                                Handle<mirror::Class> handle) {
936     return jit_patches_.DeduplicateJitClassLiteral(
937         dex_file, class_index, handle, GetCodeGenerationData());
938   }
DeduplicateJitMethodTypeLiteral(const DexFile & dex_file,dex::ProtoIndex proto_index,Handle<mirror::MethodType> handle)939   vixl::aarch64::Literal<uint32_t>* DeduplicateJitMethodTypeLiteral(
940       const DexFile& dex_file,
941       dex::ProtoIndex proto_index,
942       Handle<mirror::MethodType> handle) {
943     return jit_patches_.DeduplicateJitMethodTypeLiteral(
944         dex_file, proto_index, handle, GetCodeGenerationData());
945   }
946 
947   void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg);
948   void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
949                           vixl::aarch64::Register out,
950                           vixl::aarch64::Register base);
951   void EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
952                                 vixl::aarch64::Register out,
953                                 vixl::aarch64::Register base);
954 
955   void LoadBootImageRelRoEntry(vixl::aarch64::Register reg, uint32_t boot_image_offset);
956   void LoadBootImageAddress(vixl::aarch64::Register reg, uint32_t boot_image_reference);
957   void LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg, TypeReference type_reference);
958   void LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg, HInvoke* invoke);
959   void LoadClassRootForIntrinsic(vixl::aarch64::Register reg, ClassRoot class_root);
960 
961   void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
962   bool NeedsThunkCode(const linker::LinkerPatch& patch) const override;
963   void EmitThunkCode(const linker::LinkerPatch& patch,
964                      /*out*/ ArenaVector<uint8_t>* code,
965                      /*out*/ std::string* debug_name) override;
966 
967   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
968 
969   // Generate a GC root reference load:
970   //
971   //   root <- *(obj + offset)
972   //
973   // while honoring read barriers based on read_barrier_option.
974   void GenerateGcRootFieldLoad(HInstruction* instruction,
975                                Location root,
976                                vixl::aarch64::Register obj,
977                                uint32_t offset,
978                                vixl::aarch64::Label* fixup_label,
979                                ReadBarrierOption read_barrier_option);
980   // Generate MOV for the `old_value` in intrinsic and mark it with Baker read barrier.
981   void GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value,
982                                                  vixl::aarch64::Register old_value);
983   // Fast path implementation of ReadBarrier::Barrier for a heap
984   // reference field load when Baker's read barriers are used.
985   // Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
986   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
987                                              Location ref,
988                                              vixl::aarch64::Register obj,
989                                              const vixl::aarch64::MemOperand& src,
990                                              bool needs_null_check,
991                                              bool use_load_acquire);
992   // Fast path implementation of ReadBarrier::Barrier for a heap
993   // reference field load when Baker's read barriers are used.
994   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
995                                              Location ref,
996                                              vixl::aarch64::Register obj,
997                                              uint32_t offset,
998                                              Location maybe_temp,
999                                              bool needs_null_check,
1000                                              bool use_load_acquire);
1001   // Fast path implementation of ReadBarrier::Barrier for a heap
1002   // reference array load when Baker's read barriers are used.
1003   void GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
1004                                              Location ref,
1005                                              vixl::aarch64::Register obj,
1006                                              uint32_t data_offset,
1007                                              Location index,
1008                                              bool needs_null_check);
1009 
1010   // Emit code checking the status of the Marking Register, and
1011   // aborting the program if MR does not match the value stored in the
1012   // art::Thread object. Code is only emitted in debug mode and if
1013   // CompilerOptions::EmitRunTimeChecksInDebugMode returns true.
1014   //
1015   // Argument `code` is used to identify the different occurrences of
1016   // MaybeGenerateMarkingRegisterCheck in the code generator, and is
1017   // passed to the BRK instruction.
1018   //
1019   // If `temp_loc` is a valid location, it is expected to be a
1020   // register and will be used as a temporary to generate code;
1021   // otherwise, a temporary will be fetched from the core register
1022   // scratch pool.
1023   virtual void MaybeGenerateMarkingRegisterCheck(int code,
1024                                                  Location temp_loc = Location::NoLocation());
1025 
1026   // Create slow path for a read barrier for a heap reference within `instruction`.
1027   //
1028   // This is a helper function for GenerateReadBarrierSlow() that has the same
1029   // arguments. The creation and adding of the slow path is exposed for intrinsics
1030   // that cannot use GenerateReadBarrierSlow() from their own slow paths.
1031   SlowPathCodeARM64* AddReadBarrierSlowPath(HInstruction* instruction,
1032                                             Location out,
1033                                             Location ref,
1034                                             Location obj,
1035                                             uint32_t offset,
1036                                             Location index);
1037 
1038   // Generate a read barrier for a heap reference within `instruction`
1039   // using a slow path.
1040   //
1041   // A read barrier for an object reference read from the heap is
1042   // implemented as a call to the artReadBarrierSlow runtime entry
1043   // point, which is passed the values in locations `ref`, `obj`, and
1044   // `offset`:
1045   //
1046   //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
1047   //                                      mirror::Object* obj,
1048   //                                      uint32_t offset);
1049   //
1050   // The `out` location contains the value returned by
1051   // artReadBarrierSlow.
1052   //
1053   // When `index` is provided (i.e. for array accesses), the offset
1054   // value passed to artReadBarrierSlow is adjusted to take `index`
1055   // into account.
1056   void GenerateReadBarrierSlow(HInstruction* instruction,
1057                                Location out,
1058                                Location ref,
1059                                Location obj,
1060                                uint32_t offset,
1061                                Location index = Location::NoLocation());
1062 
1063   // If read barriers are enabled, generate a read barrier for a heap
1064   // reference using a slow path. If heap poisoning is enabled, also
1065   // unpoison the reference in `out`.
1066   void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
1067                                     Location out,
1068                                     Location ref,
1069                                     Location obj,
1070                                     uint32_t offset,
1071                                     Location index = Location::NoLocation());
1072 
1073   // Generate a read barrier for a GC root within `instruction` using
1074   // a slow path.
1075   //
1076   // A read barrier for an object reference GC root is implemented as
1077   // a call to the artReadBarrierForRootSlow runtime entry point,
1078   // which is passed the value in location `root`:
1079   //
1080   //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
1081   //
1082   // The `out` location contains the value returned by
1083   // artReadBarrierForRootSlow.
1084   void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
1085 
1086   void IncreaseFrame(size_t adjustment) override;
1087   void DecreaseFrame(size_t adjustment) override;
1088 
1089   void GenerateNop() override;
1090 
1091   void GenerateImplicitNullCheck(HNullCheck* instruction) override;
1092   void GenerateExplicitNullCheck(HNullCheck* instruction) override;
1093 
MaybeRecordImplicitNullCheck(HInstruction * instr)1094   void MaybeRecordImplicitNullCheck(HInstruction* instr) final {
1095     // The function must be only called within special scopes
1096     // (EmissionCheckScope, ExactAssemblyScope) which prevent generation of
1097     // veneer/literal pools by VIXL assembler.
1098     CHECK_EQ(GetVIXLAssembler()->ArePoolsBlocked(), true)
1099         << "The function must only be called within EmissionCheckScope or ExactAssemblyScope";
1100     CodeGenerator::MaybeRecordImplicitNullCheck(instr);
1101   }
1102 
1103   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl::aarch64::Register klass);
1104   void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry);
1105   void MaybeRecordTraceEvent(bool is_method_entry);
1106 
1107   bool CanUseImplicitSuspendCheck() const;
1108 
1109  private:
1110   // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
1111 
1112   enum class BakerReadBarrierKind : uint8_t {
1113     kField,     // Field get or array get with constant offset (i.e. constant index).
1114     kAcquire,   // Volatile field get.
1115     kArray,     // Array get with index in register.
1116     kGcRoot,    // GC root load.
1117     kLast = kGcRoot
1118   };
1119 
1120   static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* sp/zr is invalid */ 31u;
1121 
1122   static constexpr size_t kBitsForBakerReadBarrierKind =
1123       MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
1124   static constexpr size_t kBakerReadBarrierBitsForRegister =
1125       MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg);
1126   using BakerReadBarrierKindField =
1127       BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
1128   using BakerReadBarrierFirstRegField =
1129       BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>;
1130   using BakerReadBarrierSecondRegField =
1131       BitField<uint32_t,
1132                kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister,
1133                kBakerReadBarrierBitsForRegister>;
1134 
CheckValidReg(uint32_t reg)1135   static void CheckValidReg(uint32_t reg) {
1136     DCHECK(reg < vixl::aarch64::lr.GetCode() &&
1137            reg != vixl::aarch64::ip0.GetCode() &&
1138            reg != vixl::aarch64::ip1.GetCode()) << reg;
1139   }
1140 
EncodeBakerReadBarrierFieldData(uint32_t base_reg,uint32_t holder_reg)1141   static inline uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
1142     CheckValidReg(base_reg);
1143     CheckValidReg(holder_reg);
1144     return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
1145            BakerReadBarrierFirstRegField::Encode(base_reg) |
1146            BakerReadBarrierSecondRegField::Encode(holder_reg);
1147   }
1148 
EncodeBakerReadBarrierAcquireData(uint32_t base_reg,uint32_t holder_reg)1149   static inline uint32_t EncodeBakerReadBarrierAcquireData(uint32_t base_reg, uint32_t holder_reg) {
1150     CheckValidReg(base_reg);
1151     CheckValidReg(holder_reg);
1152     DCHECK_NE(base_reg, holder_reg);
1153     return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kAcquire) |
1154            BakerReadBarrierFirstRegField::Encode(base_reg) |
1155            BakerReadBarrierSecondRegField::Encode(holder_reg);
1156   }
1157 
EncodeBakerReadBarrierArrayData(uint32_t base_reg)1158   static inline uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
1159     CheckValidReg(base_reg);
1160     return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
1161            BakerReadBarrierFirstRegField::Encode(base_reg) |
1162            BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
1163   }
1164 
EncodeBakerReadBarrierGcRootData(uint32_t root_reg)1165   static inline uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
1166     CheckValidReg(root_reg);
1167     return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
1168            BakerReadBarrierFirstRegField::Encode(root_reg) |
1169            BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
1170   }
1171 
1172   void CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
1173                                     uint32_t encoded_data,
1174                                     /*out*/ std::string* debug_name);
1175 
1176   // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
1177   // whether through .data.img.rel.ro, .bss, or directly in the boot image.
1178   struct PcRelativePatchInfo : PatchInfo<vixl::aarch64::Label> {
PcRelativePatchInfoPcRelativePatchInfo1179     PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx)
1180         : PatchInfo<vixl::aarch64::Label>(dex_file, off_or_idx), pc_insn_label() { }
1181 
1182     vixl::aarch64::Label* pc_insn_label;
1183   };
1184 
1185   struct BakerReadBarrierPatchInfo {
BakerReadBarrierPatchInfoBakerReadBarrierPatchInfo1186     explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
1187 
1188     vixl::aarch64::Label label;
1189     uint32_t custom_data;
1190   };
1191 
1192   vixl::aarch64::Label* NewPcRelativePatch(const DexFile* dex_file,
1193                                            uint32_t offset_or_index,
1194                                            vixl::aarch64::Label* adrp_label,
1195                                            ArenaDeque<PcRelativePatchInfo>* patches);
1196 
1197   void FixJumpTables();
1198 
1199   template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
1200   static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
1201                                           ArenaVector<linker::LinkerPatch>* linker_patches);
1202 
1203   // Returns whether SVE features are supported and should be used.
1204   bool ShouldUseSVE() const;
1205 
1206   // Labels for each block that will be compiled.
1207   // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory.
1208   ArenaDeque<vixl::aarch64::Label> block_labels_;  // Indexed by block id.
1209   vixl::aarch64::Label frame_entry_label_;
1210   ArenaVector<std::unique_ptr<JumpTableARM64>> jump_tables_;
1211 
1212   LocationsBuilderARM64Neon location_builder_neon_;
1213   InstructionCodeGeneratorARM64Neon instruction_visitor_neon_;
1214   LocationsBuilderARM64Sve location_builder_sve_;
1215   InstructionCodeGeneratorARM64Sve instruction_visitor_sve_;
1216 
1217   LocationsBuilderARM64* location_builder_;
1218   InstructionCodeGeneratorARM64* instruction_visitor_;
1219   ParallelMoveResolverARM64 move_resolver_;
1220   Arm64Assembler assembler_;
1221 
1222   // PC-relative method patch info for kBootImageLinkTimePcRelative.
1223   ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
1224   // PC-relative method patch info for kAppImageRelRo.
1225   ArenaDeque<PcRelativePatchInfo> app_image_method_patches_;
1226   // PC-relative method patch info for kBssEntry.
1227   ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
1228   // PC-relative type patch info for kBootImageLinkTimePcRelative.
1229   ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
1230   // PC-relative type patch info for kAppImageRelRo.
1231   ArenaDeque<PcRelativePatchInfo> app_image_type_patches_;
1232   // PC-relative type patch info for kBssEntry.
1233   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
1234   // PC-relative public type patch info for kBssEntryPublic.
1235   ArenaDeque<PcRelativePatchInfo> public_type_bss_entry_patches_;
1236   // PC-relative package type patch info for kBssEntryPackage.
1237   ArenaDeque<PcRelativePatchInfo> package_type_bss_entry_patches_;
1238   // PC-relative String patch info for kBootImageLinkTimePcRelative.
1239   ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
1240   // PC-relative String patch info for kBssEntry.
1241   ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
1242   // PC-relative MethodType patch info for kBssEntry.
1243   ArenaDeque<PcRelativePatchInfo> method_type_bss_entry_patches_;
1244   // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative.
1245   ArenaDeque<PcRelativePatchInfo> boot_image_jni_entrypoint_patches_;
1246   // PC-relative patch info for IntrinsicObjects for the boot image,
1247   // and for method/type/string patches for kBootImageRelRo otherwise.
1248   ArenaDeque<PcRelativePatchInfo> boot_image_other_patches_;
1249   // Patch info for calls to entrypoint dispatch thunks. Used for slow paths.
1250   ArenaDeque<PatchInfo<vixl::aarch64::Label>> call_entrypoint_patches_;
1251   // Baker read barrier patch info.
1252   ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
1253 
1254   JitPatchesARM64 jit_patches_;
1255 
1256   // Baker read barrier slow paths, mapping custom data (uint32_t) to label.
1257   // Wrap the label to work around vixl::aarch64::Label being non-copyable
1258   // and non-moveable and as such unusable in ArenaSafeMap<>.
1259   struct LabelWrapper {
LabelWrapperLabelWrapper1260     LabelWrapper(const LabelWrapper& src)
1261         : label() {
1262       DCHECK(!src.label.IsLinked() && !src.label.IsBound());
1263     }
1264     LabelWrapper() = default;
1265     vixl::aarch64::Label label;
1266   };
1267   ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_;
1268 
1269   friend class linker::Arm64RelativePatcherTest;
1270   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
1271 };
1272 
GetAssembler()1273 inline Arm64Assembler* ParallelMoveResolverARM64::GetAssembler() const {
1274   return codegen_->GetAssembler();
1275 }
1276 
1277 }  // namespace arm64
1278 }  // namespace art
1279 
1280 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
1281