xref: /aosp_15_r20/art/compiler/optimizing/code_generator_arm_vixl.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm_vixl.h"
18 
19 #include "arch/arm/asm_support_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "arch/arm/jni_frame_arm.h"
22 #include "art_method-inl.h"
23 #include "base/bit_utils.h"
24 #include "base/bit_utils_iterator.h"
25 #include "base/globals.h"
26 #include "class_root-inl.h"
27 #include "class_table.h"
28 #include "code_generator_utils.h"
29 #include "common_arm.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "gc/accounting/card_table.h"
32 #include "gc/space/image_space.h"
33 #include "heap_poisoning.h"
34 #include "interpreter/mterp/nterp.h"
35 #include "intrinsics.h"
36 #include "intrinsics_arm_vixl.h"
37 #include "intrinsics_list.h"
38 #include "intrinsics_utils.h"
39 #include "jit/profiling_info.h"
40 #include "linker/linker_patch.h"
41 #include "mirror/array-inl.h"
42 #include "mirror/class-inl.h"
43 #include "mirror/var_handle.h"
44 #include "profiling_info_builder.h"
45 #include "scoped_thread_state_change-inl.h"
46 #include "thread.h"
47 #include "trace.h"
48 #include "utils/arm/assembler_arm_vixl.h"
49 #include "utils/arm/managed_register_arm.h"
50 #include "utils/assembler.h"
51 #include "utils/stack_checks.h"
52 
53 namespace art HIDDEN {
54 namespace arm {
55 
56 namespace vixl32 = vixl::aarch32;
57 using namespace vixl32;  // NOLINT(build/namespaces)
58 
59 using helpers::DRegisterFrom;
60 using helpers::HighRegisterFrom;
61 using helpers::InputDRegisterAt;
62 using helpers::InputOperandAt;
63 using helpers::InputRegister;
64 using helpers::InputRegisterAt;
65 using helpers::InputSRegisterAt;
66 using helpers::InputVRegister;
67 using helpers::InputVRegisterAt;
68 using helpers::Int32ConstantFrom;
69 using helpers::Int64ConstantFrom;
70 using helpers::LocationFrom;
71 using helpers::LowRegisterFrom;
72 using helpers::LowSRegisterFrom;
73 using helpers::OperandFrom;
74 using helpers::OutputRegister;
75 using helpers::OutputSRegister;
76 using helpers::OutputVRegister;
77 using helpers::RegisterFrom;
78 using helpers::SRegisterFrom;
79 using helpers::Uint64ConstantFrom;
80 
81 using vixl::EmissionCheckScope;
82 using vixl::ExactAssemblyScope;
83 using vixl::CodeBufferCheckScope;
84 
85 using RegisterList = vixl32::RegisterList;
86 
ExpectedPairLayout(Location location)87 static bool ExpectedPairLayout(Location location) {
88   // We expected this for both core and fpu register pairs.
89   return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
90 }
91 // Use a local definition to prevent copying mistakes.
92 static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
93 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
94 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
95 
96 // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
97 // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
98 // For the Baker read barrier implementation using link-time generated thunks we need to split
99 // the offset explicitly.
100 constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
101 
102 // Using a base helps identify when we hit Marking Register check breakpoints.
103 constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
104 
105 #ifdef __
106 #error "ARM Codegen VIXL macro-assembler macro already defined."
107 #endif
108 
109 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
110 #define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()->  // NOLINT
111 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
112 
113 // Marker that code is yet to be, and must, be implemented.
114 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
115 
CanEmitNarrowLdr(vixl32::Register rt,vixl32::Register rn,uint32_t offset)116 static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
117   return rt.IsLow() && rn.IsLow() && offset < 32u;
118 }
119 
120 class EmitAdrCode {
121  public:
EmitAdrCode(ArmVIXLMacroAssembler * assembler,vixl32::Register rd,vixl32::Label * label)122   EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
123       : assembler_(assembler), rd_(rd), label_(label) {
124     DCHECK(!assembler->AllowMacroInstructions());  // In ExactAssemblyScope.
125     adr_location_ = assembler->GetCursorOffset();
126     assembler->adr(EncodingSize(Wide), rd, label);
127   }
128 
~EmitAdrCode()129   ~EmitAdrCode() {
130     DCHECK(label_->IsBound());
131     // The ADR emitted by the assembler does not set the Thumb mode bit we need.
132     // TODO: Maybe extend VIXL to allow ADR for return address?
133     uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
134     // Expecting ADR encoding T3 with `(offset & 1) == 0`.
135     DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u);           // Check bits 24-31, except 26.
136     DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu);           // Check bits 16-23.
137     DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode());   // Check bits 8-11 and 15.
138     DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u);           // Check bit 0, i.e. the `offset & 1`.
139     // Add the Thumb mode bit.
140     raw_adr[2] |= 0x01u;
141   }
142 
143  private:
144   ArmVIXLMacroAssembler* const assembler_;
145   vixl32::Register rd_;
146   vixl32::Label* const label_;
147   int32_t adr_location_;
148 };
149 
OneRegInReferenceOutSaveEverythingCallerSaves()150 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
151   InvokeRuntimeCallingConventionARMVIXL calling_convention;
152   RegisterSet caller_saves = RegisterSet::Empty();
153   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
154   // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
155   // that the kPrimNot result register is the same as the first argument register.
156   return caller_saves;
157 }
158 
159 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
160 // for each live D registers they treat two corresponding S registers as live ones.
161 //
162 // Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
163 // from a list of contiguous S registers a list of contiguous D registers (processing first/last
164 // S registers corner cases) and save/restore this new list treating them as D registers.
165 // - decreasing code size
166 // - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
167 //   restored and then used in regular non SlowPath code as D register.
168 //
169 // For the following example (v means the S register is live):
170 //   D names: |    D0   |    D1   |    D2   |    D4   | ...
171 //   S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
172 //   Live?    |    |  v |  v |  v |  v |  v |  v |    | ...
173 //
174 // S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
175 // as D registers.
176 //
177 // TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers
178 // for lists of floating-point registers.
SaveContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)179 static size_t SaveContiguousSRegisterList(size_t first,
180                                           size_t last,
181                                           CodeGenerator* codegen,
182                                           size_t stack_offset) {
183   static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
184   static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
185   DCHECK_LE(first, last);
186   if ((first == last) && (first == 0)) {
187     __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
188     return stack_offset + kSRegSizeInBytes;
189   }
190   if (first % 2 == 1) {
191     __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
192     stack_offset += kSRegSizeInBytes;
193   }
194 
195   bool save_last = false;
196   if (last % 2 == 0) {
197     save_last = true;
198     --last;
199   }
200 
201   if (first < last) {
202     vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
203     DCHECK_EQ((last - first + 1) % 2, 0u);
204     size_t number_of_d_regs = (last - first + 1) / 2;
205 
206     if (number_of_d_regs == 1) {
207       __ Vstr(d_reg, MemOperand(sp, stack_offset));
208     } else if (number_of_d_regs > 1) {
209       UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
210       vixl32::Register base = sp;
211       if (stack_offset != 0) {
212         base = temps.Acquire();
213         __ Add(base, sp, Operand::From(stack_offset));
214       }
215       __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
216     }
217     stack_offset += number_of_d_regs * kDRegSizeInBytes;
218   }
219 
220   if (save_last) {
221     __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
222     stack_offset += kSRegSizeInBytes;
223   }
224 
225   return stack_offset;
226 }
227 
RestoreContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)228 static size_t RestoreContiguousSRegisterList(size_t first,
229                                              size_t last,
230                                              CodeGenerator* codegen,
231                                              size_t stack_offset) {
232   static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
233   static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
234   DCHECK_LE(first, last);
235   if ((first == last) && (first == 0)) {
236     __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
237     return stack_offset + kSRegSizeInBytes;
238   }
239   if (first % 2 == 1) {
240     __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
241     stack_offset += kSRegSizeInBytes;
242   }
243 
244   bool restore_last = false;
245   if (last % 2 == 0) {
246     restore_last = true;
247     --last;
248   }
249 
250   if (first < last) {
251     vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
252     DCHECK_EQ((last - first + 1) % 2, 0u);
253     size_t number_of_d_regs = (last - first + 1) / 2;
254     if (number_of_d_regs == 1) {
255       __ Vldr(d_reg, MemOperand(sp, stack_offset));
256     } else if (number_of_d_regs > 1) {
257       UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
258       vixl32::Register base = sp;
259       if (stack_offset != 0) {
260         base = temps.Acquire();
261         __ Add(base, sp, Operand::From(stack_offset));
262       }
263       __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
264     }
265     stack_offset += number_of_d_regs * kDRegSizeInBytes;
266   }
267 
268   if (restore_last) {
269     __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
270     stack_offset += kSRegSizeInBytes;
271   }
272 
273   return stack_offset;
274 }
275 
GetLoadOperandType(DataType::Type type)276 static LoadOperandType GetLoadOperandType(DataType::Type type) {
277   switch (type) {
278     case DataType::Type::kReference:
279       return kLoadWord;
280     case DataType::Type::kBool:
281     case DataType::Type::kUint8:
282       return kLoadUnsignedByte;
283     case DataType::Type::kInt8:
284       return kLoadSignedByte;
285     case DataType::Type::kUint16:
286       return kLoadUnsignedHalfword;
287     case DataType::Type::kInt16:
288       return kLoadSignedHalfword;
289     case DataType::Type::kInt32:
290       return kLoadWord;
291     case DataType::Type::kInt64:
292       return kLoadWordPair;
293     case DataType::Type::kFloat32:
294       return kLoadSWord;
295     case DataType::Type::kFloat64:
296       return kLoadDWord;
297     default:
298       LOG(FATAL) << "Unreachable type " << type;
299       UNREACHABLE();
300   }
301 }
302 
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)303 void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
304   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
305   size_t orig_offset = stack_offset;
306 
307   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
308   for (uint32_t i : LowToHighBits(core_spills)) {
309     // If the register holds an object, update the stack mask.
310     if (locations->RegisterContainsObject(i)) {
311       locations->SetStackBit(stack_offset / kVRegSize);
312     }
313     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
314     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
315     saved_core_stack_offsets_[i] = stack_offset;
316     stack_offset += kArmWordSize;
317   }
318 
319   CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
320   arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
321 
322   uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
323   orig_offset = stack_offset;
324   for (uint32_t i : LowToHighBits(fp_spills)) {
325     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
326     saved_fpu_stack_offsets_[i] = stack_offset;
327     stack_offset += kArmWordSize;
328   }
329 
330   stack_offset = orig_offset;
331   while (fp_spills != 0u) {
332     uint32_t begin = CTZ(fp_spills);
333     uint32_t tmp = fp_spills + (1u << begin);
334     fp_spills &= tmp;  // Clear the contiguous range of 1s.
335     uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp);  // CTZ(0) is undefined.
336     stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
337   }
338   DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
339 }
340 
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)341 void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
342   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
343   size_t orig_offset = stack_offset;
344 
345   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
346   for (uint32_t i : LowToHighBits(core_spills)) {
347     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
348     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
349     stack_offset += kArmWordSize;
350   }
351 
352   // TODO(VIXL): Check the coherency of stack_offset after this with a test.
353   CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
354   arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
355 
356   uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
357   while (fp_spills != 0u) {
358     uint32_t begin = CTZ(fp_spills);
359     uint32_t tmp = fp_spills + (1u << begin);
360     fp_spills &= tmp;  // Clear the contiguous range of 1s.
361     uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp);  // CTZ(0) is undefined.
362     stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
363   }
364   DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
365 }
366 
367 class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
368  public:
NullCheckSlowPathARMVIXL(HNullCheck * instruction)369   explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
370 
EmitNativeCode(CodeGenerator * codegen)371   void EmitNativeCode(CodeGenerator* codegen) override {
372     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
373     __ Bind(GetEntryLabel());
374     if (instruction_->CanThrowIntoCatchBlock()) {
375       // Live registers will be restored in the catch block if caught.
376       SaveLiveRegisters(codegen, instruction_->GetLocations());
377     }
378     arm_codegen->InvokeRuntime(kQuickThrowNullPointer,
379                                instruction_,
380                                instruction_->GetDexPc(),
381                                this);
382     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
383   }
384 
IsFatal() const385   bool IsFatal() const override { return true; }
386 
GetDescription() const387   const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; }
388 
389  private:
390   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
391 };
392 
393 class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
394  public:
DivZeroCheckSlowPathARMVIXL(HDivZeroCheck * instruction)395   explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
396       : SlowPathCodeARMVIXL(instruction) {}
397 
EmitNativeCode(CodeGenerator * codegen)398   void EmitNativeCode(CodeGenerator* codegen) override {
399     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
400     __ Bind(GetEntryLabel());
401     arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
402     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
403   }
404 
IsFatal() const405   bool IsFatal() const override { return true; }
406 
GetDescription() const407   const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; }
408 
409  private:
410   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
411 };
412 
413 class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
414  public:
SuspendCheckSlowPathARMVIXL(HSuspendCheck * instruction,HBasicBlock * successor)415   SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
416       : SlowPathCodeARMVIXL(instruction), successor_(successor) {}
417 
EmitNativeCode(CodeGenerator * codegen)418   void EmitNativeCode(CodeGenerator* codegen) override {
419     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
420     __ Bind(GetEntryLabel());
421     arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
422     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
423     if (successor_ == nullptr) {
424       __ B(GetReturnLabel());
425     } else {
426       __ B(arm_codegen->GetLabelOf(successor_));
427     }
428   }
429 
GetReturnLabel()430   vixl32::Label* GetReturnLabel() {
431     DCHECK(successor_ == nullptr);
432     return &return_label_;
433   }
434 
GetSuccessor() const435   HBasicBlock* GetSuccessor() const {
436     return successor_;
437   }
438 
GetDescription() const439   const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; }
440 
441  private:
442   // If not null, the block to branch to after the suspend check.
443   HBasicBlock* const successor_;
444 
445   // If `successor_` is null, the label to branch to after the suspend check.
446   vixl32::Label return_label_;
447 
448   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
449 };
450 
451 class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
452  public:
BoundsCheckSlowPathARMVIXL(HBoundsCheck * instruction)453   explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
454       : SlowPathCodeARMVIXL(instruction) {}
455 
EmitNativeCode(CodeGenerator * codegen)456   void EmitNativeCode(CodeGenerator* codegen) override {
457     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
458     LocationSummary* locations = instruction_->GetLocations();
459 
460     __ Bind(GetEntryLabel());
461     if (instruction_->CanThrowIntoCatchBlock()) {
462       // Live registers will be restored in the catch block if caught.
463       SaveLiveRegisters(codegen, instruction_->GetLocations());
464     }
465     // We're moving two locations to locations that could overlap, so we need a parallel
466     // move resolver.
467     InvokeRuntimeCallingConventionARMVIXL calling_convention;
468     codegen->EmitParallelMoves(
469         locations->InAt(0),
470         LocationFrom(calling_convention.GetRegisterAt(0)),
471         DataType::Type::kInt32,
472         locations->InAt(1),
473         LocationFrom(calling_convention.GetRegisterAt(1)),
474         DataType::Type::kInt32);
475     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
476         ? kQuickThrowStringBounds
477         : kQuickThrowArrayBounds;
478     arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
479     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
480     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
481   }
482 
IsFatal() const483   bool IsFatal() const override { return true; }
484 
GetDescription() const485   const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; }
486 
487  private:
488   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
489 };
490 
491 class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
492  public:
LoadClassSlowPathARMVIXL(HLoadClass * cls,HInstruction * at)493   LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at)
494       : SlowPathCodeARMVIXL(at), cls_(cls) {
495     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
496     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
497   }
498 
EmitNativeCode(CodeGenerator * codegen)499   void EmitNativeCode(CodeGenerator* codegen) override {
500     LocationSummary* locations = instruction_->GetLocations();
501     Location out = locations->Out();
502     const uint32_t dex_pc = instruction_->GetDexPc();
503     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
504     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
505 
506     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
507     __ Bind(GetEntryLabel());
508     SaveLiveRegisters(codegen, locations);
509 
510     InvokeRuntimeCallingConventionARMVIXL calling_convention;
511     if (must_resolve_type) {
512       DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile()) ||
513              arm_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
514              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
515                              &cls_->GetDexFile()));
516       dex::TypeIndex type_index = cls_->GetTypeIndex();
517       __ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
518       if (cls_->NeedsAccessCheck()) {
519         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
520         arm_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
521       } else {
522         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
523         arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
524       }
525       // If we also must_do_clinit, the resolved type is now in the correct register.
526     } else {
527       DCHECK(must_do_clinit);
528       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
529       arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source);
530     }
531     if (must_do_clinit) {
532       arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
533       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
534     }
535 
536     // Move the class to the desired location.
537     if (out.IsValid()) {
538       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
539       arm_codegen->Move32(locations->Out(), LocationFrom(r0));
540     }
541     RestoreLiveRegisters(codegen, locations);
542     __ B(GetExitLabel());
543   }
544 
GetDescription() const545   const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; }
546 
547  private:
548   // The class this slow path will load.
549   HLoadClass* const cls_;
550 
551   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
552 };
553 
554 class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
555  public:
LoadStringSlowPathARMVIXL(HLoadString * instruction)556   explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
557       : SlowPathCodeARMVIXL(instruction) {}
558 
EmitNativeCode(CodeGenerator * codegen)559   void EmitNativeCode(CodeGenerator* codegen) override {
560     DCHECK(instruction_->IsLoadString());
561     DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
562     LocationSummary* locations = instruction_->GetLocations();
563     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
564     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
565 
566     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
567     __ Bind(GetEntryLabel());
568     SaveLiveRegisters(codegen, locations);
569 
570     InvokeRuntimeCallingConventionARMVIXL calling_convention;
571     __ Mov(calling_convention.GetRegisterAt(0), string_index.index_);
572     arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
573     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
574 
575     arm_codegen->Move32(locations->Out(), LocationFrom(r0));
576     RestoreLiveRegisters(codegen, locations);
577 
578     __ B(GetExitLabel());
579   }
580 
GetDescription() const581   const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; }
582 
583  private:
584   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
585 };
586 
587 class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
588  public:
TypeCheckSlowPathARMVIXL(HInstruction * instruction,bool is_fatal)589   TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
590       : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
591 
EmitNativeCode(CodeGenerator * codegen)592   void EmitNativeCode(CodeGenerator* codegen) override {
593     LocationSummary* locations = instruction_->GetLocations();
594     DCHECK(instruction_->IsCheckCast()
595            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
596 
597     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
598     __ Bind(GetEntryLabel());
599 
600     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
601       SaveLiveRegisters(codegen, locations);
602     }
603 
604     // We're moving two locations to locations that could overlap, so we need a parallel
605     // move resolver.
606     InvokeRuntimeCallingConventionARMVIXL calling_convention;
607 
608     codegen->EmitParallelMoves(locations->InAt(0),
609                                LocationFrom(calling_convention.GetRegisterAt(0)),
610                                DataType::Type::kReference,
611                                locations->InAt(1),
612                                LocationFrom(calling_convention.GetRegisterAt(1)),
613                                DataType::Type::kReference);
614     if (instruction_->IsInstanceOf()) {
615       arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
616                                  instruction_,
617                                  instruction_->GetDexPc(),
618                                  this);
619       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
620       arm_codegen->Move32(locations->Out(), LocationFrom(r0));
621     } else {
622       DCHECK(instruction_->IsCheckCast());
623       arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
624                                  instruction_,
625                                  instruction_->GetDexPc(),
626                                  this);
627       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
628     }
629 
630     if (!is_fatal_) {
631       RestoreLiveRegisters(codegen, locations);
632       __ B(GetExitLabel());
633     }
634   }
635 
GetDescription() const636   const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; }
637 
IsFatal() const638   bool IsFatal() const override { return is_fatal_; }
639 
640  private:
641   const bool is_fatal_;
642 
643   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
644 };
645 
646 class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
647  public:
DeoptimizationSlowPathARMVIXL(HDeoptimize * instruction)648   explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
649       : SlowPathCodeARMVIXL(instruction) {}
650 
EmitNativeCode(CodeGenerator * codegen)651   void EmitNativeCode(CodeGenerator* codegen) override {
652     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
653     __ Bind(GetEntryLabel());
654         LocationSummary* locations = instruction_->GetLocations();
655     SaveLiveRegisters(codegen, locations);
656     InvokeRuntimeCallingConventionARMVIXL calling_convention;
657     __ Mov(calling_convention.GetRegisterAt(0),
658            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
659 
660     arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
661     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
662   }
663 
GetDescription() const664   const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; }
665 
666  private:
667   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
668 };
669 
670 class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
671  public:
ArraySetSlowPathARMVIXL(HInstruction * instruction)672   explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
673 
EmitNativeCode(CodeGenerator * codegen)674   void EmitNativeCode(CodeGenerator* codegen) override {
675     LocationSummary* locations = instruction_->GetLocations();
676     __ Bind(GetEntryLabel());
677     SaveLiveRegisters(codegen, locations);
678 
679     InvokeRuntimeCallingConventionARMVIXL calling_convention;
680     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
681     parallel_move.AddMove(
682         locations->InAt(0),
683         LocationFrom(calling_convention.GetRegisterAt(0)),
684         DataType::Type::kReference,
685         nullptr);
686     parallel_move.AddMove(
687         locations->InAt(1),
688         LocationFrom(calling_convention.GetRegisterAt(1)),
689         DataType::Type::kInt32,
690         nullptr);
691     parallel_move.AddMove(
692         locations->InAt(2),
693         LocationFrom(calling_convention.GetRegisterAt(2)),
694         DataType::Type::kReference,
695         nullptr);
696     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
697 
698     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
699     arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
700     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
701     RestoreLiveRegisters(codegen, locations);
702     __ B(GetExitLabel());
703   }
704 
GetDescription() const705   const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; }
706 
707  private:
708   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
709 };
710 
711 // Slow path generating a read barrier for a heap reference.
712 class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
713  public:
ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)714   ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction* instruction,
715                                              Location out,
716                                              Location ref,
717                                              Location obj,
718                                              uint32_t offset,
719                                              Location index)
720       : SlowPathCodeARMVIXL(instruction),
721         out_(out),
722         ref_(ref),
723         obj_(obj),
724         offset_(offset),
725         index_(index) {
726     // If `obj` is equal to `out` or `ref`, it means the initial object
727     // has been overwritten by (or after) the heap object reference load
728     // to be instrumented, e.g.:
729     //
730     //   __ LoadFromOffset(kLoadWord, out, out, offset);
731     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
732     //
733     // In that case, we have lost the information about the original
734     // object, and the emitted read barrier cannot work properly.
735     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
736     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
737   }
738 
EmitNativeCode(CodeGenerator * codegen)739   void EmitNativeCode(CodeGenerator* codegen) override {
740     DCHECK(codegen->EmitReadBarrier());
741     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
742     LocationSummary* locations = instruction_->GetLocations();
743     vixl32::Register reg_out = RegisterFrom(out_);
744     DCHECK(locations->CanCall());
745     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
746     DCHECK(instruction_->IsInstanceFieldGet() ||
747            instruction_->IsStaticFieldGet() ||
748            instruction_->IsArrayGet() ||
749            instruction_->IsInstanceOf() ||
750            instruction_->IsCheckCast() ||
751            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
752         << "Unexpected instruction in read barrier for heap reference slow path: "
753         << instruction_->DebugName();
754     // The read barrier instrumentation of object ArrayGet
755     // instructions does not support the HIntermediateAddress
756     // instruction.
757     DCHECK(!(instruction_->IsArrayGet() &&
758              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
759 
760     __ Bind(GetEntryLabel());
761     SaveLiveRegisters(codegen, locations);
762 
763     // We may have to change the index's value, but as `index_` is a
764     // constant member (like other "inputs" of this slow path),
765     // introduce a copy of it, `index`.
766     Location index = index_;
767     if (index_.IsValid()) {
768       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
769       if (instruction_->IsArrayGet()) {
770         // Compute the actual memory offset and store it in `index`.
771         vixl32::Register index_reg = RegisterFrom(index_);
772         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg.GetCode()));
773         if (codegen->IsCoreCalleeSaveRegister(index_reg.GetCode())) {
774           // We are about to change the value of `index_reg` (see the
775           // calls to art::arm::ArmVIXLMacroAssembler::Lsl and
776           // art::arm::ArmVIXLMacroAssembler::Add below), but it has
777           // not been saved by the previous call to
778           // art::SlowPathCode::SaveLiveRegisters, as it is a
779           // callee-save register --
780           // art::SlowPathCode::SaveLiveRegisters does not consider
781           // callee-save registers, as it has been designed with the
782           // assumption that callee-save registers are supposed to be
783           // handled by the called function.  So, as a callee-save
784           // register, `index_reg` _would_ eventually be saved onto
785           // the stack, but it would be too late: we would have
786           // changed its value earlier.  Therefore, we manually save
787           // it here into another freely available register,
788           // `free_reg`, chosen of course among the caller-save
789           // registers (as a callee-save `free_reg` register would
790           // exhibit the same problem).
791           //
792           // Note we could have requested a temporary register from
793           // the register allocator instead; but we prefer not to, as
794           // this is a slow path, and we know we can find a
795           // caller-save register that is available.
796           vixl32::Register free_reg = FindAvailableCallerSaveRegister(codegen);
797           __ Mov(free_reg, index_reg);
798           index_reg = free_reg;
799           index = LocationFrom(index_reg);
800         } else {
801           // The initial register stored in `index_` has already been
802           // saved in the call to art::SlowPathCode::SaveLiveRegisters
803           // (as it is not a callee-save register), so we can freely
804           // use it.
805         }
806         // Shifting the index value contained in `index_reg` by the scale
807         // factor (2) cannot overflow in practice, as the runtime is
808         // unable to allocate object arrays with a size larger than
809         // 2^26 - 1 (that is, 2^28 - 4 bytes).
810         __ Lsl(index_reg, index_reg, TIMES_4);
811         static_assert(
812             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
813             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
814         __ Add(index_reg, index_reg, offset_);
815       } else {
816         // In the case of the following intrinsics `index_` is not shifted by a scale factor of 2
817         // (as in the case of ArrayGet), as it is actually an offset to an object field within an
818         // object.
819         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
820         DCHECK(instruction_->GetLocations()->Intrinsified());
821         HInvoke* invoke = instruction_->AsInvoke();
822         DCHECK(IsUnsafeGetReference(invoke) ||
823                IsVarHandleGet(invoke) ||
824                IsVarHandleCASFamily(invoke))
825             << invoke->GetIntrinsic();
826         DCHECK_EQ(offset_, 0U);
827         // Though UnsafeGet's offset location is a register pair, we only pass the low
828         // part (high part is irrelevant for 32-bit addresses) to the slow path.
829         // For VarHandle intrinsics, the index is always just a register.
830         DCHECK(index_.IsRegister());
831         index = index_;
832       }
833     }
834 
835     // We're moving two or three locations to locations that could
836     // overlap, so we need a parallel move resolver.
837     InvokeRuntimeCallingConventionARMVIXL calling_convention;
838     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
839     parallel_move.AddMove(ref_,
840                           LocationFrom(calling_convention.GetRegisterAt(0)),
841                           DataType::Type::kReference,
842                           nullptr);
843     parallel_move.AddMove(obj_,
844                           LocationFrom(calling_convention.GetRegisterAt(1)),
845                           DataType::Type::kReference,
846                           nullptr);
847     if (index.IsValid()) {
848       parallel_move.AddMove(index,
849                             LocationFrom(calling_convention.GetRegisterAt(2)),
850                             DataType::Type::kInt32,
851                             nullptr);
852       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
853     } else {
854       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
855       __ Mov(calling_convention.GetRegisterAt(2), offset_);
856     }
857     arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
858     CheckEntrypointTypes<
859         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
860     arm_codegen->Move32(out_, LocationFrom(r0));
861 
862     RestoreLiveRegisters(codegen, locations);
863     __ B(GetExitLabel());
864   }
865 
GetDescription() const866   const char* GetDescription() const override {
867     return "ReadBarrierForHeapReferenceSlowPathARMVIXL";
868   }
869 
870  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)871   vixl32::Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
872     uint32_t ref = RegisterFrom(ref_).GetCode();
873     uint32_t obj = RegisterFrom(obj_).GetCode();
874     for (uint32_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
875       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
876         return vixl32::Register(i);
877       }
878     }
879     // We shall never fail to find a free caller-save register, as
880     // there are more than two core caller-save registers on ARM
881     // (meaning it is possible to find one which is different from
882     // `ref` and `obj`).
883     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
884     LOG(FATAL) << "Could not find a free caller-save register";
885     UNREACHABLE();
886   }
887 
888   const Location out_;
889   const Location ref_;
890   const Location obj_;
891   const uint32_t offset_;
892   // An additional location containing an index to an array.
893   // Only used for HArrayGet and the UnsafeGetObject &
894   // UnsafeGetObjectVolatile intrinsics.
895   const Location index_;
896 
897   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARMVIXL);
898 };
899 
900 // Slow path generating a read barrier for a GC root.
901 class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
902  public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction * instruction,Location out,Location root)903   ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
904       : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
905   }
906 
EmitNativeCode(CodeGenerator * codegen)907   void EmitNativeCode(CodeGenerator* codegen) override {
908     DCHECK(codegen->EmitReadBarrier());
909     LocationSummary* locations = instruction_->GetLocations();
910     vixl32::Register reg_out = RegisterFrom(out_);
911     DCHECK(locations->CanCall());
912     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
913     DCHECK(instruction_->IsLoadClass() ||
914            instruction_->IsLoadString() ||
915            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
916         << "Unexpected instruction in read barrier for GC root slow path: "
917         << instruction_->DebugName();
918 
919     __ Bind(GetEntryLabel());
920     SaveLiveRegisters(codegen, locations);
921 
922     InvokeRuntimeCallingConventionARMVIXL calling_convention;
923     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
924     arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), root_);
925     arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
926                                instruction_,
927                                instruction_->GetDexPc(),
928                                this);
929     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
930     arm_codegen->Move32(out_, LocationFrom(r0));
931 
932     RestoreLiveRegisters(codegen, locations);
933     __ B(GetExitLabel());
934   }
935 
GetDescription() const936   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; }
937 
938  private:
939   const Location out_;
940   const Location root_;
941 
942   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL);
943 };
944 
945 class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL {
946  public:
MethodEntryExitHooksSlowPathARMVIXL(HInstruction * instruction)947   explicit MethodEntryExitHooksSlowPathARMVIXL(HInstruction* instruction)
948       : SlowPathCodeARMVIXL(instruction) {}
949 
EmitNativeCode(CodeGenerator * codegen)950   void EmitNativeCode(CodeGenerator* codegen) override {
951     LocationSummary* locations = instruction_->GetLocations();
952     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
953     QuickEntrypointEnum entry_point =
954         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
955     __ Bind(GetEntryLabel());
956     SaveLiveRegisters(codegen, locations);
957     if (instruction_->IsMethodExitHook()) {
958       // Load frame size to pass to the exit hooks
959       __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize());
960     }
961     arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
962     RestoreLiveRegisters(codegen, locations);
963     __ B(GetExitLabel());
964   }
965 
GetDescription() const966   const char* GetDescription() const override {
967     return "MethodEntryExitHooksSlowPath";
968   }
969 
970  private:
971   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARMVIXL);
972 };
973 
974 class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL {
975  public:
CompileOptimizedSlowPathARMVIXL(HSuspendCheck * suspend_check,vixl32::Register profiling_info)976   CompileOptimizedSlowPathARMVIXL(HSuspendCheck* suspend_check,
977                                   vixl32::Register profiling_info)
978       : SlowPathCodeARMVIXL(suspend_check),
979         profiling_info_(profiling_info) {}
980 
EmitNativeCode(CodeGenerator * codegen)981   void EmitNativeCode(CodeGenerator* codegen) override {
982     uint32_t entry_point_offset =
983         GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
984     __ Bind(GetEntryLabel());
985     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
986     UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler());
987     vixl32::Register tmp = temps.Acquire();
988     __ Mov(tmp, ProfilingInfo::GetOptimizeThreshold());
989     __ Strh(tmp,
990             MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
991     __ Ldr(lr, MemOperand(tr, entry_point_offset));
992     // Note: we don't record the call here (and therefore don't generate a stack
993     // map), as the entrypoint should never be suspended.
994     __ Blx(lr);
995     __ B(GetExitLabel());
996   }
997 
GetDescription() const998   const char* GetDescription() const override {
999     return "CompileOptimizedSlowPath";
1000   }
1001 
1002  private:
1003   vixl32::Register profiling_info_;
1004 
1005   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARMVIXL);
1006 };
1007 
ARMCondition(IfCondition cond)1008 inline vixl32::Condition ARMCondition(IfCondition cond) {
1009   switch (cond) {
1010     case kCondEQ: return eq;
1011     case kCondNE: return ne;
1012     case kCondLT: return lt;
1013     case kCondLE: return le;
1014     case kCondGT: return gt;
1015     case kCondGE: return ge;
1016     case kCondB:  return lo;
1017     case kCondBE: return ls;
1018     case kCondA:  return hi;
1019     case kCondAE: return hs;
1020   }
1021   LOG(FATAL) << "Unreachable";
1022   UNREACHABLE();
1023 }
1024 
1025 // Maps signed condition to unsigned condition.
ARMUnsignedCondition(IfCondition cond)1026 inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) {
1027   switch (cond) {
1028     case kCondEQ: return eq;
1029     case kCondNE: return ne;
1030     // Signed to unsigned.
1031     case kCondLT: return lo;
1032     case kCondLE: return ls;
1033     case kCondGT: return hi;
1034     case kCondGE: return hs;
1035     // Unsigned remain unchanged.
1036     case kCondB:  return lo;
1037     case kCondBE: return ls;
1038     case kCondA:  return hi;
1039     case kCondAE: return hs;
1040   }
1041   LOG(FATAL) << "Unreachable";
1042   UNREACHABLE();
1043 }
1044 
ARMFPCondition(IfCondition cond,bool gt_bias)1045 inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
1046   // The ARM condition codes can express all the necessary branches, see the
1047   // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
1048   // There is no dex instruction or HIR that would need the missing conditions
1049   // "equal or unordered" or "not equal".
1050   switch (cond) {
1051     case kCondEQ: return eq;
1052     case kCondNE: return ne /* unordered */;
1053     case kCondLT: return gt_bias ? cc : lt /* unordered */;
1054     case kCondLE: return gt_bias ? ls : le /* unordered */;
1055     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
1056     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
1057     default:
1058       LOG(FATAL) << "UNREACHABLE";
1059       UNREACHABLE();
1060   }
1061 }
1062 
ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind)1063 inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
1064   switch (op_kind) {
1065     case HDataProcWithShifterOp::kASR: return ShiftType::ASR;
1066     case HDataProcWithShifterOp::kLSL: return ShiftType::LSL;
1067     case HDataProcWithShifterOp::kLSR: return ShiftType::LSR;
1068     default:
1069       LOG(FATAL) << "Unexpected op kind " << op_kind;
1070       UNREACHABLE();
1071   }
1072 }
1073 
DumpCoreRegister(std::ostream & stream,int reg) const1074 void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
1075   stream << vixl32::Register(reg);
1076 }
1077 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1078 void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1079   stream << vixl32::SRegister(reg);
1080 }
1081 
GetInstructionSetFeatures() const1082 const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const {
1083   return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures();
1084 }
1085 
ComputeSRegisterListMask(const SRegisterList & regs)1086 static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
1087   uint32_t mask = 0;
1088   for (uint32_t i = regs.GetFirstSRegister().GetCode();
1089        i <= regs.GetLastSRegister().GetCode();
1090        ++i) {
1091     mask |= (1 << i);
1092   }
1093   return mask;
1094 }
1095 
1096 // Saves the register in the stack. Returns the size taken on stack.
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1097 size_t CodeGeneratorARMVIXL::SaveCoreRegister([[maybe_unused]] size_t stack_index,
1098                                               [[maybe_unused]] uint32_t reg_id) {
1099   TODO_VIXL32(FATAL);
1100   UNREACHABLE();
1101 }
1102 
1103 // Restores the register from the stack. Returns the size taken on stack.
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1104 size_t CodeGeneratorARMVIXL::RestoreCoreRegister([[maybe_unused]] size_t stack_index,
1105                                                  [[maybe_unused]] uint32_t reg_id) {
1106   TODO_VIXL32(FATAL);
1107   UNREACHABLE();
1108 }
1109 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1110 size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
1111                                                        [[maybe_unused]] uint32_t reg_id) {
1112   TODO_VIXL32(FATAL);
1113   UNREACHABLE();
1114 }
1115 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1116 size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
1117                                                           [[maybe_unused]] uint32_t reg_id) {
1118   TODO_VIXL32(FATAL);
1119   UNREACHABLE();
1120 }
1121 
GenerateDataProcInstruction(HInstruction::InstructionKind kind,vixl32::Register out,vixl32::Register first,const Operand & second,CodeGeneratorARMVIXL * codegen)1122 static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
1123                                         vixl32::Register out,
1124                                         vixl32::Register first,
1125                                         const Operand& second,
1126                                         CodeGeneratorARMVIXL* codegen) {
1127   if (second.IsImmediate() && second.GetImmediate() == 0) {
1128     const Operand in = kind == HInstruction::kAnd
1129         ? Operand(0)
1130         : Operand(first);
1131 
1132     __ Mov(out, in);
1133   } else {
1134     switch (kind) {
1135       case HInstruction::kAdd:
1136         __ Add(out, first, second);
1137         break;
1138       case HInstruction::kAnd:
1139         __ And(out, first, second);
1140         break;
1141       case HInstruction::kOr:
1142         __ Orr(out, first, second);
1143         break;
1144       case HInstruction::kSub:
1145         __ Sub(out, first, second);
1146         break;
1147       case HInstruction::kXor:
1148         __ Eor(out, first, second);
1149         break;
1150       default:
1151         LOG(FATAL) << "Unexpected instruction kind: " << kind;
1152         UNREACHABLE();
1153     }
1154   }
1155 }
1156 
GenerateDataProc(HInstruction::InstructionKind kind,const Location & out,const Location & first,const Operand & second_lo,const Operand & second_hi,CodeGeneratorARMVIXL * codegen)1157 static void GenerateDataProc(HInstruction::InstructionKind kind,
1158                              const Location& out,
1159                              const Location& first,
1160                              const Operand& second_lo,
1161                              const Operand& second_hi,
1162                              CodeGeneratorARMVIXL* codegen) {
1163   const vixl32::Register first_hi = HighRegisterFrom(first);
1164   const vixl32::Register first_lo = LowRegisterFrom(first);
1165   const vixl32::Register out_hi = HighRegisterFrom(out);
1166   const vixl32::Register out_lo = LowRegisterFrom(out);
1167 
1168   if (kind == HInstruction::kAdd) {
1169     __ Adds(out_lo, first_lo, second_lo);
1170     __ Adc(out_hi, first_hi, second_hi);
1171   } else if (kind == HInstruction::kSub) {
1172     __ Subs(out_lo, first_lo, second_lo);
1173     __ Sbc(out_hi, first_hi, second_hi);
1174   } else {
1175     GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
1176     GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
1177   }
1178 }
1179 
GetShifterOperand(vixl32::Register rm,ShiftType shift,uint32_t shift_imm)1180 static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) {
1181   return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm);
1182 }
1183 
GenerateLongDataProc(HDataProcWithShifterOp * instruction,CodeGeneratorARMVIXL * codegen)1184 static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
1185                                  CodeGeneratorARMVIXL* codegen) {
1186   DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
1187   DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
1188 
1189   const LocationSummary* const locations = instruction->GetLocations();
1190   const uint32_t shift_value = instruction->GetShiftAmount();
1191   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
1192   const Location first = locations->InAt(0);
1193   const Location second = locations->InAt(1);
1194   const Location out = locations->Out();
1195   const vixl32::Register first_hi = HighRegisterFrom(first);
1196   const vixl32::Register first_lo = LowRegisterFrom(first);
1197   const vixl32::Register out_hi = HighRegisterFrom(out);
1198   const vixl32::Register out_lo = LowRegisterFrom(out);
1199   const vixl32::Register second_hi = HighRegisterFrom(second);
1200   const vixl32::Register second_lo = LowRegisterFrom(second);
1201   const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind());
1202 
1203   if (shift_value >= 32) {
1204     if (shift == ShiftType::LSL) {
1205       GenerateDataProcInstruction(kind,
1206                                   out_hi,
1207                                   first_hi,
1208                                   Operand(second_lo, ShiftType::LSL, shift_value - 32),
1209                                   codegen);
1210       GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen);
1211     } else if (shift == ShiftType::ASR) {
1212       GenerateDataProc(kind,
1213                        out,
1214                        first,
1215                        GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32),
1216                        Operand(second_hi, ShiftType::ASR, 31),
1217                        codegen);
1218     } else {
1219       DCHECK_EQ(shift, ShiftType::LSR);
1220       GenerateDataProc(kind,
1221                        out,
1222                        first,
1223                        GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32),
1224                        0,
1225                        codegen);
1226     }
1227   } else {
1228     DCHECK_GT(shift_value, 1U);
1229     DCHECK_LT(shift_value, 32U);
1230 
1231     UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1232 
1233     if (shift == ShiftType::LSL) {
1234       // We are not doing this for HInstruction::kAdd because the output will require
1235       // Location::kOutputOverlap; not applicable to other cases.
1236       if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1237         GenerateDataProcInstruction(kind,
1238                                     out_hi,
1239                                     first_hi,
1240                                     Operand(second_hi, ShiftType::LSL, shift_value),
1241                                     codegen);
1242         GenerateDataProcInstruction(kind,
1243                                     out_hi,
1244                                     out_hi,
1245                                     Operand(second_lo, ShiftType::LSR, 32 - shift_value),
1246                                     codegen);
1247         GenerateDataProcInstruction(kind,
1248                                     out_lo,
1249                                     first_lo,
1250                                     Operand(second_lo, ShiftType::LSL, shift_value),
1251                                     codegen);
1252       } else {
1253         const vixl32::Register temp = temps.Acquire();
1254 
1255         __ Lsl(temp, second_hi, shift_value);
1256         __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value));
1257         GenerateDataProc(kind,
1258                          out,
1259                          first,
1260                          Operand(second_lo, ShiftType::LSL, shift_value),
1261                          temp,
1262                          codegen);
1263       }
1264     } else {
1265       DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR);
1266 
1267       // We are not doing this for HInstruction::kAdd because the output will require
1268       // Location::kOutputOverlap; not applicable to other cases.
1269       if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1270         GenerateDataProcInstruction(kind,
1271                                     out_lo,
1272                                     first_lo,
1273                                     Operand(second_lo, ShiftType::LSR, shift_value),
1274                                     codegen);
1275         GenerateDataProcInstruction(kind,
1276                                     out_lo,
1277                                     out_lo,
1278                                     Operand(second_hi, ShiftType::LSL, 32 - shift_value),
1279                                     codegen);
1280         GenerateDataProcInstruction(kind,
1281                                     out_hi,
1282                                     first_hi,
1283                                     Operand(second_hi, shift, shift_value),
1284                                     codegen);
1285       } else {
1286         const vixl32::Register temp = temps.Acquire();
1287 
1288         __ Lsr(temp, second_lo, shift_value);
1289         __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value));
1290         GenerateDataProc(kind,
1291                          out,
1292                          first,
1293                          temp,
1294                          Operand(second_hi, shift, shift_value),
1295                          codegen);
1296       }
1297     }
1298   }
1299 }
1300 
GenerateVcmp(HInstruction * instruction,CodeGeneratorARMVIXL * codegen)1301 static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
1302   const Location rhs_loc = instruction->GetLocations()->InAt(1);
1303   if (rhs_loc.IsConstant()) {
1304     // 0.0 is the only immediate that can be encoded directly in
1305     // a VCMP instruction.
1306     //
1307     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
1308     // specify that in a floating-point comparison, positive zero
1309     // and negative zero are considered equal, so we can use the
1310     // literal 0.0 for both cases here.
1311     //
1312     // Note however that some methods (Float.equal, Float.compare,
1313     // Float.compareTo, Double.equal, Double.compare,
1314     // Double.compareTo, Math.max, Math.min, StrictMath.max,
1315     // StrictMath.min) consider 0.0 to be (strictly) greater than
1316     // -0.0. So if we ever translate calls to these methods into a
1317     // HCompare instruction, we must handle the -0.0 case with
1318     // care here.
1319     DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
1320 
1321     const DataType::Type type = instruction->InputAt(0)->GetType();
1322 
1323     if (type == DataType::Type::kFloat32) {
1324       __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
1325     } else {
1326       DCHECK_EQ(type, DataType::Type::kFloat64);
1327       __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
1328     }
1329   } else {
1330     __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
1331   }
1332 }
1333 
AdjustConstantForCondition(int64_t value,IfCondition * condition,IfCondition * opposite)1334 static int64_t AdjustConstantForCondition(int64_t value,
1335                                           IfCondition* condition,
1336                                           IfCondition* opposite) {
1337   if (value == 1) {
1338     if (*condition == kCondB) {
1339       value = 0;
1340       *condition = kCondEQ;
1341       *opposite = kCondNE;
1342     } else if (*condition == kCondAE) {
1343       value = 0;
1344       *condition = kCondNE;
1345       *opposite = kCondEQ;
1346     }
1347   } else if (value == -1) {
1348     if (*condition == kCondGT) {
1349       value = 0;
1350       *condition = kCondGE;
1351       *opposite = kCondLT;
1352     } else if (*condition == kCondLE) {
1353       value = 0;
1354       *condition = kCondLT;
1355       *opposite = kCondGE;
1356     }
1357   }
1358 
1359   return value;
1360 }
1361 
GenerateLongTestConstant(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1362 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
1363     HCondition* condition,
1364     bool invert,
1365     CodeGeneratorARMVIXL* codegen) {
1366   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1367 
1368   const LocationSummary* const locations = condition->GetLocations();
1369   IfCondition cond = condition->GetCondition();
1370   IfCondition opposite = condition->GetOppositeCondition();
1371 
1372   if (invert) {
1373     std::swap(cond, opposite);
1374   }
1375 
1376   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1377   const Location left = locations->InAt(0);
1378   const Location right = locations->InAt(1);
1379 
1380   DCHECK(right.IsConstant());
1381 
1382   const vixl32::Register left_high = HighRegisterFrom(left);
1383   const vixl32::Register left_low = LowRegisterFrom(left);
1384   int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
1385   UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1386 
1387   // Comparisons against 0 are common enough to deserve special attention.
1388   if (value == 0) {
1389     switch (cond) {
1390       case kCondNE:
1391       // x > 0 iff x != 0 when the comparison is unsigned.
1392       case kCondA:
1393         ret = std::make_pair(ne, eq);
1394         FALLTHROUGH_INTENDED;
1395       case kCondEQ:
1396       // x <= 0 iff x == 0 when the comparison is unsigned.
1397       case kCondBE:
1398         __ Orrs(temps.Acquire(), left_low, left_high);
1399         return ret;
1400       case kCondLT:
1401       case kCondGE:
1402         __ Cmp(left_high, 0);
1403         return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1404       // Trivially true or false.
1405       case kCondB:
1406         ret = std::make_pair(ne, eq);
1407         FALLTHROUGH_INTENDED;
1408       case kCondAE:
1409         __ Cmp(left_low, left_low);
1410         return ret;
1411       default:
1412         break;
1413     }
1414   }
1415 
1416   switch (cond) {
1417     case kCondEQ:
1418     case kCondNE:
1419     case kCondB:
1420     case kCondBE:
1421     case kCondA:
1422     case kCondAE: {
1423       const uint32_t value_low = Low32Bits(value);
1424       Operand operand_low(value_low);
1425 
1426       __ Cmp(left_high, High32Bits(value));
1427 
1428       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1429       // we must ensure that the operands corresponding to the least significant
1430       // halves of the inputs fit into a 16-bit CMP encoding.
1431       if (!left_low.IsLow() || !IsUint<8>(value_low)) {
1432         operand_low = Operand(temps.Acquire());
1433         __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low);
1434       }
1435 
1436       // We use the scope because of the IT block that follows.
1437       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1438                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1439                                CodeBufferCheckScope::kExactSize);
1440 
1441       __ it(eq);
1442       __ cmp(eq, left_low, operand_low);
1443       ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1444       break;
1445     }
1446     case kCondLE:
1447     case kCondGT:
1448       // Trivially true or false.
1449       if (value == std::numeric_limits<int64_t>::max()) {
1450         __ Cmp(left_low, left_low);
1451         ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
1452         break;
1453       }
1454 
1455       if (cond == kCondLE) {
1456         DCHECK_EQ(opposite, kCondGT);
1457         cond = kCondLT;
1458         opposite = kCondGE;
1459       } else {
1460         DCHECK_EQ(cond, kCondGT);
1461         DCHECK_EQ(opposite, kCondLE);
1462         cond = kCondGE;
1463         opposite = kCondLT;
1464       }
1465 
1466       value++;
1467       FALLTHROUGH_INTENDED;
1468     case kCondGE:
1469     case kCondLT: {
1470       __ Cmp(left_low, Low32Bits(value));
1471       __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
1472       ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1473       break;
1474     }
1475     default:
1476       LOG(FATAL) << "Unreachable";
1477       UNREACHABLE();
1478   }
1479 
1480   return ret;
1481 }
1482 
GenerateLongTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1483 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
1484     HCondition* condition,
1485     bool invert,
1486     CodeGeneratorARMVIXL* codegen) {
1487   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1488 
1489   const LocationSummary* const locations = condition->GetLocations();
1490   IfCondition cond = condition->GetCondition();
1491   IfCondition opposite = condition->GetOppositeCondition();
1492 
1493   if (invert) {
1494     std::swap(cond, opposite);
1495   }
1496 
1497   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1498   Location left = locations->InAt(0);
1499   Location right = locations->InAt(1);
1500 
1501   DCHECK(right.IsRegisterPair());
1502 
1503   switch (cond) {
1504     case kCondEQ:
1505     case kCondNE:
1506     case kCondB:
1507     case kCondBE:
1508     case kCondA:
1509     case kCondAE: {
1510       __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
1511 
1512       // We use the scope because of the IT block that follows.
1513       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1514                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1515                                CodeBufferCheckScope::kExactSize);
1516 
1517       __ it(eq);
1518       __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
1519       ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1520       break;
1521     }
1522     case kCondLE:
1523     case kCondGT:
1524       if (cond == kCondLE) {
1525         DCHECK_EQ(opposite, kCondGT);
1526         cond = kCondGE;
1527         opposite = kCondLT;
1528       } else {
1529         DCHECK_EQ(cond, kCondGT);
1530         DCHECK_EQ(opposite, kCondLE);
1531         cond = kCondLT;
1532         opposite = kCondGE;
1533       }
1534 
1535       std::swap(left, right);
1536       FALLTHROUGH_INTENDED;
1537     case kCondGE:
1538     case kCondLT: {
1539       UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1540 
1541       __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
1542       __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
1543       ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1544       break;
1545     }
1546     default:
1547       LOG(FATAL) << "Unreachable";
1548       UNREACHABLE();
1549   }
1550 
1551   return ret;
1552 }
1553 
GenerateTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1554 static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
1555                                                                     bool invert,
1556                                                                     CodeGeneratorARMVIXL* codegen) {
1557   const DataType::Type type = condition->GetLeft()->GetType();
1558   IfCondition cond = condition->GetCondition();
1559   IfCondition opposite = condition->GetOppositeCondition();
1560   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1561 
1562   if (invert) {
1563     std::swap(cond, opposite);
1564   }
1565 
1566   if (type == DataType::Type::kInt64) {
1567     ret = condition->GetLocations()->InAt(1).IsConstant()
1568         ? GenerateLongTestConstant(condition, invert, codegen)
1569         : GenerateLongTest(condition, invert, codegen);
1570   } else if (DataType::IsFloatingPointType(type)) {
1571     GenerateVcmp(condition, codegen);
1572     __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
1573     ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
1574                          ARMFPCondition(opposite, condition->IsGtBias()));
1575   } else {
1576     DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1577     __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
1578     ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1579   }
1580 
1581   return ret;
1582 }
1583 
GenerateConditionGeneric(HCondition * cond,CodeGeneratorARMVIXL * codegen)1584 static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1585   const vixl32::Register out = OutputRegister(cond);
1586   const auto condition = GenerateTest(cond, false, codegen);
1587 
1588   __ Mov(LeaveFlags, out, 0);
1589 
1590   if (out.IsLow()) {
1591     // We use the scope because of the IT block that follows.
1592     ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1593                              2 * vixl32::k16BitT32InstructionSizeInBytes,
1594                              CodeBufferCheckScope::kExactSize);
1595 
1596     __ it(condition.first);
1597     __ mov(condition.first, out, 1);
1598   } else {
1599     vixl32::Label done_label;
1600     vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
1601 
1602     __ B(condition.second, final_label, /* is_far_target= */ false);
1603     __ Mov(out, 1);
1604 
1605     if (done_label.IsReferenced()) {
1606       __ Bind(&done_label);
1607     }
1608   }
1609 }
1610 
GenerateEqualLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1611 static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1612   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1613 
1614   const LocationSummary* const locations = cond->GetLocations();
1615   IfCondition condition = cond->GetCondition();
1616   const vixl32::Register out = OutputRegister(cond);
1617   const Location left = locations->InAt(0);
1618   const Location right = locations->InAt(1);
1619   vixl32::Register left_high = HighRegisterFrom(left);
1620   vixl32::Register left_low = LowRegisterFrom(left);
1621   vixl32::Register temp;
1622   UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1623 
1624   if (right.IsConstant()) {
1625     IfCondition opposite = cond->GetOppositeCondition();
1626     const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
1627                                                      &condition,
1628                                                      &opposite);
1629     Operand right_high = High32Bits(value);
1630     Operand right_low = Low32Bits(value);
1631 
1632     // The output uses Location::kNoOutputOverlap.
1633     if (out.Is(left_high)) {
1634       std::swap(left_low, left_high);
1635       std::swap(right_low, right_high);
1636     }
1637 
1638     __ Sub(out, left_low, right_low);
1639     temp = temps.Acquire();
1640     __ Sub(temp, left_high, right_high);
1641   } else {
1642     DCHECK(right.IsRegisterPair());
1643     temp = temps.Acquire();
1644     __ Sub(temp, left_high, HighRegisterFrom(right));
1645     __ Sub(out, left_low, LowRegisterFrom(right));
1646   }
1647 
1648   // Need to check after calling AdjustConstantForCondition().
1649   DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
1650 
1651   if (condition == kCondNE && out.IsLow()) {
1652     __ Orrs(out, out, temp);
1653 
1654     // We use the scope because of the IT block that follows.
1655     ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1656                              2 * vixl32::k16BitT32InstructionSizeInBytes,
1657                              CodeBufferCheckScope::kExactSize);
1658 
1659     __ it(ne);
1660     __ mov(ne, out, 1);
1661   } else {
1662     __ Orr(out, out, temp);
1663     codegen->GenerateConditionWithZero(condition, out, out, temp);
1664   }
1665 }
1666 
GenerateConditionLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1667 static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1668   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1669 
1670   const LocationSummary* const locations = cond->GetLocations();
1671   IfCondition condition = cond->GetCondition();
1672   const vixl32::Register out = OutputRegister(cond);
1673   const Location left = locations->InAt(0);
1674   const Location right = locations->InAt(1);
1675 
1676   if (right.IsConstant()) {
1677     IfCondition opposite = cond->GetOppositeCondition();
1678 
1679     // Comparisons against 0 are common enough to deserve special attention.
1680     if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
1681       switch (condition) {
1682         case kCondNE:
1683         case kCondA:
1684           if (out.IsLow()) {
1685             // We only care if both input registers are 0 or not.
1686             __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
1687 
1688             // We use the scope because of the IT block that follows.
1689             ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1690                                      2 * vixl32::k16BitT32InstructionSizeInBytes,
1691                                      CodeBufferCheckScope::kExactSize);
1692 
1693             __ it(ne);
1694             __ mov(ne, out, 1);
1695             return;
1696           }
1697 
1698           FALLTHROUGH_INTENDED;
1699         case kCondEQ:
1700         case kCondBE:
1701           // We only care if both input registers are 0 or not.
1702           __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
1703           codegen->GenerateConditionWithZero(condition, out, out);
1704           return;
1705         case kCondLT:
1706         case kCondGE:
1707           // We only care about the sign bit.
1708           FALLTHROUGH_INTENDED;
1709         case kCondAE:
1710         case kCondB:
1711           codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
1712           return;
1713         case kCondLE:
1714         case kCondGT:
1715         default:
1716           break;
1717       }
1718     }
1719   }
1720 
1721   // If `out` is a low register, then the GenerateConditionGeneric()
1722   // function generates a shorter code sequence that is still branchless.
1723   if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) {
1724     GenerateEqualLong(cond, codegen);
1725     return;
1726   }
1727 
1728   GenerateConditionGeneric(cond, codegen);
1729 }
1730 
GenerateConditionIntegralOrNonPrimitive(HCondition * cond,CodeGeneratorARMVIXL * codegen)1731 static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond,
1732                                                     CodeGeneratorARMVIXL* codegen) {
1733   const DataType::Type type = cond->GetLeft()->GetType();
1734 
1735   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1736 
1737   if (type == DataType::Type::kInt64) {
1738     GenerateConditionLong(cond, codegen);
1739     return;
1740   }
1741 
1742   IfCondition condition = cond->GetCondition();
1743   vixl32::Register in = InputRegisterAt(cond, 0);
1744   const vixl32::Register out = OutputRegister(cond);
1745   const Location right = cond->GetLocations()->InAt(1);
1746   int64_t value;
1747 
1748   if (right.IsConstant()) {
1749     IfCondition opposite = cond->GetOppositeCondition();
1750 
1751     value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
1752 
1753     // Comparisons against 0 are common enough to deserve special attention.
1754     if (value == 0) {
1755       switch (condition) {
1756         case kCondNE:
1757         case kCondA:
1758           if (out.IsLow() && out.Is(in)) {
1759             __ Cmp(out, 0);
1760 
1761             // We use the scope because of the IT block that follows.
1762             ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1763                                      2 * vixl32::k16BitT32InstructionSizeInBytes,
1764                                      CodeBufferCheckScope::kExactSize);
1765 
1766             __ it(ne);
1767             __ mov(ne, out, 1);
1768             return;
1769           }
1770 
1771           FALLTHROUGH_INTENDED;
1772         case kCondEQ:
1773         case kCondBE:
1774         case kCondLT:
1775         case kCondGE:
1776         case kCondAE:
1777         case kCondB:
1778           codegen->GenerateConditionWithZero(condition, out, in);
1779           return;
1780         case kCondLE:
1781         case kCondGT:
1782         default:
1783           break;
1784       }
1785     }
1786   }
1787 
1788   if (condition == kCondEQ || condition == kCondNE) {
1789     Operand operand(0);
1790 
1791     if (right.IsConstant()) {
1792       operand = Operand::From(value);
1793     } else if (out.Is(RegisterFrom(right))) {
1794       // Avoid 32-bit instructions if possible.
1795       operand = InputOperandAt(cond, 0);
1796       in = RegisterFrom(right);
1797     } else {
1798       operand = InputOperandAt(cond, 1);
1799     }
1800 
1801     if (condition == kCondNE && out.IsLow()) {
1802       __ Subs(out, in, operand);
1803 
1804       // We use the scope because of the IT block that follows.
1805       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1806                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1807                                CodeBufferCheckScope::kExactSize);
1808 
1809       __ it(ne);
1810       __ mov(ne, out, 1);
1811     } else {
1812       __ Sub(out, in, operand);
1813       codegen->GenerateConditionWithZero(condition, out, out);
1814     }
1815 
1816     return;
1817   }
1818 
1819   GenerateConditionGeneric(cond, codegen);
1820 }
1821 
CanEncodeConstantAs8BitImmediate(HConstant * constant)1822 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
1823   const DataType::Type type = constant->GetType();
1824   bool ret = false;
1825 
1826   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1827 
1828   if (type == DataType::Type::kInt64) {
1829     const uint64_t value = Uint64ConstantFrom(constant);
1830 
1831     ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
1832   } else {
1833     ret = IsUint<8>(Int32ConstantFrom(constant));
1834   }
1835 
1836   return ret;
1837 }
1838 
Arm8BitEncodableConstantOrRegister(HInstruction * constant)1839 static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
1840   DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
1841 
1842   if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
1843     return Location::ConstantLocation(constant);
1844   }
1845 
1846   return Location::RequiresRegister();
1847 }
1848 
CanGenerateConditionalMove(const Location & out,const Location & src)1849 static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
1850   // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1851   // we check that we are not dealing with floating-point output (there is no
1852   // 16-bit VMOV encoding).
1853   if (!out.IsRegister() && !out.IsRegisterPair()) {
1854     return false;
1855   }
1856 
1857   // For constants, we also check that the output is in one or two low registers,
1858   // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
1859   // MOV encoding can be used.
1860   if (src.IsConstant()) {
1861     if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
1862       return false;
1863     }
1864 
1865     if (out.IsRegister()) {
1866       if (!RegisterFrom(out).IsLow()) {
1867         return false;
1868       }
1869     } else {
1870       DCHECK(out.IsRegisterPair());
1871 
1872       if (!HighRegisterFrom(out).IsLow()) {
1873         return false;
1874       }
1875     }
1876   }
1877 
1878   return true;
1879 }
1880 
1881 #undef __
1882 
GetFinalLabel(HInstruction * instruction,vixl32::Label * final_label)1883 vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
1884                                                    vixl32::Label* final_label) {
1885   DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
1886   DCHECK_IMPLIES(instruction->IsInvoke(), !instruction->GetLocations()->CanCall());
1887 
1888   const HBasicBlock* const block = instruction->GetBlock();
1889   const HLoopInformation* const info = block->GetLoopInformation();
1890   HInstruction* const next = instruction->GetNext();
1891 
1892   // Avoid a branch to a branch.
1893   if (next->IsGoto() && (info == nullptr ||
1894                          !info->IsBackEdge(*block) ||
1895                          !info->HasSuspendCheck())) {
1896     final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
1897   }
1898 
1899   return final_label;
1900 }
1901 
1902 namespace detail {
1903 
1904 // Mark which intrinsics we don't have handcrafted code for.
1905 template <Intrinsics T>
1906 struct IsUnimplemented {
1907   bool is_unimplemented = false;
1908 };
1909 
1910 #define TRUE_OVERRIDE(Name)                     \
1911   template <>                                   \
1912   struct IsUnimplemented<Intrinsics::k##Name> { \
1913     bool is_unimplemented = true;               \
1914   };
1915 UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE)
1916 #undef TRUE_OVERRIDE
1917 
1918 static constexpr bool kIsIntrinsicUnimplemented[] = {
1919     false,  // kNone
1920 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1921     IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1922     ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1923 #undef IS_UNIMPLEMENTED
1924 };
1925 
1926 }  // namespace detail
1927 
CodeGeneratorARMVIXL(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1928 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
1929                                            const CompilerOptions& compiler_options,
1930                                            OptimizingCompilerStats* stats)
1931     : CodeGenerator(graph,
1932                     kNumberOfCoreRegisters,
1933                     kNumberOfSRegisters,
1934                     kNumberOfRegisterPairs,
1935                     kCoreCalleeSaves.GetList(),
1936                     ComputeSRegisterListMask(kFpuCalleeSaves),
1937                     compiler_options,
1938                     stats,
1939                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1940       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1941       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1942       location_builder_(graph, this),
1943       instruction_visitor_(graph, this),
1944       move_resolver_(graph->GetAllocator(), this),
1945       assembler_(graph->GetAllocator()),
1946       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1947       app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1948       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1949       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1950       app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1951       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1952       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1953       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1954       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1955       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1956       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1957       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1958       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1959       uint32_literals_(std::less<uint32_t>(),
1960                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1961       jit_string_patches_(StringReferenceValueComparator(),
1962                           graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1963       jit_class_patches_(TypeReferenceValueComparator(),
1964                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1965       jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1966                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1967   // Always save the LR register to mimic Quick.
1968   AddAllocatedRegister(Location::RegisterLocation(LR));
1969   // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
1970   // S0-S31, which alias to D0-D15.
1971   GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
1972   GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
1973 }
1974 
EmitTable(CodeGeneratorARMVIXL * codegen)1975 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
1976   uint32_t num_entries = switch_instr_->GetNumEntries();
1977   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1978 
1979   // We are about to use the assembler to place literals directly. Make sure we have enough
1980   // underlying code buffer and we have generated a jump table of the right size, using
1981   // codegen->GetVIXLAssembler()->GetBuffer().Align();
1982   ExactAssemblyScope aas(codegen->GetVIXLAssembler(),
1983                          num_entries * sizeof(int32_t),
1984                          CodeBufferCheckScope::kMaximumSize);
1985   // TODO(VIXL): Check that using lower case bind is fine here.
1986   codegen->GetVIXLAssembler()->bind(&table_start_);
1987   for (uint32_t i = 0; i < num_entries; i++) {
1988     codegen->GetVIXLAssembler()->place(bb_addresses_[i].get());
1989   }
1990 }
1991 
FixTable(CodeGeneratorARMVIXL * codegen)1992 void JumpTableARMVIXL::FixTable(CodeGeneratorARMVIXL* codegen) {
1993   uint32_t num_entries = switch_instr_->GetNumEntries();
1994   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1995 
1996   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
1997   for (uint32_t i = 0; i < num_entries; i++) {
1998     vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
1999     DCHECK(target_label->IsBound());
2000     int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
2001     // When doing BX to address we need to have lower bit set to 1 in T32.
2002     if (codegen->GetVIXLAssembler()->IsUsingT32()) {
2003       jump_offset++;
2004     }
2005     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
2006     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
2007 
2008     bb_addresses_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler()->GetBuffer());
2009   }
2010 }
2011 
FixJumpTables()2012 void CodeGeneratorARMVIXL::FixJumpTables() {
2013   for (auto&& jump_table : jump_tables_) {
2014     jump_table->FixTable(this);
2015   }
2016 }
2017 
2018 #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->  // NOLINT
2019 
Finalize()2020 void CodeGeneratorARMVIXL::Finalize() {
2021   FixJumpTables();
2022 
2023   // Emit JIT baker read barrier slow paths.
2024   DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
2025   for (auto& entry : jit_baker_read_barrier_slow_paths_) {
2026     uint32_t encoded_data = entry.first;
2027     vixl::aarch32::Label* slow_path_entry = &entry.second.label;
2028     __ Bind(slow_path_entry);
2029     CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
2030   }
2031 
2032   GetAssembler()->FinalizeCode();
2033   CodeGenerator::Finalize();
2034 
2035   // Verify Baker read barrier linker patches.
2036   if (kIsDebugBuild) {
2037     ArrayRef<const uint8_t> code(GetCode());
2038     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
2039       DCHECK(info.label.IsBound());
2040       uint32_t literal_offset = info.label.GetLocation();
2041       DCHECK_ALIGNED(literal_offset, 2u);
2042 
2043       auto GetInsn16 = [&code](uint32_t offset) {
2044         DCHECK_ALIGNED(offset, 2u);
2045         return (static_cast<uint32_t>(code[offset + 0]) << 0) +
2046                (static_cast<uint32_t>(code[offset + 1]) << 8);
2047       };
2048       auto GetInsn32 = [=](uint32_t offset) {
2049         return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
2050       };
2051 
2052       uint32_t encoded_data = info.custom_data;
2053       BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
2054       // Check that the next instruction matches the expected LDR.
2055       switch (kind) {
2056         case BakerReadBarrierKind::kField: {
2057           BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2058           if (width == BakerReadBarrierWidth::kWide) {
2059             DCHECK_GE(code.size() - literal_offset, 8u);
2060             uint32_t next_insn = GetInsn32(literal_offset + 4u);
2061             // LDR (immediate), encoding T3, with correct base_reg.
2062             CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
2063             const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2064             CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
2065           } else {
2066             DCHECK_GE(code.size() - literal_offset, 6u);
2067             uint32_t next_insn = GetInsn16(literal_offset + 4u);
2068             // LDR (immediate), encoding T1, with correct base_reg.
2069             CheckValidReg(next_insn & 0x7u);  // Check destination register.
2070             const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2071             CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
2072           }
2073           break;
2074         }
2075         case BakerReadBarrierKind::kArray: {
2076           DCHECK_GE(code.size() - literal_offset, 8u);
2077           uint32_t next_insn = GetInsn32(literal_offset + 4u);
2078           // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
2079           CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
2080           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2081           CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
2082           CheckValidReg(next_insn & 0xf);  // Check index register
2083           break;
2084         }
2085         case BakerReadBarrierKind::kGcRoot: {
2086           BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2087           if (width == BakerReadBarrierWidth::kWide) {
2088             DCHECK_GE(literal_offset, 4u);
2089             uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2090             // LDR (immediate), encoding T3, with correct root_reg.
2091             const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2092             CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
2093           } else {
2094             DCHECK_GE(literal_offset, 2u);
2095             uint32_t prev_insn = GetInsn16(literal_offset - 2u);
2096             const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2097             // Usually LDR (immediate), encoding T1, with correct root_reg but we may have
2098             // a `MOV marked, old_value` for intrinsic CAS where `marked` is a low register.
2099             if ((prev_insn & 0xff87u) != (0x4600 | root_reg)) {
2100               CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
2101             }
2102           }
2103           break;
2104         }
2105         case BakerReadBarrierKind::kIntrinsicCas: {
2106           DCHECK_GE(literal_offset, 4u);
2107           uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2108           // MOV (register), encoding T3, with correct root_reg.
2109           const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2110           DCHECK_GE(root_reg, 8u);  // Used only for high registers.
2111           CHECK_EQ(prev_insn & 0xfffffff0u, 0xea4f0000u | (root_reg << 8));
2112           break;
2113         }
2114         default:
2115           LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
2116           UNREACHABLE();
2117       }
2118     }
2119   }
2120 }
2121 
SetupBlockedRegisters() const2122 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
2123   // Stack register, LR and PC are always reserved.
2124   blocked_core_registers_[SP] = true;
2125   blocked_core_registers_[LR] = true;
2126   blocked_core_registers_[PC] = true;
2127 
2128   // TODO: We don't need to reserve marking-register for userfaultfd GC. But
2129   // that would require some work in the assembler code as the right GC is
2130   // chosen at load-time and not compile time.
2131   if (kReserveMarkingRegister) {
2132     // Reserve marking register.
2133     blocked_core_registers_[MR] = true;
2134   }
2135 
2136   // Reserve thread register.
2137   blocked_core_registers_[TR] = true;
2138 
2139   // Reserve temp register.
2140   blocked_core_registers_[IP] = true;
2141 
2142   if (GetGraph()->IsDebuggable()) {
2143     // Stubs do not save callee-save floating point registers. If the graph
2144     // is debuggable, we need to deal with these registers differently. For
2145     // now, just block them.
2146     for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode();
2147          i <= kFpuCalleeSaves.GetLastSRegister().GetCode();
2148          ++i) {
2149       blocked_fpu_registers_[i] = true;
2150     }
2151   }
2152 }
2153 
InstructionCodeGeneratorARMVIXL(HGraph * graph,CodeGeneratorARMVIXL * codegen)2154 InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph,
2155                                                                  CodeGeneratorARMVIXL* codegen)
2156       : InstructionCodeGenerator(graph, codegen),
2157         assembler_(codegen->GetAssembler()),
2158         codegen_(codegen) {}
2159 
ComputeSpillMask()2160 void CodeGeneratorARMVIXL::ComputeSpillMask() {
2161   core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
2162   DCHECK_NE(core_spill_mask_ & (1u << kLrCode), 0u)
2163       << "At least the return address register must be saved";
2164   // 16-bit PUSH/POP (T1) can save/restore just the LR/PC.
2165   DCHECK(GetVIXLAssembler()->IsUsingT32());
2166   fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
2167   // We use vpush and vpop for saving and restoring floating point registers, which take
2168   // a SRegister and the number of registers to save/restore after that SRegister. We
2169   // therefore update the `fpu_spill_mask_` to also contain those registers not allocated,
2170   // but in the range.
2171   if (fpu_spill_mask_ != 0) {
2172     uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_);
2173     uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_);
2174     for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) {
2175       fpu_spill_mask_ |= (1 << i);
2176     }
2177   }
2178 }
2179 
VisitMethodExitHook(HMethodExitHook * method_hook)2180 void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook) {
2181   LocationSummary* locations = new (GetGraph()->GetAllocator())
2182       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2183   locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType()));
2184   // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
2185   // compute the address to store the timestamp counter.
2186   locations->AddRegisterTemps(3);
2187 }
2188 
GenerateMethodEntryExitHook(HInstruction * instruction)2189 void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) {
2190   LocationSummary* locations = instruction->GetLocations();
2191   vixl32::Register addr = RegisterFrom(locations->GetTemp(0));
2192   vixl32::Register value = RegisterFrom(locations->GetTemp(1));
2193   vixl32::Register tmp = RegisterFrom(locations->GetTemp(2));
2194 
2195   SlowPathCodeARMVIXL* slow_path =
2196       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
2197   codegen_->AddSlowPath(slow_path);
2198 
2199   if (instruction->IsMethodExitHook()) {
2200     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
2201     // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
2202     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
2203     // disabled in debuggable runtime. The other bit is used when this method itself requires a
2204     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
2205     GetAssembler()->LoadFromOffset(
2206         kLoadWord, value, sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
2207     __ CompareAndBranchIfNonZero(value, slow_path->GetEntryLabel());
2208   }
2209 
2210   MemberOffset  offset = instruction->IsMethodExitHook() ?
2211       instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
2212       instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
2213   uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
2214   __ Mov(addr, address + offset.Int32Value());
2215   __ Ldrb(value, MemOperand(addr, 0));
2216   __ Cmp(value, instrumentation::Instrumentation::kFastTraceListeners);
2217   // Check if there are any trace method entry / exit listeners. If no, continue.
2218   __ B(lt, slow_path->GetExitLabel());
2219   // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
2220   // If yes, just take the slow path.
2221   __ B(gt, slow_path->GetEntryLabel());
2222 
2223   // Check if there is place in the buffer to store a new entry, if no, take slow path.
2224   uint32_t trace_buffer_curr_entry_offset =
2225       Thread::TraceBufferCurrPtrOffset<kArmPointerSize>().Int32Value();
2226   vixl32::Register curr_entry = value;
2227   vixl32::Register init_entry = addr;
2228   __ Ldr(curr_entry, MemOperand(tr, trace_buffer_curr_entry_offset));
2229   __ Subs(curr_entry, curr_entry, static_cast<uint32_t>(kNumEntriesForWallClock * sizeof(void*)));
2230   __ Ldr(init_entry, MemOperand(tr, Thread::TraceBufferPtrOffset<kArmPointerSize>().SizeValue()));
2231   __ Cmp(curr_entry, init_entry);
2232   __ B(lt, slow_path->GetEntryLabel());
2233 
2234   // Update the index in the `Thread`.
2235   __ Str(curr_entry, MemOperand(tr, trace_buffer_curr_entry_offset));
2236 
2237   // Record method pointer and trace action.
2238   __ Ldr(tmp, MemOperand(sp, 0));
2239   // Use last two bits to encode trace method action. For MethodEntry it is 0
2240   // so no need to set the bits since they are 0 already.
2241   if (instruction->IsMethodExitHook()) {
2242     DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
2243     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
2244     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
2245     __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
2246   }
2247   __ Str(tmp, MemOperand(curr_entry, kMethodOffsetInBytes));
2248 
2249   vixl32::Register tmp1 = init_entry;
2250   // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34.
2251   __ Mrrc(/* lower 32-bit */ tmp,
2252           /* higher 32-bit */ tmp1,
2253           /* coproc= */ 15,
2254           /* opc1= */ 1,
2255           /* crm= */ 14);
2256   static_assert(kHighTimestampOffsetInBytes ==
2257                 kTimestampOffsetInBytes + static_cast<uint32_t>(kRuntimePointerSize));
2258   __ Strd(tmp, tmp1, MemOperand(curr_entry, kTimestampOffsetInBytes));
2259   __ Bind(slow_path->GetExitLabel());
2260 }
2261 
VisitMethodExitHook(HMethodExitHook * instruction)2262 void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instruction) {
2263   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2264   DCHECK(codegen_->RequiresCurrentMethod());
2265   GenerateMethodEntryExitHook(instruction);
2266 }
2267 
VisitMethodEntryHook(HMethodEntryHook * method_hook)2268 void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
2269   LocationSummary* locations = new (GetGraph()->GetAllocator())
2270       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2271   // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
2272   // compute the address to store the timestamp counter.
2273   locations->AddRegisterTemps(3);
2274 }
2275 
VisitMethodEntryHook(HMethodEntryHook * instruction)2276 void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) {
2277   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2278   DCHECK(codegen_->RequiresCurrentMethod());
2279   GenerateMethodEntryExitHook(instruction);
2280 }
2281 
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)2282 void CodeGeneratorARMVIXL::MaybeIncrementHotness(HSuspendCheck* suspend_check,
2283                                                  bool is_frame_entry) {
2284   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
2285     UseScratchRegisterScope temps(GetVIXLAssembler());
2286     vixl32::Register temp = temps.Acquire();
2287     static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
2288     if (!is_frame_entry) {
2289       __ Push(vixl32::Register(kMethodRegister));
2290       GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize);
2291       GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
2292     }
2293     // Load with zero extend to clear the high bits for integer overflow check.
2294     __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2295     vixl::aarch32::Label done;
2296     DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
2297     __ CompareAndBranchIfZero(temp, &done, /* is_far_target= */ false);
2298     __ Add(temp, temp, -1);
2299     __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2300     __ Bind(&done);
2301     if (!is_frame_entry) {
2302       __ Pop(vixl32::Register(kMethodRegister));
2303       GetAssembler()->cfi().AdjustCFAOffset(-static_cast<int>(kArmWordSize));
2304     }
2305   }
2306 
2307   if (GetGraph()->IsCompilingBaseline() &&
2308       GetGraph()->IsUsefulOptimizing() &&
2309       !Runtime::Current()->IsAotCompiler()) {
2310     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2311     DCHECK(info != nullptr);
2312     DCHECK(!HasEmptyFrame());
2313     uint32_t address = reinterpret_cast32<uint32_t>(info);
2314     UseScratchRegisterScope temps(GetVIXLAssembler());
2315     vixl32::Register tmp = temps.Acquire();
2316     SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL(
2317         suspend_check, /* profiling_info= */ lr);
2318     AddSlowPath(slow_path);
2319     __ Mov(lr, address);
2320     __ Ldrh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2321     __ Adds(tmp, tmp, -1);
2322     __ B(cc, slow_path->GetEntryLabel());
2323     __ Strh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2324     __ Bind(slow_path->GetExitLabel());
2325   }
2326 }
2327 
GenerateFrameEntry()2328 void CodeGeneratorARMVIXL::GenerateFrameEntry() {
2329   bool skip_overflow_check =
2330       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
2331   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
2332 
2333   // Check if we need to generate the clinit check. We will jump to the
2334   // resolution stub if the class is not initialized and the executing thread is
2335   // not the thread initializing it.
2336   // We do this before constructing the frame to get the correct stack trace if
2337   // an exception is thrown.
2338   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
2339     UseScratchRegisterScope temps(GetVIXLAssembler());
2340     vixl32::Label resolution;
2341     vixl32::Label memory_barrier;
2342 
2343     // Check if we're visibly initialized.
2344 
2345     vixl32::Register temp1 = temps.Acquire();
2346     // Use r4 as other temporary register.
2347     DCHECK(!blocked_core_registers_[R4]);
2348     DCHECK(!kCoreCalleeSaves.Includes(r4));
2349     vixl32::Register temp2 = r4;
2350     for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2351       DCHECK(!reg.Is(r4));
2352     }
2353 
2354     // We don't emit a read barrier here to save on code size. We rely on the
2355     // resolution trampoline to do a suspend check before re-entering this code.
2356     __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
2357     __ Ldrb(temp2, MemOperand(temp1, kClassStatusByteOffset));
2358     __ Cmp(temp2, kShiftedVisiblyInitializedValue);
2359     __ B(cs, &frame_entry_label_);
2360 
2361     // Check if we're initialized and jump to code that does a memory barrier if
2362     // so.
2363     __ Cmp(temp2, kShiftedInitializedValue);
2364     __ B(cs, &memory_barrier);
2365 
2366     // Check if we're initializing and the thread initializing is the one
2367     // executing the code.
2368     __ Cmp(temp2, kShiftedInitializingValue);
2369     __ B(lo, &resolution);
2370 
2371     __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
2372     __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value()));
2373     __ Cmp(temp1, temp2);
2374     __ B(eq, &frame_entry_label_);
2375     __ Bind(&resolution);
2376 
2377     // Jump to the resolution stub.
2378     ThreadOffset32 entrypoint_offset =
2379         GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline);
2380     __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value()));
2381     __ Bx(temp1);
2382 
2383     __ Bind(&memory_barrier);
2384     GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2385   }
2386 
2387   __ Bind(&frame_entry_label_);
2388 
2389   if (HasEmptyFrame()) {
2390     // Ensure that the CFI opcode list is not empty.
2391     GetAssembler()->cfi().Nop();
2392     MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
2393     return;
2394   }
2395 
2396   // Make sure the frame size isn't unreasonably large.
2397   DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
2398 
2399   if (!skip_overflow_check) {
2400     // Using r4 instead of IP saves 2 bytes.
2401     UseScratchRegisterScope temps(GetVIXLAssembler());
2402     vixl32::Register temp;
2403     // TODO: Remove this check when R4 is made a callee-save register
2404     // in ART compiled code (b/72801708). Currently we need to make
2405     // sure r4 is not blocked, e.g. in special purpose
2406     // TestCodeGeneratorARMVIXL; also asserting that r4 is available
2407     // here.
2408     if (!blocked_core_registers_[R4]) {
2409       for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2410         DCHECK(!reg.Is(r4));
2411       }
2412       DCHECK(!kCoreCalleeSaves.Includes(r4));
2413       temp = r4;
2414     } else {
2415       temp = temps.Acquire();
2416     }
2417     __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
2418     // The load must immediately precede RecordPcInfo.
2419     ExactAssemblyScope aas(GetVIXLAssembler(),
2420                            vixl32::kMaxInstructionSizeInBytes,
2421                            CodeBufferCheckScope::kMaximumSize);
2422     __ ldr(temp, MemOperand(temp));
2423     RecordPcInfo(nullptr, 0);
2424   }
2425 
2426   uint32_t frame_size = GetFrameSize();
2427   uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2428   uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2429   if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2430       core_spills_offset <= 3u * kArmWordSize) {
2431     // Do a single PUSH for core registers including the method and up to two
2432     // filler registers. Then store the single FP spill if any.
2433     // (The worst case is when the method is not required and we actually
2434     // store 3 extra registers but they are stored in the same properly
2435     // aligned 16-byte chunk where we're already writing anyway.)
2436     DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2437     uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize);
2438     DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(core_spill_mask_));
2439     __ Push(RegisterList(core_spill_mask_ | extra_regs));
2440     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
2441     GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2442                                            core_spills_offset,
2443                                            core_spill_mask_,
2444                                            kArmWordSize);
2445     if (fpu_spill_mask_ != 0u) {
2446       DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2447       vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2448       GetAssembler()->StoreSToOffset(sreg, sp, fp_spills_offset);
2449       GetAssembler()->cfi().RelOffset(DWARFReg(sreg), /*offset=*/ fp_spills_offset);
2450     }
2451   } else {
2452     __ Push(RegisterList(core_spill_mask_));
2453     GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
2454     GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2455                                            /*offset=*/ 0,
2456                                            core_spill_mask_,
2457                                            kArmWordSize);
2458     if (fpu_spill_mask_ != 0) {
2459       uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2460 
2461       // Check that list is contiguous.
2462       DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2463 
2464       __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2465       GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
2466       GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0),
2467                                              /*offset=*/ 0,
2468                                              fpu_spill_mask_,
2469                                              kArmWordSize);
2470     }
2471 
2472     // Adjust SP and save the current method if we need it. Note that we do
2473     // not save the method in HCurrentMethod, as the instruction might have
2474     // been removed in the SSA graph.
2475     if (RequiresCurrentMethod() && fp_spills_offset <= 3 * kArmWordSize) {
2476       DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2477       __ Push(RegisterList(MaxInt<uint32_t>(fp_spills_offset / kArmWordSize)));
2478       GetAssembler()->cfi().AdjustCFAOffset(fp_spills_offset);
2479     } else {
2480       IncreaseFrame(fp_spills_offset);
2481       if (RequiresCurrentMethod()) {
2482         GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
2483       }
2484     }
2485   }
2486 
2487   if (GetGraph()->HasShouldDeoptimizeFlag()) {
2488     UseScratchRegisterScope temps(GetVIXLAssembler());
2489     vixl32::Register temp = temps.Acquire();
2490     // Initialize should_deoptimize flag to 0.
2491     __ Mov(temp, 0);
2492     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
2493   }
2494 
2495   MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
2496   MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
2497 }
2498 
GenerateFrameExit()2499 void CodeGeneratorARMVIXL::GenerateFrameExit() {
2500   if (HasEmptyFrame()) {
2501     __ Bx(lr);
2502     return;
2503   }
2504 
2505   // Pop LR into PC to return.
2506   DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U);
2507   uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode;
2508 
2509   uint32_t frame_size = GetFrameSize();
2510   uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2511   uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2512   if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2513       // r4 is blocked by TestCodeGeneratorARMVIXL used by some tests.
2514       core_spills_offset <= (blocked_core_registers_[r4.GetCode()] ? 2u : 3u) * kArmWordSize) {
2515     // Load the FP spill if any and then do a single POP including the method
2516     // and up to two filler registers. If we have no FP spills, this also has
2517     // the advantage that we do not need to emit CFI directives.
2518     if (fpu_spill_mask_ != 0u) {
2519       DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2520       vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2521       GetAssembler()->cfi().RememberState();
2522       GetAssembler()->LoadSFromOffset(sreg, sp, fp_spills_offset);
2523       GetAssembler()->cfi().Restore(DWARFReg(sreg));
2524     }
2525     // Clobber registers r2-r4 as they are caller-save in ART managed ABI and
2526     // never hold the return value.
2527     uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize) << r2.GetCode();
2528     DCHECK_EQ(extra_regs & kCoreCalleeSaves.GetList(), 0u);
2529     DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(pop_mask));
2530     __ Pop(RegisterList(pop_mask | extra_regs));
2531     if (fpu_spill_mask_ != 0u) {
2532       GetAssembler()->cfi().RestoreState();
2533     }
2534   } else {
2535     GetAssembler()->cfi().RememberState();
2536     DecreaseFrame(fp_spills_offset);
2537     if (fpu_spill_mask_ != 0) {
2538       uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2539 
2540       // Check that list is contiguous.
2541       DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2542 
2543       __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2544       GetAssembler()->cfi().AdjustCFAOffset(
2545           -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_));
2546       GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_);
2547     }
2548     __ Pop(RegisterList(pop_mask));
2549     GetAssembler()->cfi().RestoreState();
2550     GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
2551   }
2552 }
2553 
Bind(HBasicBlock * block)2554 void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) {
2555   __ Bind(GetLabelOf(block));
2556 }
2557 
GetNextLocation(DataType::Type type)2558 Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2559   switch (type) {
2560     case DataType::Type::kReference:
2561     case DataType::Type::kBool:
2562     case DataType::Type::kUint8:
2563     case DataType::Type::kInt8:
2564     case DataType::Type::kUint16:
2565     case DataType::Type::kInt16:
2566     case DataType::Type::kInt32: {
2567       uint32_t index = gp_index_++;
2568       uint32_t stack_index = stack_index_++;
2569       if (index < calling_convention.GetNumberOfRegisters()) {
2570         return LocationFrom(calling_convention.GetRegisterAt(index));
2571       } else {
2572         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2573       }
2574     }
2575 
2576     case DataType::Type::kInt64: {
2577       uint32_t index = gp_index_;
2578       uint32_t stack_index = stack_index_;
2579       gp_index_ += 2;
2580       stack_index_ += 2;
2581       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2582         if (calling_convention.GetRegisterAt(index).Is(r1)) {
2583           // Skip R1, and use R2_R3 instead.
2584           gp_index_++;
2585           index++;
2586         }
2587       }
2588       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2589         DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1,
2590                   calling_convention.GetRegisterAt(index + 1).GetCode());
2591 
2592         return LocationFrom(calling_convention.GetRegisterAt(index),
2593                             calling_convention.GetRegisterAt(index + 1));
2594       } else {
2595         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2596       }
2597     }
2598 
2599     case DataType::Type::kFloat32: {
2600       uint32_t stack_index = stack_index_++;
2601       if (float_index_ % 2 == 0) {
2602         float_index_ = std::max(double_index_, float_index_);
2603       }
2604       if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
2605         return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
2606       } else {
2607         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2608       }
2609     }
2610 
2611     case DataType::Type::kFloat64: {
2612       double_index_ = std::max(double_index_, RoundUp(float_index_, 2));
2613       uint32_t stack_index = stack_index_;
2614       stack_index_ += 2;
2615       if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
2616         uint32_t index = double_index_;
2617         double_index_ += 2;
2618         Location result = LocationFrom(
2619           calling_convention.GetFpuRegisterAt(index),
2620           calling_convention.GetFpuRegisterAt(index + 1));
2621         DCHECK(ExpectedPairLayout(result));
2622         return result;
2623       } else {
2624         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2625       }
2626     }
2627 
2628     case DataType::Type::kUint32:
2629     case DataType::Type::kUint64:
2630     case DataType::Type::kVoid:
2631       LOG(FATAL) << "Unexpected parameter type " << type;
2632       UNREACHABLE();
2633   }
2634   return Location::NoLocation();
2635 }
2636 
GetReturnLocation(DataType::Type type) const2637 Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const {
2638   switch (type) {
2639     case DataType::Type::kReference:
2640     case DataType::Type::kBool:
2641     case DataType::Type::kUint8:
2642     case DataType::Type::kInt8:
2643     case DataType::Type::kUint16:
2644     case DataType::Type::kInt16:
2645     case DataType::Type::kUint32:
2646     case DataType::Type::kInt32: {
2647       return LocationFrom(r0);
2648     }
2649 
2650     case DataType::Type::kFloat32: {
2651       return LocationFrom(s0);
2652     }
2653 
2654     case DataType::Type::kUint64:
2655     case DataType::Type::kInt64: {
2656       return LocationFrom(r0, r1);
2657     }
2658 
2659     case DataType::Type::kFloat64: {
2660       return LocationFrom(s0, s1);
2661     }
2662 
2663     case DataType::Type::kVoid:
2664       return Location::NoLocation();
2665   }
2666 
2667   UNREACHABLE();
2668 }
2669 
GetMethodLocation() const2670 Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2671   return LocationFrom(kMethodRegister);
2672 }
2673 
GetNextLocation(DataType::Type type)2674 Location CriticalNativeCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2675   DCHECK_NE(type, DataType::Type::kReference);
2676 
2677   // Native ABI uses the same registers as managed, except that the method register r0
2678   // is a normal argument.
2679   Location location = Location::NoLocation();
2680   if (DataType::Is64BitType(type)) {
2681     gpr_index_ = RoundUp(gpr_index_, 2u);
2682     stack_offset_ = RoundUp(stack_offset_, 2 * kFramePointerSize);
2683     if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2684       location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u],
2685                               kParameterCoreRegistersVIXL[gpr_index_]);
2686       gpr_index_ += 2u;
2687     }
2688   } else {
2689     if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2690       location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u]);
2691       ++gpr_index_;
2692     }
2693   }
2694   if (location.IsInvalid()) {
2695     if (DataType::Is64BitType(type)) {
2696       location = Location::DoubleStackSlot(stack_offset_);
2697       stack_offset_ += 2 * kFramePointerSize;
2698     } else {
2699       location = Location::StackSlot(stack_offset_);
2700       stack_offset_ += kFramePointerSize;
2701     }
2702 
2703     if (for_register_allocation_) {
2704       location = Location::Any();
2705     }
2706   }
2707   return location;
2708 }
2709 
GetReturnLocation(DataType::Type type) const2710 Location CriticalNativeCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type)
2711     const {
2712   // We perform conversion to the managed ABI return register after the call if needed.
2713   InvokeDexCallingConventionVisitorARMVIXL dex_calling_convention;
2714   return dex_calling_convention.GetReturnLocation(type);
2715 }
2716 
GetMethodLocation() const2717 Location CriticalNativeCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2718   // Pass the method in the hidden argument R4.
2719   return Location::RegisterLocation(R4);
2720 }
2721 
Move32(Location destination,Location source)2722 void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
2723   if (source.Equals(destination)) {
2724     return;
2725   }
2726   if (destination.IsRegister()) {
2727     if (source.IsRegister()) {
2728       __ Mov(RegisterFrom(destination), RegisterFrom(source));
2729     } else if (source.IsFpuRegister()) {
2730       __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
2731     } else {
2732       GetAssembler()->LoadFromOffset(kLoadWord,
2733                                      RegisterFrom(destination),
2734                                      sp,
2735                                      source.GetStackIndex());
2736     }
2737   } else if (destination.IsFpuRegister()) {
2738     if (source.IsRegister()) {
2739       __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
2740     } else if (source.IsFpuRegister()) {
2741       __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
2742     } else {
2743       GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
2744     }
2745   } else {
2746     DCHECK(destination.IsStackSlot()) << destination;
2747     if (source.IsRegister()) {
2748       GetAssembler()->StoreToOffset(kStoreWord,
2749                                     RegisterFrom(source),
2750                                     sp,
2751                                     destination.GetStackIndex());
2752     } else if (source.IsFpuRegister()) {
2753       GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
2754     } else {
2755       DCHECK(source.IsStackSlot()) << source;
2756       UseScratchRegisterScope temps(GetVIXLAssembler());
2757       vixl32::Register temp = temps.Acquire();
2758       GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
2759       GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
2760     }
2761   }
2762 }
2763 
MoveConstant(Location location,int32_t value)2764 void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) {
2765   DCHECK(location.IsRegister());
2766   __ Mov(RegisterFrom(location), value);
2767 }
2768 
MoveLocation(Location dst,Location src,DataType::Type dst_type)2769 void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
2770   // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in
2771   // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend.
2772   HParallelMove move(GetGraph()->GetAllocator());
2773   move.AddMove(src, dst, dst_type, nullptr);
2774   GetMoveResolver()->EmitNativeCode(&move);
2775 }
2776 
AddLocationAsTemp(Location location,LocationSummary * locations)2777 void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) {
2778   if (location.IsRegister()) {
2779     locations->AddTemp(location);
2780   } else if (location.IsRegisterPair()) {
2781     locations->AddTemp(LocationFrom(LowRegisterFrom(location)));
2782     locations->AddTemp(LocationFrom(HighRegisterFrom(location)));
2783   } else {
2784     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2785   }
2786 }
2787 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2788 void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
2789                                          HInstruction* instruction,
2790                                          uint32_t dex_pc,
2791                                          SlowPathCode* slow_path) {
2792   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2793 
2794   ThreadOffset32 entrypoint_offset = GetThreadOffset<kArmPointerSize>(entrypoint);
2795   // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
2796   // entire oat file. This adds an extra branch and we do not want to slow down the main path.
2797   // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
2798   if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
2799     __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
2800     // Ensure the pc position is recorded immediately after the `blx` instruction.
2801     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
2802     ExactAssemblyScope aas(GetVIXLAssembler(),
2803                            vixl32::k16BitT32InstructionSizeInBytes,
2804                            CodeBufferCheckScope::kExactSize);
2805     __ blx(lr);
2806     if (EntrypointRequiresStackMap(entrypoint)) {
2807       RecordPcInfo(instruction, dex_pc, slow_path);
2808     }
2809   } else {
2810     // Ensure the pc position is recorded immediately after the `bl` instruction.
2811     ExactAssemblyScope aas(GetVIXLAssembler(),
2812                            vixl32::k32BitT32InstructionSizeInBytes,
2813                            CodeBufferCheckScope::kExactSize);
2814     EmitEntrypointThunkCall(entrypoint_offset);
2815     if (EntrypointRequiresStackMap(entrypoint)) {
2816       RecordPcInfo(instruction, dex_pc, slow_path);
2817     }
2818   }
2819 }
2820 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2821 void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2822                                                                HInstruction* instruction,
2823                                                                SlowPathCode* slow_path) {
2824   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2825   __ Ldr(lr, MemOperand(tr, entry_point_offset));
2826   __ Blx(lr);
2827 }
2828 
HandleGoto(HInstruction * got,HBasicBlock * successor)2829 void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2830   if (successor->IsExitBlock()) {
2831     DCHECK(got->GetPrevious()->AlwaysThrows());
2832     return;  // no code needed
2833   }
2834 
2835   HBasicBlock* block = got->GetBlock();
2836   HInstruction* previous = got->GetPrevious();
2837   HLoopInformation* info = block->GetLoopInformation();
2838 
2839   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2840     codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
2841     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2842     return;
2843   }
2844   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2845     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2846     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2);
2847   }
2848   if (!codegen_->GoesToNextBlock(block, successor)) {
2849     __ B(codegen_->GetLabelOf(successor));
2850   }
2851 }
2852 
VisitGoto(HGoto * got)2853 void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) {
2854   got->SetLocations(nullptr);
2855 }
2856 
VisitGoto(HGoto * got)2857 void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) {
2858   HandleGoto(got, got->GetSuccessor());
2859 }
2860 
VisitTryBoundary(HTryBoundary * try_boundary)2861 void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2862   try_boundary->SetLocations(nullptr);
2863 }
2864 
VisitTryBoundary(HTryBoundary * try_boundary)2865 void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2866   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2867   if (!successor->IsExitBlock()) {
2868     HandleGoto(try_boundary, successor);
2869   }
2870 }
2871 
VisitExit(HExit * exit)2872 void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
2873   exit->SetLocations(nullptr);
2874 }
2875 
VisitExit(HExit * exit)2876 void InstructionCodeGeneratorARMVIXL::VisitExit([[maybe_unused]] HExit* exit) {}
2877 
GenerateCompareTestAndBranch(HCondition * condition,vixl32::Label * true_target,vixl32::Label * false_target,bool is_far_target)2878 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
2879                                                                    vixl32::Label* true_target,
2880                                                                    vixl32::Label* false_target,
2881                                                                    bool is_far_target) {
2882   if (true_target == false_target) {
2883     DCHECK(true_target != nullptr);
2884     __ B(true_target);
2885     return;
2886   }
2887 
2888   vixl32::Label* non_fallthrough_target;
2889   bool invert;
2890   bool emit_both_branches;
2891 
2892   if (true_target == nullptr) {
2893     // The true target is fallthrough.
2894     DCHECK(false_target != nullptr);
2895     non_fallthrough_target = false_target;
2896     invert = true;
2897     emit_both_branches = false;
2898   } else {
2899     non_fallthrough_target = true_target;
2900     invert = false;
2901     // Either the false target is fallthrough, or there is no fallthrough
2902     // and both branches must be emitted.
2903     emit_both_branches = (false_target != nullptr);
2904   }
2905 
2906   const auto cond = GenerateTest(condition, invert, codegen_);
2907 
2908   __ B(cond.first, non_fallthrough_target, is_far_target);
2909 
2910   if (emit_both_branches) {
2911     // No target falls through, we need to branch.
2912     __ B(false_target);
2913   }
2914 }
2915 
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl32::Label * true_target,vixl32::Label * false_target,bool far_target)2916 void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
2917                                                             size_t condition_input_index,
2918                                                             vixl32::Label* true_target,
2919                                                             vixl32::Label* false_target,
2920                                                             bool far_target) {
2921   HInstruction* cond = instruction->InputAt(condition_input_index);
2922 
2923   if (true_target == nullptr && false_target == nullptr) {
2924     // Nothing to do. The code always falls through.
2925     return;
2926   } else if (cond->IsIntConstant()) {
2927     // Constant condition, statically compared against "true" (integer value 1).
2928     if (cond->AsIntConstant()->IsTrue()) {
2929       if (true_target != nullptr) {
2930         __ B(true_target);
2931       }
2932     } else {
2933       DCHECK(cond->AsIntConstant()->IsFalse()) << Int32ConstantFrom(cond);
2934       if (false_target != nullptr) {
2935         __ B(false_target);
2936       }
2937     }
2938     return;
2939   }
2940 
2941   // The following code generates these patterns:
2942   //  (1) true_target == nullptr && false_target != nullptr
2943   //        - opposite condition true => branch to false_target
2944   //  (2) true_target != nullptr && false_target == nullptr
2945   //        - condition true => branch to true_target
2946   //  (3) true_target != nullptr && false_target != nullptr
2947   //        - condition true => branch to true_target
2948   //        - branch to false_target
2949   if (IsBooleanValueOrMaterializedCondition(cond)) {
2950     // Condition has been materialized, compare the output to 0.
2951     if (kIsDebugBuild) {
2952       Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
2953       DCHECK(cond_val.IsRegister());
2954     }
2955     if (true_target == nullptr) {
2956       __ CompareAndBranchIfZero(InputRegisterAt(instruction, condition_input_index),
2957                                 false_target,
2958                                 far_target);
2959     } else {
2960       __ CompareAndBranchIfNonZero(InputRegisterAt(instruction, condition_input_index),
2961                                    true_target,
2962                                    far_target);
2963     }
2964   } else {
2965     // Condition has not been materialized. Use its inputs as the comparison and
2966     // its condition as the branch condition.
2967     HCondition* condition = cond->AsCondition();
2968 
2969     // If this is a long or FP comparison that has been folded into
2970     // the HCondition, generate the comparison directly.
2971     DataType::Type type = condition->InputAt(0)->GetType();
2972     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2973       GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
2974       return;
2975     }
2976 
2977     vixl32::Label* non_fallthrough_target;
2978     vixl32::Condition arm_cond = vixl32::Condition::None();
2979     const vixl32::Register left = InputRegisterAt(cond, 0);
2980     const Operand right = InputOperandAt(cond, 1);
2981 
2982     if (true_target == nullptr) {
2983       arm_cond = ARMCondition(condition->GetOppositeCondition());
2984       non_fallthrough_target = false_target;
2985     } else {
2986       arm_cond = ARMCondition(condition->GetCondition());
2987       non_fallthrough_target = true_target;
2988     }
2989 
2990     if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
2991       if (arm_cond.Is(eq)) {
2992         __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
2993       } else {
2994         DCHECK(arm_cond.Is(ne));
2995         __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
2996       }
2997     } else {
2998       __ Cmp(left, right);
2999       __ B(arm_cond, non_fallthrough_target, far_target);
3000     }
3001   }
3002 
3003   // If neither branch falls through (case 3), the conditional branch to `true_target`
3004   // was already emitted (case 2) and we need to emit a jump to `false_target`.
3005   if (true_target != nullptr && false_target != nullptr) {
3006     __ B(false_target);
3007   }
3008 }
3009 
VisitIf(HIf * if_instr)3010 void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) {
3011   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3012   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3013     locations->SetInAt(0, Location::RequiresRegister());
3014     if (GetGraph()->IsCompilingBaseline() &&
3015         codegen_->GetCompilerOptions().ProfileBranches() &&
3016         !Runtime::Current()->IsAotCompiler()) {
3017       locations->AddTemp(Location::RequiresRegister());
3018     }
3019   }
3020 }
3021 
VisitIf(HIf * if_instr)3022 void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
3023   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3024   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3025   vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
3026       nullptr : codegen_->GetLabelOf(true_successor);
3027   vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
3028       nullptr : codegen_->GetLabelOf(false_successor);
3029   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3030     if (GetGraph()->IsCompilingBaseline() &&
3031         codegen_->GetCompilerOptions().ProfileBranches() &&
3032         !Runtime::Current()->IsAotCompiler()) {
3033       DCHECK(if_instr->InputAt(0)->IsCondition());
3034       ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3035       DCHECK(info != nullptr);
3036       BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
3037       // Currently, not all If branches are profiled.
3038       if (cache != nullptr) {
3039         uint32_t address =
3040             reinterpret_cast32<uint32_t>(cache) + BranchCache::FalseOffset().Int32Value();
3041         static_assert(
3042             BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
3043             "Unexpected offsets for BranchCache");
3044         vixl32::Label done;
3045         UseScratchRegisterScope temps(GetVIXLAssembler());
3046         vixl32::Register temp = temps.Acquire();
3047         vixl32::Register counter = RegisterFrom(if_instr->GetLocations()->GetTemp(0));
3048         vixl32::Register condition = InputRegisterAt(if_instr, 0);
3049         __ Mov(temp, address);
3050         __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
3051         __ Adds(counter, counter, 1);
3052         __ Uxth(counter, counter);
3053         __ CompareAndBranchIfZero(counter, &done);
3054         __ Strh(counter, MemOperand(temp, condition, LSL, 1));
3055         __ Bind(&done);
3056       }
3057     }
3058   }
3059   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3060 }
3061 
VisitDeoptimize(HDeoptimize * deoptimize)3062 void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
3063   LocationSummary* locations = new (GetGraph()->GetAllocator())
3064       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3065   InvokeRuntimeCallingConventionARMVIXL calling_convention;
3066   RegisterSet caller_saves = RegisterSet::Empty();
3067   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
3068   locations->SetCustomSlowPathCallerSaves(caller_saves);
3069   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3070     locations->SetInAt(0, Location::RequiresRegister());
3071   }
3072 }
3073 
VisitDeoptimize(HDeoptimize * deoptimize)3074 void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
3075   SlowPathCodeARMVIXL* slow_path =
3076       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize);
3077   GenerateTestAndBranch(deoptimize,
3078                         /* condition_input_index= */ 0,
3079                         slow_path->GetEntryLabel(),
3080                         /* false_target= */ nullptr);
3081 }
3082 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3083 void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3084   LocationSummary* locations = new (GetGraph()->GetAllocator())
3085       LocationSummary(flag, LocationSummary::kNoCall);
3086   locations->SetOut(Location::RequiresRegister());
3087 }
3088 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3089 void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3090   GetAssembler()->LoadFromOffset(kLoadWord,
3091                                  OutputRegister(flag),
3092                                  sp,
3093                                  codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
3094 }
3095 
VisitSelect(HSelect * select)3096 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
3097   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3098   const bool is_floating_point = DataType::IsFloatingPointType(select->GetType());
3099 
3100   if (is_floating_point) {
3101     locations->SetInAt(0, Location::RequiresFpuRegister());
3102     locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
3103   } else {
3104     locations->SetInAt(0, Location::RequiresRegister());
3105     locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
3106   }
3107 
3108   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3109     locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
3110     // The code generator handles overlap with the values, but not with the condition.
3111     locations->SetOut(Location::SameAsFirstInput());
3112   } else if (is_floating_point) {
3113     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3114   } else {
3115     if (!locations->InAt(1).IsConstant()) {
3116       locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
3117     }
3118 
3119     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3120   }
3121 }
3122 
VisitSelect(HSelect * select)3123 void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
3124   HInstruction* const condition = select->GetCondition();
3125   const LocationSummary* const locations = select->GetLocations();
3126   const DataType::Type type = select->GetType();
3127   const Location first = locations->InAt(0);
3128   const Location out = locations->Out();
3129   const Location second = locations->InAt(1);
3130 
3131   // In the unlucky case the output of this instruction overlaps
3132   // with an input of an "emitted-at-use-site" condition, and
3133   // the output of this instruction is not one of its inputs, we'll
3134   // need to fallback to branches instead of conditional ARM instructions.
3135   bool output_overlaps_with_condition_inputs =
3136       !IsBooleanValueOrMaterializedCondition(condition) &&
3137       !out.Equals(first) &&
3138       !out.Equals(second) &&
3139       (condition->GetLocations()->InAt(0).Equals(out) ||
3140        condition->GetLocations()->InAt(1).Equals(out));
3141   DCHECK_IMPLIES(output_overlaps_with_condition_inputs, condition->IsCondition());
3142   Location src;
3143 
3144   if (condition->IsIntConstant()) {
3145     if (condition->AsIntConstant()->IsFalse()) {
3146       src = first;
3147     } else {
3148       src = second;
3149     }
3150 
3151     codegen_->MoveLocation(out, src, type);
3152     return;
3153   }
3154 
3155   if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) {
3156     bool invert = false;
3157 
3158     if (out.Equals(second)) {
3159       src = first;
3160       invert = true;
3161     } else if (out.Equals(first)) {
3162       src = second;
3163     } else if (second.IsConstant()) {
3164       DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
3165       src = second;
3166     } else if (first.IsConstant()) {
3167       DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
3168       src = first;
3169       invert = true;
3170     } else {
3171       src = second;
3172     }
3173 
3174     if (CanGenerateConditionalMove(out, src)) {
3175       if (!out.Equals(first) && !out.Equals(second)) {
3176         codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
3177       }
3178 
3179       std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
3180 
3181       if (IsBooleanValueOrMaterializedCondition(condition)) {
3182         __ Cmp(InputRegisterAt(select, 2), 0);
3183         cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
3184       } else {
3185         cond = GenerateTest(condition->AsCondition(), invert, codegen_);
3186       }
3187 
3188       const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
3189       // We use the scope because of the IT block that follows.
3190       ExactAssemblyScope guard(GetVIXLAssembler(),
3191                                instr_count * vixl32::k16BitT32InstructionSizeInBytes,
3192                                CodeBufferCheckScope::kExactSize);
3193 
3194       if (out.IsRegister()) {
3195         __ it(cond.first);
3196         __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
3197       } else {
3198         DCHECK(out.IsRegisterPair());
3199 
3200         Operand operand_high(0);
3201         Operand operand_low(0);
3202 
3203         if (src.IsConstant()) {
3204           const int64_t value = Int64ConstantFrom(src);
3205 
3206           operand_high = High32Bits(value);
3207           operand_low = Low32Bits(value);
3208         } else {
3209           DCHECK(src.IsRegisterPair());
3210           operand_high = HighRegisterFrom(src);
3211           operand_low = LowRegisterFrom(src);
3212         }
3213 
3214         __ it(cond.first);
3215         __ mov(cond.first, LowRegisterFrom(out), operand_low);
3216         __ it(cond.first);
3217         __ mov(cond.first, HighRegisterFrom(out), operand_high);
3218       }
3219 
3220       return;
3221     }
3222   }
3223 
3224   vixl32::Label* false_target = nullptr;
3225   vixl32::Label* true_target = nullptr;
3226   vixl32::Label select_end;
3227   vixl32::Label other_case;
3228   vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
3229 
3230   if (out.Equals(second)) {
3231     true_target = target;
3232     src = first;
3233   } else {
3234     false_target = target;
3235     src = second;
3236 
3237     if (!out.Equals(first)) {
3238       if (output_overlaps_with_condition_inputs) {
3239         false_target = &other_case;
3240       } else {
3241         codegen_->MoveLocation(out, first, type);
3242       }
3243     }
3244   }
3245 
3246   GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target= */ false);
3247   codegen_->MoveLocation(out, src, type);
3248   if (output_overlaps_with_condition_inputs) {
3249     __ B(target);
3250     __ Bind(&other_case);
3251     codegen_->MoveLocation(out, first, type);
3252   }
3253 
3254   if (select_end.IsReferenced()) {
3255     __ Bind(&select_end);
3256   }
3257 }
3258 
VisitNop(HNop * nop)3259 void LocationsBuilderARMVIXL::VisitNop(HNop* nop) {
3260   new (GetGraph()->GetAllocator()) LocationSummary(nop);
3261 }
3262 
VisitNop(HNop *)3263 void InstructionCodeGeneratorARMVIXL::VisitNop(HNop*) {
3264   // The environment recording already happened in CodeGenerator::Compile.
3265 }
3266 
IncreaseFrame(size_t adjustment)3267 void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) {
3268   __ Claim(adjustment);
3269   GetAssembler()->cfi().AdjustCFAOffset(adjustment);
3270 }
3271 
DecreaseFrame(size_t adjustment)3272 void CodeGeneratorARMVIXL::DecreaseFrame(size_t adjustment) {
3273   __ Drop(adjustment);
3274   GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
3275 }
3276 
GenerateNop()3277 void CodeGeneratorARMVIXL::GenerateNop() {
3278   __ Nop();
3279 }
3280 
3281 // `temp` is an extra temporary register that is used for some conditions;
3282 // callers may not specify it, in which case the method will use a scratch
3283 // register instead.
GenerateConditionWithZero(IfCondition condition,vixl32::Register out,vixl32::Register in,vixl32::Register temp)3284 void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
3285                                                      vixl32::Register out,
3286                                                      vixl32::Register in,
3287                                                      vixl32::Register temp) {
3288   switch (condition) {
3289     case kCondEQ:
3290     // x <= 0 iff x == 0 when the comparison is unsigned.
3291     case kCondBE:
3292       if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
3293         temp = out;
3294       }
3295 
3296       // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
3297       // different as well.
3298       if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
3299         // temp = - in; only 0 sets the carry flag.
3300         __ Rsbs(temp, in, 0);
3301 
3302         if (out.Is(in)) {
3303           std::swap(in, temp);
3304         }
3305 
3306         // out = - in + in + carry = carry
3307         __ Adc(out, temp, in);
3308       } else {
3309         // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
3310         __ Clz(out, in);
3311         // Any number less than 32 logically shifted right by 5 bits results in 0;
3312         // the same operation on 32 yields 1.
3313         __ Lsr(out, out, 5);
3314       }
3315 
3316       break;
3317     case kCondNE:
3318     // x > 0 iff x != 0 when the comparison is unsigned.
3319     case kCondA: {
3320       UseScratchRegisterScope temps(GetVIXLAssembler());
3321 
3322       if (out.Is(in)) {
3323         if (!temp.IsValid() || in.Is(temp)) {
3324           temp = temps.Acquire();
3325         }
3326       } else if (!temp.IsValid() || !temp.IsLow()) {
3327         temp = out;
3328       }
3329 
3330       // temp = in - 1; only 0 does not set the carry flag.
3331       __ Subs(temp, in, 1);
3332       // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
3333       __ Sbc(out, in, temp);
3334       break;
3335     }
3336     case kCondGE:
3337       __ Mvn(out, in);
3338       in = out;
3339       FALLTHROUGH_INTENDED;
3340     case kCondLT:
3341       // We only care about the sign bit.
3342       __ Lsr(out, in, 31);
3343       break;
3344     case kCondAE:
3345       // Trivially true.
3346       __ Mov(out, 1);
3347       break;
3348     case kCondB:
3349       // Trivially false.
3350       __ Mov(out, 0);
3351       break;
3352     default:
3353       LOG(FATAL) << "Unexpected condition " << condition;
3354       UNREACHABLE();
3355   }
3356 }
3357 
HandleCondition(HCondition * cond)3358 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
3359   LocationSummary* locations =
3360       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
3361   const DataType::Type type = cond->InputAt(0)->GetType();
3362   if (DataType::IsFloatingPointType(type)) {
3363     locations->SetInAt(0, Location::RequiresFpuRegister());
3364     locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
3365   } else {
3366     locations->SetInAt(0, Location::RequiresRegister());
3367     locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
3368   }
3369   if (!cond->IsEmittedAtUseSite()) {
3370     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3371   }
3372 }
3373 
HandleCondition(HCondition * cond)3374 void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
3375   if (cond->IsEmittedAtUseSite()) {
3376     return;
3377   }
3378 
3379   const DataType::Type type = cond->GetLeft()->GetType();
3380 
3381   if (DataType::IsFloatingPointType(type)) {
3382     GenerateConditionGeneric(cond, codegen_);
3383     return;
3384   }
3385 
3386   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
3387 
3388   const IfCondition condition = cond->GetCondition();
3389 
3390   // A condition with only one boolean input, or two boolean inputs without being equality or
3391   // inequality results from transformations done by the instruction simplifier, and is handled
3392   // as a regular condition with integral inputs.
3393   if (type == DataType::Type::kBool &&
3394       cond->GetRight()->GetType() == DataType::Type::kBool &&
3395       (condition == kCondEQ || condition == kCondNE)) {
3396     vixl32::Register left = InputRegisterAt(cond, 0);
3397     const vixl32::Register out = OutputRegister(cond);
3398     const Location right_loc = cond->GetLocations()->InAt(1);
3399 
3400     // The constant case is handled by the instruction simplifier.
3401     DCHECK(!right_loc.IsConstant());
3402 
3403     vixl32::Register right = RegisterFrom(right_loc);
3404 
3405     // Avoid 32-bit instructions if possible.
3406     if (out.Is(right)) {
3407       std::swap(left, right);
3408     }
3409 
3410     __ Eor(out, left, right);
3411 
3412     if (condition == kCondEQ) {
3413       __ Eor(out, out, 1);
3414     }
3415 
3416     return;
3417   }
3418 
3419   GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
3420 }
3421 
VisitEqual(HEqual * comp)3422 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
3423   HandleCondition(comp);
3424 }
3425 
VisitEqual(HEqual * comp)3426 void InstructionCodeGeneratorARMVIXL::VisitEqual(HEqual* comp) {
3427   HandleCondition(comp);
3428 }
3429 
VisitNotEqual(HNotEqual * comp)3430 void LocationsBuilderARMVIXL::VisitNotEqual(HNotEqual* comp) {
3431   HandleCondition(comp);
3432 }
3433 
VisitNotEqual(HNotEqual * comp)3434 void InstructionCodeGeneratorARMVIXL::VisitNotEqual(HNotEqual* comp) {
3435   HandleCondition(comp);
3436 }
3437 
VisitLessThan(HLessThan * comp)3438 void LocationsBuilderARMVIXL::VisitLessThan(HLessThan* comp) {
3439   HandleCondition(comp);
3440 }
3441 
VisitLessThan(HLessThan * comp)3442 void InstructionCodeGeneratorARMVIXL::VisitLessThan(HLessThan* comp) {
3443   HandleCondition(comp);
3444 }
3445 
VisitLessThanOrEqual(HLessThanOrEqual * comp)3446 void LocationsBuilderARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3447   HandleCondition(comp);
3448 }
3449 
VisitLessThanOrEqual(HLessThanOrEqual * comp)3450 void InstructionCodeGeneratorARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3451   HandleCondition(comp);
3452 }
3453 
VisitGreaterThan(HGreaterThan * comp)3454 void LocationsBuilderARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3455   HandleCondition(comp);
3456 }
3457 
VisitGreaterThan(HGreaterThan * comp)3458 void InstructionCodeGeneratorARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3459   HandleCondition(comp);
3460 }
3461 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3462 void LocationsBuilderARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3463   HandleCondition(comp);
3464 }
3465 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3466 void InstructionCodeGeneratorARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3467   HandleCondition(comp);
3468 }
3469 
VisitBelow(HBelow * comp)3470 void LocationsBuilderARMVIXL::VisitBelow(HBelow* comp) {
3471   HandleCondition(comp);
3472 }
3473 
VisitBelow(HBelow * comp)3474 void InstructionCodeGeneratorARMVIXL::VisitBelow(HBelow* comp) {
3475   HandleCondition(comp);
3476 }
3477 
VisitBelowOrEqual(HBelowOrEqual * comp)3478 void LocationsBuilderARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3479   HandleCondition(comp);
3480 }
3481 
VisitBelowOrEqual(HBelowOrEqual * comp)3482 void InstructionCodeGeneratorARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3483   HandleCondition(comp);
3484 }
3485 
VisitAbove(HAbove * comp)3486 void LocationsBuilderARMVIXL::VisitAbove(HAbove* comp) {
3487   HandleCondition(comp);
3488 }
3489 
VisitAbove(HAbove * comp)3490 void InstructionCodeGeneratorARMVIXL::VisitAbove(HAbove* comp) {
3491   HandleCondition(comp);
3492 }
3493 
VisitAboveOrEqual(HAboveOrEqual * comp)3494 void LocationsBuilderARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3495   HandleCondition(comp);
3496 }
3497 
VisitAboveOrEqual(HAboveOrEqual * comp)3498 void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3499   HandleCondition(comp);
3500 }
3501 
VisitIntConstant(HIntConstant * constant)3502 void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
3503   LocationSummary* locations =
3504       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3505   locations->SetOut(Location::ConstantLocation(constant));
3506 }
3507 
VisitIntConstant(HIntConstant * constant)3508 void InstructionCodeGeneratorARMVIXL::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
3509   // Will be generated at use site.
3510 }
3511 
VisitNullConstant(HNullConstant * constant)3512 void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
3513   LocationSummary* locations =
3514       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3515   locations->SetOut(Location::ConstantLocation(constant));
3516 }
3517 
VisitNullConstant(HNullConstant * constant)3518 void InstructionCodeGeneratorARMVIXL::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
3519   // Will be generated at use site.
3520 }
3521 
VisitLongConstant(HLongConstant * constant)3522 void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
3523   LocationSummary* locations =
3524       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3525   locations->SetOut(Location::ConstantLocation(constant));
3526 }
3527 
VisitLongConstant(HLongConstant * constant)3528 void InstructionCodeGeneratorARMVIXL::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
3529   // Will be generated at use site.
3530 }
3531 
VisitFloatConstant(HFloatConstant * constant)3532 void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
3533   LocationSummary* locations =
3534       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3535   locations->SetOut(Location::ConstantLocation(constant));
3536 }
3537 
VisitFloatConstant(HFloatConstant * constant)3538 void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
3539     [[maybe_unused]] HFloatConstant* constant) {
3540   // Will be generated at use site.
3541 }
3542 
VisitDoubleConstant(HDoubleConstant * constant)3543 void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
3544   LocationSummary* locations =
3545       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3546   locations->SetOut(Location::ConstantLocation(constant));
3547 }
3548 
VisitDoubleConstant(HDoubleConstant * constant)3549 void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
3550     [[maybe_unused]] HDoubleConstant* constant) {
3551   // Will be generated at use site.
3552 }
3553 
VisitConstructorFence(HConstructorFence * constructor_fence)3554 void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
3555   constructor_fence->SetLocations(nullptr);
3556 }
3557 
VisitConstructorFence(HConstructorFence * constructor_fence)3558 void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
3559     [[maybe_unused]] HConstructorFence* constructor_fence) {
3560   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3561 }
3562 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3563 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3564   memory_barrier->SetLocations(nullptr);
3565 }
3566 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3567 void InstructionCodeGeneratorARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3568   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
3569 }
3570 
VisitReturnVoid(HReturnVoid * ret)3571 void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
3572   ret->SetLocations(nullptr);
3573 }
3574 
VisitReturnVoid(HReturnVoid * ret)3575 void InstructionCodeGeneratorARMVIXL::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
3576   codegen_->GenerateFrameExit();
3577 }
3578 
VisitReturn(HReturn * ret)3579 void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) {
3580   LocationSummary* locations =
3581       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
3582   locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType()));
3583 }
3584 
VisitReturn(HReturn * ret)3585 void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret) {
3586   if (GetGraph()->IsCompilingOsr()) {
3587     // To simplify callers of an OSR method, we put the return value in both
3588     // floating point and core registers.
3589     switch (ret->InputAt(0)->GetType()) {
3590       case DataType::Type::kFloat32:
3591         __ Vmov(r0, s0);
3592         break;
3593       case DataType::Type::kFloat64:
3594         __ Vmov(r0, r1, d0);
3595         break;
3596       default:
3597         break;
3598     }
3599   }
3600   codegen_->GenerateFrameExit();
3601 }
3602 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3603 void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3604   // The trampoline uses the same calling convention as dex calling conventions,
3605   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3606   // the method_idx.
3607   HandleInvoke(invoke);
3608 }
3609 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3610 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3611   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3612   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 3);
3613 }
3614 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3615 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3616   // Explicit clinit checks triggered by static invokes must have been pruned by
3617   // art::PrepareForRegisterAllocation.
3618   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3619 
3620   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3621   if (intrinsic.TryDispatch(invoke)) {
3622     return;
3623   }
3624 
3625   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3626     CriticalNativeCallingConventionVisitorARMVIXL calling_convention_visitor(
3627         /*for_register_allocation=*/ true);
3628     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3629   } else {
3630     HandleInvoke(invoke);
3631   }
3632 }
3633 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3634 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3635   if (invoke->GetLocations()->Intrinsified()) {
3636     IntrinsicCodeGeneratorARMVIXL intrinsic(codegen);
3637     intrinsic.Dispatch(invoke);
3638     return true;
3639   }
3640   return false;
3641 }
3642 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3643 void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3644   // Explicit clinit checks triggered by static invokes must have been pruned by
3645   // art::PrepareForRegisterAllocation.
3646   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3647 
3648   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3649     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 4);
3650     return;
3651   }
3652 
3653   LocationSummary* locations = invoke->GetLocations();
3654   codegen_->GenerateStaticOrDirectCall(
3655       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3656 
3657   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 5);
3658 }
3659 
HandleInvoke(HInvoke * invoke)3660 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
3661   InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
3662   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3663 }
3664 
VisitInvokeVirtual(HInvokeVirtual * invoke)3665 void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3666   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3667   if (intrinsic.TryDispatch(invoke)) {
3668     return;
3669   }
3670 
3671   HandleInvoke(invoke);
3672 }
3673 
VisitInvokeVirtual(HInvokeVirtual * invoke)3674 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3675   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3676     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 6);
3677     return;
3678   }
3679 
3680   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3681   DCHECK(!codegen_->IsLeafMethod());
3682 
3683   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 7);
3684 }
3685 
VisitInvokeInterface(HInvokeInterface * invoke)3686 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3687   HandleInvoke(invoke);
3688   // Add the hidden argument.
3689   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3690     // We cannot request r12 as it's blocked by the register allocator.
3691     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
3692   }
3693 }
3694 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,vixl32::Register klass)3695 void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3696                                                          vixl32::Register klass) {
3697   DCHECK_EQ(r0.GetCode(), klass.GetCode());
3698   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
3699     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3700     InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
3701         info, GetCompilerOptions(), instruction->AsInvoke());
3702     if (cache != nullptr) {
3703       uint32_t address = reinterpret_cast32<uint32_t>(cache);
3704       vixl32::Label done;
3705       UseScratchRegisterScope temps(GetVIXLAssembler());
3706       temps.Exclude(ip);
3707       __ Mov(r4, address);
3708       __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
3709       // Fast path for a monomorphic cache.
3710       __ Cmp(klass, ip);
3711       __ B(eq, &done, /* is_far_target= */ false);
3712       InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
3713       __ Bind(&done);
3714     } else {
3715       // This is unexpected, but we don't guarantee stable compilation across
3716       // JIT runs so just warn about it.
3717       ScopedObjectAccess soa(Thread::Current());
3718       LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
3719     }
3720   }
3721 }
3722 
VisitInvokeInterface(HInvokeInterface * invoke)3723 void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3724   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3725   LocationSummary* locations = invoke->GetLocations();
3726   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3727   Location receiver = locations->InAt(0);
3728   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3729 
3730   DCHECK(!receiver.IsStackSlot());
3731 
3732   // Ensure the pc position is recorded immediately after the `ldr` instruction.
3733   {
3734     ExactAssemblyScope aas(GetVIXLAssembler(),
3735                            vixl32::kMaxInstructionSizeInBytes,
3736                            CodeBufferCheckScope::kMaximumSize);
3737     // /* HeapReference<Class> */ temp = receiver->klass_
3738     __ ldr(temp, MemOperand(RegisterFrom(receiver), class_offset));
3739     codegen_->MaybeRecordImplicitNullCheck(invoke);
3740   }
3741   // Instead of simply (possibly) unpoisoning `temp` here, we should
3742   // emit a read barrier for the previous class reference load.
3743   // However this is not required in practice, as this is an
3744   // intermediate/temporary reference and because the current
3745   // concurrent copying collector keeps the from-space memory
3746   // intact/accessible until the end of the marking phase (the
3747   // concurrent copying collector may not in the future).
3748   GetAssembler()->MaybeUnpoisonHeapReference(temp);
3749 
3750   // If we're compiling baseline, update the inline cache.
3751   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3752 
3753   GetAssembler()->LoadFromOffset(kLoadWord,
3754                                  temp,
3755                                  temp,
3756                                  mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
3757 
3758   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3759       invoke->GetImtIndex(), kArmPointerSize));
3760   // temp = temp->GetImtEntryAt(method_offset);
3761   GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
3762   uint32_t entry_point =
3763       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
3764   // LR = temp->GetEntryPoint();
3765   GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
3766 
3767   {
3768     // Set the hidden (in r12) argument. It is done here, right before a BLX to prevent other
3769     // instruction from clobbering it as they might use r12 as a scratch register.
3770     Location hidden_reg = Location::RegisterLocation(r12.GetCode());
3771     // The VIXL macro assembler may clobber any of the scratch registers that are available to it,
3772     // so it checks if the application is using them (by passing them to the macro assembler
3773     // methods). The following application of UseScratchRegisterScope corrects VIXL's notion of
3774     // what is available, and is the opposite of the standard usage: Instead of requesting a
3775     // temporary location, it imposes an external constraint (i.e. a specific register is reserved
3776     // for the hidden argument). Note that this works even if VIXL needs a scratch register itself
3777     // (to materialize the constant), since the destination register becomes available for such use
3778     // internally for the duration of the macro instruction.
3779     UseScratchRegisterScope temps(GetVIXLAssembler());
3780     temps.Exclude(RegisterFrom(hidden_reg));
3781     if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3782       Location current_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
3783       if (current_method.IsStackSlot()) {
3784         GetAssembler()->LoadFromOffset(
3785             kLoadWord, RegisterFrom(hidden_reg), sp, current_method.GetStackIndex());
3786       } else {
3787         __ Mov(RegisterFrom(hidden_reg), RegisterFrom(current_method));
3788       }
3789     } else if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3790       // We pass the method from the IMT in case of a conflict. This will ensure
3791       // we go into the runtime to resolve the actual method.
3792       CHECK_NE(temp.GetCode(), lr.GetCode());
3793       __ Mov(RegisterFrom(hidden_reg), temp);
3794     } else {
3795       codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3796     }
3797   }
3798   {
3799     // Ensure the pc position is recorded immediately after the `blx` instruction.
3800     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
3801     ExactAssemblyScope aas(GetVIXLAssembler(),
3802                            vixl32::k16BitT32InstructionSizeInBytes,
3803                            CodeBufferCheckScope::kExactSize);
3804     // LR();
3805     __ blx(lr);
3806     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
3807     DCHECK(!codegen_->IsLeafMethod());
3808   }
3809 
3810   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 8);
3811 }
3812 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3813 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3814   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3815   if (intrinsic.TryDispatch(invoke)) {
3816     return;
3817   }
3818   HandleInvoke(invoke);
3819 }
3820 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3821 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3822   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3823     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 9);
3824     return;
3825   }
3826   codegen_->GenerateInvokePolymorphicCall(invoke);
3827   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 10);
3828 }
3829 
VisitInvokeCustom(HInvokeCustom * invoke)3830 void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3831   HandleInvoke(invoke);
3832 }
3833 
VisitInvokeCustom(HInvokeCustom * invoke)3834 void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3835   codegen_->GenerateInvokeCustomCall(invoke);
3836   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 11);
3837 }
3838 
VisitNeg(HNeg * neg)3839 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
3840   LocationSummary* locations =
3841       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3842   switch (neg->GetResultType()) {
3843     case DataType::Type::kInt32: {
3844       locations->SetInAt(0, Location::RequiresRegister());
3845       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3846       break;
3847     }
3848     case DataType::Type::kInt64: {
3849       locations->SetInAt(0, Location::RequiresRegister());
3850       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3851       break;
3852     }
3853 
3854     case DataType::Type::kFloat32:
3855     case DataType::Type::kFloat64:
3856       locations->SetInAt(0, Location::RequiresFpuRegister());
3857       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3858       break;
3859 
3860     default:
3861       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3862   }
3863 }
3864 
VisitNeg(HNeg * neg)3865 void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) {
3866   LocationSummary* locations = neg->GetLocations();
3867   Location out = locations->Out();
3868   Location in = locations->InAt(0);
3869   switch (neg->GetResultType()) {
3870     case DataType::Type::kInt32:
3871       __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0);
3872       break;
3873 
3874     case DataType::Type::kInt64:
3875       // out.lo = 0 - in.lo (and update the carry/borrow (C) flag)
3876       __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0);
3877       // We cannot emit an RSC (Reverse Subtract with Carry)
3878       // instruction here, as it does not exist in the Thumb-2
3879       // instruction set.  We use the following approach
3880       // using SBC and SUB instead.
3881       //
3882       // out.hi = -C
3883       __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out));
3884       // out.hi = out.hi - in.hi
3885       __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in));
3886       break;
3887 
3888     case DataType::Type::kFloat32:
3889     case DataType::Type::kFloat64:
3890       __ Vneg(OutputVRegister(neg), InputVRegister(neg));
3891       break;
3892 
3893     default:
3894       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3895   }
3896 }
3897 
VisitTypeConversion(HTypeConversion * conversion)3898 void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
3899   DataType::Type result_type = conversion->GetResultType();
3900   DataType::Type input_type = conversion->GetInputType();
3901   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3902       << input_type << " -> " << result_type;
3903 
3904   // The float-to-long, double-to-long and long-to-float type conversions
3905   // rely on a call to the runtime.
3906   LocationSummary::CallKind call_kind =
3907       (((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3908         && result_type == DataType::Type::kInt64)
3909        || (input_type == DataType::Type::kInt64 && result_type == DataType::Type::kFloat32))
3910       ? LocationSummary::kCallOnMainOnly
3911       : LocationSummary::kNoCall;
3912   LocationSummary* locations =
3913       new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3914 
3915   switch (result_type) {
3916     case DataType::Type::kUint8:
3917     case DataType::Type::kInt8:
3918     case DataType::Type::kUint16:
3919     case DataType::Type::kInt16:
3920       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3921       locations->SetInAt(0, Location::RequiresRegister());
3922       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3923       break;
3924 
3925     case DataType::Type::kInt32:
3926       switch (input_type) {
3927         case DataType::Type::kInt64:
3928           locations->SetInAt(0, Location::Any());
3929           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3930           break;
3931 
3932         case DataType::Type::kFloat32:
3933           locations->SetInAt(0, Location::RequiresFpuRegister());
3934           locations->SetOut(Location::RequiresRegister());
3935           locations->AddTemp(Location::RequiresFpuRegister());
3936           break;
3937 
3938         case DataType::Type::kFloat64:
3939           locations->SetInAt(0, Location::RequiresFpuRegister());
3940           locations->SetOut(Location::RequiresRegister());
3941           locations->AddTemp(Location::RequiresFpuRegister());
3942           break;
3943 
3944         default:
3945           LOG(FATAL) << "Unexpected type conversion from " << input_type
3946                      << " to " << result_type;
3947       }
3948       break;
3949 
3950     case DataType::Type::kInt64:
3951       switch (input_type) {
3952         case DataType::Type::kBool:
3953         case DataType::Type::kUint8:
3954         case DataType::Type::kInt8:
3955         case DataType::Type::kUint16:
3956         case DataType::Type::kInt16:
3957         case DataType::Type::kInt32:
3958           locations->SetInAt(0, Location::RequiresRegister());
3959           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3960           break;
3961 
3962         case DataType::Type::kFloat32: {
3963           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3964           locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3965           locations->SetOut(LocationFrom(r0, r1));
3966           break;
3967         }
3968 
3969         case DataType::Type::kFloat64: {
3970           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3971           locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0),
3972                                              calling_convention.GetFpuRegisterAt(1)));
3973           locations->SetOut(LocationFrom(r0, r1));
3974           break;
3975         }
3976 
3977         default:
3978           LOG(FATAL) << "Unexpected type conversion from " << input_type
3979                      << " to " << result_type;
3980       }
3981       break;
3982 
3983     case DataType::Type::kFloat32:
3984       switch (input_type) {
3985         case DataType::Type::kBool:
3986         case DataType::Type::kUint8:
3987         case DataType::Type::kInt8:
3988         case DataType::Type::kUint16:
3989         case DataType::Type::kInt16:
3990         case DataType::Type::kInt32:
3991           locations->SetInAt(0, Location::RequiresRegister());
3992           locations->SetOut(Location::RequiresFpuRegister());
3993           break;
3994 
3995         case DataType::Type::kInt64: {
3996           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3997           locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0),
3998                                              calling_convention.GetRegisterAt(1)));
3999           locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0)));
4000           break;
4001         }
4002 
4003         case DataType::Type::kFloat64:
4004           locations->SetInAt(0, Location::RequiresFpuRegister());
4005           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4006           break;
4007 
4008         default:
4009           LOG(FATAL) << "Unexpected type conversion from " << input_type
4010                      << " to " << result_type;
4011       }
4012       break;
4013 
4014     case DataType::Type::kFloat64:
4015       switch (input_type) {
4016         case DataType::Type::kBool:
4017         case DataType::Type::kUint8:
4018         case DataType::Type::kInt8:
4019         case DataType::Type::kUint16:
4020         case DataType::Type::kInt16:
4021         case DataType::Type::kInt32:
4022           locations->SetInAt(0, Location::RequiresRegister());
4023           locations->SetOut(Location::RequiresFpuRegister());
4024           break;
4025 
4026         case DataType::Type::kInt64:
4027           locations->SetInAt(0, Location::RequiresRegister());
4028           locations->SetOut(Location::RequiresFpuRegister());
4029           locations->AddTemp(Location::RequiresFpuRegister());
4030           locations->AddTemp(Location::RequiresFpuRegister());
4031           break;
4032 
4033         case DataType::Type::kFloat32:
4034           locations->SetInAt(0, Location::RequiresFpuRegister());
4035           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4036           break;
4037 
4038         default:
4039           LOG(FATAL) << "Unexpected type conversion from " << input_type
4040                      << " to " << result_type;
4041       }
4042       break;
4043 
4044     default:
4045       LOG(FATAL) << "Unexpected type conversion from " << input_type
4046                  << " to " << result_type;
4047   }
4048 }
4049 
VisitTypeConversion(HTypeConversion * conversion)4050 void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
4051   LocationSummary* locations = conversion->GetLocations();
4052   Location out = locations->Out();
4053   Location in = locations->InAt(0);
4054   DataType::Type result_type = conversion->GetResultType();
4055   DataType::Type input_type = conversion->GetInputType();
4056   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
4057       << input_type << " -> " << result_type;
4058   switch (result_type) {
4059     case DataType::Type::kUint8:
4060       switch (input_type) {
4061         case DataType::Type::kInt8:
4062         case DataType::Type::kUint16:
4063         case DataType::Type::kInt16:
4064         case DataType::Type::kInt32:
4065           __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
4066           break;
4067         case DataType::Type::kInt64:
4068           __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
4069           break;
4070 
4071         default:
4072           LOG(FATAL) << "Unexpected type conversion from " << input_type
4073                      << " to " << result_type;
4074       }
4075       break;
4076 
4077     case DataType::Type::kInt8:
4078       switch (input_type) {
4079         case DataType::Type::kUint8:
4080         case DataType::Type::kUint16:
4081         case DataType::Type::kInt16:
4082         case DataType::Type::kInt32:
4083           __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
4084           break;
4085         case DataType::Type::kInt64:
4086           __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
4087           break;
4088 
4089         default:
4090           LOG(FATAL) << "Unexpected type conversion from " << input_type
4091                      << " to " << result_type;
4092       }
4093       break;
4094 
4095     case DataType::Type::kUint16:
4096       switch (input_type) {
4097         case DataType::Type::kInt8:
4098         case DataType::Type::kInt16:
4099         case DataType::Type::kInt32:
4100           __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4101           break;
4102         case DataType::Type::kInt64:
4103           __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4104           break;
4105 
4106         default:
4107           LOG(FATAL) << "Unexpected type conversion from " << input_type
4108                      << " to " << result_type;
4109       }
4110       break;
4111 
4112     case DataType::Type::kInt16:
4113       switch (input_type) {
4114         case DataType::Type::kUint16:
4115         case DataType::Type::kInt32:
4116           __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4117           break;
4118         case DataType::Type::kInt64:
4119           __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4120           break;
4121 
4122         default:
4123           LOG(FATAL) << "Unexpected type conversion from " << input_type
4124                      << " to " << result_type;
4125       }
4126       break;
4127 
4128     case DataType::Type::kInt32:
4129       switch (input_type) {
4130         case DataType::Type::kInt64:
4131           DCHECK(out.IsRegister());
4132           if (in.IsRegisterPair()) {
4133             __ Mov(OutputRegister(conversion), LowRegisterFrom(in));
4134           } else if (in.IsDoubleStackSlot()) {
4135             GetAssembler()->LoadFromOffset(kLoadWord,
4136                                            OutputRegister(conversion),
4137                                            sp,
4138                                            in.GetStackIndex());
4139           } else {
4140             DCHECK(in.IsConstant());
4141             DCHECK(in.GetConstant()->IsLongConstant());
4142             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
4143             __ Mov(OutputRegister(conversion), static_cast<int32_t>(value));
4144           }
4145           break;
4146 
4147         case DataType::Type::kFloat32: {
4148           vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
4149           __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0));
4150           __ Vmov(OutputRegister(conversion), temp);
4151           break;
4152         }
4153 
4154         case DataType::Type::kFloat64: {
4155           vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4156           __ Vcvt(S32, F64, temp_s, DRegisterFrom(in));
4157           __ Vmov(OutputRegister(conversion), temp_s);
4158           break;
4159         }
4160 
4161         default:
4162           LOG(FATAL) << "Unexpected type conversion from " << input_type
4163                      << " to " << result_type;
4164       }
4165       break;
4166 
4167     case DataType::Type::kInt64:
4168       switch (input_type) {
4169         case DataType::Type::kBool:
4170         case DataType::Type::kUint8:
4171         case DataType::Type::kInt8:
4172         case DataType::Type::kUint16:
4173         case DataType::Type::kInt16:
4174         case DataType::Type::kInt32:
4175           DCHECK(out.IsRegisterPair());
4176           DCHECK(in.IsRegister());
4177           __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0));
4178           // Sign extension.
4179           __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31);
4180           break;
4181 
4182         case DataType::Type::kFloat32:
4183           codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
4184           CheckEntrypointTypes<kQuickF2l, int64_t, float>();
4185           break;
4186 
4187         case DataType::Type::kFloat64:
4188           codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
4189           CheckEntrypointTypes<kQuickD2l, int64_t, double>();
4190           break;
4191 
4192         default:
4193           LOG(FATAL) << "Unexpected type conversion from " << input_type
4194                      << " to " << result_type;
4195       }
4196       break;
4197 
4198     case DataType::Type::kFloat32:
4199       switch (input_type) {
4200         case DataType::Type::kBool:
4201         case DataType::Type::kUint8:
4202         case DataType::Type::kInt8:
4203         case DataType::Type::kUint16:
4204         case DataType::Type::kInt16:
4205         case DataType::Type::kInt32:
4206           __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
4207           __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion));
4208           break;
4209 
4210         case DataType::Type::kInt64:
4211           codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc());
4212           CheckEntrypointTypes<kQuickL2f, float, int64_t>();
4213           break;
4214 
4215         case DataType::Type::kFloat64:
4216           __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in));
4217           break;
4218 
4219         default:
4220           LOG(FATAL) << "Unexpected type conversion from " << input_type
4221                      << " to " << result_type;
4222       }
4223       break;
4224 
4225     case DataType::Type::kFloat64:
4226       switch (input_type) {
4227         case DataType::Type::kBool:
4228         case DataType::Type::kUint8:
4229         case DataType::Type::kInt8:
4230         case DataType::Type::kUint16:
4231         case DataType::Type::kInt16:
4232         case DataType::Type::kInt32:
4233           __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
4234           __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out));
4235           break;
4236 
4237         case DataType::Type::kInt64: {
4238           vixl32::Register low = LowRegisterFrom(in);
4239           vixl32::Register high = HighRegisterFrom(in);
4240           vixl32::SRegister out_s = LowSRegisterFrom(out);
4241           vixl32::DRegister out_d = DRegisterFrom(out);
4242           vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4243           vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0));
4244           vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(1));
4245 
4246           // temp_d = int-to-double(high)
4247           __ Vmov(temp_s, high);
4248           __ Vcvt(F64, S32, temp_d, temp_s);
4249           // constant_d = k2Pow32EncodingForDouble
4250           __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble));
4251           // out_d = unsigned-to-double(low)
4252           __ Vmov(out_s, low);
4253           __ Vcvt(F64, U32, out_d, out_s);
4254           // out_d += temp_d * constant_d
4255           __ Vmla(F64, out_d, temp_d, constant_d);
4256           break;
4257         }
4258 
4259         case DataType::Type::kFloat32:
4260           __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0));
4261           break;
4262 
4263         default:
4264           LOG(FATAL) << "Unexpected type conversion from " << input_type
4265                      << " to " << result_type;
4266       }
4267       break;
4268 
4269     default:
4270       LOG(FATAL) << "Unexpected type conversion from " << input_type
4271                  << " to " << result_type;
4272   }
4273 }
4274 
VisitAdd(HAdd * add)4275 void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) {
4276   LocationSummary* locations =
4277       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
4278   switch (add->GetResultType()) {
4279     case DataType::Type::kInt32: {
4280       locations->SetInAt(0, Location::RequiresRegister());
4281       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
4282       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4283       break;
4284     }
4285 
4286     case DataType::Type::kInt64: {
4287       locations->SetInAt(0, Location::RequiresRegister());
4288       locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
4289       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4290       break;
4291     }
4292 
4293     case DataType::Type::kFloat32:
4294     case DataType::Type::kFloat64: {
4295       locations->SetInAt(0, Location::RequiresFpuRegister());
4296       locations->SetInAt(1, Location::RequiresFpuRegister());
4297       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4298       break;
4299     }
4300 
4301     default:
4302       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4303   }
4304 }
4305 
VisitAdd(HAdd * add)4306 void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) {
4307   LocationSummary* locations = add->GetLocations();
4308   Location out = locations->Out();
4309   Location first = locations->InAt(0);
4310   Location second = locations->InAt(1);
4311 
4312   switch (add->GetResultType()) {
4313     case DataType::Type::kInt32: {
4314       __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1));
4315       }
4316       break;
4317 
4318     case DataType::Type::kInt64: {
4319       if (second.IsConstant()) {
4320         uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4321         GenerateAddLongConst(out, first, value);
4322       } else {
4323         DCHECK(second.IsRegisterPair());
4324         __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4325         __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4326       }
4327       break;
4328     }
4329 
4330     case DataType::Type::kFloat32:
4331     case DataType::Type::kFloat64:
4332       __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1));
4333       break;
4334 
4335     default:
4336       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4337   }
4338 }
4339 
VisitSub(HSub * sub)4340 void LocationsBuilderARMVIXL::VisitSub(HSub* sub) {
4341   LocationSummary* locations =
4342       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
4343   switch (sub->GetResultType()) {
4344     case DataType::Type::kInt32: {
4345       locations->SetInAt(0, Location::RequiresRegister());
4346       locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
4347       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4348       break;
4349     }
4350 
4351     case DataType::Type::kInt64: {
4352       locations->SetInAt(0, Location::RequiresRegister());
4353       locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
4354       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4355       break;
4356     }
4357     case DataType::Type::kFloat32:
4358     case DataType::Type::kFloat64: {
4359       locations->SetInAt(0, Location::RequiresFpuRegister());
4360       locations->SetInAt(1, Location::RequiresFpuRegister());
4361       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4362       break;
4363     }
4364     default:
4365       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4366   }
4367 }
4368 
VisitSub(HSub * sub)4369 void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) {
4370   LocationSummary* locations = sub->GetLocations();
4371   Location out = locations->Out();
4372   Location first = locations->InAt(0);
4373   Location second = locations->InAt(1);
4374   switch (sub->GetResultType()) {
4375     case DataType::Type::kInt32: {
4376       __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1));
4377       break;
4378     }
4379 
4380     case DataType::Type::kInt64: {
4381       if (second.IsConstant()) {
4382         uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4383         GenerateAddLongConst(out, first, -value);
4384       } else {
4385         DCHECK(second.IsRegisterPair());
4386         __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4387         __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4388       }
4389       break;
4390     }
4391 
4392     case DataType::Type::kFloat32:
4393     case DataType::Type::kFloat64:
4394       __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1));
4395       break;
4396 
4397     default:
4398       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4399   }
4400 }
4401 
VisitMul(HMul * mul)4402 void LocationsBuilderARMVIXL::VisitMul(HMul* mul) {
4403   LocationSummary* locations =
4404       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4405   switch (mul->GetResultType()) {
4406     case DataType::Type::kInt32:
4407     case DataType::Type::kInt64:  {
4408       locations->SetInAt(0, Location::RequiresRegister());
4409       locations->SetInAt(1, Location::RequiresRegister());
4410       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4411       break;
4412     }
4413 
4414     case DataType::Type::kFloat32:
4415     case DataType::Type::kFloat64: {
4416       locations->SetInAt(0, Location::RequiresFpuRegister());
4417       locations->SetInAt(1, Location::RequiresFpuRegister());
4418       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4419       break;
4420     }
4421 
4422     default:
4423       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4424   }
4425 }
4426 
VisitMul(HMul * mul)4427 void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) {
4428   LocationSummary* locations = mul->GetLocations();
4429   Location out = locations->Out();
4430   Location first = locations->InAt(0);
4431   Location second = locations->InAt(1);
4432   switch (mul->GetResultType()) {
4433     case DataType::Type::kInt32: {
4434       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
4435       break;
4436     }
4437     case DataType::Type::kInt64: {
4438       vixl32::Register out_hi = HighRegisterFrom(out);
4439       vixl32::Register out_lo = LowRegisterFrom(out);
4440       vixl32::Register in1_hi = HighRegisterFrom(first);
4441       vixl32::Register in1_lo = LowRegisterFrom(first);
4442       vixl32::Register in2_hi = HighRegisterFrom(second);
4443       vixl32::Register in2_lo = LowRegisterFrom(second);
4444 
4445       // Extra checks to protect caused by the existence of R1_R2.
4446       // The algorithm is wrong if out.hi is either in1.lo or in2.lo:
4447       // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2);
4448       DCHECK(!out_hi.Is(in1_lo));
4449       DCHECK(!out_hi.Is(in2_lo));
4450 
4451       // input: in1 - 64 bits, in2 - 64 bits
4452       // output: out
4453       // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
4454       // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
4455       // parts: out.lo = (in1.lo * in2.lo)[31:0]
4456 
4457       UseScratchRegisterScope temps(GetVIXLAssembler());
4458       vixl32::Register temp = temps.Acquire();
4459       // temp <- in1.lo * in2.hi
4460       __ Mul(temp, in1_lo, in2_hi);
4461       // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo
4462       __ Mla(out_hi, in1_hi, in2_lo, temp);
4463       // out.lo <- (in1.lo * in2.lo)[31:0];
4464       __ Umull(out_lo, temp, in1_lo, in2_lo);
4465       // out.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
4466       __ Add(out_hi, out_hi, temp);
4467       break;
4468     }
4469 
4470     case DataType::Type::kFloat32:
4471     case DataType::Type::kFloat64:
4472       __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1));
4473       break;
4474 
4475     default:
4476       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4477   }
4478 }
4479 
DivRemOneOrMinusOne(HBinaryOperation * instruction)4480 void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4481   DCHECK(instruction->IsDiv() || instruction->IsRem());
4482   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4483 
4484   Location second = instruction->GetLocations()->InAt(1);
4485   DCHECK(second.IsConstant());
4486 
4487   vixl32::Register out = OutputRegister(instruction);
4488   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4489   int32_t imm = Int32ConstantFrom(second);
4490   DCHECK(imm == 1 || imm == -1);
4491 
4492   if (instruction->IsRem()) {
4493     __ Mov(out, 0);
4494   } else {
4495     if (imm == 1) {
4496       __ Mov(out, dividend);
4497     } else {
4498       __ Rsb(out, dividend, 0);
4499     }
4500   }
4501 }
4502 
DivRemByPowerOfTwo(HBinaryOperation * instruction)4503 void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
4504   DCHECK(instruction->IsDiv() || instruction->IsRem());
4505   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4506 
4507   LocationSummary* locations = instruction->GetLocations();
4508   Location second = locations->InAt(1);
4509   DCHECK(second.IsConstant());
4510 
4511   vixl32::Register out = OutputRegister(instruction);
4512   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4513   int32_t imm = Int32ConstantFrom(second);
4514   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4515   int ctz_imm = CTZ(abs_imm);
4516 
4517   auto generate_div_code = [this, imm, ctz_imm](vixl32::Register out, vixl32::Register in) {
4518     __ Asr(out, in, ctz_imm);
4519     if (imm < 0) {
4520       __ Rsb(out, out, 0);
4521     }
4522   };
4523 
4524   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
4525     // No need to adjust the result for non-negative dividends or the INT32_MIN dividend.
4526     // NOTE: The generated code for HDiv/HRem correctly works for the INT32_MIN dividend:
4527     //   imm == 2
4528     //     HDiv
4529     //      add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
4530     //      asr out, out(0x80000001), #1 => out = 0xc0000000
4531     //      This is the same as 'asr out, dividend(0x80000000), #1'
4532     //
4533     //   imm > 2
4534     //     HDiv
4535     //      asr out, dividend(0x80000000), #31 => out = -1
4536     //      add out, dividend(0x80000000), out(-1), lsr #(32 - ctz_imm) => out = 0b10..01..1,
4537     //          where the number of the rightmost 1s is ctz_imm.
4538     //      asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
4539     //          leftmost 1s is ctz_imm + 1.
4540     //      This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
4541     //
4542     //   imm == INT32_MIN
4543     //     HDiv
4544     //      asr out, dividend(0x80000000), #31 => out = -1
4545     //      add out, dividend(0x80000000), out(-1), lsr #1 => out = 0xc0000000
4546     //      asr out, out(0xc0000000), #31 => out = -1
4547     //      rsb out, out(-1), #0 => out = 1
4548     //      This is the same as
4549     //        asr out, dividend(0x80000000), #31
4550     //        rsb out, out, #0
4551     //
4552     //
4553     //   INT_MIN % imm must be 0 for any imm of power 2. 'and' and 'ubfx' work only with bits
4554     //   0..30 of a dividend. For INT32_MIN those bits are zeros. So 'and' and 'ubfx' always
4555     //   produce zero.
4556     if (instruction->IsDiv()) {
4557       generate_div_code(out, dividend);
4558     } else {
4559       if (GetVIXLAssembler()->IsModifiedImmediate(abs_imm - 1)) {
4560         __ And(out, dividend, abs_imm - 1);
4561       } else {
4562         __ Ubfx(out, dividend, 0, ctz_imm);
4563       }
4564       return;
4565     }
4566   } else {
4567     vixl32::Register add_right_input = dividend;
4568     if (ctz_imm > 1) {
4569       __ Asr(out, dividend, 31);
4570       add_right_input = out;
4571     }
4572     __ Add(out, dividend, Operand(add_right_input, vixl32::LSR, 32 - ctz_imm));
4573 
4574     if (instruction->IsDiv()) {
4575       generate_div_code(out, out);
4576     } else {
4577       __ Bfc(out, 0, ctz_imm);
4578       __ Sub(out, dividend, out);
4579     }
4580   }
4581 }
4582 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4583 void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4584   DCHECK(instruction->IsDiv() || instruction->IsRem());
4585   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4586 
4587   LocationSummary* locations = instruction->GetLocations();
4588   Location second = locations->InAt(1);
4589   DCHECK(second.IsConstant());
4590 
4591   vixl32::Register out = OutputRegister(instruction);
4592   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4593   vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
4594   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
4595   int32_t imm = Int32ConstantFrom(second);
4596 
4597   int64_t magic;
4598   int shift;
4599   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4600 
4601   auto generate_unsigned_div_code =[this, magic, shift](vixl32::Register out,
4602                                                         vixl32::Register dividend,
4603                                                         vixl32::Register temp1,
4604                                                         vixl32::Register temp2) {
4605     // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4606     __ Mov(temp1, static_cast<int32_t>(magic));
4607     if (magic > 0 && shift == 0) {
4608       __ Smull(temp2, out, dividend, temp1);
4609     } else {
4610       __ Smull(temp2, temp1, dividend, temp1);
4611       if (magic < 0) {
4612         // The negative magic M = static_cast<int>(m) means that the multiplier m is greater
4613         // than INT32_MAX. In such a case shift is never 0.
4614         // Proof:
4615         //   m = (2^p + d - 2^p % d) / d, where p = 32 + shift, d > 2
4616         //
4617         //   If shift == 0, m = (2^32 + d - 2^32 % d) / d =
4618         //   = (2^32 + d - (2^32 - (2^32 / d) * d)) / d =
4619         //   = (d + (2^32 / d) * d) / d = 1 + (2^32 / d), here '/' is the integer division.
4620         //
4621         //   1 + (2^32 / d) is decreasing when d is increasing.
4622         //   The maximum is 1 431 655 766, when d == 3. This value is less than INT32_MAX.
4623         //   the minimum is 3, when d = 2^31 -1.
4624         //   So for all values of d in [3, INT32_MAX] m with p == 32 is in [3, INT32_MAX) and
4625         //   is never less than 0.
4626         __ Add(temp1, temp1, dividend);
4627       }
4628       DCHECK_NE(shift, 0);
4629       __ Lsr(out, temp1, shift);
4630     }
4631   };
4632 
4633   if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
4634     // No need to adjust the result for a non-negative dividend and a positive divisor.
4635     if (instruction->IsDiv()) {
4636       generate_unsigned_div_code(out, dividend, temp1, temp2);
4637     } else {
4638       generate_unsigned_div_code(temp1, dividend, temp1, temp2);
4639       __ Mov(temp2, imm);
4640       __ Mls(out, temp1, temp2, dividend);
4641     }
4642   } else {
4643     // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4644     __ Mov(temp1, static_cast<int32_t>(magic));
4645     __ Smull(temp2, temp1, dividend, temp1);
4646 
4647     if (imm > 0 && magic < 0) {
4648       __ Add(temp1, temp1, dividend);
4649     } else if (imm < 0 && magic > 0) {
4650       __ Sub(temp1, temp1, dividend);
4651     }
4652 
4653     if (shift != 0) {
4654       __ Asr(temp1, temp1, shift);
4655     }
4656 
4657     if (instruction->IsDiv()) {
4658       __ Sub(out, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4659     } else {
4660       __ Sub(temp1, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4661       // TODO: Strength reduction for mls.
4662       __ Mov(temp2, imm);
4663       __ Mls(out, temp1, temp2, dividend);
4664     }
4665   }
4666 }
4667 
GenerateDivRemConstantIntegral(HBinaryOperation * instruction)4668 void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral(
4669     HBinaryOperation* instruction) {
4670   DCHECK(instruction->IsDiv() || instruction->IsRem());
4671   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4672 
4673   Location second = instruction->GetLocations()->InAt(1);
4674   DCHECK(second.IsConstant());
4675 
4676   int32_t imm = Int32ConstantFrom(second);
4677   if (imm == 0) {
4678     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4679   } else if (imm == 1 || imm == -1) {
4680     DivRemOneOrMinusOne(instruction);
4681   } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4682     DivRemByPowerOfTwo(instruction);
4683   } else {
4684     DCHECK(imm <= -2 || imm >= 2);
4685     GenerateDivRemWithAnyConstant(instruction);
4686   }
4687 }
4688 
VisitDiv(HDiv * div)4689 void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) {
4690   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4691   if (div->GetResultType() == DataType::Type::kInt64) {
4692     // pLdiv runtime call.
4693     call_kind = LocationSummary::kCallOnMainOnly;
4694   } else if (div->GetResultType() == DataType::Type::kInt32 && div->InputAt(1)->IsConstant()) {
4695     // sdiv will be replaced by other instruction sequence.
4696   } else if (div->GetResultType() == DataType::Type::kInt32 &&
4697              !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4698     // pIdivmod runtime call.
4699     call_kind = LocationSummary::kCallOnMainOnly;
4700   }
4701 
4702   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4703 
4704   switch (div->GetResultType()) {
4705     case DataType::Type::kInt32: {
4706       HInstruction* divisor = div->InputAt(1);
4707       if (divisor->IsConstant()) {
4708         locations->SetInAt(0, Location::RequiresRegister());
4709         locations->SetInAt(1, Location::ConstantLocation(divisor));
4710         int32_t value = Int32ConstantFrom(divisor);
4711         Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4712         if (value == 1 || value == 0 || value == -1) {
4713           // No temp register required.
4714         } else if (IsPowerOfTwo(AbsOrMin(value)) &&
4715                    value != 2 &&
4716                    value != -2 &&
4717                    !HasNonNegativeOrMinIntInputAt(div, 0)) {
4718           // The "out" register is used as a temporary, so it overlaps with the inputs.
4719           out_overlaps = Location::kOutputOverlap;
4720         } else {
4721           locations->AddRegisterTemps(2);
4722         }
4723         locations->SetOut(Location::RequiresRegister(), out_overlaps);
4724       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4725         locations->SetInAt(0, Location::RequiresRegister());
4726         locations->SetInAt(1, Location::RequiresRegister());
4727         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4728       } else {
4729         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4730         locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4731         locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4732         // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4733         //       we only need the former.
4734         locations->SetOut(LocationFrom(r0));
4735       }
4736       break;
4737     }
4738     case DataType::Type::kInt64: {
4739       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4740       locations->SetInAt(0, LocationFrom(
4741           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4742       locations->SetInAt(1, LocationFrom(
4743           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4744       locations->SetOut(LocationFrom(r0, r1));
4745       break;
4746     }
4747     case DataType::Type::kFloat32:
4748     case DataType::Type::kFloat64: {
4749       locations->SetInAt(0, Location::RequiresFpuRegister());
4750       locations->SetInAt(1, Location::RequiresFpuRegister());
4751       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4752       break;
4753     }
4754 
4755     default:
4756       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4757   }
4758 }
4759 
VisitDiv(HDiv * div)4760 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
4761   Location lhs = div->GetLocations()->InAt(0);
4762   Location rhs = div->GetLocations()->InAt(1);
4763 
4764   switch (div->GetResultType()) {
4765     case DataType::Type::kInt32: {
4766       if (rhs.IsConstant()) {
4767         GenerateDivRemConstantIntegral(div);
4768       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4769         __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
4770       } else {
4771         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4772         DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs)));
4773         DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs)));
4774         DCHECK(r0.Is(OutputRegister(div)));
4775 
4776         codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc());
4777         CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4778       }
4779       break;
4780     }
4781 
4782     case DataType::Type::kInt64: {
4783       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4784       DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs)));
4785       DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs)));
4786       DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs)));
4787       DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs)));
4788       DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0));
4789       DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1));
4790 
4791       codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc());
4792       CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4793       break;
4794     }
4795 
4796     case DataType::Type::kFloat32:
4797     case DataType::Type::kFloat64:
4798       __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1));
4799       break;
4800 
4801     default:
4802       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4803   }
4804 }
4805 
VisitRem(HRem * rem)4806 void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
4807   DataType::Type type = rem->GetResultType();
4808 
4809   // Most remainders are implemented in the runtime.
4810   LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
4811   if (rem->GetResultType() == DataType::Type::kInt32 && rem->InputAt(1)->IsConstant()) {
4812     // sdiv will be replaced by other instruction sequence.
4813     call_kind = LocationSummary::kNoCall;
4814   } else if ((rem->GetResultType() == DataType::Type::kInt32)
4815              && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4816     // Have hardware divide instruction for int, do it with three instructions.
4817     call_kind = LocationSummary::kNoCall;
4818   }
4819 
4820   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4821 
4822   switch (type) {
4823     case DataType::Type::kInt32: {
4824       HInstruction* divisor = rem->InputAt(1);
4825       if (divisor->IsConstant()) {
4826         locations->SetInAt(0, Location::RequiresRegister());
4827         locations->SetInAt(1, Location::ConstantLocation(divisor));
4828         int32_t value = Int32ConstantFrom(divisor);
4829         Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4830         if (value == 1 || value == 0 || value == -1) {
4831           // No temp register required.
4832         } else if (IsPowerOfTwo(AbsOrMin(value)) && !HasNonNegativeOrMinIntInputAt(rem, 0)) {
4833           // The "out" register is used as a temporary, so it overlaps with the inputs.
4834           out_overlaps = Location::kOutputOverlap;
4835         } else {
4836           locations->AddRegisterTemps(2);
4837         }
4838         locations->SetOut(Location::RequiresRegister(), out_overlaps);
4839       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4840         locations->SetInAt(0, Location::RequiresRegister());
4841         locations->SetInAt(1, Location::RequiresRegister());
4842         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4843         locations->AddTemp(Location::RequiresRegister());
4844       } else {
4845         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4846         locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4847         locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4848         // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4849         //       we only need the latter.
4850         locations->SetOut(LocationFrom(r1));
4851       }
4852       break;
4853     }
4854     case DataType::Type::kInt64: {
4855       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4856       locations->SetInAt(0, LocationFrom(
4857           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4858       locations->SetInAt(1, LocationFrom(
4859           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4860       // The runtime helper puts the output in R2,R3.
4861       locations->SetOut(LocationFrom(r2, r3));
4862       break;
4863     }
4864     case DataType::Type::kFloat32: {
4865       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4866       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
4867       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
4868       locations->SetOut(LocationFrom(s0));
4869       break;
4870     }
4871 
4872     case DataType::Type::kFloat64: {
4873       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4874       locations->SetInAt(0, LocationFrom(
4875           calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
4876       locations->SetInAt(1, LocationFrom(
4877           calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
4878       locations->SetOut(LocationFrom(s0, s1));
4879       break;
4880     }
4881 
4882     default:
4883       LOG(FATAL) << "Unexpected rem type " << type;
4884   }
4885 }
4886 
VisitRem(HRem * rem)4887 void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
4888   LocationSummary* locations = rem->GetLocations();
4889   Location second = locations->InAt(1);
4890 
4891   DataType::Type type = rem->GetResultType();
4892   switch (type) {
4893     case DataType::Type::kInt32: {
4894         vixl32::Register reg1 = InputRegisterAt(rem, 0);
4895         vixl32::Register out_reg = OutputRegister(rem);
4896         if (second.IsConstant()) {
4897           GenerateDivRemConstantIntegral(rem);
4898         } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4899         vixl32::Register reg2 = RegisterFrom(second);
4900         vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
4901 
4902         // temp = reg1 / reg2  (integer division)
4903         // dest = reg1 - temp * reg2
4904         __ Sdiv(temp, reg1, reg2);
4905         __ Mls(out_reg, temp, reg2, reg1);
4906       } else {
4907         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4908         DCHECK(reg1.Is(calling_convention.GetRegisterAt(0)));
4909         DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1)));
4910         DCHECK(out_reg.Is(r1));
4911 
4912         codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc());
4913         CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4914       }
4915       break;
4916     }
4917 
4918     case DataType::Type::kInt64: {
4919       codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc());
4920         CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4921       break;
4922     }
4923 
4924     case DataType::Type::kFloat32: {
4925       codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc());
4926       CheckEntrypointTypes<kQuickFmodf, float, float, float>();
4927       break;
4928     }
4929 
4930     case DataType::Type::kFloat64: {
4931       codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc());
4932       CheckEntrypointTypes<kQuickFmod, double, double, double>();
4933       break;
4934     }
4935 
4936     default:
4937       LOG(FATAL) << "Unexpected rem type " << type;
4938   }
4939 }
4940 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4941 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4942   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4943   switch (minmax->GetResultType()) {
4944     case DataType::Type::kInt32:
4945       locations->SetInAt(0, Location::RequiresRegister());
4946       locations->SetInAt(1, Location::RequiresRegister());
4947       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4948       break;
4949     case DataType::Type::kInt64:
4950       locations->SetInAt(0, Location::RequiresRegister());
4951       locations->SetInAt(1, Location::RequiresRegister());
4952       locations->SetOut(Location::SameAsFirstInput());
4953       break;
4954     case DataType::Type::kFloat32:
4955       locations->SetInAt(0, Location::RequiresFpuRegister());
4956       locations->SetInAt(1, Location::RequiresFpuRegister());
4957       locations->SetOut(Location::SameAsFirstInput());
4958       locations->AddTemp(Location::RequiresRegister());
4959       break;
4960     case DataType::Type::kFloat64:
4961       locations->SetInAt(0, Location::RequiresFpuRegister());
4962       locations->SetInAt(1, Location::RequiresFpuRegister());
4963       locations->SetOut(Location::SameAsFirstInput());
4964       break;
4965     default:
4966       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4967   }
4968 }
4969 
GenerateMinMaxInt(LocationSummary * locations,bool is_min)4970 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
4971   Location op1_loc = locations->InAt(0);
4972   Location op2_loc = locations->InAt(1);
4973   Location out_loc = locations->Out();
4974 
4975   vixl32::Register op1 = RegisterFrom(op1_loc);
4976   vixl32::Register op2 = RegisterFrom(op2_loc);
4977   vixl32::Register out = RegisterFrom(out_loc);
4978 
4979   __ Cmp(op1, op2);
4980 
4981   {
4982     ExactAssemblyScope aas(GetVIXLAssembler(),
4983                            3 * kMaxInstructionSizeInBytes,
4984                            CodeBufferCheckScope::kMaximumSize);
4985 
4986     __ ite(is_min ? lt : gt);
4987     __ mov(is_min ? lt : gt, out, op1);
4988     __ mov(is_min ? ge : le, out, op2);
4989   }
4990 }
4991 
GenerateMinMaxLong(LocationSummary * locations,bool is_min)4992 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
4993   Location op1_loc = locations->InAt(0);
4994   Location op2_loc = locations->InAt(1);
4995   Location out_loc = locations->Out();
4996 
4997   // Optimization: don't generate any code if inputs are the same.
4998   if (op1_loc.Equals(op2_loc)) {
4999     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
5000     return;
5001   }
5002 
5003   vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
5004   vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
5005   vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
5006   vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
5007   vixl32::Register out_lo = LowRegisterFrom(out_loc);
5008   vixl32::Register out_hi = HighRegisterFrom(out_loc);
5009   UseScratchRegisterScope temps(GetVIXLAssembler());
5010   const vixl32::Register temp = temps.Acquire();
5011 
5012   DCHECK(op1_lo.Is(out_lo));
5013   DCHECK(op1_hi.Is(out_hi));
5014 
5015   // Compare op1 >= op2, or op1 < op2.
5016   __ Cmp(out_lo, op2_lo);
5017   __ Sbcs(temp, out_hi, op2_hi);
5018 
5019   // Now GE/LT condition code is correct for the long comparison.
5020   {
5021     vixl32::ConditionType cond = is_min ? ge : lt;
5022     ExactAssemblyScope it_scope(GetVIXLAssembler(),
5023                                 3 * kMaxInstructionSizeInBytes,
5024                                 CodeBufferCheckScope::kMaximumSize);
5025     __ itt(cond);
5026     __ mov(cond, out_lo, op2_lo);
5027     __ mov(cond, out_hi, op2_hi);
5028   }
5029 }
5030 
GenerateMinMaxFloat(HInstruction * minmax,bool is_min)5031 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
5032   LocationSummary* locations = minmax->GetLocations();
5033   Location op1_loc = locations->InAt(0);
5034   Location op2_loc = locations->InAt(1);
5035   Location out_loc = locations->Out();
5036 
5037   // Optimization: don't generate any code if inputs are the same.
5038   if (op1_loc.Equals(op2_loc)) {
5039     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
5040     return;
5041   }
5042 
5043   vixl32::SRegister op1 = SRegisterFrom(op1_loc);
5044   vixl32::SRegister op2 = SRegisterFrom(op2_loc);
5045   vixl32::SRegister out = SRegisterFrom(out_loc);
5046 
5047   UseScratchRegisterScope temps(GetVIXLAssembler());
5048   const vixl32::Register temp1 = temps.Acquire();
5049   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
5050   vixl32::Label nan, done;
5051   vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
5052 
5053   DCHECK(op1.Is(out));
5054 
5055   __ Vcmp(op1, op2);
5056   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5057   __ B(vs, &nan, /* is_far_target= */ false);  // if un-ordered, go to NaN handling.
5058 
5059   // op1 <> op2
5060   vixl32::ConditionType cond = is_min ? gt : lt;
5061   {
5062     ExactAssemblyScope it_scope(GetVIXLAssembler(),
5063                                 2 * kMaxInstructionSizeInBytes,
5064                                 CodeBufferCheckScope::kMaximumSize);
5065     __ it(cond);
5066     __ vmov(cond, F32, out, op2);
5067   }
5068   // for <>(not equal), we've done min/max calculation.
5069   __ B(ne, final_label, /* is_far_target= */ false);
5070 
5071   // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
5072   __ Vmov(temp1, op1);
5073   __ Vmov(temp2, op2);
5074   if (is_min) {
5075     __ Orr(temp1, temp1, temp2);
5076   } else {
5077     __ And(temp1, temp1, temp2);
5078   }
5079   __ Vmov(out, temp1);
5080   __ B(final_label);
5081 
5082   // handle NaN input.
5083   __ Bind(&nan);
5084   __ Movt(temp1, High16Bits(kNanFloat));  // 0x7FC0xxxx is a NaN.
5085   __ Vmov(out, temp1);
5086 
5087   if (done.IsReferenced()) {
5088     __ Bind(&done);
5089   }
5090 }
5091 
GenerateMinMaxDouble(HInstruction * minmax,bool is_min)5092 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
5093   LocationSummary* locations = minmax->GetLocations();
5094   Location op1_loc = locations->InAt(0);
5095   Location op2_loc = locations->InAt(1);
5096   Location out_loc = locations->Out();
5097 
5098   // Optimization: don't generate any code if inputs are the same.
5099   if (op1_loc.Equals(op2_loc)) {
5100     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in.
5101     return;
5102   }
5103 
5104   vixl32::DRegister op1 = DRegisterFrom(op1_loc);
5105   vixl32::DRegister op2 = DRegisterFrom(op2_loc);
5106   vixl32::DRegister out = DRegisterFrom(out_loc);
5107   vixl32::Label handle_nan_eq, done;
5108   vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
5109 
5110   DCHECK(op1.Is(out));
5111 
5112   __ Vcmp(op1, op2);
5113   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5114   __ B(vs, &handle_nan_eq, /* is_far_target= */ false);  // if un-ordered, go to NaN handling.
5115 
5116   // op1 <> op2
5117   vixl32::ConditionType cond = is_min ? gt : lt;
5118   {
5119     ExactAssemblyScope it_scope(GetVIXLAssembler(),
5120                                 2 * kMaxInstructionSizeInBytes,
5121                                 CodeBufferCheckScope::kMaximumSize);
5122     __ it(cond);
5123     __ vmov(cond, F64, out, op2);
5124   }
5125   // for <>(not equal), we've done min/max calculation.
5126   __ B(ne, final_label, /* is_far_target= */ false);
5127 
5128   // handle op1 == op2, max(+0.0,-0.0).
5129   if (!is_min) {
5130     __ Vand(F64, out, op1, op2);
5131     __ B(final_label);
5132   }
5133 
5134   // handle op1 == op2, min(+0.0,-0.0), NaN input.
5135   __ Bind(&handle_nan_eq);
5136   __ Vorr(F64, out, op1, op2);  // assemble op1/-0.0/NaN.
5137 
5138   if (done.IsReferenced()) {
5139     __ Bind(&done);
5140   }
5141 }
5142 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)5143 void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
5144   DataType::Type type = minmax->GetResultType();
5145   switch (type) {
5146     case DataType::Type::kInt32:
5147       GenerateMinMaxInt(minmax->GetLocations(), is_min);
5148       break;
5149     case DataType::Type::kInt64:
5150       GenerateMinMaxLong(minmax->GetLocations(), is_min);
5151       break;
5152     case DataType::Type::kFloat32:
5153       GenerateMinMaxFloat(minmax, is_min);
5154       break;
5155     case DataType::Type::kFloat64:
5156       GenerateMinMaxDouble(minmax, is_min);
5157       break;
5158     default:
5159       LOG(FATAL) << "Unexpected type for HMinMax " << type;
5160   }
5161 }
5162 
VisitMin(HMin * min)5163 void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
5164   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
5165 }
5166 
VisitMin(HMin * min)5167 void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
5168   GenerateMinMax(min, /*is_min*/ true);
5169 }
5170 
VisitMax(HMax * max)5171 void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
5172   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
5173 }
5174 
VisitMax(HMax * max)5175 void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
5176   GenerateMinMax(max, /*is_min*/ false);
5177 }
5178 
VisitAbs(HAbs * abs)5179 void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
5180   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
5181   switch (abs->GetResultType()) {
5182     case DataType::Type::kInt32:
5183     case DataType::Type::kInt64:
5184       locations->SetInAt(0, Location::RequiresRegister());
5185       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5186       locations->AddTemp(Location::RequiresRegister());
5187       break;
5188     case DataType::Type::kFloat32:
5189     case DataType::Type::kFloat64:
5190       locations->SetInAt(0, Location::RequiresFpuRegister());
5191       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5192       break;
5193     default:
5194       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5195   }
5196 }
5197 
VisitAbs(HAbs * abs)5198 void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) {
5199   LocationSummary* locations = abs->GetLocations();
5200   switch (abs->GetResultType()) {
5201     case DataType::Type::kInt32: {
5202       vixl32::Register in_reg = RegisterFrom(locations->InAt(0));
5203       vixl32::Register out_reg = RegisterFrom(locations->Out());
5204       vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5205       __ Asr(mask, in_reg, 31);
5206       __ Add(out_reg, in_reg, mask);
5207       __ Eor(out_reg, out_reg, mask);
5208       break;
5209     }
5210     case DataType::Type::kInt64: {
5211       Location in = locations->InAt(0);
5212       vixl32::Register in_reg_lo = LowRegisterFrom(in);
5213       vixl32::Register in_reg_hi = HighRegisterFrom(in);
5214       Location output = locations->Out();
5215       vixl32::Register out_reg_lo = LowRegisterFrom(output);
5216       vixl32::Register out_reg_hi = HighRegisterFrom(output);
5217       DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
5218       vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5219       __ Asr(mask, in_reg_hi, 31);
5220       __ Adds(out_reg_lo, in_reg_lo, mask);
5221       __ Adc(out_reg_hi, in_reg_hi, mask);
5222       __ Eor(out_reg_lo, out_reg_lo, mask);
5223       __ Eor(out_reg_hi, out_reg_hi, mask);
5224       break;
5225     }
5226     case DataType::Type::kFloat32:
5227     case DataType::Type::kFloat64:
5228       __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0));
5229       break;
5230     default:
5231       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5232   }
5233 }
5234 
VisitDivZeroCheck(HDivZeroCheck * instruction)5235 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5236   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5237   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5238 }
5239 
VisitDivZeroCheck(HDivZeroCheck * instruction)5240 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5241   DivZeroCheckSlowPathARMVIXL* slow_path =
5242       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARMVIXL(instruction);
5243   codegen_->AddSlowPath(slow_path);
5244 
5245   LocationSummary* locations = instruction->GetLocations();
5246   Location value = locations->InAt(0);
5247 
5248   switch (instruction->GetType()) {
5249     case DataType::Type::kBool:
5250     case DataType::Type::kUint8:
5251     case DataType::Type::kInt8:
5252     case DataType::Type::kUint16:
5253     case DataType::Type::kInt16:
5254     case DataType::Type::kInt32: {
5255       if (value.IsRegister()) {
5256         __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
5257       } else {
5258         DCHECK(value.IsConstant()) << value;
5259         if (Int32ConstantFrom(value) == 0) {
5260           __ B(slow_path->GetEntryLabel());
5261         }
5262       }
5263       break;
5264     }
5265     case DataType::Type::kInt64: {
5266       if (value.IsRegisterPair()) {
5267         UseScratchRegisterScope temps(GetVIXLAssembler());
5268         vixl32::Register temp = temps.Acquire();
5269         __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value));
5270         __ B(eq, slow_path->GetEntryLabel());
5271       } else {
5272         DCHECK(value.IsConstant()) << value;
5273         if (Int64ConstantFrom(value) == 0) {
5274           __ B(slow_path->GetEntryLabel());
5275         }
5276       }
5277       break;
5278     }
5279     default:
5280       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
5281   }
5282 }
5283 
HandleIntegerRotate(HBinaryOperation * rotate)5284 void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HBinaryOperation* rotate) {
5285   LocationSummary* locations = rotate->GetLocations();
5286   vixl32::Register in = InputRegisterAt(rotate, 0);
5287   Location rhs = locations->InAt(1);
5288   vixl32::Register out = OutputRegister(rotate);
5289 
5290   if (rhs.IsConstant()) {
5291     // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
5292     // so map all rotations to a +ve. equivalent in that range.
5293     // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
5294     uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
5295     if (rotate->IsRol()) {
5296       rot = -rot;
5297     }
5298     rot &= 0x1f;
5299 
5300     if (rot) {
5301       // Rotate, mapping left rotations to right equivalents if necessary.
5302       // (e.g. left by 2 bits == right by 30.)
5303       __ Ror(out, in, rot);
5304     } else if (!out.Is(in)) {
5305       __ Mov(out, in);
5306     }
5307   } else {
5308     if (rotate->IsRol()) {
5309       UseScratchRegisterScope temps(GetVIXLAssembler());
5310 
5311       vixl32::Register negated = temps.Acquire();
5312       __ Rsb(negated, RegisterFrom(rhs), 0);
5313       __ Ror(out, in, negated);
5314     } else {
5315       DCHECK(rotate->IsRor());
5316       __ Ror(out, in, RegisterFrom(rhs));
5317     }
5318   }
5319 }
5320 
5321 // Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
5322 // rotates by swapping input regs (effectively rotating by the first 32-bits of
5323 // a larger rotation) or flipping direction (thus treating larger right/left
5324 // rotations as sub-word sized rotations in the other direction) as appropriate.
HandleLongRotate(HBinaryOperation * rotate)5325 void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HBinaryOperation* rotate) {
5326   LocationSummary* locations = rotate->GetLocations();
5327   vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
5328   vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
5329   Location rhs = locations->InAt(1);
5330   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
5331   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
5332 
5333   if (rhs.IsConstant()) {
5334     uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
5335 
5336     if (rotate->IsRol()) {
5337       rot = -rot;
5338     }
5339 
5340     // Map all rotations to +ve. equivalents on the interval [0,63].
5341     rot &= kMaxLongShiftDistance;
5342     // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
5343     // logic below to a simple pair of binary orr.
5344     // (e.g. 34 bits == in_reg swap + 2 bits right.)
5345     if (rot >= kArmBitsPerWord) {
5346       rot -= kArmBitsPerWord;
5347       std::swap(in_reg_hi, in_reg_lo);
5348     }
5349     // Rotate, or mov to out for zero or word size rotations.
5350     if (rot != 0u) {
5351       __ Lsr(out_reg_hi, in_reg_hi, Operand::From(rot));
5352       __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot));
5353       __ Lsr(out_reg_lo, in_reg_lo, Operand::From(rot));
5354       __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot));
5355     } else {
5356       __ Mov(out_reg_lo, in_reg_lo);
5357       __ Mov(out_reg_hi, in_reg_hi);
5358     }
5359   } else {
5360     vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0));
5361     vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
5362     vixl32::Label end;
5363     vixl32::Label shift_by_32_plus_shift_right;
5364     vixl32::Label* final_label = codegen_->GetFinalLabel(rotate, &end);
5365 
5366     // Negate rhs, taken from VisitNeg
5367     if (rotate->IsRol()) {
5368       Location negated = locations->GetTemp(2);
5369       Location in = rhs;
5370 
5371       __ Rsb(RegisterFrom(negated), RegisterFrom(in), 0);
5372 
5373       rhs = negated;
5374     }
5375 
5376     __ And(shift_right, RegisterFrom(rhs), 0x1F);
5377     __ Lsrs(shift_left, RegisterFrom(rhs), 6);
5378     __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord));
5379     __ B(cc, &shift_by_32_plus_shift_right, /* is_far_target= */ false);
5380 
5381     // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
5382     // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
5383     __ Lsl(out_reg_hi, in_reg_hi, shift_left);
5384     __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5385     __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5386     __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5387     __ Lsr(shift_left, in_reg_hi, shift_right);
5388     __ Add(out_reg_lo, out_reg_lo, shift_left);
5389     __ B(final_label);
5390 
5391     __ Bind(&shift_by_32_plus_shift_right);  // Shift by 32+shift_right.
5392     // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
5393     // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
5394     __ Lsr(out_reg_hi, in_reg_hi, shift_right);
5395     __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5396     __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5397     __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5398     __ Lsl(shift_right, in_reg_hi, shift_left);
5399     __ Add(out_reg_lo, out_reg_lo, shift_right);
5400 
5401     if (end.IsReferenced()) {
5402       __ Bind(&end);
5403     }
5404   }
5405 }
5406 
HandleRotate(HBinaryOperation * rotate)5407 void LocationsBuilderARMVIXL::HandleRotate(HBinaryOperation* rotate) {
5408   LocationSummary* locations =
5409       new (GetGraph()->GetAllocator()) LocationSummary(rotate, LocationSummary::kNoCall);
5410   HInstruction* shift = rotate->InputAt(1);
5411   switch (rotate->GetResultType()) {
5412     case DataType::Type::kInt32: {
5413       locations->SetInAt(0, Location::RequiresRegister());
5414       locations->SetInAt(1, Location::RegisterOrConstant(shift));
5415       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5416       break;
5417     }
5418     case DataType::Type::kInt64: {
5419       locations->SetInAt(0, Location::RequiresRegister());
5420       if (shift->IsConstant()) {
5421         locations->SetInAt(1, Location::ConstantLocation(shift));
5422       } else {
5423         locations->SetInAt(1, Location::RequiresRegister());
5424 
5425         if (rotate->IsRor()) {
5426           locations->AddRegisterTemps(2);
5427         } else {
5428           DCHECK(rotate->IsRol());
5429           locations->AddRegisterTemps(3);
5430         }
5431       }
5432       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5433       break;
5434     }
5435     default:
5436       LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5437   }
5438 }
5439 
VisitRol(HRol * rol)5440 void LocationsBuilderARMVIXL::VisitRol(HRol* rol) {
5441   HandleRotate(rol);
5442 }
5443 
VisitRor(HRor * ror)5444 void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
5445   HandleRotate(ror);
5446 }
5447 
HandleRotate(HBinaryOperation * rotate)5448 void InstructionCodeGeneratorARMVIXL::HandleRotate(HBinaryOperation* rotate) {
5449   DataType::Type type = rotate->GetResultType();
5450   switch (type) {
5451     case DataType::Type::kInt32: {
5452       HandleIntegerRotate(rotate);
5453       break;
5454     }
5455     case DataType::Type::kInt64: {
5456       HandleLongRotate(rotate);
5457       break;
5458     }
5459     default:
5460       LOG(FATAL) << "Unexpected operation type " << type;
5461       UNREACHABLE();
5462   }
5463 }
5464 
VisitRol(HRol * rol)5465 void InstructionCodeGeneratorARMVIXL::VisitRol(HRol* rol) {
5466   HandleRotate(rol);
5467 }
5468 
VisitRor(HRor * ror)5469 void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) {
5470   HandleRotate(ror);
5471 }
5472 
HandleShift(HBinaryOperation * op)5473 void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
5474   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5475 
5476   LocationSummary* locations =
5477       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
5478 
5479   HInstruction* shift = op->InputAt(1);
5480   switch (op->GetResultType()) {
5481     case DataType::Type::kInt32: {
5482       locations->SetInAt(0, Location::RequiresRegister());
5483       if (shift->IsConstant()) {
5484         locations->SetInAt(1, Location::ConstantLocation(shift));
5485         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5486       } else {
5487         locations->SetInAt(1, Location::RequiresRegister());
5488         // Make the output overlap, as it will be used to hold the masked
5489         // second input.
5490         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5491       }
5492       break;
5493     }
5494     case DataType::Type::kInt64: {
5495       locations->SetInAt(0, Location::RequiresRegister());
5496       if (shift->IsConstant()) {
5497         locations->SetInAt(1, Location::ConstantLocation(shift));
5498         // For simplicity, use kOutputOverlap even though we only require that low registers
5499         // don't clash with high registers which the register allocator currently guarantees.
5500         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5501       } else {
5502         locations->SetInAt(1, Location::RequiresRegister());
5503         locations->AddTemp(Location::RequiresRegister());
5504         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5505       }
5506       break;
5507     }
5508     default:
5509       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5510   }
5511 }
5512 
HandleShift(HBinaryOperation * op)5513 void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) {
5514   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5515 
5516   LocationSummary* locations = op->GetLocations();
5517   Location out = locations->Out();
5518   Location first = locations->InAt(0);
5519   Location second = locations->InAt(1);
5520 
5521   DataType::Type type = op->GetResultType();
5522   switch (type) {
5523     case DataType::Type::kInt32: {
5524       vixl32::Register out_reg = OutputRegister(op);
5525       vixl32::Register first_reg = InputRegisterAt(op, 0);
5526       if (second.IsRegister()) {
5527         vixl32::Register second_reg = RegisterFrom(second);
5528         // ARM doesn't mask the shift count so we need to do it ourselves.
5529         __ And(out_reg, second_reg, kMaxIntShiftDistance);
5530         if (op->IsShl()) {
5531           __ Lsl(out_reg, first_reg, out_reg);
5532         } else if (op->IsShr()) {
5533           __ Asr(out_reg, first_reg, out_reg);
5534         } else {
5535           __ Lsr(out_reg, first_reg, out_reg);
5536         }
5537       } else {
5538         int32_t cst = Int32ConstantFrom(second);
5539         uint32_t shift_value = cst & kMaxIntShiftDistance;
5540         if (shift_value == 0) {  // ARM does not support shifting with 0 immediate.
5541           __ Mov(out_reg, first_reg);
5542         } else if (op->IsShl()) {
5543           __ Lsl(out_reg, first_reg, shift_value);
5544         } else if (op->IsShr()) {
5545           __ Asr(out_reg, first_reg, shift_value);
5546         } else {
5547           __ Lsr(out_reg, first_reg, shift_value);
5548         }
5549       }
5550       break;
5551     }
5552     case DataType::Type::kInt64: {
5553       vixl32::Register o_h = HighRegisterFrom(out);
5554       vixl32::Register o_l = LowRegisterFrom(out);
5555 
5556       vixl32::Register high = HighRegisterFrom(first);
5557       vixl32::Register low = LowRegisterFrom(first);
5558 
5559       if (second.IsRegister()) {
5560         vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
5561 
5562         vixl32::Register second_reg = RegisterFrom(second);
5563 
5564         if (op->IsShl()) {
5565           __ And(o_l, second_reg, kMaxLongShiftDistance);
5566           // Shift the high part
5567           __ Lsl(o_h, high, o_l);
5568           // Shift the low part and `or` what overflew on the high part
5569           __ Rsb(temp, o_l, Operand::From(kArmBitsPerWord));
5570           __ Lsr(temp, low, temp);
5571           __ Orr(o_h, o_h, temp);
5572           // If the shift is > 32 bits, override the high part
5573           __ Subs(temp, o_l, Operand::From(kArmBitsPerWord));
5574           {
5575             ExactAssemblyScope guard(GetVIXLAssembler(),
5576                                      2 * vixl32::kMaxInstructionSizeInBytes,
5577                                      CodeBufferCheckScope::kMaximumSize);
5578             __ it(pl);
5579             __ lsl(pl, o_h, low, temp);
5580           }
5581           // Shift the low part
5582           __ Lsl(o_l, low, o_l);
5583         } else if (op->IsShr()) {
5584           __ And(o_h, second_reg, kMaxLongShiftDistance);
5585           // Shift the low part
5586           __ Lsr(o_l, low, o_h);
5587           // Shift the high part and `or` what underflew on the low part
5588           __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5589           __ Lsl(temp, high, temp);
5590           __ Orr(o_l, o_l, temp);
5591           // If the shift is > 32 bits, override the low part
5592           __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5593           {
5594             ExactAssemblyScope guard(GetVIXLAssembler(),
5595                                      2 * vixl32::kMaxInstructionSizeInBytes,
5596                                      CodeBufferCheckScope::kMaximumSize);
5597             __ it(pl);
5598             __ asr(pl, o_l, high, temp);
5599           }
5600           // Shift the high part
5601           __ Asr(o_h, high, o_h);
5602         } else {
5603           __ And(o_h, second_reg, kMaxLongShiftDistance);
5604           // same as Shr except we use `Lsr`s and not `Asr`s
5605           __ Lsr(o_l, low, o_h);
5606           __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5607           __ Lsl(temp, high, temp);
5608           __ Orr(o_l, o_l, temp);
5609           __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5610           {
5611             ExactAssemblyScope guard(GetVIXLAssembler(),
5612                                      2 * vixl32::kMaxInstructionSizeInBytes,
5613                                      CodeBufferCheckScope::kMaximumSize);
5614           __ it(pl);
5615           __ lsr(pl, o_l, high, temp);
5616           }
5617           __ Lsr(o_h, high, o_h);
5618         }
5619       } else {
5620         // Register allocator doesn't create partial overlap.
5621         DCHECK(!o_l.Is(high));
5622         DCHECK(!o_h.Is(low));
5623         int32_t cst = Int32ConstantFrom(second);
5624         uint32_t shift_value = cst & kMaxLongShiftDistance;
5625         if (shift_value > 32) {
5626           if (op->IsShl()) {
5627             __ Lsl(o_h, low, shift_value - 32);
5628             __ Mov(o_l, 0);
5629           } else if (op->IsShr()) {
5630             __ Asr(o_l, high, shift_value - 32);
5631             __ Asr(o_h, high, 31);
5632           } else {
5633             __ Lsr(o_l, high, shift_value - 32);
5634             __ Mov(o_h, 0);
5635           }
5636         } else if (shift_value == 32) {
5637           if (op->IsShl()) {
5638             __ Mov(o_h, low);
5639             __ Mov(o_l, 0);
5640           } else if (op->IsShr()) {
5641             __ Mov(o_l, high);
5642             __ Asr(o_h, high, 31);
5643           } else {
5644             __ Mov(o_l, high);
5645             __ Mov(o_h, 0);
5646           }
5647         } else if (shift_value == 1) {
5648           if (op->IsShl()) {
5649             __ Lsls(o_l, low, 1);
5650             __ Adc(o_h, high, high);
5651           } else if (op->IsShr()) {
5652             __ Asrs(o_h, high, 1);
5653             __ Rrx(o_l, low);
5654           } else {
5655             __ Lsrs(o_h, high, 1);
5656             __ Rrx(o_l, low);
5657           }
5658         } else if (shift_value == 0) {
5659           __ Mov(o_l, low);
5660           __ Mov(o_h, high);
5661         } else {
5662           DCHECK(0 < shift_value && shift_value < 32) << shift_value;
5663           if (op->IsShl()) {
5664             __ Lsl(o_h, high, shift_value);
5665             __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value));
5666             __ Lsl(o_l, low, shift_value);
5667           } else if (op->IsShr()) {
5668             __ Lsr(o_l, low, shift_value);
5669             __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5670             __ Asr(o_h, high, shift_value);
5671           } else {
5672             __ Lsr(o_l, low, shift_value);
5673             __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5674             __ Lsr(o_h, high, shift_value);
5675           }
5676         }
5677       }
5678       break;
5679     }
5680     default:
5681       LOG(FATAL) << "Unexpected operation type " << type;
5682       UNREACHABLE();
5683   }
5684 }
5685 
VisitShl(HShl * shl)5686 void LocationsBuilderARMVIXL::VisitShl(HShl* shl) {
5687   HandleShift(shl);
5688 }
5689 
VisitShl(HShl * shl)5690 void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) {
5691   HandleShift(shl);
5692 }
5693 
VisitShr(HShr * shr)5694 void LocationsBuilderARMVIXL::VisitShr(HShr* shr) {
5695   HandleShift(shr);
5696 }
5697 
VisitShr(HShr * shr)5698 void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) {
5699   HandleShift(shr);
5700 }
5701 
VisitUShr(HUShr * ushr)5702 void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) {
5703   HandleShift(ushr);
5704 }
5705 
VisitUShr(HUShr * ushr)5706 void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
5707   HandleShift(ushr);
5708 }
5709 
VisitNewInstance(HNewInstance * instruction)5710 void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5711   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5712       instruction, LocationSummary::kCallOnMainOnly);
5713   InvokeRuntimeCallingConventionARMVIXL calling_convention;
5714   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5715   locations->SetOut(LocationFrom(r0));
5716 }
5717 
VisitNewInstance(HNewInstance * instruction)5718 void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5719   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5720   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5721   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 12);
5722 }
5723 
VisitNewArray(HNewArray * instruction)5724 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
5725   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5726       instruction, LocationSummary::kCallOnMainOnly);
5727   InvokeRuntimeCallingConventionARMVIXL calling_convention;
5728   locations->SetOut(LocationFrom(r0));
5729   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5730   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5731 }
5732 
VisitNewArray(HNewArray * instruction)5733 void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
5734   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5735   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5736   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5737   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5738   DCHECK(!codegen_->IsLeafMethod());
5739   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 13);
5740 }
5741 
VisitParameterValue(HParameterValue * instruction)5742 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
5743   LocationSummary* locations =
5744       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5745   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5746   if (location.IsStackSlot()) {
5747     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5748   } else if (location.IsDoubleStackSlot()) {
5749     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5750   }
5751   locations->SetOut(location);
5752 }
5753 
VisitParameterValue(HParameterValue * instruction)5754 void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
5755     [[maybe_unused]] HParameterValue* instruction) {
5756   // Nothing to do, the parameter is already at its location.
5757 }
5758 
VisitCurrentMethod(HCurrentMethod * instruction)5759 void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
5760   LocationSummary* locations =
5761       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5762   locations->SetOut(LocationFrom(kMethodRegister));
5763 }
5764 
VisitCurrentMethod(HCurrentMethod * instruction)5765 void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
5766     [[maybe_unused]] HCurrentMethod* instruction) {
5767   // Nothing to do, the method is already at its location.
5768 }
5769 
VisitNot(HNot * not_)5770 void LocationsBuilderARMVIXL::VisitNot(HNot* not_) {
5771   LocationSummary* locations =
5772       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5773   locations->SetInAt(0, Location::RequiresRegister());
5774   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5775 }
5776 
VisitNot(HNot * not_)5777 void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
5778   LocationSummary* locations = not_->GetLocations();
5779   Location out = locations->Out();
5780   Location in = locations->InAt(0);
5781   switch (not_->GetResultType()) {
5782     case DataType::Type::kInt32:
5783       __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0));
5784       break;
5785 
5786     case DataType::Type::kInt64:
5787       __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in));
5788       __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in));
5789       break;
5790 
5791     default:
5792       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5793   }
5794 }
5795 
VisitBooleanNot(HBooleanNot * bool_not)5796 void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5797   LocationSummary* locations =
5798       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5799   locations->SetInAt(0, Location::RequiresRegister());
5800   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5801 }
5802 
VisitBooleanNot(HBooleanNot * bool_not)5803 void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5804   __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1);
5805 }
5806 
VisitCompare(HCompare * compare)5807 void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
5808   LocationSummary* locations =
5809       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5810   switch (compare->GetComparisonType()) {
5811     case DataType::Type::kBool:
5812     case DataType::Type::kUint8:
5813     case DataType::Type::kInt8:
5814     case DataType::Type::kUint16:
5815     case DataType::Type::kInt16:
5816     case DataType::Type::kInt32:
5817     case DataType::Type::kUint32:
5818     case DataType::Type::kInt64:
5819     case DataType::Type::kUint64: {
5820       locations->SetInAt(0, Location::RequiresRegister());
5821       locations->SetInAt(1, Location::RequiresRegister());
5822       // Output overlaps because it is written before doing the low comparison.
5823       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5824       break;
5825     }
5826     case DataType::Type::kFloat32:
5827     case DataType::Type::kFloat64: {
5828       locations->SetInAt(0, Location::RequiresFpuRegister());
5829       locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
5830       locations->SetOut(Location::RequiresRegister());
5831       break;
5832     }
5833     default:
5834       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5835   }
5836 }
5837 
VisitCompare(HCompare * compare)5838 void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
5839   LocationSummary* locations = compare->GetLocations();
5840   vixl32::Register out = OutputRegister(compare);
5841   Location left = locations->InAt(0);
5842   Location right = locations->InAt(1);
5843 
5844   vixl32::Label less, greater, done;
5845   vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
5846   DataType::Type type = compare->GetComparisonType();
5847   vixl32::Condition less_cond = vixl32::ConditionType::lt;
5848   vixl32::Condition greater_cond = vixl32::ConditionType::gt;
5849   switch (type) {
5850     case DataType::Type::kUint32:
5851       less_cond = vixl32::ConditionType::lo;
5852       // greater_cond - is not needed below
5853       FALLTHROUGH_INTENDED;
5854     case DataType::Type::kBool:
5855     case DataType::Type::kUint8:
5856     case DataType::Type::kInt8:
5857     case DataType::Type::kUint16:
5858     case DataType::Type::kInt16:
5859     case DataType::Type::kInt32: {
5860       // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
5861       __ Mov(out, 0);
5862       __ Cmp(RegisterFrom(left), RegisterFrom(right));
5863       break;
5864     }
5865     case DataType::Type::kUint64:
5866       less_cond = vixl32::ConditionType::lo;
5867       greater_cond = vixl32::ConditionType::hi;
5868       FALLTHROUGH_INTENDED;
5869     case DataType::Type::kInt64: {
5870       __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));  // High part compare.
5871       __ B(less_cond, &less, /* is_far_target= */ false);
5872       __ B(greater_cond, &greater, /* is_far_target= */ false);
5873       // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
5874       __ Mov(out, 0);
5875       __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));  // Unsigned compare.
5876       less_cond = vixl32::ConditionType::lo;
5877       // greater_cond - is not needed below
5878       break;
5879     }
5880     case DataType::Type::kFloat32:
5881     case DataType::Type::kFloat64: {
5882       __ Mov(out, 0);
5883       GenerateVcmp(compare, codegen_);
5884       // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
5885       __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5886       less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
5887       break;
5888     }
5889     default:
5890       LOG(FATAL) << "Unexpected compare type " << type;
5891       UNREACHABLE();
5892   }
5893 
5894   __ B(eq, final_label, /* is_far_target= */ false);
5895   __ B(less_cond, &less, /* is_far_target= */ false);
5896 
5897   __ Bind(&greater);
5898   __ Mov(out, 1);
5899   __ B(final_label);
5900 
5901   __ Bind(&less);
5902   __ Mov(out, -1);
5903 
5904   if (done.IsReferenced()) {
5905     __ Bind(&done);
5906   }
5907 }
5908 
VisitPhi(HPhi * instruction)5909 void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
5910   LocationSummary* locations =
5911       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5912   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5913     locations->SetInAt(i, Location::Any());
5914   }
5915   locations->SetOut(Location::Any());
5916 }
5917 
VisitPhi(HPhi * instruction)5918 void InstructionCodeGeneratorARMVIXL::VisitPhi([[maybe_unused]] HPhi* instruction) {
5919   LOG(FATAL) << "Unreachable";
5920 }
5921 
GenerateMemoryBarrier(MemBarrierKind kind)5922 void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) {
5923   // TODO (ported from quick): revisit ARM barrier kinds.
5924   DmbOptions flavor = DmbOptions::ISH;  // Quiet C++ warnings.
5925   switch (kind) {
5926     case MemBarrierKind::kAnyStore:
5927     case MemBarrierKind::kLoadAny:
5928     case MemBarrierKind::kAnyAny: {
5929       flavor = DmbOptions::ISH;
5930       break;
5931     }
5932     case MemBarrierKind::kStoreStore: {
5933       flavor = DmbOptions::ISHST;
5934       break;
5935     }
5936     default:
5937       LOG(FATAL) << "Unexpected memory barrier " << kind;
5938   }
5939   __ Dmb(flavor);
5940 }
5941 
GenerateWideAtomicLoad(vixl32::Register addr,uint32_t offset,vixl32::Register out_lo,vixl32::Register out_hi)5942 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr,
5943                                                              uint32_t offset,
5944                                                              vixl32::Register out_lo,
5945                                                              vixl32::Register out_hi) {
5946   UseScratchRegisterScope temps(GetVIXLAssembler());
5947   if (offset != 0) {
5948     vixl32::Register temp = temps.Acquire();
5949     __ Add(temp, addr, offset);
5950     addr = temp;
5951   }
5952   __ Ldrexd(out_lo, out_hi, MemOperand(addr));
5953 }
5954 
GenerateWideAtomicStore(vixl32::Register addr,uint32_t offset,vixl32::Register value_lo,vixl32::Register value_hi,vixl32::Register temp1,vixl32::Register temp2,HInstruction * instruction)5955 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr,
5956                                                               uint32_t offset,
5957                                                               vixl32::Register value_lo,
5958                                                               vixl32::Register value_hi,
5959                                                               vixl32::Register temp1,
5960                                                               vixl32::Register temp2,
5961                                                               HInstruction* instruction) {
5962   UseScratchRegisterScope temps(GetVIXLAssembler());
5963   vixl32::Label fail;
5964   if (offset != 0) {
5965     vixl32::Register temp = temps.Acquire();
5966     __ Add(temp, addr, offset);
5967     addr = temp;
5968   }
5969   __ Bind(&fail);
5970   {
5971     // Ensure the pc position is recorded immediately after the `ldrexd` instruction.
5972     ExactAssemblyScope aas(GetVIXLAssembler(),
5973                            vixl32::kMaxInstructionSizeInBytes,
5974                            CodeBufferCheckScope::kMaximumSize);
5975     // We need a load followed by store. (The address used in a STREX instruction must
5976     // be the same as the address in the most recently executed LDREX instruction.)
5977     __ ldrexd(temp1, temp2, MemOperand(addr));
5978     codegen_->MaybeRecordImplicitNullCheck(instruction);
5979   }
5980   __ Strexd(temp1, value_lo, value_hi, MemOperand(addr));
5981   __ CompareAndBranchIfNonZero(temp1, &fail);
5982 }
5983 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5984 void LocationsBuilderARMVIXL::HandleFieldSet(HInstruction* instruction,
5985                                              const FieldInfo& field_info,
5986                                              WriteBarrierKind write_barrier_kind) {
5987   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5988 
5989   LocationSummary* locations =
5990       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5991   locations->SetInAt(0, Location::RequiresRegister());
5992 
5993   DataType::Type field_type = field_info.GetFieldType();
5994   if (DataType::IsFloatingPointType(field_type)) {
5995     locations->SetInAt(1, Location::RequiresFpuRegister());
5996   } else {
5997     locations->SetInAt(1, Location::RequiresRegister());
5998   }
5999 
6000   bool is_wide = field_type == DataType::Type::kInt64 || field_type == DataType::Type::kFloat64;
6001   bool generate_volatile = field_info.IsVolatile()
6002       && is_wide
6003       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6004   bool needs_write_barrier =
6005       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6006   bool check_gc_card =
6007       codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
6008 
6009   // Temporary registers for the write barrier.
6010   // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
6011   if (needs_write_barrier || check_gc_card) {
6012     locations->AddRegisterTemps(2);
6013   } else if (generate_volatile) {
6014     // ARM encoding have some additional constraints for ldrexd/strexd:
6015     // - registers need to be consecutive
6016     // - the first register should be even but not R14.
6017     // We don't test for ARM yet, and the assertion makes sure that we
6018     // revisit this if we ever enable ARM encoding.
6019     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
6020     locations->AddRegisterTemps(2);
6021     if (field_type == DataType::Type::kFloat64) {
6022       // For doubles we need two more registers to copy the value.
6023       locations->AddTemp(LocationFrom(r2));
6024       locations->AddTemp(LocationFrom(r3));
6025     }
6026   } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6027     locations->AddTemp(Location::RequiresRegister());
6028   }
6029 }
6030 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6031 void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
6032                                                      const FieldInfo& field_info,
6033                                                      bool value_can_be_null,
6034                                                      WriteBarrierKind write_barrier_kind) {
6035   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6036 
6037   LocationSummary* locations = instruction->GetLocations();
6038   vixl32::Register base = InputRegisterAt(instruction, 0);
6039   Location value = locations->InAt(1);
6040 
6041   bool is_volatile = field_info.IsVolatile();
6042   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6043   DataType::Type field_type = field_info.GetFieldType();
6044   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6045   bool needs_write_barrier =
6046       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6047 
6048   if (is_volatile) {
6049     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
6050   }
6051 
6052   switch (field_type) {
6053     case DataType::Type::kBool:
6054     case DataType::Type::kUint8:
6055     case DataType::Type::kInt8:
6056     case DataType::Type::kUint16:
6057     case DataType::Type::kInt16:
6058     case DataType::Type::kInt32: {
6059       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6060       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6061       StoreOperandType operand_type = GetStoreOperandType(field_type);
6062       GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset);
6063       codegen_->MaybeRecordImplicitNullCheck(instruction);
6064       break;
6065     }
6066 
6067     case DataType::Type::kReference: {
6068       vixl32::Register value_reg = RegisterFrom(value);
6069       if (kPoisonHeapReferences) {
6070         DCHECK_EQ(field_type, DataType::Type::kReference);
6071         value_reg = RegisterFrom(locations->GetTemp(0));
6072         __ Mov(value_reg, RegisterFrom(value));
6073         GetAssembler()->PoisonHeapReference(value_reg);
6074       }
6075       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6076       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6077       GetAssembler()->StoreToOffset(kStoreWord, value_reg, base, offset);
6078       codegen_->MaybeRecordImplicitNullCheck(instruction);
6079       break;
6080     }
6081 
6082     case DataType::Type::kInt64: {
6083       if (is_volatile && !atomic_ldrd_strd) {
6084         GenerateWideAtomicStore(base,
6085                                 offset,
6086                                 LowRegisterFrom(value),
6087                                 HighRegisterFrom(value),
6088                                 RegisterFrom(locations->GetTemp(0)),
6089                                 RegisterFrom(locations->GetTemp(1)),
6090                                 instruction);
6091       } else {
6092         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6093         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6094         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset);
6095         codegen_->MaybeRecordImplicitNullCheck(instruction);
6096       }
6097       break;
6098     }
6099 
6100     case DataType::Type::kFloat32: {
6101       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6102       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6103       GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset);
6104       codegen_->MaybeRecordImplicitNullCheck(instruction);
6105       break;
6106     }
6107 
6108     case DataType::Type::kFloat64: {
6109       vixl32::DRegister value_reg = DRegisterFrom(value);
6110       if (is_volatile && !atomic_ldrd_strd) {
6111         vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0));
6112         vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1));
6113 
6114         __ Vmov(value_reg_lo, value_reg_hi, value_reg);
6115 
6116         GenerateWideAtomicStore(base,
6117                                 offset,
6118                                 value_reg_lo,
6119                                 value_reg_hi,
6120                                 RegisterFrom(locations->GetTemp(2)),
6121                                 RegisterFrom(locations->GetTemp(3)),
6122                                 instruction);
6123       } else {
6124         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6125         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6126         GetAssembler()->StoreDToOffset(value_reg, base, offset);
6127         codegen_->MaybeRecordImplicitNullCheck(instruction);
6128       }
6129       break;
6130     }
6131 
6132     case DataType::Type::kUint32:
6133     case DataType::Type::kUint64:
6134     case DataType::Type::kVoid:
6135       LOG(FATAL) << "Unreachable type " << field_type;
6136       UNREACHABLE();
6137   }
6138 
6139   if (needs_write_barrier) {
6140     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
6141     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
6142     codegen_->MaybeMarkGCCard(
6143         temp,
6144         card,
6145         base,
6146         RegisterFrom(value),
6147         value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
6148   } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
6149     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
6150     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
6151     codegen_->CheckGCCardIsValid(temp, card, base);
6152   }
6153 
6154   if (is_volatile) {
6155     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6156   }
6157 }
6158 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6159 void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
6160                                              const FieldInfo& field_info) {
6161   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6162 
6163   bool object_field_get_with_read_barrier =
6164       (field_info.GetFieldType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6165   LocationSummary* locations =
6166       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6167                                                        object_field_get_with_read_barrier
6168                                                            ? LocationSummary::kCallOnSlowPath
6169                                                            : LocationSummary::kNoCall);
6170   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6171     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6172   }
6173   // Input for object receiver.
6174   locations->SetInAt(0, Location::RequiresRegister());
6175 
6176   bool volatile_for_double = field_info.IsVolatile()
6177       && (field_info.GetFieldType() == DataType::Type::kFloat64)
6178       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6179   // The output overlaps in case of volatile long: we don't want the code generated by
6180   // `GenerateWideAtomicLoad()` to overwrite the object's location.  Likewise, in the case
6181   // of an object field get with non-Baker read barriers enabled, we do not want the load
6182   // to overwrite the object's location, as we need it to emit the read barrier.
6183   // Baker read barrier implementation with introspection does not have this restriction.
6184   bool overlap =
6185       (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) ||
6186       (object_field_get_with_read_barrier && !kUseBakerReadBarrier);
6187 
6188   if (DataType::IsFloatingPointType(instruction->GetType())) {
6189     locations->SetOut(Location::RequiresFpuRegister());
6190   } else {
6191     locations->SetOut(Location::RequiresRegister(),
6192                       (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
6193   }
6194   if (volatile_for_double) {
6195     // ARM encoding have some additional constraints for ldrexd/strexd:
6196     // - registers need to be consecutive
6197     // - the first register should be even but not R14.
6198     // We don't test for ARM yet, and the assertion makes sure that we
6199     // revisit this if we ever enable ARM encoding.
6200     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
6201     locations->AddRegisterTemps(2);
6202   } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6203     // We need a temporary register for the read barrier load in
6204     // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6205     // only if the offset is too big.
6206     if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
6207       locations->AddTemp(Location::RequiresRegister());
6208     }
6209   }
6210 }
6211 
ArithmeticZeroOrFpuRegister(HInstruction * input)6212 Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) {
6213   DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType();
6214   if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
6215       (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
6216     return Location::ConstantLocation(input);
6217   } else {
6218     return Location::RequiresFpuRegister();
6219   }
6220 }
6221 
ArmEncodableConstantOrRegister(HInstruction * constant,Opcode opcode)6222 Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
6223                                                                  Opcode opcode) {
6224   DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
6225   if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
6226     return Location::ConstantLocation(constant);
6227   }
6228   return Location::RequiresRegister();
6229 }
6230 
CanEncode32BitConstantAsImmediate(CodeGeneratorARMVIXL * codegen,uint32_t value,Opcode opcode,vixl32::FlagsUpdate flags_update=vixl32::FlagsUpdate::DontCare)6231 static bool CanEncode32BitConstantAsImmediate(
6232     CodeGeneratorARMVIXL* codegen,
6233     uint32_t value,
6234     Opcode opcode,
6235     vixl32::FlagsUpdate flags_update = vixl32::FlagsUpdate::DontCare) {
6236   ArmVIXLAssembler* assembler = codegen->GetAssembler();
6237   if (assembler->ShifterOperandCanHold(opcode, value, flags_update)) {
6238     return true;
6239   }
6240   Opcode neg_opcode = kNoOperand;
6241   uint32_t neg_value = 0;
6242   switch (opcode) {
6243     case AND: neg_opcode = BIC; neg_value = ~value; break;
6244     case ORR: neg_opcode = ORN; neg_value = ~value; break;
6245     case ADD: neg_opcode = SUB; neg_value = -value; break;
6246     case ADC: neg_opcode = SBC; neg_value = ~value; break;
6247     case SUB: neg_opcode = ADD; neg_value = -value; break;
6248     case SBC: neg_opcode = ADC; neg_value = ~value; break;
6249     case MOV: neg_opcode = MVN; neg_value = ~value; break;
6250     default:
6251       return false;
6252   }
6253 
6254   if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, flags_update)) {
6255     return true;
6256   }
6257 
6258   return opcode == AND && IsPowerOfTwo(value + 1);
6259 }
6260 
CanEncodeConstantAsImmediate(HConstant * input_cst,Opcode opcode)6261 bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) {
6262   uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
6263   if (DataType::Is64BitType(input_cst->GetType())) {
6264     Opcode high_opcode = opcode;
6265     vixl32::FlagsUpdate low_flags_update = vixl32::FlagsUpdate::DontCare;
6266     switch (opcode) {
6267       case SUB:
6268         // Flip the operation to an ADD.
6269         value = -value;
6270         opcode = ADD;
6271         FALLTHROUGH_INTENDED;
6272       case ADD:
6273         if (Low32Bits(value) == 0u) {
6274           return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), opcode);
6275         }
6276         high_opcode = ADC;
6277         low_flags_update = vixl32::FlagsUpdate::SetFlags;
6278         break;
6279       default:
6280         break;
6281     }
6282     return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), high_opcode) &&
6283            CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode, low_flags_update);
6284   } else {
6285     return CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode);
6286   }
6287 }
6288 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6289 void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
6290                                                      const FieldInfo& field_info) {
6291   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6292 
6293   LocationSummary* locations = instruction->GetLocations();
6294   uint32_t receiver_input = 0;
6295   vixl32::Register base = InputRegisterAt(instruction, receiver_input);
6296   Location out = locations->Out();
6297   bool is_volatile = field_info.IsVolatile();
6298   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6299   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
6300   DataType::Type load_type = instruction->GetType();
6301   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6302 
6303   switch (load_type) {
6304     case DataType::Type::kBool:
6305     case DataType::Type::kUint8:
6306     case DataType::Type::kInt8:
6307     case DataType::Type::kUint16:
6308     case DataType::Type::kInt16:
6309     case DataType::Type::kInt32: {
6310       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6311       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6312       LoadOperandType operand_type = GetLoadOperandType(load_type);
6313       GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset);
6314       codegen_->MaybeRecordImplicitNullCheck(instruction);
6315       break;
6316     }
6317 
6318     case DataType::Type::kReference: {
6319       // /* HeapReference<Object> */ out = *(base + offset)
6320       if (codegen_->EmitBakerReadBarrier()) {
6321         Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6322         // Note that a potential implicit null check is handled in this
6323         // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
6324         codegen_->GenerateFieldLoadWithBakerReadBarrier(
6325             instruction, out, base, offset, maybe_temp, /* needs_null_check= */ true);
6326         if (is_volatile) {
6327           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6328         }
6329       } else {
6330         {
6331           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6332           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6333           GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
6334           codegen_->MaybeRecordImplicitNullCheck(instruction);
6335         }
6336         if (is_volatile) {
6337           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6338         }
6339         // If read barriers are enabled, emit read barriers other than
6340         // Baker's using a slow path (and also unpoison the loaded
6341         // reference, if heap poisoning is enabled).
6342         codegen_->MaybeGenerateReadBarrierSlow(
6343             instruction, out, out, locations->InAt(receiver_input), offset);
6344       }
6345       break;
6346     }
6347 
6348     case DataType::Type::kInt64: {
6349       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6350       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6351       if (is_volatile && !atomic_ldrd_strd) {
6352         GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out));
6353       } else {
6354         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset);
6355       }
6356       codegen_->MaybeRecordImplicitNullCheck(instruction);
6357       break;
6358     }
6359 
6360     case DataType::Type::kFloat32: {
6361       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6362       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6363       GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset);
6364       codegen_->MaybeRecordImplicitNullCheck(instruction);
6365       break;
6366     }
6367 
6368     case DataType::Type::kFloat64: {
6369       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6370       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6371       vixl32::DRegister out_dreg = DRegisterFrom(out);
6372       if (is_volatile && !atomic_ldrd_strd) {
6373         vixl32::Register lo = RegisterFrom(locations->GetTemp(0));
6374         vixl32::Register hi = RegisterFrom(locations->GetTemp(1));
6375         GenerateWideAtomicLoad(base, offset, lo, hi);
6376         codegen_->MaybeRecordImplicitNullCheck(instruction);
6377         __ Vmov(out_dreg, lo, hi);
6378       } else {
6379         GetAssembler()->LoadDFromOffset(out_dreg, base, offset);
6380         codegen_->MaybeRecordImplicitNullCheck(instruction);
6381       }
6382       break;
6383     }
6384 
6385     case DataType::Type::kUint32:
6386     case DataType::Type::kUint64:
6387     case DataType::Type::kVoid:
6388       LOG(FATAL) << "Unreachable type " << load_type;
6389       UNREACHABLE();
6390   }
6391 
6392   if (is_volatile) {
6393     if (load_type == DataType::Type::kReference) {
6394       // Memory barriers, in the case of references, are also handled
6395       // in the previous switch statement.
6396     } else {
6397       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6398     }
6399   }
6400 }
6401 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6402 void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6403   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6404 }
6405 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6406 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6407   HandleFieldSet(instruction,
6408                  instruction->GetFieldInfo(),
6409                  instruction->GetValueCanBeNull(),
6410                  instruction->GetWriteBarrierKind());
6411 }
6412 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6413 void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6414   HandleFieldGet(instruction, instruction->GetFieldInfo());
6415 }
6416 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6417 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6418   HandleFieldGet(instruction, instruction->GetFieldInfo());
6419 }
6420 
VisitStaticFieldGet(HStaticFieldGet * instruction)6421 void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6422   HandleFieldGet(instruction, instruction->GetFieldInfo());
6423 }
6424 
VisitStaticFieldGet(HStaticFieldGet * instruction)6425 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6426   HandleFieldGet(instruction, instruction->GetFieldInfo());
6427 }
6428 
VisitStaticFieldSet(HStaticFieldSet * instruction)6429 void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6430   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6431 }
6432 
VisitStaticFieldSet(HStaticFieldSet * instruction)6433 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6434   HandleFieldSet(instruction,
6435                  instruction->GetFieldInfo(),
6436                  instruction->GetValueCanBeNull(),
6437                  instruction->GetWriteBarrierKind());
6438 }
6439 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6440 void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6441   codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(r0));
6442 }
6443 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6444 void InstructionCodeGeneratorARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6445   __ Mov(r0, instruction->GetFormat()->GetValue());
6446   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6447 }
6448 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6449 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet(
6450     HUnresolvedInstanceFieldGet* instruction) {
6451   FieldAccessCallingConventionARMVIXL calling_convention;
6452   codegen_->CreateUnresolvedFieldLocationSummary(
6453       instruction, instruction->GetFieldType(), calling_convention);
6454 }
6455 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6456 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldGet(
6457     HUnresolvedInstanceFieldGet* instruction) {
6458   FieldAccessCallingConventionARMVIXL calling_convention;
6459   codegen_->GenerateUnresolvedFieldAccess(instruction,
6460                                           instruction->GetFieldType(),
6461                                           instruction->GetFieldIndex(),
6462                                           instruction->GetDexPc(),
6463                                           calling_convention);
6464 }
6465 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6466 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldSet(
6467     HUnresolvedInstanceFieldSet* instruction) {
6468   FieldAccessCallingConventionARMVIXL calling_convention;
6469   codegen_->CreateUnresolvedFieldLocationSummary(
6470       instruction, instruction->GetFieldType(), calling_convention);
6471 }
6472 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6473 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldSet(
6474     HUnresolvedInstanceFieldSet* instruction) {
6475   FieldAccessCallingConventionARMVIXL calling_convention;
6476   codegen_->GenerateUnresolvedFieldAccess(instruction,
6477                                           instruction->GetFieldType(),
6478                                           instruction->GetFieldIndex(),
6479                                           instruction->GetDexPc(),
6480                                           calling_convention);
6481 }
6482 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6483 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldGet(
6484     HUnresolvedStaticFieldGet* instruction) {
6485   FieldAccessCallingConventionARMVIXL calling_convention;
6486   codegen_->CreateUnresolvedFieldLocationSummary(
6487       instruction, instruction->GetFieldType(), calling_convention);
6488 }
6489 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6490 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldGet(
6491     HUnresolvedStaticFieldGet* instruction) {
6492   FieldAccessCallingConventionARMVIXL calling_convention;
6493   codegen_->GenerateUnresolvedFieldAccess(instruction,
6494                                           instruction->GetFieldType(),
6495                                           instruction->GetFieldIndex(),
6496                                           instruction->GetDexPc(),
6497                                           calling_convention);
6498 }
6499 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6500 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldSet(
6501     HUnresolvedStaticFieldSet* instruction) {
6502   FieldAccessCallingConventionARMVIXL calling_convention;
6503   codegen_->CreateUnresolvedFieldLocationSummary(
6504       instruction, instruction->GetFieldType(), calling_convention);
6505 }
6506 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6507 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldSet(
6508     HUnresolvedStaticFieldSet* instruction) {
6509   FieldAccessCallingConventionARMVIXL calling_convention;
6510   codegen_->GenerateUnresolvedFieldAccess(instruction,
6511                                           instruction->GetFieldType(),
6512                                           instruction->GetFieldIndex(),
6513                                           instruction->GetDexPc(),
6514                                           calling_convention);
6515 }
6516 
VisitNullCheck(HNullCheck * instruction)6517 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6518   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6519   locations->SetInAt(0, Location::RequiresRegister());
6520 }
6521 
GenerateImplicitNullCheck(HNullCheck * instruction)6522 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
6523   if (CanMoveNullCheckToUser(instruction)) {
6524     return;
6525   }
6526 
6527   UseScratchRegisterScope temps(GetVIXLAssembler());
6528   // Ensure the pc position is recorded immediately after the `ldr` instruction.
6529   ExactAssemblyScope aas(GetVIXLAssembler(),
6530                          vixl32::kMaxInstructionSizeInBytes,
6531                          CodeBufferCheckScope::kMaximumSize);
6532   __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0)));
6533   RecordPcInfo(instruction, instruction->GetDexPc());
6534 }
6535 
GenerateExplicitNullCheck(HNullCheck * instruction)6536 void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) {
6537   NullCheckSlowPathARMVIXL* slow_path =
6538       new (GetScopedAllocator()) NullCheckSlowPathARMVIXL(instruction);
6539   AddSlowPath(slow_path);
6540   __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
6541 }
6542 
VisitNullCheck(HNullCheck * instruction)6543 void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6544   codegen_->GenerateNullCheck(instruction);
6545 }
6546 
LoadFromShiftedRegOffset(DataType::Type type,Location out_loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6547 void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(DataType::Type type,
6548                                                     Location out_loc,
6549                                                     vixl32::Register base,
6550                                                     vixl32::Register reg_index,
6551                                                     vixl32::Condition cond) {
6552   uint32_t shift_count = DataType::SizeShift(type);
6553   MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6554 
6555   switch (type) {
6556     case DataType::Type::kBool:
6557     case DataType::Type::kUint8:
6558       __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
6559       break;
6560     case DataType::Type::kInt8:
6561       __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
6562       break;
6563     case DataType::Type::kUint16:
6564       __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
6565       break;
6566     case DataType::Type::kInt16:
6567       __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
6568       break;
6569     case DataType::Type::kReference:
6570     case DataType::Type::kInt32:
6571       __ Ldr(cond, RegisterFrom(out_loc), mem_address);
6572       break;
6573     // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
6574     case DataType::Type::kInt64:
6575     case DataType::Type::kFloat32:
6576     case DataType::Type::kFloat64:
6577     default:
6578       LOG(FATAL) << "Unreachable type " << type;
6579       UNREACHABLE();
6580   }
6581 }
6582 
StoreToShiftedRegOffset(DataType::Type type,Location loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6583 void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type,
6584                                                    Location loc,
6585                                                    vixl32::Register base,
6586                                                    vixl32::Register reg_index,
6587                                                    vixl32::Condition cond) {
6588   uint32_t shift_count = DataType::SizeShift(type);
6589   MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6590 
6591   switch (type) {
6592     case DataType::Type::kBool:
6593     case DataType::Type::kUint8:
6594     case DataType::Type::kInt8:
6595       __ Strb(cond, RegisterFrom(loc), mem_address);
6596       break;
6597     case DataType::Type::kUint16:
6598     case DataType::Type::kInt16:
6599       __ Strh(cond, RegisterFrom(loc), mem_address);
6600       break;
6601     case DataType::Type::kReference:
6602     case DataType::Type::kInt32:
6603       __ Str(cond, RegisterFrom(loc), mem_address);
6604       break;
6605     // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
6606     case DataType::Type::kInt64:
6607     case DataType::Type::kFloat32:
6608     case DataType::Type::kFloat64:
6609     default:
6610       LOG(FATAL) << "Unreachable type " << type;
6611       UNREACHABLE();
6612   }
6613 }
6614 
VisitArrayGet(HArrayGet * instruction)6615 void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6616   bool object_array_get_with_read_barrier =
6617       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6618   LocationSummary* locations =
6619       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6620                                                        object_array_get_with_read_barrier
6621                                                            ? LocationSummary::kCallOnSlowPath
6622                                                            : LocationSummary::kNoCall);
6623   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6624     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6625   }
6626   locations->SetInAt(0, Location::RequiresRegister());
6627   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6628   if (DataType::IsFloatingPointType(instruction->GetType())) {
6629     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6630   } else {
6631     // The output overlaps for an object array get for non-Baker read barriers: we do not want
6632     // the load to overwrite the object's location, as we need it to emit the read barrier.
6633     // Baker read barrier implementation with introspection does not have this restriction.
6634     bool overlap = object_array_get_with_read_barrier && !kUseBakerReadBarrier;
6635     locations->SetOut(Location::RequiresRegister(),
6636                       overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
6637   }
6638   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6639     if (instruction->GetIndex()->IsConstant()) {
6640       // Array loads with constant index are treated as field loads.
6641       // We need a temporary register for the read barrier load in
6642       // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6643       // only if the offset is too big.
6644       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
6645       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
6646       offset += index << DataType::SizeShift(DataType::Type::kReference);
6647       if (offset >= kReferenceLoadMinFarOffset) {
6648         locations->AddTemp(Location::RequiresRegister());
6649       }
6650     } else {
6651       // We need a non-scratch temporary for the array data pointer in
6652       // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier().
6653       locations->AddTemp(Location::RequiresRegister());
6654     }
6655   } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6656     // Also need a temporary for String compression feature.
6657     locations->AddTemp(Location::RequiresRegister());
6658   }
6659 }
6660 
VisitArrayGet(HArrayGet * instruction)6661 void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6662   LocationSummary* locations = instruction->GetLocations();
6663   Location obj_loc = locations->InAt(0);
6664   vixl32::Register obj = InputRegisterAt(instruction, 0);
6665   Location index = locations->InAt(1);
6666   Location out_loc = locations->Out();
6667   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6668   DataType::Type type = instruction->GetType();
6669   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
6670                                         instruction->IsStringCharAt();
6671   HInstruction* array_instr = instruction->GetArray();
6672   bool has_intermediate_address = array_instr->IsIntermediateAddress();
6673 
6674   switch (type) {
6675     case DataType::Type::kBool:
6676     case DataType::Type::kUint8:
6677     case DataType::Type::kInt8:
6678     case DataType::Type::kUint16:
6679     case DataType::Type::kInt16:
6680     case DataType::Type::kInt32: {
6681       vixl32::Register length;
6682       if (maybe_compressed_char_at) {
6683         length = RegisterFrom(locations->GetTemp(0));
6684         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6685         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6686         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6687         GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
6688         codegen_->MaybeRecordImplicitNullCheck(instruction);
6689       }
6690       if (index.IsConstant()) {
6691         int32_t const_index = Int32ConstantFrom(index);
6692         if (maybe_compressed_char_at) {
6693           vixl32::Label uncompressed_load, done;
6694           vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6695           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
6696           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6697                         "Expecting 0=compressed, 1=uncompressed");
6698           __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6699           GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
6700                                          RegisterFrom(out_loc),
6701                                          obj,
6702                                          data_offset + const_index);
6703           __ B(final_label);
6704           __ Bind(&uncompressed_load);
6705           GetAssembler()->LoadFromOffset(GetLoadOperandType(DataType::Type::kUint16),
6706                                          RegisterFrom(out_loc),
6707                                          obj,
6708                                          data_offset + (const_index << 1));
6709           if (done.IsReferenced()) {
6710             __ Bind(&done);
6711           }
6712         } else {
6713           uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(type));
6714 
6715           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6716           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6717           LoadOperandType load_type = GetLoadOperandType(type);
6718           GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset);
6719           codegen_->MaybeRecordImplicitNullCheck(instruction);
6720         }
6721       } else {
6722         UseScratchRegisterScope temps(GetVIXLAssembler());
6723         vixl32::Register temp = temps.Acquire();
6724 
6725         if (has_intermediate_address) {
6726           // We do not need to compute the intermediate address from the array: the
6727           // input instruction has done it already. See the comment in
6728           // `TryExtractArrayAccessAddress()`.
6729           if (kIsDebugBuild) {
6730             HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6731             DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6732           }
6733           temp = obj;
6734         } else {
6735           __ Add(temp, obj, data_offset);
6736         }
6737         if (maybe_compressed_char_at) {
6738           vixl32::Label uncompressed_load, done;
6739           vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6740           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
6741           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6742                         "Expecting 0=compressed, 1=uncompressed");
6743           __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6744           __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
6745           __ B(final_label);
6746           __ Bind(&uncompressed_load);
6747           __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
6748           if (done.IsReferenced()) {
6749             __ Bind(&done);
6750           }
6751         } else {
6752           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6753           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6754           codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6755           codegen_->MaybeRecordImplicitNullCheck(instruction);
6756         }
6757       }
6758       break;
6759     }
6760 
6761     case DataType::Type::kReference: {
6762       // The read barrier instrumentation of object ArrayGet
6763       // instructions does not support the HIntermediateAddress
6764       // instruction.
6765       DCHECK(!(has_intermediate_address && codegen_->EmitReadBarrier()));
6766 
6767       static_assert(
6768           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6769           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6770       // /* HeapReference<Object> */ out =
6771       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6772       if (codegen_->EmitBakerReadBarrier()) {
6773         // Note that a potential implicit null check is handled in this
6774         // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
6775         DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
6776         if (index.IsConstant()) {
6777           // Array load with a constant index can be treated as a field load.
6778           Location maybe_temp =
6779               (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6780           data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type);
6781           codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6782                                                           out_loc,
6783                                                           obj,
6784                                                           data_offset,
6785                                                           maybe_temp,
6786                                                           /* needs_null_check= */ false);
6787         } else {
6788           Location temp = locations->GetTemp(0);
6789           codegen_->GenerateArrayLoadWithBakerReadBarrier(
6790               out_loc, obj, data_offset, index, temp, /* needs_null_check= */ false);
6791         }
6792       } else {
6793         vixl32::Register out = OutputRegister(instruction);
6794         if (index.IsConstant()) {
6795           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6796           {
6797             // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6798             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6799             GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
6800             codegen_->MaybeRecordImplicitNullCheck(instruction);
6801           }
6802           // If read barriers are enabled, emit read barriers other than
6803           // Baker's using a slow path (and also unpoison the loaded
6804           // reference, if heap poisoning is enabled).
6805           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6806         } else {
6807           UseScratchRegisterScope temps(GetVIXLAssembler());
6808           vixl32::Register temp = temps.Acquire();
6809 
6810           if (has_intermediate_address) {
6811             // We do not need to compute the intermediate address from the array: the
6812             // input instruction has done it already. See the comment in
6813             // `TryExtractArrayAccessAddress()`.
6814             if (kIsDebugBuild) {
6815               HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6816               DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6817             }
6818             temp = obj;
6819           } else {
6820             __ Add(temp, obj, data_offset);
6821           }
6822           {
6823             // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6824             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6825             codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6826             temps.Close();
6827             codegen_->MaybeRecordImplicitNullCheck(instruction);
6828           }
6829           // If read barriers are enabled, emit read barriers other than
6830           // Baker's using a slow path (and also unpoison the loaded
6831           // reference, if heap poisoning is enabled).
6832           codegen_->MaybeGenerateReadBarrierSlow(
6833               instruction, out_loc, out_loc, obj_loc, data_offset, index);
6834         }
6835       }
6836       break;
6837     }
6838 
6839     case DataType::Type::kInt64: {
6840       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6841       // As two macro instructions can be emitted the max size is doubled.
6842       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6843       if (index.IsConstant()) {
6844         size_t offset =
6845             (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6846         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset);
6847       } else {
6848         UseScratchRegisterScope temps(GetVIXLAssembler());
6849         vixl32::Register temp = temps.Acquire();
6850         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6851         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset);
6852       }
6853       codegen_->MaybeRecordImplicitNullCheck(instruction);
6854       break;
6855     }
6856 
6857     case DataType::Type::kFloat32: {
6858       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6859       // As two macro instructions can be emitted the max size is doubled.
6860       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6861       vixl32::SRegister out = SRegisterFrom(out_loc);
6862       if (index.IsConstant()) {
6863         size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6864         GetAssembler()->LoadSFromOffset(out, obj, offset);
6865       } else {
6866         UseScratchRegisterScope temps(GetVIXLAssembler());
6867         vixl32::Register temp = temps.Acquire();
6868         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
6869         GetAssembler()->LoadSFromOffset(out, temp, data_offset);
6870       }
6871       codegen_->MaybeRecordImplicitNullCheck(instruction);
6872       break;
6873     }
6874 
6875     case DataType::Type::kFloat64: {
6876       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6877       // As two macro instructions can be emitted the max size is doubled.
6878       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6879       if (index.IsConstant()) {
6880         size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6881         GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset);
6882       } else {
6883         UseScratchRegisterScope temps(GetVIXLAssembler());
6884         vixl32::Register temp = temps.Acquire();
6885         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6886         GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset);
6887       }
6888       codegen_->MaybeRecordImplicitNullCheck(instruction);
6889       break;
6890     }
6891 
6892     case DataType::Type::kUint32:
6893     case DataType::Type::kUint64:
6894     case DataType::Type::kVoid:
6895       LOG(FATAL) << "Unreachable type " << type;
6896       UNREACHABLE();
6897   }
6898 }
6899 
VisitArraySet(HArraySet * instruction)6900 void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
6901   DataType::Type value_type = instruction->GetComponentType();
6902 
6903   const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6904   bool needs_write_barrier =
6905       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6906   bool check_gc_card =
6907       codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
6908 
6909   bool needs_type_check = instruction->NeedsTypeCheck();
6910 
6911   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6912       instruction,
6913       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6914 
6915   locations->SetInAt(0, Location::RequiresRegister());
6916   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6917   if (DataType::IsFloatingPointType(value_type)) {
6918     locations->SetInAt(2, Location::RequiresFpuRegister());
6919   } else {
6920     locations->SetInAt(2, Location::RequiresRegister());
6921   }
6922   if (needs_write_barrier || check_gc_card || instruction->NeedsTypeCheck()) {
6923     // Temporary registers for type checking, write barrier, checking the dirty bit, or register
6924     // poisoning.
6925     locations->AddRegisterTemps(2);
6926   } else if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
6927     locations->AddTemp(Location::RequiresRegister());
6928   }
6929 }
6930 
VisitArraySet(HArraySet * instruction)6931 void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
6932   LocationSummary* locations = instruction->GetLocations();
6933   vixl32::Register array = InputRegisterAt(instruction, 0);
6934   Location index = locations->InAt(1);
6935   DataType::Type value_type = instruction->GetComponentType();
6936   bool needs_type_check = instruction->NeedsTypeCheck();
6937   const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6938   bool needs_write_barrier =
6939       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6940   uint32_t data_offset =
6941       mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
6942   Location value_loc = locations->InAt(2);
6943   HInstruction* array_instr = instruction->GetArray();
6944   bool has_intermediate_address = array_instr->IsIntermediateAddress();
6945 
6946   switch (value_type) {
6947     case DataType::Type::kBool:
6948     case DataType::Type::kUint8:
6949     case DataType::Type::kInt8:
6950     case DataType::Type::kUint16:
6951     case DataType::Type::kInt16:
6952     case DataType::Type::kInt32: {
6953       if (index.IsConstant()) {
6954         int32_t const_index = Int32ConstantFrom(index);
6955         uint32_t full_offset =
6956             data_offset + (const_index << DataType::SizeShift(value_type));
6957         StoreOperandType store_type = GetStoreOperandType(value_type);
6958         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6959         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6960         GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset);
6961         codegen_->MaybeRecordImplicitNullCheck(instruction);
6962       } else {
6963         UseScratchRegisterScope temps(GetVIXLAssembler());
6964         vixl32::Register temp = temps.Acquire();
6965 
6966         if (has_intermediate_address) {
6967           // We do not need to compute the intermediate address from the array: the
6968           // input instruction has done it already. See the comment in
6969           // `TryExtractArrayAccessAddress()`.
6970           if (kIsDebugBuild) {
6971             HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6972             DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6973           }
6974           temp = array;
6975         } else {
6976           __ Add(temp, array, data_offset);
6977         }
6978         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6979         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6980         codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6981         codegen_->MaybeRecordImplicitNullCheck(instruction);
6982       }
6983       break;
6984     }
6985 
6986     case DataType::Type::kReference: {
6987       vixl32::Register value = RegisterFrom(value_loc);
6988       // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
6989       // See the comment in instruction_simplifier_shared.cc.
6990       DCHECK(!has_intermediate_address);
6991 
6992       if (instruction->InputAt(2)->IsNullConstant()) {
6993         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6994         // As two macro instructions can be emitted the max size is doubled.
6995         EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6996         // Just setting null.
6997         if (index.IsConstant()) {
6998           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6999           GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
7000         } else {
7001           DCHECK(index.IsRegister()) << index;
7002           UseScratchRegisterScope temps(GetVIXLAssembler());
7003           vixl32::Register temp = temps.Acquire();
7004           __ Add(temp, array, data_offset);
7005           codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
7006         }
7007         codegen_->MaybeRecordImplicitNullCheck(instruction);
7008         if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
7009           // We need to set a write barrier here even though we are writing null, since this write
7010           // barrier is being relied on.
7011           DCHECK(needs_write_barrier);
7012           vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7013           vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7014           codegen_->MarkGCCard(temp1, temp2, array);
7015         }
7016         DCHECK(!needs_type_check);
7017         break;
7018       }
7019 
7020       const bool can_value_be_null = instruction->GetValueCanBeNull();
7021       // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
7022       // value is null (without an extra CompareAndBranchIfZero since we already checked if the
7023       // value is null for the type check).
7024       const bool skip_marking_gc_card =
7025           can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
7026       vixl32::Label do_store;
7027       vixl32::Label skip_writing_card;
7028       if (can_value_be_null) {
7029         if (skip_marking_gc_card) {
7030           __ CompareAndBranchIfZero(value, &skip_writing_card, /* is_far_target= */ false);
7031         } else {
7032           __ CompareAndBranchIfZero(value, &do_store, /* is_far_target= */ false);
7033         }
7034       }
7035 
7036       SlowPathCodeARMVIXL* slow_path = nullptr;
7037       if (needs_type_check) {
7038         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction);
7039         codegen_->AddSlowPath(slow_path);
7040 
7041         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7042         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7043         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7044 
7045         // Note that when read barriers are enabled, the type checks
7046         // are performed without read barriers.  This is fine, even in
7047         // the case where a class object is in the from-space after
7048         // the flip, as a comparison involving such a type would not
7049         // produce a false positive; it may of course produce a false
7050         // negative, in which case we would take the ArraySet slow
7051         // path.
7052 
7053         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7054         vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7055 
7056         {
7057           // Ensure we record the pc position immediately after the `ldr` instruction.
7058           ExactAssemblyScope aas(GetVIXLAssembler(),
7059                                  vixl32::kMaxInstructionSizeInBytes,
7060                                  CodeBufferCheckScope::kMaximumSize);
7061           // /* HeapReference<Class> */ temp1 = array->klass_
7062           __ ldr(temp1, MemOperand(array, class_offset));
7063           codegen_->MaybeRecordImplicitNullCheck(instruction);
7064         }
7065         GetAssembler()->MaybeUnpoisonHeapReference(temp1);
7066 
7067         // /* HeapReference<Class> */ temp1 = temp1->component_type_
7068         GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
7069         // /* HeapReference<Class> */ temp2 = value->klass_
7070         GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset);
7071         // If heap poisoning is enabled, no need to unpoison `temp1`
7072         // nor `temp2`, as we are comparing two poisoned references.
7073         __ Cmp(temp1, temp2);
7074 
7075         if (instruction->StaticTypeOfArrayIsObjectArray()) {
7076           vixl32::Label do_put;
7077           __ B(eq, &do_put, /* is_far_target= */ false);
7078           // If heap poisoning is enabled, the `temp1` reference has
7079           // not been unpoisoned yet; unpoison it now.
7080           GetAssembler()->MaybeUnpoisonHeapReference(temp1);
7081 
7082           // /* HeapReference<Class> */ temp1 = temp1->super_class_
7083           GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
7084           // If heap poisoning is enabled, no need to unpoison
7085           // `temp1`, as we are comparing against null below.
7086           __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
7087           __ Bind(&do_put);
7088         } else {
7089           __ B(ne, slow_path->GetEntryLabel());
7090         }
7091       }
7092 
7093       if (can_value_be_null && !skip_marking_gc_card) {
7094         DCHECK(do_store.IsReferenced());
7095         __ Bind(&do_store);
7096       }
7097 
7098       if (needs_write_barrier) {
7099         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7100         vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7101         codegen_->MarkGCCard(temp1, temp2, array);
7102       } else if (codegen_->ShouldCheckGCCard(
7103                      value_type, instruction->GetValue(), write_barrier_kind)) {
7104         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7105         vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7106         codegen_->CheckGCCardIsValid(temp1, temp2, array);
7107       }
7108 
7109       if (skip_marking_gc_card) {
7110         // Note that we don't check that the GC card is valid as it can be correctly clean.
7111         DCHECK(skip_writing_card.IsReferenced());
7112         __ Bind(&skip_writing_card);
7113       }
7114 
7115       vixl32::Register source = value;
7116       if (kPoisonHeapReferences) {
7117         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7118         DCHECK_EQ(value_type, DataType::Type::kReference);
7119         __ Mov(temp1, value);
7120         GetAssembler()->PoisonHeapReference(temp1);
7121         source = temp1;
7122       }
7123 
7124       {
7125         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7126         // As two macro instructions can be emitted the max size is doubled.
7127         EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7128         if (index.IsConstant()) {
7129           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7130           GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
7131         } else {
7132           DCHECK(index.IsRegister()) << index;
7133 
7134           UseScratchRegisterScope temps(GetVIXLAssembler());
7135           vixl32::Register temp = temps.Acquire();
7136           __ Add(temp, array, data_offset);
7137           codegen_->StoreToShiftedRegOffset(value_type,
7138                                             LocationFrom(source),
7139                                             temp,
7140                                             RegisterFrom(index));
7141         }
7142 
7143         if (can_value_be_null || !needs_type_check) {
7144           codegen_->MaybeRecordImplicitNullCheck(instruction);
7145         }
7146       }
7147 
7148       if (slow_path != nullptr) {
7149         __ Bind(slow_path->GetExitLabel());
7150       }
7151 
7152       break;
7153     }
7154 
7155     case DataType::Type::kInt64: {
7156       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7157       // As two macro instructions can be emitted the max size is doubled.
7158       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7159       Location value = locations->InAt(2);
7160       if (index.IsConstant()) {
7161         size_t offset =
7162             (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7163         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset);
7164       } else {
7165         UseScratchRegisterScope temps(GetVIXLAssembler());
7166         vixl32::Register temp = temps.Acquire();
7167         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7168         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset);
7169       }
7170       codegen_->MaybeRecordImplicitNullCheck(instruction);
7171       break;
7172     }
7173 
7174     case DataType::Type::kFloat32: {
7175       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7176       // As two macro instructions can be emitted the max size is doubled.
7177       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7178       Location value = locations->InAt(2);
7179       DCHECK(value.IsFpuRegister());
7180       if (index.IsConstant()) {
7181         size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7182         GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset);
7183       } else {
7184         UseScratchRegisterScope temps(GetVIXLAssembler());
7185         vixl32::Register temp = temps.Acquire();
7186         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
7187         GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset);
7188       }
7189       codegen_->MaybeRecordImplicitNullCheck(instruction);
7190       break;
7191     }
7192 
7193     case DataType::Type::kFloat64: {
7194       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7195       // As two macro instructions can be emitted the max size is doubled.
7196       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7197       Location value = locations->InAt(2);
7198       DCHECK(value.IsFpuRegisterPair());
7199       if (index.IsConstant()) {
7200         size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7201         GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset);
7202       } else {
7203         UseScratchRegisterScope temps(GetVIXLAssembler());
7204         vixl32::Register temp = temps.Acquire();
7205         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7206         GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset);
7207       }
7208       codegen_->MaybeRecordImplicitNullCheck(instruction);
7209       break;
7210     }
7211 
7212     case DataType::Type::kUint32:
7213     case DataType::Type::kUint64:
7214     case DataType::Type::kVoid:
7215       LOG(FATAL) << "Unreachable type " << value_type;
7216       UNREACHABLE();
7217   }
7218 }
7219 
VisitArrayLength(HArrayLength * instruction)7220 void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7221   LocationSummary* locations =
7222       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7223   locations->SetInAt(0, Location::RequiresRegister());
7224   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7225 }
7226 
VisitArrayLength(HArrayLength * instruction)7227 void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7228   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
7229   vixl32::Register obj = InputRegisterAt(instruction, 0);
7230   vixl32::Register out = OutputRegister(instruction);
7231   {
7232     ExactAssemblyScope aas(GetVIXLAssembler(),
7233                            vixl32::kMaxInstructionSizeInBytes,
7234                            CodeBufferCheckScope::kMaximumSize);
7235     __ ldr(out, MemOperand(obj, offset));
7236     codegen_->MaybeRecordImplicitNullCheck(instruction);
7237   }
7238   // Mask out compression flag from String's array length.
7239   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
7240     __ Lsr(out, out, 1u);
7241   }
7242 }
7243 
VisitIntermediateAddress(HIntermediateAddress * instruction)7244 void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7245   LocationSummary* locations =
7246       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7247 
7248   locations->SetInAt(0, Location::RequiresRegister());
7249   locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
7250   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7251 }
7252 
VisitIntermediateAddress(HIntermediateAddress * instruction)7253 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7254   vixl32::Register out = OutputRegister(instruction);
7255   vixl32::Register first = InputRegisterAt(instruction, 0);
7256   Location second = instruction->GetLocations()->InAt(1);
7257 
7258   if (second.IsRegister()) {
7259     __ Add(out, first, RegisterFrom(second));
7260   } else {
7261     __ Add(out, first, Int32ConstantFrom(second));
7262   }
7263 }
7264 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7265 void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
7266     HIntermediateAddressIndex* instruction) {
7267   LOG(FATAL) << "Unreachable " << instruction->GetId();
7268 }
7269 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7270 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
7271     HIntermediateAddressIndex* instruction) {
7272   LOG(FATAL) << "Unreachable " << instruction->GetId();
7273 }
7274 
VisitBoundsCheck(HBoundsCheck * instruction)7275 void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7276   RegisterSet caller_saves = RegisterSet::Empty();
7277   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7278   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
7279   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
7280   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
7281 
7282   HInstruction* index = instruction->InputAt(0);
7283   HInstruction* length = instruction->InputAt(1);
7284   // If both index and length are constants we can statically check the bounds. But if at least one
7285   // of them is not encodable ArmEncodableConstantOrRegister will create
7286   // Location::RequiresRegister() which is not desired to happen. Instead we create constant
7287   // locations.
7288   bool both_const = index->IsConstant() && length->IsConstant();
7289   locations->SetInAt(0, both_const
7290       ? Location::ConstantLocation(index)
7291       : ArmEncodableConstantOrRegister(index, CMP));
7292   locations->SetInAt(1, both_const
7293       ? Location::ConstantLocation(length)
7294       : ArmEncodableConstantOrRegister(length, CMP));
7295 }
7296 
VisitBoundsCheck(HBoundsCheck * instruction)7297 void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7298   LocationSummary* locations = instruction->GetLocations();
7299   Location index_loc = locations->InAt(0);
7300   Location length_loc = locations->InAt(1);
7301 
7302   if (length_loc.IsConstant()) {
7303     int32_t length = Int32ConstantFrom(length_loc);
7304     if (index_loc.IsConstant()) {
7305       // BCE will remove the bounds check if we are guaranteed to pass.
7306       int32_t index = Int32ConstantFrom(index_loc);
7307       if (index < 0 || index >= length) {
7308         SlowPathCodeARMVIXL* slow_path =
7309             new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7310         codegen_->AddSlowPath(slow_path);
7311         __ B(slow_path->GetEntryLabel());
7312       } else {
7313         // Some optimization after BCE may have generated this, and we should not
7314         // generate a bounds check if it is a valid range.
7315       }
7316       return;
7317     }
7318 
7319     SlowPathCodeARMVIXL* slow_path =
7320         new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7321     __ Cmp(RegisterFrom(index_loc), length);
7322     codegen_->AddSlowPath(slow_path);
7323     __ B(hs, slow_path->GetEntryLabel());
7324   } else {
7325     SlowPathCodeARMVIXL* slow_path =
7326         new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7327     __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0));
7328     codegen_->AddSlowPath(slow_path);
7329     __ B(ls, slow_path->GetEntryLabel());
7330   }
7331 }
7332 
MaybeMarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object,vixl32::Register value,bool emit_null_check)7333 void CodeGeneratorARMVIXL::MaybeMarkGCCard(vixl32::Register temp,
7334                                            vixl32::Register card,
7335                                            vixl32::Register object,
7336                                            vixl32::Register value,
7337                                            bool emit_null_check) {
7338   vixl32::Label is_null;
7339   if (emit_null_check) {
7340     __ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false);
7341   }
7342   MarkGCCard(temp, card, object);
7343   if (emit_null_check) {
7344     __ Bind(&is_null);
7345   }
7346 }
7347 
MarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object)7348 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
7349                                       vixl32::Register card,
7350                                       vixl32::Register object) {
7351   // Load the address of the card table into `card`.
7352   GetAssembler()->LoadFromOffset(
7353       kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7354   // Calculate the offset (in the card table) of the card corresponding to `object`.
7355   __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7356   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
7357   // `object`'s card.
7358   //
7359   // Register `card` contains the address of the card table. Note that the card
7360   // table's base is biased during its creation so that it always starts at an
7361   // address whose least-significant byte is equal to `kCardDirty` (see
7362   // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
7363   // below writes the `kCardDirty` (byte) value into the `object`'s card
7364   // (located at `card + object >> kCardShift`).
7365   //
7366   // This dual use of the value in register `card` (1. to calculate the location
7367   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
7368   // (no need to explicitly load `kCardDirty` as an immediate value).
7369   __ Strb(card, MemOperand(card, temp));
7370 }
7371 
CheckGCCardIsValid(vixl32::Register temp,vixl32::Register card,vixl32::Register object)7372 void CodeGeneratorARMVIXL::CheckGCCardIsValid(vixl32::Register temp,
7373                                               vixl32::Register card,
7374                                               vixl32::Register object) {
7375   vixl32::Label done;
7376   // Load the address of the card table into `card`.
7377   GetAssembler()->LoadFromOffset(
7378       kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7379   // Calculate the offset (in the card table) of the card corresponding to `object`.
7380   __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7381   // assert (!clean || !self->is_gc_marking)
7382   __ Ldrb(temp, MemOperand(card, temp));
7383   static_assert(gc::accounting::CardTable::kCardClean == 0);
7384   __ CompareAndBranchIfNonZero(temp, &done, /*is_far_target=*/false);
7385   __ CompareAndBranchIfZero(mr, &done, /*is_far_target=*/false);
7386   __ Bkpt(0);
7387   __ Bind(&done);
7388 }
7389 
VisitParallelMove(HParallelMove * instruction)7390 void LocationsBuilderARMVIXL::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
7391   LOG(FATAL) << "Unreachable";
7392 }
7393 
VisitParallelMove(HParallelMove * instruction)7394 void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) {
7395   if (instruction->GetNext()->IsSuspendCheck() &&
7396       instruction->GetBlock()->GetLoopInformation() != nullptr) {
7397     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
7398     // The back edge will generate the suspend check.
7399     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
7400   }
7401 
7402   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
7403 }
7404 
VisitSuspendCheck(HSuspendCheck * instruction)7405 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7406   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7407       instruction, LocationSummary::kCallOnSlowPath);
7408   locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7409 }
7410 
VisitSuspendCheck(HSuspendCheck * instruction)7411 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7412   HBasicBlock* block = instruction->GetBlock();
7413   if (block->GetLoopInformation() != nullptr) {
7414     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
7415     // The back edge will generate the suspend check.
7416     return;
7417   }
7418   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
7419     // The goto will generate the suspend check.
7420     return;
7421   }
7422   GenerateSuspendCheck(instruction, nullptr);
7423   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 14);
7424 }
7425 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)7426 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
7427                                                            HBasicBlock* successor) {
7428   SuspendCheckSlowPathARMVIXL* slow_path =
7429       down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath());
7430   if (slow_path == nullptr) {
7431     slow_path =
7432         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARMVIXL(instruction, successor);
7433     instruction->SetSlowPath(slow_path);
7434     codegen_->AddSlowPath(slow_path);
7435     if (successor != nullptr) {
7436       DCHECK(successor->IsLoopHeader());
7437     }
7438   } else {
7439     DCHECK_EQ(slow_path->GetSuccessor(), successor);
7440   }
7441 
7442   UseScratchRegisterScope temps(GetVIXLAssembler());
7443   vixl32::Register temp = temps.Acquire();
7444   GetAssembler()->LoadFromOffset(
7445       kLoadWord, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
7446   __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
7447   if (successor == nullptr) {
7448     __ B(ne, slow_path->GetEntryLabel());
7449     __ Bind(slow_path->GetReturnLabel());
7450   } else {
7451     __ B(eq, codegen_->GetLabelOf(successor));
7452     __ B(slow_path->GetEntryLabel());
7453   }
7454 }
7455 
GetAssembler() const7456 ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const {
7457   return codegen_->GetAssembler();
7458 }
7459 
EmitMove(size_t index)7460 void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
7461   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7462   MoveOperands* move = moves_[index];
7463   Location source = move->GetSource();
7464   Location destination = move->GetDestination();
7465 
7466   if (source.IsRegister()) {
7467     if (destination.IsRegister()) {
7468       __ Mov(RegisterFrom(destination), RegisterFrom(source));
7469     } else if (destination.IsFpuRegister()) {
7470       __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
7471     } else {
7472       DCHECK(destination.IsStackSlot());
7473       GetAssembler()->StoreToOffset(kStoreWord,
7474                                     RegisterFrom(source),
7475                                     sp,
7476                                     destination.GetStackIndex());
7477     }
7478   } else if (source.IsStackSlot()) {
7479     if (destination.IsRegister()) {
7480       GetAssembler()->LoadFromOffset(kLoadWord,
7481                                      RegisterFrom(destination),
7482                                      sp,
7483                                      source.GetStackIndex());
7484     } else if (destination.IsFpuRegister()) {
7485       GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
7486     } else {
7487       DCHECK(destination.IsStackSlot());
7488       vixl32::Register temp = temps.Acquire();
7489       GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
7490       GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7491     }
7492   } else if (source.IsFpuRegister()) {
7493     if (destination.IsRegister()) {
7494       __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
7495     } else if (destination.IsFpuRegister()) {
7496       __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
7497     } else {
7498       DCHECK(destination.IsStackSlot());
7499       GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
7500     }
7501   } else if (source.IsDoubleStackSlot()) {
7502     if (destination.IsDoubleStackSlot()) {
7503       vixl32::DRegister temp = temps.AcquireD();
7504       GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex());
7505       GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex());
7506     } else if (destination.IsRegisterPair()) {
7507       DCHECK(ExpectedPairLayout(destination));
7508       GetAssembler()->LoadFromOffset(
7509           kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex());
7510     } else {
7511       DCHECK(destination.IsFpuRegisterPair()) << destination;
7512       GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex());
7513     }
7514   } else if (source.IsRegisterPair()) {
7515     if (destination.IsRegisterPair()) {
7516       __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
7517       __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source));
7518     } else if (destination.IsFpuRegisterPair()) {
7519       __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source));
7520     } else {
7521       DCHECK(destination.IsDoubleStackSlot()) << destination;
7522       DCHECK(ExpectedPairLayout(source));
7523       GetAssembler()->StoreToOffset(kStoreWordPair,
7524                                     LowRegisterFrom(source),
7525                                     sp,
7526                                     destination.GetStackIndex());
7527     }
7528   } else if (source.IsFpuRegisterPair()) {
7529     if (destination.IsRegisterPair()) {
7530       __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source));
7531     } else if (destination.IsFpuRegisterPair()) {
7532       __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
7533     } else {
7534       DCHECK(destination.IsDoubleStackSlot()) << destination;
7535       GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex());
7536     }
7537   } else {
7538     DCHECK(source.IsConstant()) << source;
7539     HConstant* constant = source.GetConstant();
7540     if (constant->IsIntConstant() || constant->IsNullConstant()) {
7541       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7542       if (destination.IsRegister()) {
7543         __ Mov(RegisterFrom(destination), value);
7544       } else {
7545         DCHECK(destination.IsStackSlot());
7546         vixl32::Register temp = temps.Acquire();
7547         __ Mov(temp, value);
7548         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7549       }
7550     } else if (constant->IsLongConstant()) {
7551       int64_t value = Int64ConstantFrom(source);
7552       if (destination.IsRegisterPair()) {
7553         __ Mov(LowRegisterFrom(destination), Low32Bits(value));
7554         __ Mov(HighRegisterFrom(destination), High32Bits(value));
7555       } else {
7556         DCHECK(destination.IsDoubleStackSlot()) << destination;
7557         vixl32::Register temp = temps.Acquire();
7558         __ Mov(temp, Low32Bits(value));
7559         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7560         __ Mov(temp, High32Bits(value));
7561         GetAssembler()->StoreToOffset(kStoreWord,
7562                                       temp,
7563                                       sp,
7564                                       destination.GetHighStackIndex(kArmWordSize));
7565       }
7566     } else if (constant->IsDoubleConstant()) {
7567       double value = constant->AsDoubleConstant()->GetValue();
7568       if (destination.IsFpuRegisterPair()) {
7569         __ Vmov(DRegisterFrom(destination), value);
7570       } else {
7571         DCHECK(destination.IsDoubleStackSlot()) << destination;
7572         uint64_t int_value = bit_cast<uint64_t, double>(value);
7573         vixl32::Register temp = temps.Acquire();
7574         __ Mov(temp, Low32Bits(int_value));
7575         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7576         __ Mov(temp, High32Bits(int_value));
7577         GetAssembler()->StoreToOffset(kStoreWord,
7578                                       temp,
7579                                       sp,
7580                                       destination.GetHighStackIndex(kArmWordSize));
7581       }
7582     } else {
7583       DCHECK(constant->IsFloatConstant()) << constant->DebugName();
7584       float value = constant->AsFloatConstant()->GetValue();
7585       if (destination.IsFpuRegister()) {
7586         __ Vmov(SRegisterFrom(destination), value);
7587       } else {
7588         DCHECK(destination.IsStackSlot());
7589         vixl32::Register temp = temps.Acquire();
7590         __ Mov(temp, bit_cast<int32_t, float>(value));
7591         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7592       }
7593     }
7594   }
7595 }
7596 
Exchange(vixl32::Register reg,int mem)7597 void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
7598   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7599   vixl32::Register temp = temps.Acquire();
7600   __ Mov(temp, reg);
7601   GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem);
7602   GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7603 }
7604 
Exchange(int mem1,int mem2)7605 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
7606   // TODO(VIXL32): Double check the performance of this implementation.
7607   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7608   vixl32::Register temp1 = temps.Acquire();
7609   ScratchRegisterScope ensure_scratch(
7610       this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
7611   vixl32::Register temp2(ensure_scratch.GetRegister());
7612 
7613   int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
7614   GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
7615   GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
7616   GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
7617   GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
7618 }
7619 
EmitSwap(size_t index)7620 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
7621   MoveOperands* move = moves_[index];
7622   Location source = move->GetSource();
7623   Location destination = move->GetDestination();
7624   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7625 
7626   if (source.IsRegister() && destination.IsRegister()) {
7627     vixl32::Register temp = temps.Acquire();
7628     DCHECK(!RegisterFrom(source).Is(temp));
7629     DCHECK(!RegisterFrom(destination).Is(temp));
7630     __ Mov(temp, RegisterFrom(destination));
7631     __ Mov(RegisterFrom(destination), RegisterFrom(source));
7632     __ Mov(RegisterFrom(source), temp);
7633   } else if (source.IsRegister() && destination.IsStackSlot()) {
7634     Exchange(RegisterFrom(source), destination.GetStackIndex());
7635   } else if (source.IsStackSlot() && destination.IsRegister()) {
7636     Exchange(RegisterFrom(destination), source.GetStackIndex());
7637   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7638     Exchange(source.GetStackIndex(), destination.GetStackIndex());
7639   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7640     vixl32::Register temp = temps.Acquire();
7641     __ Vmov(temp, SRegisterFrom(source));
7642     __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
7643     __ Vmov(SRegisterFrom(destination), temp);
7644   } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
7645     vixl32::DRegister temp = temps.AcquireD();
7646     __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
7647     __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination));
7648     __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination));
7649     __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp);
7650   } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
7651     vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination);
7652     int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex();
7653     DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
7654     vixl32::DRegister temp = temps.AcquireD();
7655     __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1));
7656     GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
7657     GetAssembler()->StoreDToOffset(temp, sp, mem);
7658   } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
7659     vixl32::DRegister first = DRegisterFrom(source);
7660     vixl32::DRegister second = DRegisterFrom(destination);
7661     vixl32::DRegister temp = temps.AcquireD();
7662     __ Vmov(temp, first);
7663     __ Vmov(first, second);
7664     __ Vmov(second, temp);
7665   } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
7666     vixl32::DRegister reg = source.IsFpuRegisterPair()
7667         ? DRegisterFrom(source)
7668         : DRegisterFrom(destination);
7669     int mem = source.IsFpuRegisterPair()
7670         ? destination.GetStackIndex()
7671         : source.GetStackIndex();
7672     vixl32::DRegister temp = temps.AcquireD();
7673     __ Vmov(temp, reg);
7674     GetAssembler()->LoadDFromOffset(reg, sp, mem);
7675     GetAssembler()->StoreDToOffset(temp, sp, mem);
7676   } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
7677     vixl32::SRegister reg = source.IsFpuRegister()
7678         ? SRegisterFrom(source)
7679         : SRegisterFrom(destination);
7680     int mem = source.IsFpuRegister()
7681         ? destination.GetStackIndex()
7682         : source.GetStackIndex();
7683     vixl32::Register temp = temps.Acquire();
7684     __ Vmov(temp, reg);
7685     GetAssembler()->LoadSFromOffset(reg, sp, mem);
7686     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7687   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
7688     vixl32::DRegister temp1 = temps.AcquireD();
7689     vixl32::DRegister temp2 = temps.AcquireD();
7690     __ Vldr(temp1, MemOperand(sp, source.GetStackIndex()));
7691     __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex()));
7692     __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex()));
7693     __ Vstr(temp2, MemOperand(sp, source.GetStackIndex()));
7694   } else {
7695     LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
7696   }
7697 }
7698 
SpillScratch(int reg)7699 void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
7700   __ Push(vixl32::Register(reg));
7701 }
7702 
RestoreScratch(int reg)7703 void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
7704   __ Pop(vixl32::Register(reg));
7705 }
7706 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7707 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
7708     HLoadClass::LoadKind desired_class_load_kind) {
7709   switch (desired_class_load_kind) {
7710     case HLoadClass::LoadKind::kInvalid:
7711       LOG(FATAL) << "UNREACHABLE";
7712       UNREACHABLE();
7713     case HLoadClass::LoadKind::kReferrersClass:
7714       break;
7715     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7716     case HLoadClass::LoadKind::kBootImageRelRo:
7717     case HLoadClass::LoadKind::kAppImageRelRo:
7718     case HLoadClass::LoadKind::kBssEntry:
7719     case HLoadClass::LoadKind::kBssEntryPublic:
7720     case HLoadClass::LoadKind::kBssEntryPackage:
7721       DCHECK(!GetCompilerOptions().IsJitCompiler());
7722       break;
7723     case HLoadClass::LoadKind::kJitBootImageAddress:
7724     case HLoadClass::LoadKind::kJitTableAddress:
7725       DCHECK(GetCompilerOptions().IsJitCompiler());
7726       break;
7727     case HLoadClass::LoadKind::kRuntimeCall:
7728       break;
7729   }
7730   return desired_class_load_kind;
7731 }
7732 
VisitLoadClass(HLoadClass * cls)7733 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
7734   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7735   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7736     InvokeRuntimeCallingConventionARMVIXL calling_convention;
7737     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7738         cls,
7739         LocationFrom(calling_convention.GetRegisterAt(0)),
7740         LocationFrom(r0));
7741     DCHECK(calling_convention.GetRegisterAt(0).Is(r0));
7742     return;
7743   }
7744   DCHECK_EQ(cls->NeedsAccessCheck(),
7745             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7746                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7747 
7748   const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
7749   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7750       ? LocationSummary::kCallOnSlowPath
7751       : LocationSummary::kNoCall;
7752   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7753   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7754     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7755   }
7756 
7757   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
7758     locations->SetInAt(0, Location::RequiresRegister());
7759   }
7760   locations->SetOut(Location::RequiresRegister());
7761   if (load_kind == HLoadClass::LoadKind::kBssEntry ||
7762       load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7763       load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
7764     if (codegen_->EmitNonBakerReadBarrier()) {
7765       // For non-Baker read barrier we have a temp-clobbering call.
7766     } else {
7767       // Rely on the type resolution or initialization and marking to save everything we need.
7768       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7769     }
7770   }
7771 }
7772 
7773 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7774 // move.
VisitLoadClass(HLoadClass * cls)7775 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7776   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7777   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7778     codegen_->GenerateLoadClassRuntimeCall(cls);
7779     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 15);
7780     return;
7781   }
7782   DCHECK_EQ(cls->NeedsAccessCheck(),
7783             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7784                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7785 
7786   LocationSummary* locations = cls->GetLocations();
7787   Location out_loc = locations->Out();
7788   vixl32::Register out = OutputRegister(cls);
7789 
7790   const ReadBarrierOption read_barrier_option =
7791       cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
7792   bool generate_null_check = false;
7793   switch (load_kind) {
7794     case HLoadClass::LoadKind::kReferrersClass: {
7795       DCHECK(!cls->CanCallRuntime());
7796       DCHECK(!cls->MustGenerateClinitCheck());
7797       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7798       vixl32::Register current_method = InputRegisterAt(cls, 0);
7799       codegen_->GenerateGcRootFieldLoad(cls,
7800                                         out_loc,
7801                                         current_method,
7802                                         ArtMethod::DeclaringClassOffset().Int32Value(),
7803                                         read_barrier_option);
7804       break;
7805     }
7806     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7807       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7808              codegen_->GetCompilerOptions().IsBootImageExtension());
7809       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7810       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7811           codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7812       codegen_->EmitMovwMovtPlaceholder(labels, out);
7813       break;
7814     }
7815     case HLoadClass::LoadKind::kBootImageRelRo: {
7816       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7817       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
7818       codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
7819       break;
7820     }
7821     case HLoadClass::LoadKind::kAppImageRelRo: {
7822       DCHECK(codegen_->GetCompilerOptions().IsAppImage());
7823       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7824       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7825           codegen_->NewAppImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7826       codegen_->EmitMovwMovtPlaceholder(labels, out);
7827       __ Ldr(out, MemOperand(out, /*offset=*/ 0));
7828       break;
7829     }
7830     case HLoadClass::LoadKind::kBssEntry:
7831     case HLoadClass::LoadKind::kBssEntryPublic:
7832     case HLoadClass::LoadKind::kBssEntryPackage: {
7833       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls);
7834       codegen_->EmitMovwMovtPlaceholder(labels, out);
7835       // All aligned loads are implicitly atomic consume operations on ARM.
7836       codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7837       generate_null_check = true;
7838       break;
7839     }
7840     case HLoadClass::LoadKind::kJitBootImageAddress: {
7841       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7842       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7843       DCHECK_NE(address, 0u);
7844       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
7845       break;
7846     }
7847     case HLoadClass::LoadKind::kJitTableAddress: {
7848       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
7849                                                        cls->GetTypeIndex(),
7850                                                        cls->GetClass()));
7851       // /* GcRoot<mirror::Class> */ out = *out
7852       codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7853       break;
7854     }
7855     case HLoadClass::LoadKind::kRuntimeCall:
7856     case HLoadClass::LoadKind::kInvalid:
7857       LOG(FATAL) << "UNREACHABLE";
7858       UNREACHABLE();
7859   }
7860 
7861   if (generate_null_check || cls->MustGenerateClinitCheck()) {
7862     DCHECK(cls->CanCallRuntime());
7863     LoadClassSlowPathARMVIXL* slow_path =
7864         new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls);
7865     codegen_->AddSlowPath(slow_path);
7866     if (generate_null_check) {
7867       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
7868     }
7869     if (cls->MustGenerateClinitCheck()) {
7870       GenerateClassInitializationCheck(slow_path, out);
7871     } else {
7872       __ Bind(slow_path->GetExitLabel());
7873     }
7874     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 16);
7875   }
7876 }
7877 
VisitLoadMethodHandle(HLoadMethodHandle * load)7878 void LocationsBuilderARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7879   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7880   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7881   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7882 }
7883 
VisitLoadMethodHandle(HLoadMethodHandle * load)7884 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7885   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7886 }
7887 
VisitLoadMethodType(HLoadMethodType * load)7888 void LocationsBuilderARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7889   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7890   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7891   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7892 }
7893 
VisitLoadMethodType(HLoadMethodType * load)7894 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7895   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7896 }
7897 
VisitClinitCheck(HClinitCheck * check)7898 void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7899   LocationSummary* locations =
7900       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7901   locations->SetInAt(0, Location::RequiresRegister());
7902   if (check->HasUses()) {
7903     locations->SetOut(Location::SameAsFirstInput());
7904   }
7905   // Rely on the type initialization to save everything we need.
7906   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7907 }
7908 
VisitClinitCheck(HClinitCheck * check)7909 void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7910   // We assume the class is not null.
7911   LoadClassSlowPathARMVIXL* slow_path =
7912       new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check);
7913   codegen_->AddSlowPath(slow_path);
7914   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
7915 }
7916 
GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL * slow_path,vixl32::Register class_reg)7917 void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
7918     LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
7919   UseScratchRegisterScope temps(GetVIXLAssembler());
7920   vixl32::Register temp = temps.Acquire();
7921   __ Ldrb(temp, MemOperand(class_reg, kClassStatusByteOffset));
7922   __ Cmp(temp, kShiftedVisiblyInitializedValue);
7923   __ B(lo, slow_path->GetEntryLabel());
7924   __ Bind(slow_path->GetExitLabel());
7925 }
7926 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl32::Register temp,vixl32::FlagsUpdate flags_update)7927 void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
7928     HTypeCheckInstruction* check,
7929     vixl32::Register temp,
7930     vixl32::FlagsUpdate flags_update) {
7931   uint32_t path_to_root = check->GetBitstringPathToRoot();
7932   uint32_t mask = check->GetBitstringMask();
7933   DCHECK(IsPowerOfTwo(mask + 1));
7934   size_t mask_bits = WhichPowerOf2(mask + 1);
7935 
7936   // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
7937   // the Z flag for BNE. This is indicated by the `flags_update` parameter.
7938   if (mask_bits == 16u) {
7939     // Load only the bitstring part of the status word.
7940     __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7941     // Check if the bitstring bits are equal to `path_to_root`.
7942     if (flags_update == SetFlags) {
7943       __ Cmp(temp, path_to_root);
7944     } else {
7945       __ Sub(temp, temp, path_to_root);
7946     }
7947   } else {
7948     // /* uint32_t */ temp = temp->status_
7949     __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7950     if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
7951       // Compare the bitstring bits using SUB.
7952       __ Sub(temp, temp, path_to_root);
7953       // Shift out bits that do not contribute to the comparison.
7954       __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7955     } else if (IsUint<16>(path_to_root)) {
7956       if (temp.IsLow()) {
7957         // Note: Optimized for size but contains one more dependent instruction than necessary.
7958         //       MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
7959         //       macro assembler would use the high reg IP for the constant by default.
7960         // Compare the bitstring bits using SUB.
7961         __ Sub(temp, temp, path_to_root & 0x00ffu);  // 16-bit SUB (immediate) T2
7962         __ Sub(temp, temp, path_to_root & 0xff00u);  // 32-bit SUB (immediate) T3
7963         // Shift out bits that do not contribute to the comparison.
7964         __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7965       } else {
7966         // Extract the bitstring bits.
7967         __ Ubfx(temp, temp, 0, mask_bits);
7968         // Check if the bitstring bits are equal to `path_to_root`.
7969         if (flags_update == SetFlags) {
7970           __ Cmp(temp, path_to_root);
7971         } else {
7972           __ Sub(temp, temp, path_to_root);
7973         }
7974       }
7975     } else {
7976       // Shift out bits that do not contribute to the comparison.
7977       __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7978       // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
7979       if (flags_update == SetFlags) {
7980         __ Cmp(temp, path_to_root << (32u - mask_bits));
7981       } else {
7982         __ Sub(temp, temp, path_to_root << (32u - mask_bits));
7983       }
7984     }
7985   }
7986 }
7987 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7988 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
7989     HLoadString::LoadKind desired_string_load_kind) {
7990   switch (desired_string_load_kind) {
7991     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7992     case HLoadString::LoadKind::kBootImageRelRo:
7993     case HLoadString::LoadKind::kBssEntry:
7994       DCHECK(!GetCompilerOptions().IsJitCompiler());
7995       break;
7996     case HLoadString::LoadKind::kJitBootImageAddress:
7997     case HLoadString::LoadKind::kJitTableAddress:
7998       DCHECK(GetCompilerOptions().IsJitCompiler());
7999       break;
8000     case HLoadString::LoadKind::kRuntimeCall:
8001       break;
8002   }
8003   return desired_string_load_kind;
8004 }
8005 
VisitLoadString(HLoadString * load)8006 void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
8007   LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
8008   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
8009   HLoadString::LoadKind load_kind = load->GetLoadKind();
8010   if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
8011     locations->SetOut(LocationFrom(r0));
8012   } else {
8013     locations->SetOut(Location::RequiresRegister());
8014     if (load_kind == HLoadString::LoadKind::kBssEntry) {
8015       if (codegen_->EmitNonBakerReadBarrier()) {
8016         // For non-Baker read barrier we have a temp-clobbering call.
8017       } else {
8018         // Rely on the pResolveString and marking to save everything we need, including temps.
8019         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
8020       }
8021     }
8022   }
8023 }
8024 
8025 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
8026 // move.
VisitLoadString(HLoadString * load)8027 void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
8028   LocationSummary* locations = load->GetLocations();
8029   Location out_loc = locations->Out();
8030   vixl32::Register out = OutputRegister(load);
8031   HLoadString::LoadKind load_kind = load->GetLoadKind();
8032 
8033   switch (load_kind) {
8034     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
8035       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
8036              codegen_->GetCompilerOptions().IsBootImageExtension());
8037       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
8038           codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
8039       codegen_->EmitMovwMovtPlaceholder(labels, out);
8040       return;
8041     }
8042     case HLoadString::LoadKind::kBootImageRelRo: {
8043       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
8044       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
8045       codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
8046       return;
8047     }
8048     case HLoadString::LoadKind::kBssEntry: {
8049       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
8050           codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
8051       codegen_->EmitMovwMovtPlaceholder(labels, out);
8052       // All aligned loads are implicitly atomic consume operations on ARM.
8053       codegen_->GenerateGcRootFieldLoad(
8054           load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
8055       LoadStringSlowPathARMVIXL* slow_path =
8056           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
8057       codegen_->AddSlowPath(slow_path);
8058       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
8059       __ Bind(slow_path->GetExitLabel());
8060       codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 17);
8061       return;
8062     }
8063     case HLoadString::LoadKind::kJitBootImageAddress: {
8064       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
8065       DCHECK_NE(address, 0u);
8066       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
8067       return;
8068     }
8069     case HLoadString::LoadKind::kJitTableAddress: {
8070       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
8071                                                         load->GetStringIndex(),
8072                                                         load->GetString()));
8073       // /* GcRoot<mirror::String> */ out = *out
8074       codegen_->GenerateGcRootFieldLoad(
8075           load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
8076       return;
8077     }
8078     default:
8079       break;
8080   }
8081 
8082   DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
8083   InvokeRuntimeCallingConventionARMVIXL calling_convention;
8084   __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
8085   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
8086   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
8087   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 18);
8088 }
8089 
GetExceptionTlsOffset()8090 static int32_t GetExceptionTlsOffset() {
8091   return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
8092 }
8093 
VisitLoadException(HLoadException * load)8094 void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) {
8095   LocationSummary* locations =
8096       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
8097   locations->SetOut(Location::RequiresRegister());
8098 }
8099 
VisitLoadException(HLoadException * load)8100 void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) {
8101   vixl32::Register out = OutputRegister(load);
8102   GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset());
8103 }
8104 
8105 
VisitClearException(HClearException * clear)8106 void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
8107   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
8108 }
8109 
VisitClearException(HClearException * clear)8110 void InstructionCodeGeneratorARMVIXL::VisitClearException([[maybe_unused]] HClearException* clear) {
8111   UseScratchRegisterScope temps(GetVIXLAssembler());
8112   vixl32::Register temp = temps.Acquire();
8113   __ Mov(temp, 0);
8114   GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset());
8115 }
8116 
VisitThrow(HThrow * instruction)8117 void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) {
8118   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8119       instruction, LocationSummary::kCallOnMainOnly);
8120   InvokeRuntimeCallingConventionARMVIXL calling_convention;
8121   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8122 }
8123 
VisitThrow(HThrow * instruction)8124 void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
8125   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
8126   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
8127 }
8128 
8129 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)8130 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
8131   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8132     return 1;
8133   }
8134   if (emit_read_barrier &&
8135        (kUseBakerReadBarrier ||
8136           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
8137           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
8138           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
8139     return 1;
8140   }
8141   return 0;
8142 }
8143 
8144 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
8145 // interface pointer, one for loading the current interface.
8146 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)8147 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
8148   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8149     return 3;
8150   }
8151   return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
8152 }
8153 
VisitInstanceOf(HInstanceOf * instruction)8154 void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
8155   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
8156   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8157   bool baker_read_barrier_slow_path = false;
8158   switch (type_check_kind) {
8159     case TypeCheckKind::kExactCheck:
8160     case TypeCheckKind::kAbstractClassCheck:
8161     case TypeCheckKind::kClassHierarchyCheck:
8162     case TypeCheckKind::kArrayObjectCheck:
8163     case TypeCheckKind::kInterfaceCheck: {
8164       bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
8165       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
8166       baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
8167                                      (type_check_kind != TypeCheckKind::kInterfaceCheck);
8168       break;
8169     }
8170     case TypeCheckKind::kArrayCheck:
8171     case TypeCheckKind::kUnresolvedCheck:
8172       call_kind = LocationSummary::kCallOnSlowPath;
8173       break;
8174     case TypeCheckKind::kBitstringCheck:
8175       break;
8176   }
8177 
8178   LocationSummary* locations =
8179       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8180   if (baker_read_barrier_slow_path) {
8181     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
8182   }
8183   locations->SetInAt(0, Location::RequiresRegister());
8184   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8185     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8186     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8187     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8188   } else {
8189     locations->SetInAt(1, Location::RequiresRegister());
8190   }
8191   // The "out" register is used as a temporary, so it overlaps with the inputs.
8192   // Note that TypeCheckSlowPathARM uses this register too.
8193   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
8194   locations->AddRegisterTemps(
8195       NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
8196 }
8197 
VisitInstanceOf(HInstanceOf * instruction)8198 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
8199   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8200   LocationSummary* locations = instruction->GetLocations();
8201   Location obj_loc = locations->InAt(0);
8202   vixl32::Register obj = InputRegisterAt(instruction, 0);
8203   vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8204       ? vixl32::Register()
8205       : InputRegisterAt(instruction, 1);
8206   Location out_loc = locations->Out();
8207   vixl32::Register out = OutputRegister(instruction);
8208   const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
8209   DCHECK_LE(num_temps, 1u);
8210   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
8211   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8212   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8213   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8214   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8215   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8216   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8217   const uint32_t object_array_data_offset =
8218       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8219   vixl32::Label done;
8220   vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
8221   SlowPathCodeARMVIXL* slow_path = nullptr;
8222 
8223   // Return 0 if `obj` is null.
8224   // avoid null check if we know obj is not null.
8225   if (instruction->MustDoNullCheck()) {
8226     DCHECK(!out.Is(obj));
8227     __ Mov(out, 0);
8228     __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8229   }
8230 
8231   switch (type_check_kind) {
8232     case TypeCheckKind::kExactCheck: {
8233       ReadBarrierOption read_barrier_option =
8234           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8235       // /* HeapReference<Class> */ out = obj->klass_
8236       GenerateReferenceLoadTwoRegisters(instruction,
8237                                         out_loc,
8238                                         obj_loc,
8239                                         class_offset,
8240                                         maybe_temp_loc,
8241                                         read_barrier_option);
8242       // Classes must be equal for the instanceof to succeed.
8243       __ Cmp(out, cls);
8244       // We speculatively set the result to false without changing the condition
8245       // flags, which allows us to avoid some branching later.
8246       __ Mov(LeaveFlags, out, 0);
8247 
8248       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8249       // we check that the output is in a low register, so that a 16-bit MOV
8250       // encoding can be used.
8251       if (out.IsLow()) {
8252         // We use the scope because of the IT block that follows.
8253         ExactAssemblyScope guard(GetVIXLAssembler(),
8254                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
8255                                  CodeBufferCheckScope::kExactSize);
8256 
8257         __ it(eq);
8258         __ mov(eq, out, 1);
8259       } else {
8260         __ B(ne, final_label, /* is_far_target= */ false);
8261         __ Mov(out, 1);
8262       }
8263 
8264       break;
8265     }
8266 
8267     case TypeCheckKind::kAbstractClassCheck: {
8268       ReadBarrierOption read_barrier_option =
8269           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8270       // /* HeapReference<Class> */ out = obj->klass_
8271       GenerateReferenceLoadTwoRegisters(instruction,
8272                                         out_loc,
8273                                         obj_loc,
8274                                         class_offset,
8275                                         maybe_temp_loc,
8276                                         read_barrier_option);
8277       // If the class is abstract, we eagerly fetch the super class of the
8278       // object to avoid doing a comparison we know will fail.
8279       vixl32::Label loop;
8280       __ Bind(&loop);
8281       // /* HeapReference<Class> */ out = out->super_class_
8282       GenerateReferenceLoadOneRegister(instruction,
8283                                        out_loc,
8284                                        super_offset,
8285                                        maybe_temp_loc,
8286                                        read_barrier_option);
8287       // If `out` is null, we use it for the result, and jump to the final label.
8288       __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8289       __ Cmp(out, cls);
8290       __ B(ne, &loop, /* is_far_target= */ false);
8291       __ Mov(out, 1);
8292       break;
8293     }
8294 
8295     case TypeCheckKind::kClassHierarchyCheck: {
8296       ReadBarrierOption read_barrier_option =
8297           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8298       // /* HeapReference<Class> */ out = obj->klass_
8299       GenerateReferenceLoadTwoRegisters(instruction,
8300                                         out_loc,
8301                                         obj_loc,
8302                                         class_offset,
8303                                         maybe_temp_loc,
8304                                         read_barrier_option);
8305       // Walk over the class hierarchy to find a match.
8306       vixl32::Label loop, success;
8307       __ Bind(&loop);
8308       __ Cmp(out, cls);
8309       __ B(eq, &success, /* is_far_target= */ false);
8310       // /* HeapReference<Class> */ out = out->super_class_
8311       GenerateReferenceLoadOneRegister(instruction,
8312                                        out_loc,
8313                                        super_offset,
8314                                        maybe_temp_loc,
8315                                        read_barrier_option);
8316       // This is essentially a null check, but it sets the condition flags to the
8317       // proper value for the code that follows the loop, i.e. not `eq`.
8318       __ Cmp(out, 1);
8319       __ B(hs, &loop, /* is_far_target= */ false);
8320 
8321       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8322       // we check that the output is in a low register, so that a 16-bit MOV
8323       // encoding can be used.
8324       if (out.IsLow()) {
8325         // If `out` is null, we use it for the result, and the condition flags
8326         // have already been set to `ne`, so the IT block that comes afterwards
8327         // (and which handles the successful case) turns into a NOP (instead of
8328         // overwriting `out`).
8329         __ Bind(&success);
8330 
8331         // We use the scope because of the IT block that follows.
8332         ExactAssemblyScope guard(GetVIXLAssembler(),
8333                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
8334                                  CodeBufferCheckScope::kExactSize);
8335 
8336         // There is only one branch to the `success` label (which is bound to this
8337         // IT block), and it has the same condition, `eq`, so in that case the MOV
8338         // is executed.
8339         __ it(eq);
8340         __ mov(eq, out, 1);
8341       } else {
8342         // If `out` is null, we use it for the result, and jump to the final label.
8343         __ B(final_label);
8344         __ Bind(&success);
8345         __ Mov(out, 1);
8346       }
8347 
8348       break;
8349     }
8350 
8351     case TypeCheckKind::kArrayObjectCheck: {
8352       ReadBarrierOption read_barrier_option =
8353           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8354       // /* HeapReference<Class> */ out = obj->klass_
8355       GenerateReferenceLoadTwoRegisters(instruction,
8356                                         out_loc,
8357                                         obj_loc,
8358                                         class_offset,
8359                                         maybe_temp_loc,
8360                                         read_barrier_option);
8361       // Do an exact check.
8362       vixl32::Label exact_check;
8363       __ Cmp(out, cls);
8364       __ B(eq, &exact_check, /* is_far_target= */ false);
8365       // Otherwise, we need to check that the object's class is a non-primitive array.
8366       // /* HeapReference<Class> */ out = out->component_type_
8367       GenerateReferenceLoadOneRegister(instruction,
8368                                        out_loc,
8369                                        component_offset,
8370                                        maybe_temp_loc,
8371                                        read_barrier_option);
8372       // If `out` is null, we use it for the result, and jump to the final label.
8373       __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8374       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
8375       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8376       __ Cmp(out, 0);
8377       // We speculatively set the result to false without changing the condition
8378       // flags, which allows us to avoid some branching later.
8379       __ Mov(LeaveFlags, out, 0);
8380 
8381       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8382       // we check that the output is in a low register, so that a 16-bit MOV
8383       // encoding can be used.
8384       if (out.IsLow()) {
8385         __ Bind(&exact_check);
8386 
8387         // We use the scope because of the IT block that follows.
8388         ExactAssemblyScope guard(GetVIXLAssembler(),
8389                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
8390                                  CodeBufferCheckScope::kExactSize);
8391 
8392         __ it(eq);
8393         __ mov(eq, out, 1);
8394       } else {
8395         __ B(ne, final_label, /* is_far_target= */ false);
8396         __ Bind(&exact_check);
8397         __ Mov(out, 1);
8398       }
8399 
8400       break;
8401     }
8402 
8403     case TypeCheckKind::kArrayCheck: {
8404       // No read barrier since the slow path will retry upon failure.
8405       // /* HeapReference<Class> */ out = obj->klass_
8406       GenerateReferenceLoadTwoRegisters(instruction,
8407                                         out_loc,
8408                                         obj_loc,
8409                                         class_offset,
8410                                         maybe_temp_loc,
8411                                         kWithoutReadBarrier);
8412       __ Cmp(out, cls);
8413       DCHECK(locations->OnlyCallsOnSlowPath());
8414       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8415           instruction, /* is_fatal= */ false);
8416       codegen_->AddSlowPath(slow_path);
8417       __ B(ne, slow_path->GetEntryLabel());
8418       __ Mov(out, 1);
8419       break;
8420     }
8421 
8422     case TypeCheckKind::kInterfaceCheck: {
8423       if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
8424         DCHECK(locations->OnlyCallsOnSlowPath());
8425         slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8426             instruction, /* is_fatal= */ false);
8427         codegen_->AddSlowPath(slow_path);
8428         if (codegen_->EmitNonBakerReadBarrier()) {
8429           __ B(slow_path->GetEntryLabel());
8430           break;
8431         }
8432         // For Baker read barrier, take the slow path while marking.
8433         __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
8434       }
8435 
8436       // Fast-path without read barriers.
8437       UseScratchRegisterScope temps(GetVIXLAssembler());
8438       vixl32::Register temp = RegisterFrom(maybe_temp_loc);
8439       vixl32::Register temp2 = temps.Acquire();
8440       // /* HeapReference<Class> */ temp = obj->klass_
8441       __ Ldr(temp, MemOperand(obj, class_offset));
8442       GetAssembler()->MaybeUnpoisonHeapReference(temp);
8443       // /* HeapReference<Class> */ temp = temp->iftable_
8444       __ Ldr(temp, MemOperand(temp, iftable_offset));
8445       GetAssembler()->MaybeUnpoisonHeapReference(temp);
8446       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8447       __ Ldr(out, MemOperand(temp, array_length_offset));
8448       // Loop through the `IfTable` and check if any class matches.
8449       vixl32::Label loop;
8450       __ Bind(&loop);
8451       // If taken, the result in `out` is already 0 (false).
8452       __ CompareAndBranchIfZero(out, &done, /* is_far_target= */ false);
8453       __ Ldr(temp2, MemOperand(temp, object_array_data_offset));
8454       GetAssembler()->MaybeUnpoisonHeapReference(temp2);
8455       // Go to next interface.
8456       __ Add(temp, temp, static_cast<uint32_t>(2 * kHeapReferenceSize));
8457       __ Sub(out, out, 2);
8458       // Compare the classes and continue the loop if they do not match.
8459       __ Cmp(cls, temp2);
8460       __ B(ne, &loop);
8461       __ Mov(out, 1);
8462       break;
8463     }
8464 
8465     case TypeCheckKind::kUnresolvedCheck: {
8466       // Note that we indeed only call on slow path, but we always go
8467       // into the slow path for the unresolved check case.
8468       //
8469       // We cannot directly call the InstanceofNonTrivial runtime
8470       // entry point without resorting to a type checking slow path
8471       // here (i.e. by calling InvokeRuntime directly), as it would
8472       // require to assign fixed registers for the inputs of this
8473       // HInstanceOf instruction (following the runtime calling
8474       // convention), which might be cluttered by the potential first
8475       // read barrier emission at the beginning of this method.
8476       //
8477       // TODO: Introduce a new runtime entry point taking the object
8478       // to test (instead of its class) as argument, and let it deal
8479       // with the read barrier issues. This will let us refactor this
8480       // case of the `switch` code as it was previously (with a direct
8481       // call to the runtime not using a type checking slow path).
8482       // This should also be beneficial for the other cases above.
8483       DCHECK(locations->OnlyCallsOnSlowPath());
8484       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8485           instruction, /* is_fatal= */ false);
8486       codegen_->AddSlowPath(slow_path);
8487       __ B(slow_path->GetEntryLabel());
8488       break;
8489     }
8490 
8491     case TypeCheckKind::kBitstringCheck: {
8492       // /* HeapReference<Class> */ temp = obj->klass_
8493       GenerateReferenceLoadTwoRegisters(instruction,
8494                                         out_loc,
8495                                         obj_loc,
8496                                         class_offset,
8497                                         maybe_temp_loc,
8498                                         kWithoutReadBarrier);
8499 
8500       GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
8501       // If `out` is a low reg and we would have another low reg temp, we could
8502       // optimize this as RSBS+ADC, see GenerateConditionWithZero().
8503       //
8504       // Also, in some cases when `out` is a low reg and we're loading a constant to IP
8505       // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
8506       // would be the same and we would have fewer direct data dependencies.
8507       codegen_->GenerateConditionWithZero(kCondEQ, out, out);  // CLZ+LSR
8508       break;
8509     }
8510   }
8511 
8512   if (done.IsReferenced()) {
8513     __ Bind(&done);
8514   }
8515 
8516   if (slow_path != nullptr) {
8517     __ Bind(slow_path->GetExitLabel());
8518   }
8519 }
8520 
VisitCheckCast(HCheckCast * instruction)8521 void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8522   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8523   LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
8524   LocationSummary* locations =
8525       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8526   locations->SetInAt(0, Location::RequiresRegister());
8527   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8528     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8529     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8530     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8531   } else {
8532     locations->SetInAt(1, Location::RequiresRegister());
8533   }
8534   locations->AddRegisterTemps(
8535       NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
8536 }
8537 
VisitCheckCast(HCheckCast * instruction)8538 void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8539   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8540   LocationSummary* locations = instruction->GetLocations();
8541   Location obj_loc = locations->InAt(0);
8542   vixl32::Register obj = InputRegisterAt(instruction, 0);
8543   vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8544       ? vixl32::Register()
8545       : InputRegisterAt(instruction, 1);
8546   Location temp_loc = locations->GetTemp(0);
8547   vixl32::Register temp = RegisterFrom(temp_loc);
8548   const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
8549   DCHECK_LE(num_temps, 3u);
8550   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8551   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
8552   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8553   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8554   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8555   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8556   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8557   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8558   const uint32_t object_array_data_offset =
8559       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8560 
8561   bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
8562   SlowPathCodeARMVIXL* type_check_slow_path =
8563       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8564           instruction, is_type_check_slow_path_fatal);
8565   codegen_->AddSlowPath(type_check_slow_path);
8566 
8567   vixl32::Label done;
8568   vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
8569   // Avoid null check if we know obj is not null.
8570   if (instruction->MustDoNullCheck()) {
8571     __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8572   }
8573 
8574   switch (type_check_kind) {
8575     case TypeCheckKind::kExactCheck:
8576     case TypeCheckKind::kArrayCheck: {
8577       // /* HeapReference<Class> */ temp = obj->klass_
8578       GenerateReferenceLoadTwoRegisters(instruction,
8579                                         temp_loc,
8580                                         obj_loc,
8581                                         class_offset,
8582                                         maybe_temp2_loc,
8583                                         kWithoutReadBarrier);
8584 
8585       __ Cmp(temp, cls);
8586       // Jump to slow path for throwing the exception or doing a
8587       // more involved array check.
8588       __ B(ne, type_check_slow_path->GetEntryLabel());
8589       break;
8590     }
8591 
8592     case TypeCheckKind::kAbstractClassCheck: {
8593       // /* HeapReference<Class> */ temp = obj->klass_
8594       GenerateReferenceLoadTwoRegisters(instruction,
8595                                         temp_loc,
8596                                         obj_loc,
8597                                         class_offset,
8598                                         maybe_temp2_loc,
8599                                         kWithoutReadBarrier);
8600 
8601       // If the class is abstract, we eagerly fetch the super class of the
8602       // object to avoid doing a comparison we know will fail.
8603       vixl32::Label loop;
8604       __ Bind(&loop);
8605       // /* HeapReference<Class> */ temp = temp->super_class_
8606       GenerateReferenceLoadOneRegister(instruction,
8607                                        temp_loc,
8608                                        super_offset,
8609                                        maybe_temp2_loc,
8610                                        kWithoutReadBarrier);
8611 
8612       // If the class reference currently in `temp` is null, jump to the slow path to throw the
8613       // exception.
8614       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8615 
8616       // Otherwise, compare the classes.
8617       __ Cmp(temp, cls);
8618       __ B(ne, &loop, /* is_far_target= */ false);
8619       break;
8620     }
8621 
8622     case TypeCheckKind::kClassHierarchyCheck: {
8623       // /* HeapReference<Class> */ temp = obj->klass_
8624       GenerateReferenceLoadTwoRegisters(instruction,
8625                                         temp_loc,
8626                                         obj_loc,
8627                                         class_offset,
8628                                         maybe_temp2_loc,
8629                                         kWithoutReadBarrier);
8630 
8631       // Walk over the class hierarchy to find a match.
8632       vixl32::Label loop;
8633       __ Bind(&loop);
8634       __ Cmp(temp, cls);
8635       __ B(eq, final_label, /* is_far_target= */ false);
8636 
8637       // /* HeapReference<Class> */ temp = temp->super_class_
8638       GenerateReferenceLoadOneRegister(instruction,
8639                                        temp_loc,
8640                                        super_offset,
8641                                        maybe_temp2_loc,
8642                                        kWithoutReadBarrier);
8643 
8644       // If the class reference currently in `temp` is null, jump to the slow path to throw the
8645       // exception.
8646       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8647       // Otherwise, jump to the beginning of the loop.
8648       __ B(&loop);
8649       break;
8650     }
8651 
8652     case TypeCheckKind::kArrayObjectCheck:  {
8653       // /* HeapReference<Class> */ temp = obj->klass_
8654       GenerateReferenceLoadTwoRegisters(instruction,
8655                                         temp_loc,
8656                                         obj_loc,
8657                                         class_offset,
8658                                         maybe_temp2_loc,
8659                                         kWithoutReadBarrier);
8660 
8661       // Do an exact check.
8662       __ Cmp(temp, cls);
8663       __ B(eq, final_label, /* is_far_target= */ false);
8664 
8665       // Otherwise, we need to check that the object's class is a non-primitive array.
8666       // /* HeapReference<Class> */ temp = temp->component_type_
8667       GenerateReferenceLoadOneRegister(instruction,
8668                                        temp_loc,
8669                                        component_offset,
8670                                        maybe_temp2_loc,
8671                                        kWithoutReadBarrier);
8672       // If the component type is null, jump to the slow path to throw the exception.
8673       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8674       // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
8675       // to further check that this component type is not a primitive type.
8676       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
8677       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8678       __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
8679       break;
8680     }
8681 
8682     case TypeCheckKind::kUnresolvedCheck:
8683       // We always go into the type check slow path for the unresolved check case.
8684       // We cannot directly call the CheckCast runtime entry point
8685       // without resorting to a type checking slow path here (i.e. by
8686       // calling InvokeRuntime directly), as it would require to
8687       // assign fixed registers for the inputs of this HInstanceOf
8688       // instruction (following the runtime calling convention), which
8689       // might be cluttered by the potential first read barrier
8690       // emission at the beginning of this method.
8691 
8692       __ B(type_check_slow_path->GetEntryLabel());
8693       break;
8694 
8695     case TypeCheckKind::kInterfaceCheck: {
8696       // Avoid read barriers to improve performance of the fast path. We can not get false
8697       // positives by doing this.
8698       // /* HeapReference<Class> */ temp = obj->klass_
8699       GenerateReferenceLoadTwoRegisters(instruction,
8700                                         temp_loc,
8701                                         obj_loc,
8702                                         class_offset,
8703                                         maybe_temp2_loc,
8704                                         kWithoutReadBarrier);
8705 
8706       // /* HeapReference<Class> */ temp = temp->iftable_
8707       GenerateReferenceLoadOneRegister(instruction,
8708                                        temp_loc,
8709                                        iftable_offset,
8710                                        maybe_temp2_loc,
8711                                        kWithoutReadBarrier);
8712       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8713       __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
8714       // Loop through the iftable and check if any class matches.
8715       vixl32::Label start_loop;
8716       __ Bind(&start_loop);
8717       __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc),
8718                                 type_check_slow_path->GetEntryLabel());
8719       __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset));
8720       GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc));
8721       // Go to next interface.
8722       __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize));
8723       __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
8724       // Compare the classes and continue the loop if they do not match.
8725       __ Cmp(cls, RegisterFrom(maybe_temp3_loc));
8726       __ B(ne, &start_loop, /* is_far_target= */ false);
8727       break;
8728     }
8729 
8730     case TypeCheckKind::kBitstringCheck: {
8731       // /* HeapReference<Class> */ temp = obj->klass_
8732       GenerateReferenceLoadTwoRegisters(instruction,
8733                                         temp_loc,
8734                                         obj_loc,
8735                                         class_offset,
8736                                         maybe_temp2_loc,
8737                                         kWithoutReadBarrier);
8738 
8739       GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
8740       __ B(ne, type_check_slow_path->GetEntryLabel());
8741       break;
8742     }
8743   }
8744   if (done.IsReferenced()) {
8745     __ Bind(&done);
8746   }
8747 
8748   __ Bind(type_check_slow_path->GetExitLabel());
8749 }
8750 
VisitMonitorOperation(HMonitorOperation * instruction)8751 void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8752   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8753       instruction, LocationSummary::kCallOnMainOnly);
8754   InvokeRuntimeCallingConventionARMVIXL calling_convention;
8755   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8756 }
8757 
VisitMonitorOperation(HMonitorOperation * instruction)8758 void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8759   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
8760                           instruction,
8761                           instruction->GetDexPc());
8762   if (instruction->IsEnter()) {
8763     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8764   } else {
8765     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8766   }
8767   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 19);
8768 }
8769 
VisitAnd(HAnd * instruction)8770 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
8771   HandleBitwiseOperation(instruction, AND);
8772 }
8773 
VisitOr(HOr * instruction)8774 void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) {
8775   HandleBitwiseOperation(instruction, ORR);
8776 }
8777 
VisitXor(HXor * instruction)8778 void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) {
8779   HandleBitwiseOperation(instruction, EOR);
8780 }
8781 
HandleBitwiseOperation(HBinaryOperation * instruction,Opcode opcode)8782 void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
8783   LocationSummary* locations =
8784       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8785   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8786          || instruction->GetResultType() == DataType::Type::kInt64);
8787   // Note: GVN reorders commutative operations to have the constant on the right hand side.
8788   locations->SetInAt(0, Location::RequiresRegister());
8789   locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
8790   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8791 }
8792 
VisitAnd(HAnd * instruction)8793 void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) {
8794   HandleBitwiseOperation(instruction);
8795 }
8796 
VisitOr(HOr * instruction)8797 void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) {
8798   HandleBitwiseOperation(instruction);
8799 }
8800 
VisitXor(HXor * instruction)8801 void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) {
8802   HandleBitwiseOperation(instruction);
8803 }
8804 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8805 void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8806   LocationSummary* locations =
8807       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8808   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8809          || instruction->GetResultType() == DataType::Type::kInt64);
8810 
8811   locations->SetInAt(0, Location::RequiresRegister());
8812   locations->SetInAt(1, Location::RequiresRegister());
8813   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8814 }
8815 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8816 void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8817   LocationSummary* locations = instruction->GetLocations();
8818   Location first = locations->InAt(0);
8819   Location second = locations->InAt(1);
8820   Location out = locations->Out();
8821 
8822   if (instruction->GetResultType() == DataType::Type::kInt32) {
8823     vixl32::Register first_reg = RegisterFrom(first);
8824     vixl32::Register second_reg = RegisterFrom(second);
8825     vixl32::Register out_reg = RegisterFrom(out);
8826 
8827     switch (instruction->GetOpKind()) {
8828       case HInstruction::kAnd:
8829         __ Bic(out_reg, first_reg, second_reg);
8830         break;
8831       case HInstruction::kOr:
8832         __ Orn(out_reg, first_reg, second_reg);
8833         break;
8834       // There is no EON on arm.
8835       case HInstruction::kXor:
8836       default:
8837         LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8838         UNREACHABLE();
8839     }
8840     return;
8841 
8842   } else {
8843     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8844     vixl32::Register first_low = LowRegisterFrom(first);
8845     vixl32::Register first_high = HighRegisterFrom(first);
8846     vixl32::Register second_low = LowRegisterFrom(second);
8847     vixl32::Register second_high = HighRegisterFrom(second);
8848     vixl32::Register out_low = LowRegisterFrom(out);
8849     vixl32::Register out_high = HighRegisterFrom(out);
8850 
8851     switch (instruction->GetOpKind()) {
8852       case HInstruction::kAnd:
8853         __ Bic(out_low, first_low, second_low);
8854         __ Bic(out_high, first_high, second_high);
8855         break;
8856       case HInstruction::kOr:
8857         __ Orn(out_low, first_low, second_low);
8858         __ Orn(out_high, first_high, second_high);
8859         break;
8860       // There is no EON on arm.
8861       case HInstruction::kXor:
8862       default:
8863         LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8864         UNREACHABLE();
8865     }
8866   }
8867 }
8868 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8869 void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp(
8870     HDataProcWithShifterOp* instruction) {
8871   DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
8872          instruction->GetType() == DataType::Type::kInt64);
8873   LocationSummary* locations =
8874       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8875   const bool overlap = instruction->GetType() == DataType::Type::kInt64 &&
8876                        HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
8877 
8878   locations->SetInAt(0, Location::RequiresRegister());
8879   locations->SetInAt(1, Location::RequiresRegister());
8880   locations->SetOut(Location::RequiresRegister(),
8881                     overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
8882 }
8883 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8884 void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp(
8885     HDataProcWithShifterOp* instruction) {
8886   const LocationSummary* const locations = instruction->GetLocations();
8887   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
8888   const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
8889 
8890   if (instruction->GetType() == DataType::Type::kInt32) {
8891     const vixl32::Register first = InputRegisterAt(instruction, 0);
8892     const vixl32::Register output = OutputRegister(instruction);
8893     const vixl32::Register second = instruction->InputAt(1)->GetType() == DataType::Type::kInt64
8894         ? LowRegisterFrom(locations->InAt(1))
8895         : InputRegisterAt(instruction, 1);
8896 
8897     if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8898       DCHECK_EQ(kind, HInstruction::kAdd);
8899 
8900       switch (op_kind) {
8901         case HDataProcWithShifterOp::kUXTB:
8902           __ Uxtab(output, first, second);
8903           break;
8904         case HDataProcWithShifterOp::kUXTH:
8905           __ Uxtah(output, first, second);
8906           break;
8907         case HDataProcWithShifterOp::kSXTB:
8908           __ Sxtab(output, first, second);
8909           break;
8910         case HDataProcWithShifterOp::kSXTH:
8911           __ Sxtah(output, first, second);
8912           break;
8913         default:
8914           LOG(FATAL) << "Unexpected operation kind: " << op_kind;
8915           UNREACHABLE();
8916       }
8917     } else {
8918       GenerateDataProcInstruction(kind,
8919                                   output,
8920                                   first,
8921                                   Operand(second,
8922                                           ShiftFromOpKind(op_kind),
8923                                           instruction->GetShiftAmount()),
8924                                   codegen_);
8925     }
8926   } else {
8927     DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
8928 
8929     if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8930       const vixl32::Register second = InputRegisterAt(instruction, 1);
8931 
8932       DCHECK(!LowRegisterFrom(locations->Out()).Is(second));
8933       GenerateDataProc(kind,
8934                        locations->Out(),
8935                        locations->InAt(0),
8936                        second,
8937                        Operand(second, ShiftType::ASR, 31),
8938                        codegen_);
8939     } else {
8940       GenerateLongDataProc(instruction, codegen_);
8941     }
8942   }
8943 }
8944 
8945 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateAndConst(vixl32::Register out,vixl32::Register first,uint32_t value)8946 void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
8947                                                        vixl32::Register first,
8948                                                        uint32_t value) {
8949   // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
8950   if (value == 0xffffffffu) {
8951     if (!out.Is(first)) {
8952       __ Mov(out, first);
8953     }
8954     return;
8955   }
8956   if (value == 0u) {
8957     __ Mov(out, 0);
8958     return;
8959   }
8960   if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
8961     __ And(out, first, value);
8962   } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) {
8963     __ Bic(out, first, ~value);
8964   } else {
8965     DCHECK(IsPowerOfTwo(value + 1));
8966     __ Ubfx(out, first, 0, WhichPowerOf2(value + 1));
8967   }
8968 }
8969 
8970 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateOrrConst(vixl32::Register out,vixl32::Register first,uint32_t value)8971 void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out,
8972                                                        vixl32::Register first,
8973                                                        uint32_t value) {
8974   // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
8975   if (value == 0u) {
8976     if (!out.Is(first)) {
8977       __ Mov(out, first);
8978     }
8979     return;
8980   }
8981   if (value == 0xffffffffu) {
8982     __ Mvn(out, 0);
8983     return;
8984   }
8985   if (GetAssembler()->ShifterOperandCanHold(ORR, value)) {
8986     __ Orr(out, first, value);
8987   } else {
8988     DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value));
8989     __ Orn(out, first, ~value);
8990   }
8991 }
8992 
8993 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateEorConst(vixl32::Register out,vixl32::Register first,uint32_t value)8994 void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out,
8995                                                        vixl32::Register first,
8996                                                        uint32_t value) {
8997   // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
8998   if (value == 0u) {
8999     if (!out.Is(first)) {
9000       __ Mov(out, first);
9001     }
9002     return;
9003   }
9004   __ Eor(out, first, value);
9005 }
9006 
GenerateAddLongConst(Location out,Location first,uint64_t value)9007 void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out,
9008                                                            Location first,
9009                                                            uint64_t value) {
9010   vixl32::Register out_low = LowRegisterFrom(out);
9011   vixl32::Register out_high = HighRegisterFrom(out);
9012   vixl32::Register first_low = LowRegisterFrom(first);
9013   vixl32::Register first_high = HighRegisterFrom(first);
9014   uint32_t value_low = Low32Bits(value);
9015   uint32_t value_high = High32Bits(value);
9016   if (value_low == 0u) {
9017     if (!out_low.Is(first_low)) {
9018       __ Mov(out_low, first_low);
9019     }
9020     __ Add(out_high, first_high, value_high);
9021     return;
9022   }
9023   __ Adds(out_low, first_low, value_low);
9024   if (GetAssembler()->ShifterOperandCanHold(ADC, value_high)) {
9025     __ Adc(out_high, first_high, value_high);
9026   } else {
9027     DCHECK(GetAssembler()->ShifterOperandCanHold(SBC, ~value_high));
9028     __ Sbc(out_high, first_high, ~value_high);
9029   }
9030 }
9031 
HandleBitwiseOperation(HBinaryOperation * instruction)9032 void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) {
9033   LocationSummary* locations = instruction->GetLocations();
9034   Location first = locations->InAt(0);
9035   Location second = locations->InAt(1);
9036   Location out = locations->Out();
9037 
9038   if (second.IsConstant()) {
9039     uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
9040     uint32_t value_low = Low32Bits(value);
9041     if (instruction->GetResultType() == DataType::Type::kInt32) {
9042       vixl32::Register first_reg = InputRegisterAt(instruction, 0);
9043       vixl32::Register out_reg = OutputRegister(instruction);
9044       if (instruction->IsAnd()) {
9045         GenerateAndConst(out_reg, first_reg, value_low);
9046       } else if (instruction->IsOr()) {
9047         GenerateOrrConst(out_reg, first_reg, value_low);
9048       } else {
9049         DCHECK(instruction->IsXor());
9050         GenerateEorConst(out_reg, first_reg, value_low);
9051       }
9052     } else {
9053       DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
9054       uint32_t value_high = High32Bits(value);
9055       vixl32::Register first_low = LowRegisterFrom(first);
9056       vixl32::Register first_high = HighRegisterFrom(first);
9057       vixl32::Register out_low = LowRegisterFrom(out);
9058       vixl32::Register out_high = HighRegisterFrom(out);
9059       if (instruction->IsAnd()) {
9060         GenerateAndConst(out_low, first_low, value_low);
9061         GenerateAndConst(out_high, first_high, value_high);
9062       } else if (instruction->IsOr()) {
9063         GenerateOrrConst(out_low, first_low, value_low);
9064         GenerateOrrConst(out_high, first_high, value_high);
9065       } else {
9066         DCHECK(instruction->IsXor());
9067         GenerateEorConst(out_low, first_low, value_low);
9068         GenerateEorConst(out_high, first_high, value_high);
9069       }
9070     }
9071     return;
9072   }
9073 
9074   if (instruction->GetResultType() == DataType::Type::kInt32) {
9075     vixl32::Register first_reg = InputRegisterAt(instruction, 0);
9076     vixl32::Register second_reg = InputRegisterAt(instruction, 1);
9077     vixl32::Register out_reg = OutputRegister(instruction);
9078     if (instruction->IsAnd()) {
9079       __ And(out_reg, first_reg, second_reg);
9080     } else if (instruction->IsOr()) {
9081       __ Orr(out_reg, first_reg, second_reg);
9082     } else {
9083       DCHECK(instruction->IsXor());
9084       __ Eor(out_reg, first_reg, second_reg);
9085     }
9086   } else {
9087     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
9088     vixl32::Register first_low = LowRegisterFrom(first);
9089     vixl32::Register first_high = HighRegisterFrom(first);
9090     vixl32::Register second_low = LowRegisterFrom(second);
9091     vixl32::Register second_high = HighRegisterFrom(second);
9092     vixl32::Register out_low = LowRegisterFrom(out);
9093     vixl32::Register out_high = HighRegisterFrom(out);
9094     if (instruction->IsAnd()) {
9095       __ And(out_low, first_low, second_low);
9096       __ And(out_high, first_high, second_high);
9097     } else if (instruction->IsOr()) {
9098       __ Orr(out_low, first_low, second_low);
9099       __ Orr(out_high, first_high, second_high);
9100     } else {
9101       DCHECK(instruction->IsXor());
9102       __ Eor(out_low, first_low, second_low);
9103       __ Eor(out_high, first_high, second_high);
9104     }
9105   }
9106 }
9107 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)9108 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
9109     HInstruction* instruction,
9110     Location out,
9111     uint32_t offset,
9112     Location maybe_temp,
9113     ReadBarrierOption read_barrier_option) {
9114   vixl32::Register out_reg = RegisterFrom(out);
9115   if (read_barrier_option == kWithReadBarrier) {
9116     DCHECK(codegen_->EmitReadBarrier());
9117     DCHECK(maybe_temp.IsRegister()) << maybe_temp;
9118     if (kUseBakerReadBarrier) {
9119       // Load with fast path based Baker's read barrier.
9120       // /* HeapReference<Object> */ out = *(out + offset)
9121       codegen_->GenerateFieldLoadWithBakerReadBarrier(
9122           instruction, out, out_reg, offset, maybe_temp, /* needs_null_check= */ false);
9123     } else {
9124       // Load with slow path based read barrier.
9125       // Save the value of `out` into `maybe_temp` before overwriting it
9126       // in the following move operation, as we will need it for the
9127       // read barrier below.
9128       __ Mov(RegisterFrom(maybe_temp), out_reg);
9129       // /* HeapReference<Object> */ out = *(out + offset)
9130       GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
9131       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
9132     }
9133   } else {
9134     // Plain load with no read barrier.
9135     // /* HeapReference<Object> */ out = *(out + offset)
9136     GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
9137     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
9138   }
9139 }
9140 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)9141 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
9142     HInstruction* instruction,
9143     Location out,
9144     Location obj,
9145     uint32_t offset,
9146     Location maybe_temp,
9147     ReadBarrierOption read_barrier_option) {
9148   vixl32::Register out_reg = RegisterFrom(out);
9149   vixl32::Register obj_reg = RegisterFrom(obj);
9150   if (read_barrier_option == kWithReadBarrier) {
9151     DCHECK(codegen_->EmitReadBarrier());
9152     if (kUseBakerReadBarrier) {
9153       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
9154       // Load with fast path based Baker's read barrier.
9155       // /* HeapReference<Object> */ out = *(obj + offset)
9156       codegen_->GenerateFieldLoadWithBakerReadBarrier(
9157           instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check= */ false);
9158     } else {
9159       // Load with slow path based read barrier.
9160       // /* HeapReference<Object> */ out = *(obj + offset)
9161       GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
9162       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
9163     }
9164   } else {
9165     // Plain load with no read barrier.
9166     // /* HeapReference<Object> */ out = *(obj + offset)
9167     GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
9168     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
9169   }
9170 }
9171 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,vixl32::Register obj,uint32_t offset,ReadBarrierOption read_barrier_option)9172 void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
9173     HInstruction* instruction,
9174     Location root,
9175     vixl32::Register obj,
9176     uint32_t offset,
9177     ReadBarrierOption read_barrier_option) {
9178   vixl32::Register root_reg = RegisterFrom(root);
9179   if (read_barrier_option == kWithReadBarrier) {
9180     DCHECK(EmitReadBarrier());
9181     if (kUseBakerReadBarrier) {
9182       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
9183       // Baker's read barrier are used.
9184 
9185       // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
9186       // the Marking Register) to decide whether we need to enter
9187       // the slow path to mark the GC root.
9188       //
9189       // We use shared thunks for the slow path; shared within the method
9190       // for JIT, across methods for AOT. That thunk checks the reference
9191       // and jumps to the entrypoint if needed.
9192       //
9193       //     lr = &return_address;
9194       //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
9195       //     if (mr) {  // Thread::Current()->GetIsGcMarking()
9196       //       goto gc_root_thunk<root_reg>(lr)
9197       //     }
9198       //   return_address:
9199 
9200       UseScratchRegisterScope temps(GetVIXLAssembler());
9201       temps.Exclude(ip);
9202       bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
9203       uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
9204 
9205       size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u);
9206       size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions;
9207       size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9208                           narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9209       ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9210       vixl32::Label return_address;
9211       EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9212       __ cmp(mr, Operand(0));
9213       // Currently the offset is always within range. If that changes,
9214       // we shall have to split the load the same way as for fields.
9215       DCHECK_LT(offset, kReferenceLoadMinFarOffset);
9216       ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9217       __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
9218       EmitBakerReadBarrierBne(custom_data);
9219       __ bind(&return_address);
9220       DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9221                 narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9222                        : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
9223     } else {
9224       // GC root loaded through a slow path for read barriers other
9225       // than Baker's.
9226       // /* GcRoot<mirror::Object>* */ root = obj + offset
9227       __ Add(root_reg, obj, offset);
9228       // /* mirror::Object* */ root = root->Read()
9229       GenerateReadBarrierForRootSlow(instruction, root, root);
9230     }
9231   } else {
9232     // Plain GC root load with no read barrier.
9233     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
9234     GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
9235     // Note that GC roots are not affected by heap poisoning, thus we
9236     // do not have to unpoison `root_reg` here.
9237   }
9238   MaybeGenerateMarkingRegisterCheck(/* code= */ 20);
9239 }
9240 
GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch32::Register marked_old_value,vixl::aarch32::Register old_value)9241 void CodeGeneratorARMVIXL::GenerateIntrinsicMoveWithBakerReadBarrier(
9242     vixl::aarch32::Register marked_old_value,
9243     vixl::aarch32::Register old_value) {
9244   DCHECK(EmitBakerReadBarrier());
9245 
9246   // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
9247   // For low registers, we can reuse the GC root narrow entrypoint, for high registers
9248   // we use a specialized entrypoint because the register bits are 8-11 instead of 12-15.
9249   bool narrow_mov = marked_old_value.IsLow();
9250   uint32_t custom_data = narrow_mov
9251       ? EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode(), /*narrow=*/ true)
9252       : EncodeBakerReadBarrierIntrinsicCasData(marked_old_value.GetCode());
9253 
9254   size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* MOV */ (narrow_mov ? 1u : 0u);
9255   size_t wide_instructions = /* ADR+CMP+MOV+BNE */ 4u - narrow_instructions;
9256   size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9257                       narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9258   ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9259   vixl32::Label return_address;
9260   EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9261   __ cmp(mr, Operand(0));
9262   ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9263   __ mov(EncodingSize(narrow_mov ? Narrow : Wide), marked_old_value, old_value);
9264   EmitBakerReadBarrierBne(custom_data);
9265   __ bind(&return_address);
9266   DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9267             narrow_mov
9268                 ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9269                 : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_MOV_OFFSET);
9270 }
9271 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,const vixl32::MemOperand & src,bool needs_null_check)9272 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9273                                                                  Location ref,
9274                                                                  vixl32::Register obj,
9275                                                                  const vixl32::MemOperand& src,
9276                                                                  bool needs_null_check) {
9277   DCHECK(EmitBakerReadBarrier());
9278 
9279   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9280   // Marking Register) to decide whether we need to enter the slow
9281   // path to mark the reference. Then, in the slow path, check the
9282   // gray bit in the lock word of the reference's holder (`obj`) to
9283   // decide whether to mark `ref` or not.
9284   //
9285   // We use shared thunks for the slow path; shared within the method
9286   // for JIT, across methods for AOT. That thunk checks the holder
9287   // and jumps to the entrypoint if needed. If the holder is not gray,
9288   // it creates a fake dependency and returns to the LDR instruction.
9289   //
9290   //     lr = &gray_return_address;
9291   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
9292   //       goto field_thunk<holder_reg, base_reg>(lr)
9293   //     }
9294   //   not_gray_return_address:
9295   //     // Original reference load. If the offset is too large to fit
9296   //     // into LDR, we use an adjusted base register here.
9297   //     HeapReference<mirror::Object> reference = *(obj+offset);
9298   //   gray_return_address:
9299 
9300   DCHECK(src.GetAddrMode() == vixl32::Offset);
9301   DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>));
9302   vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9303   bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate());
9304 
9305   UseScratchRegisterScope temps(GetVIXLAssembler());
9306   temps.Exclude(ip);
9307   uint32_t custom_data =
9308       EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
9309 
9310   {
9311     size_t narrow_instructions =
9312         /* CMP */ (mr.IsLow() ? 1u : 0u) +
9313         /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u);
9314     size_t wide_instructions =
9315         /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9316     size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9317                         narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9318     ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9319     vixl32::Label return_address;
9320     EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9321     __ cmp(mr, Operand(0));
9322     EmitBakerReadBarrierBne(custom_data);
9323     ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9324     __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src);
9325     if (needs_null_check) {
9326       MaybeRecordImplicitNullCheck(instruction);
9327     }
9328     // Note: We need a specific width for the unpoisoning NEG.
9329     if (kPoisonHeapReferences) {
9330       if (narrow) {
9331         // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
9332         __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
9333       } else {
9334         __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9335       }
9336     }
9337     __ bind(&return_address);
9338     DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9339               narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
9340                      : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
9341   }
9342   MaybeGenerateMarkingRegisterCheck(/* code= */ 21, /* temp_loc= */ LocationFrom(ip));
9343 }
9344 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check)9345 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9346                                                                  Location ref,
9347                                                                  vixl32::Register obj,
9348                                                                  uint32_t offset,
9349                                                                  Location maybe_temp,
9350                                                                  bool needs_null_check) {
9351   DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
9352   vixl32::Register base = obj;
9353   if (offset >= kReferenceLoadMinFarOffset) {
9354     base = RegisterFrom(maybe_temp);
9355     static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
9356     __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
9357     offset &= (kReferenceLoadMinFarOffset - 1u);
9358   }
9359   GenerateFieldLoadWithBakerReadBarrier(
9360       instruction, ref, obj, MemOperand(base, offset), needs_null_check);
9361 }
9362 
GenerateArrayLoadWithBakerReadBarrier(Location ref,vixl32::Register obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)9363 void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
9364                                                                  vixl32::Register obj,
9365                                                                  uint32_t data_offset,
9366                                                                  Location index,
9367                                                                  Location temp,
9368                                                                  bool needs_null_check) {
9369   DCHECK(EmitBakerReadBarrier());
9370 
9371   static_assert(
9372       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
9373       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
9374   ScaleFactor scale_factor = TIMES_4;
9375 
9376   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9377   // Marking Register) to decide whether we need to enter the slow
9378   // path to mark the reference. Then, in the slow path, check the
9379   // gray bit in the lock word of the reference's holder (`obj`) to
9380   // decide whether to mark `ref` or not.
9381   //
9382   // We use shared thunks for the slow path; shared within the method
9383   // for JIT, across methods for AOT. That thunk checks the holder
9384   // and jumps to the entrypoint if needed. If the holder is not gray,
9385   // it creates a fake dependency and returns to the LDR instruction.
9386   //
9387   //     lr = &gray_return_address;
9388   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
9389   //       goto array_thunk<base_reg>(lr)
9390   //     }
9391   //   not_gray_return_address:
9392   //     // Original reference load. If the offset is too large to fit
9393   //     // into LDR, we use an adjusted base register here.
9394   //     HeapReference<mirror::Object> reference = data[index];
9395   //   gray_return_address:
9396 
9397   DCHECK(index.IsValid());
9398   vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
9399   vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9400   vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32);  // Raw pointer.
9401 
9402   UseScratchRegisterScope temps(GetVIXLAssembler());
9403   temps.Exclude(ip);
9404   uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
9405 
9406   __ Add(data_reg, obj, Operand(data_offset));
9407   {
9408     size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
9409     size_t wide_instructions =
9410         /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9411     size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9412                         narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9413     ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9414     vixl32::Label return_address;
9415     EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9416     __ cmp(mr, Operand(0));
9417     EmitBakerReadBarrierBne(custom_data);
9418     ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9419     __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
9420     DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
9421     // Note: We need a Wide NEG for the unpoisoning.
9422     if (kPoisonHeapReferences) {
9423       __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9424     }
9425     __ bind(&return_address);
9426     DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9427               BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
9428   }
9429   MaybeGenerateMarkingRegisterCheck(/* code= */ 22, /* temp_loc= */ LocationFrom(ip));
9430 }
9431 
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)9432 void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
9433   // The following condition is a compile-time one, so it does not have a run-time cost.
9434   if (kIsDebugBuild && EmitBakerReadBarrier()) {
9435     // The following condition is a run-time one; it is executed after the
9436     // previous compile-time test, to avoid penalizing non-debug builds.
9437     if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
9438       UseScratchRegisterScope temps(GetVIXLAssembler());
9439       vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
9440       GetAssembler()->GenerateMarkingRegisterCheck(temp,
9441                                                    kMarkingRegisterCheckBreakCodeBaseCode + code);
9442     }
9443   }
9444 }
9445 
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9446 SlowPathCodeARMVIXL* CodeGeneratorARMVIXL::AddReadBarrierSlowPath(HInstruction* instruction,
9447                                                                   Location out,
9448                                                                   Location ref,
9449                                                                   Location obj,
9450                                                                   uint32_t offset,
9451                                                                   Location index) {
9452   SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator())
9453       ReadBarrierForHeapReferenceSlowPathARMVIXL(instruction, out, ref, obj, offset, index);
9454   AddSlowPath(slow_path);
9455   return slow_path;
9456 }
9457 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9458 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
9459                                                    Location out,
9460                                                    Location ref,
9461                                                    Location obj,
9462                                                    uint32_t offset,
9463                                                    Location index) {
9464   DCHECK(EmitReadBarrier());
9465 
9466   // Insert a slow path based read barrier *after* the reference load.
9467   //
9468   // If heap poisoning is enabled, the unpoisoning of the loaded
9469   // reference will be carried out by the runtime within the slow
9470   // path.
9471   //
9472   // Note that `ref` currently does not get unpoisoned (when heap
9473   // poisoning is enabled), which is alright as the `ref` argument is
9474   // not used by the artReadBarrierSlow entry point.
9475   //
9476   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
9477   SlowPathCodeARMVIXL* slow_path =
9478       AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
9479 
9480   __ B(slow_path->GetEntryLabel());
9481   __ Bind(slow_path->GetExitLabel());
9482 }
9483 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9484 void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
9485                                                         Location out,
9486                                                         Location ref,
9487                                                         Location obj,
9488                                                         uint32_t offset,
9489                                                         Location index) {
9490   if (EmitReadBarrier()) {
9491     // Baker's read barriers shall be handled by the fast path
9492     // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier).
9493     DCHECK(!kUseBakerReadBarrier);
9494     // If heap poisoning is enabled, unpoisoning will be taken care of
9495     // by the runtime within the slow path.
9496     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
9497   } else if (kPoisonHeapReferences) {
9498     GetAssembler()->UnpoisonHeapReference(RegisterFrom(out));
9499   }
9500 }
9501 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)9502 void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction,
9503                                                           Location out,
9504                                                           Location root) {
9505   DCHECK(EmitReadBarrier());
9506 
9507   // Insert a slow path based read barrier *after* the GC root load.
9508   //
9509   // Note that GC roots are not affected by heap poisoning, so we do
9510   // not need to do anything special for this here.
9511   SlowPathCodeARMVIXL* slow_path =
9512       new (GetScopedAllocator()) ReadBarrierForRootSlowPathARMVIXL(instruction, out, root);
9513   AddSlowPath(slow_path);
9514 
9515   __ B(slow_path->GetEntryLabel());
9516   __ Bind(slow_path->GetExitLabel());
9517 }
9518 
9519 // Check if the desired_dispatch_info is supported. If it is, return it,
9520 // otherwise return a fall-back info that should be used instead.
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)9521 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
9522     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
9523     ArtMethod* method) {
9524   if (method->IsIntrinsic() &&
9525       desired_dispatch_info.code_ptr_location == CodePtrLocation::kCallCriticalNative) {
9526     // As a work-around for soft-float native ABI interfering with type checks, we are
9527     // inserting fake calls to Float.floatToRawIntBits() or Double.doubleToRawLongBits()
9528     // when a float or double argument is passed in core registers but we cannot do that
9529     // for actual intrinsic implementations that expect them in FP registers. Therefore
9530     // we do not use `kCallCriticalNative` for intrinsics with FP arguments; if they are
9531     // properly intrinsified, the dispatch type does not matter anyway.
9532     ScopedObjectAccess soa(Thread::Current());
9533     uint32_t shorty_len;
9534     const char* shorty = method->GetShorty(&shorty_len);
9535     for (uint32_t i = 1; i != shorty_len; ++i) {
9536       if (shorty[i] == 'D' || shorty[i] == 'F') {
9537         HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
9538         dispatch_info.code_ptr_location = CodePtrLocation::kCallArtMethod;
9539         return dispatch_info;
9540       }
9541     }
9542   }
9543   return desired_dispatch_info;
9544 }
9545 
9546 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)9547 void CodeGeneratorARMVIXL::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
9548   switch (load_kind) {
9549     case MethodLoadKind::kBootImageLinkTimePcRelative: {
9550       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9551       PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
9552       vixl32::Register temp_reg = RegisterFrom(temp);
9553       EmitMovwMovtPlaceholder(labels, temp_reg);
9554       break;
9555     }
9556     case MethodLoadKind::kBootImageRelRo: {
9557       uint32_t boot_image_offset = GetBootImageOffset(invoke);
9558       LoadBootImageRelRoEntry(RegisterFrom(temp), boot_image_offset);
9559       break;
9560     }
9561     case MethodLoadKind::kAppImageRelRo: {
9562       DCHECK(GetCompilerOptions().IsAppImage());
9563       PcRelativePatchInfo* labels = NewAppImageMethodPatch(invoke->GetResolvedMethodReference());
9564       vixl32::Register temp_reg = RegisterFrom(temp);
9565       EmitMovwMovtPlaceholder(labels, temp_reg);
9566       __ Ldr(temp_reg, MemOperand(temp_reg, /*offset=*/ 0));
9567       break;
9568     }
9569     case MethodLoadKind::kBssEntry: {
9570       PcRelativePatchInfo* labels = NewMethodBssEntryPatch(invoke->GetMethodReference());
9571       vixl32::Register temp_reg = RegisterFrom(temp);
9572       EmitMovwMovtPlaceholder(labels, temp_reg);
9573       // All aligned loads are implicitly atomic consume operations on ARM.
9574       GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
9575       break;
9576     }
9577     case MethodLoadKind::kJitDirectAddress: {
9578       __ Mov(RegisterFrom(temp), Operand::From(invoke->GetResolvedMethod()));
9579       break;
9580     }
9581     case MethodLoadKind::kRuntimeCall: {
9582       // Test situation, don't do anything.
9583       break;
9584     }
9585     default: {
9586       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
9587       UNREACHABLE();
9588     }
9589   }
9590 }
9591 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)9592 void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
9593     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
9594   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
9595   switch (invoke->GetMethodLoadKind()) {
9596     case MethodLoadKind::kStringInit: {
9597       uint32_t offset =
9598           GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
9599       // temp = thread->string_init_entrypoint
9600       GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset);
9601       break;
9602     }
9603     case MethodLoadKind::kRecursive: {
9604       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
9605       break;
9606     }
9607     case MethodLoadKind::kRuntimeCall: {
9608       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
9609       return;  // No code pointer retrieval; the runtime performs the call directly.
9610     }
9611     case MethodLoadKind::kBootImageLinkTimePcRelative:
9612       // Note: Unlike arm64, x86 and x86-64, we do not avoid the materialization of method
9613       // pointer for kCallCriticalNative because it would not save us an instruction from
9614       // the current sequence MOVW+MOVT+ADD(pc)+LDR+BL. The ADD(pc) separates the patched
9615       // offset instructions MOVW+MOVT from the entrypoint load, so they cannot be fused.
9616       FALLTHROUGH_INTENDED;
9617     default: {
9618       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
9619       break;
9620     }
9621   }
9622 
9623   auto call_code_pointer_member = [&](MemberOffset offset) {
9624     // LR = callee_method->member;
9625     GetAssembler()->LoadFromOffset(kLoadWord, lr, RegisterFrom(callee_method), offset.Int32Value());
9626     {
9627       // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9628       // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9629       ExactAssemblyScope aas(GetVIXLAssembler(),
9630                              vixl32::k16BitT32InstructionSizeInBytes,
9631                              CodeBufferCheckScope::kExactSize);
9632       // LR()
9633       __ blx(lr);
9634       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9635     }
9636   };
9637   switch (invoke->GetCodePtrLocation()) {
9638     case CodePtrLocation::kCallSelf:
9639       {
9640         DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
9641         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9642         ExactAssemblyScope aas(GetVIXLAssembler(),
9643                                vixl32::k32BitT32InstructionSizeInBytes,
9644                                CodeBufferCheckScope::kMaximumSize);
9645         __ bl(GetFrameEntryLabel());
9646         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9647       }
9648       break;
9649     case CodePtrLocation::kCallCriticalNative: {
9650       size_t out_frame_size =
9651           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARMVIXL,
9652                                     kAapcsStackAlignment,
9653                                     GetCriticalNativeDirectCallFrameSize>(invoke);
9654       call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArmPointerSize));
9655       // Move the result when needed due to native and managed ABI mismatch.
9656       switch (invoke->GetType()) {
9657         case DataType::Type::kFloat32:
9658           __ Vmov(s0, r0);
9659           break;
9660         case DataType::Type::kFloat64:
9661           __ Vmov(d0, r0, r1);
9662           break;
9663         case DataType::Type::kBool:
9664         case DataType::Type::kInt8:
9665         case DataType::Type::kUint16:
9666         case DataType::Type::kInt16:
9667         case DataType::Type::kInt32:
9668         case DataType::Type::kInt64:
9669         case DataType::Type::kVoid:
9670           break;
9671         default:
9672           DCHECK(false) << invoke->GetType();
9673           break;
9674       }
9675       if (out_frame_size != 0u) {
9676         DecreaseFrame(out_frame_size);
9677       }
9678       break;
9679     }
9680     case CodePtrLocation::kCallArtMethod:
9681       call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize));
9682       break;
9683   }
9684 
9685   DCHECK(!IsLeafMethod());
9686 }
9687 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_location,SlowPathCode * slow_path)9688 void CodeGeneratorARMVIXL::GenerateVirtualCall(
9689     HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
9690   vixl32::Register temp = RegisterFrom(temp_location);
9691   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
9692       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
9693 
9694   // Use the calling convention instead of the location of the receiver, as
9695   // intrinsics may have put the receiver in a different register. In the intrinsics
9696   // slow path, the arguments have been moved to the right place, so here we are
9697   // guaranteed that the receiver is the first register of the calling convention.
9698   InvokeDexCallingConventionARMVIXL calling_convention;
9699   vixl32::Register receiver = calling_convention.GetRegisterAt(0);
9700   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
9701   {
9702     // Make sure the pc is recorded immediately after the `ldr` instruction.
9703     ExactAssemblyScope aas(GetVIXLAssembler(),
9704                            vixl32::kMaxInstructionSizeInBytes,
9705                            CodeBufferCheckScope::kMaximumSize);
9706     // /* HeapReference<Class> */ temp = receiver->klass_
9707     __ ldr(temp, MemOperand(receiver, class_offset));
9708     MaybeRecordImplicitNullCheck(invoke);
9709   }
9710   // Instead of simply (possibly) unpoisoning `temp` here, we should
9711   // emit a read barrier for the previous class reference load.
9712   // However this is not required in practice, as this is an
9713   // intermediate/temporary reference and because the current
9714   // concurrent copying collector keeps the from-space memory
9715   // intact/accessible until the end of the marking phase (the
9716   // concurrent copying collector may not in the future).
9717   GetAssembler()->MaybeUnpoisonHeapReference(temp);
9718 
9719   // If we're compiling baseline, update the inline cache.
9720   MaybeGenerateInlineCacheCheck(invoke, temp);
9721 
9722   // temp = temp->GetMethodAt(method_offset);
9723   uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
9724       kArmPointerSize).Int32Value();
9725   GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
9726   // LR = temp->GetEntryPoint();
9727   GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
9728   {
9729     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9730     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9731     ExactAssemblyScope aas(GetVIXLAssembler(),
9732                            vixl32::k16BitT32InstructionSizeInBytes,
9733                            CodeBufferCheckScope::kExactSize);
9734     // LR();
9735     __ blx(lr);
9736     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9737   }
9738 }
9739 
NewBootImageIntrinsicPatch(uint32_t intrinsic_data)9740 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch(
9741     uint32_t intrinsic_data) {
9742   return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_other_patches_);
9743 }
9744 
NewBootImageRelRoPatch(uint32_t boot_image_offset)9745 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
9746     uint32_t boot_image_offset) {
9747   return NewPcRelativePatch(/* dex_file= */ nullptr,
9748                             boot_image_offset,
9749                             &boot_image_other_patches_);
9750 }
9751 
NewBootImageMethodPatch(MethodReference target_method)9752 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
9753     MethodReference target_method) {
9754   return NewPcRelativePatch(
9755       target_method.dex_file, target_method.index, &boot_image_method_patches_);
9756 }
9757 
NewAppImageMethodPatch(MethodReference target_method)9758 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewAppImageMethodPatch(
9759     MethodReference target_method) {
9760   return NewPcRelativePatch(
9761       target_method.dex_file, target_method.index, &app_image_method_patches_);
9762 }
9763 
NewMethodBssEntryPatch(MethodReference target_method)9764 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch(
9765     MethodReference target_method) {
9766   return NewPcRelativePatch(
9767       target_method.dex_file, target_method.index, &method_bss_entry_patches_);
9768 }
9769 
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9770 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch(
9771     const DexFile& dex_file, dex::TypeIndex type_index) {
9772   return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_);
9773 }
9774 
NewAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9775 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewAppImageTypePatch(
9776     const DexFile& dex_file, dex::TypeIndex type_index) {
9777   return NewPcRelativePatch(&dex_file, type_index.index_, &app_image_type_patches_);
9778 }
9779 
NewTypeBssEntryPatch(HLoadClass * load_class)9780 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch(
9781     HLoadClass* load_class) {
9782   const DexFile& dex_file = load_class->GetDexFile();
9783   dex::TypeIndex type_index = load_class->GetTypeIndex();
9784   ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
9785   switch (load_class->GetLoadKind()) {
9786     case HLoadClass::LoadKind::kBssEntry:
9787       patches = &type_bss_entry_patches_;
9788       break;
9789     case HLoadClass::LoadKind::kBssEntryPublic:
9790       patches = &public_type_bss_entry_patches_;
9791       break;
9792     case HLoadClass::LoadKind::kBssEntryPackage:
9793       patches = &package_type_bss_entry_patches_;
9794       break;
9795     default:
9796       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
9797       UNREACHABLE();
9798   }
9799   return NewPcRelativePatch(&dex_file, type_index.index_, patches);
9800 }
9801 
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index)9802 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch(
9803     const DexFile& dex_file, dex::StringIndex string_index) {
9804   return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_);
9805 }
9806 
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index)9807 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch(
9808     const DexFile& dex_file, dex::StringIndex string_index) {
9809   return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_);
9810 }
9811 
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,ArenaDeque<PcRelativePatchInfo> * patches)9812 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
9813     const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
9814   patches->emplace_back(dex_file, offset_or_index);
9815   return &patches->back();
9816 }
9817 
EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset)9818 void CodeGeneratorARMVIXL::EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset) {
9819   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
9820   DCHECK(!GetCompilerOptions().IsJitCompiler());
9821   call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
9822   vixl::aarch32::Label* bl_label = &call_entrypoint_patches_.back().label;
9823   __ bind(bl_label);
9824   vixl32::Label placeholder_label;
9825   __ bl(&placeholder_label);  // Placeholder, patched at link-time.
9826   __ bind(&placeholder_label);
9827 }
9828 
EmitBakerReadBarrierBne(uint32_t custom_data)9829 void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
9830   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
9831   if (GetCompilerOptions().IsJitCompiler()) {
9832     auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
9833     vixl::aarch32::Label* slow_path_entry = &it->second.label;
9834     __ b(ne, EncodingSize(Wide), slow_path_entry);
9835   } else {
9836     baker_read_barrier_patches_.emplace_back(custom_data);
9837     vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label;
9838     __ bind(patch_label);
9839     vixl32::Label placeholder_label;
9840     __ b(ne, EncodingSize(Wide), &placeholder_label);  // Placeholder, patched at link-time.
9841     __ bind(&placeholder_label);
9842   }
9843 }
9844 
DeduplicateBootImageAddressLiteral(uint32_t address)9845 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
9846   return DeduplicateUint32Literal(address, &uint32_literals_);
9847 }
9848 
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)9849 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
9850     const DexFile& dex_file,
9851     dex::StringIndex string_index,
9852     Handle<mirror::String> handle) {
9853   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
9854   return jit_string_patches_.GetOrCreate(
9855       StringReference(&dex_file, string_index),
9856       [this]() {
9857         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9858       });
9859 }
9860 
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)9861 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file,
9862                                                       dex::TypeIndex type_index,
9863                                                       Handle<mirror::Class> handle) {
9864   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
9865   return jit_class_patches_.GetOrCreate(
9866       TypeReference(&dex_file, type_index),
9867       [this]() {
9868         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9869       });
9870 }
9871 
LoadBootImageRelRoEntry(vixl32::Register reg,uint32_t boot_image_offset)9872 void CodeGeneratorARMVIXL::LoadBootImageRelRoEntry(vixl32::Register reg,
9873                                                    uint32_t boot_image_offset) {
9874   CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
9875   EmitMovwMovtPlaceholder(labels, reg);
9876   __ Ldr(reg, MemOperand(reg, /*offset=*/ 0));
9877 }
9878 
LoadBootImageAddress(vixl32::Register reg,uint32_t boot_image_reference)9879 void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg,
9880                                                 uint32_t boot_image_reference) {
9881   if (GetCompilerOptions().IsBootImage()) {
9882     CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
9883         NewBootImageIntrinsicPatch(boot_image_reference);
9884     EmitMovwMovtPlaceholder(labels, reg);
9885   } else if (GetCompilerOptions().GetCompilePic()) {
9886     LoadBootImageRelRoEntry(reg, boot_image_reference);
9887   } else {
9888     DCHECK(GetCompilerOptions().IsJitCompiler());
9889     gc::Heap* heap = Runtime::Current()->GetHeap();
9890     DCHECK(!heap->GetBootImageSpaces().empty());
9891     uintptr_t address =
9892         reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference);
9893     __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address)));
9894   }
9895 }
9896 
LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,TypeReference target_type)9897 void CodeGeneratorARMVIXL::LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,
9898                                                          TypeReference target_type) {
9899   // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
9900   DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9901   PcRelativePatchInfo* labels =
9902       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
9903   EmitMovwMovtPlaceholder(labels, reg);
9904 }
9905 
LoadIntrinsicDeclaringClass(vixl32::Register reg,HInvoke * invoke)9906 void CodeGeneratorARMVIXL::LoadIntrinsicDeclaringClass(vixl32::Register reg, HInvoke* invoke) {
9907   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
9908   if (GetCompilerOptions().IsBootImage()) {
9909     MethodReference target_method = invoke->GetResolvedMethodReference();
9910     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
9911     LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
9912   } else {
9913     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
9914     LoadBootImageAddress(reg, boot_image_offset);
9915   }
9916 }
9917 
LoadClassRootForIntrinsic(vixl::aarch32::Register reg,ClassRoot class_root)9918 void CodeGeneratorARMVIXL::LoadClassRootForIntrinsic(vixl::aarch32::Register reg,
9919                                                      ClassRoot class_root) {
9920   if (GetCompilerOptions().IsBootImage()) {
9921     ScopedObjectAccess soa(Thread::Current());
9922     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
9923     TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
9924     LoadTypeForBootImageIntrinsic(reg, target_type);
9925   } else {
9926     uint32_t boot_image_offset = GetBootImageOffset(class_root);
9927     LoadBootImageAddress(reg, boot_image_offset);
9928   }
9929 }
9930 
9931 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)9932 inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
9933     const ArenaDeque<PcRelativePatchInfo>& infos,
9934     ArenaVector<linker::LinkerPatch>* linker_patches) {
9935   for (const PcRelativePatchInfo& info : infos) {
9936     const DexFile* dex_file = info.target_dex_file;
9937     size_t offset_or_index = info.offset_or_index;
9938     DCHECK(info.add_pc_label.IsBound());
9939     uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation());
9940     // Add MOVW patch.
9941     DCHECK(info.movw_label.IsBound());
9942     uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation());
9943     linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index));
9944     // Add MOVT patch.
9945     DCHECK(info.movt_label.IsBound());
9946     uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation());
9947     linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index));
9948   }
9949 }
9950 
9951 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)9952 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
9953                                      const DexFile* target_dex_file,
9954                                      uint32_t pc_insn_offset,
9955                                      uint32_t boot_image_offset) {
9956   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
9957   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
9958 }
9959 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)9960 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
9961   DCHECK(linker_patches->empty());
9962   size_t size =
9963       /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() +
9964       /* MOVW+MOVT for each entry */ 2u * app_image_method_patches_.size() +
9965       /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() +
9966       /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() +
9967       /* MOVW+MOVT for each entry */ 2u * app_image_type_patches_.size() +
9968       /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
9969       /* MOVW+MOVT for each entry */ 2u * public_type_bss_entry_patches_.size() +
9970       /* MOVW+MOVT for each entry */ 2u * package_type_bss_entry_patches_.size() +
9971       /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() +
9972       /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() +
9973       /* MOVW+MOVT for each entry */ 2u * boot_image_other_patches_.size() +
9974       call_entrypoint_patches_.size() +
9975       baker_read_barrier_patches_.size();
9976   linker_patches->reserve(size);
9977   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
9978     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
9979         boot_image_method_patches_, linker_patches);
9980     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
9981         boot_image_type_patches_, linker_patches);
9982     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
9983         boot_image_string_patches_, linker_patches);
9984   } else {
9985     DCHECK(boot_image_method_patches_.empty());
9986     DCHECK(boot_image_type_patches_.empty());
9987     DCHECK(boot_image_string_patches_.empty());
9988   }
9989   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
9990   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
9991   if (GetCompilerOptions().IsBootImage()) {
9992     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
9993         boot_image_other_patches_, linker_patches);
9994   } else {
9995     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
9996         boot_image_other_patches_, linker_patches);
9997     EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
9998         app_image_method_patches_, linker_patches);
9999     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
10000         app_image_type_patches_, linker_patches);
10001   }
10002   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
10003       method_bss_entry_patches_, linker_patches);
10004   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
10005       type_bss_entry_patches_, linker_patches);
10006   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
10007       public_type_bss_entry_patches_, linker_patches);
10008   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
10009       package_type_bss_entry_patches_, linker_patches);
10010   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
10011       string_bss_entry_patches_, linker_patches);
10012   for (const PatchInfo<vixl32::Label>& info : call_entrypoint_patches_) {
10013     DCHECK(info.target_dex_file == nullptr);
10014     linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
10015         info.label.GetLocation(), info.offset_or_index));
10016   }
10017   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
10018     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
10019         info.label.GetLocation(), info.custom_data));
10020   }
10021   DCHECK_EQ(size, linker_patches->size());
10022 }
10023 
NeedsThunkCode(const linker::LinkerPatch & patch) const10024 bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
10025   return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
10026          patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
10027          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
10028 }
10029 
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)10030 void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
10031                                          /*out*/ ArenaVector<uint8_t>* code,
10032                                          /*out*/ std::string* debug_name) {
10033   arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
10034   switch (patch.GetType()) {
10035     case linker::LinkerPatch::Type::kCallRelative: {
10036       // The thunk just uses the entry point in the ArtMethod. This works even for calls
10037       // to the generic JNI and interpreter trampolines.
10038       MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
10039       assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, vixl32::r0, offset.Int32Value());
10040       assembler.GetVIXLAssembler()->Bkpt(0);
10041       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10042         *debug_name = "MethodCallThunk";
10043       }
10044       break;
10045     }
10046     case linker::LinkerPatch::Type::kCallEntrypoint: {
10047       assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, tr, patch.EntrypointOffset());
10048       assembler.GetVIXLAssembler()->Bkpt(0);
10049       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10050         *debug_name = "EntrypointCallThunk_" + std::to_string(patch.EntrypointOffset());
10051       }
10052       break;
10053     }
10054     case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
10055       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
10056       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
10057       break;
10058     }
10059     default:
10060       LOG(FATAL) << "Unexpected patch type " << patch.GetType();
10061       UNREACHABLE();
10062   }
10063 
10064   // Ensure we emit the literal pool if any.
10065   assembler.FinalizeCode();
10066   code->resize(assembler.CodeSize());
10067   MemoryRegion code_region(code->data(), code->size());
10068   assembler.CopyInstructions(code_region);
10069 }
10070 
DeduplicateUint32Literal(uint32_t value,Uint32ToLiteralMap * map)10071 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
10072     uint32_t value,
10073     Uint32ToLiteralMap* map) {
10074   return map->GetOrCreate(
10075       value,
10076       [this, value]() {
10077         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ value);
10078       });
10079 }
10080 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)10081 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
10082   LocationSummary* locations =
10083       new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
10084   locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
10085                      Location::RequiresRegister());
10086   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
10087   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
10088   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
10089 }
10090 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)10091 void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
10092   vixl32::Register res = OutputRegister(instr);
10093   vixl32::Register accumulator =
10094       InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
10095   vixl32::Register mul_left =
10096       InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
10097   vixl32::Register mul_right =
10098       InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
10099 
10100   if (instr->GetOpKind() == HInstruction::kAdd) {
10101     __ Mla(res, mul_left, mul_right, accumulator);
10102   } else {
10103     __ Mls(res, mul_left, mul_right, accumulator);
10104   }
10105 }
10106 
VisitBoundType(HBoundType * instruction)10107 void LocationsBuilderARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
10108   // Nothing to do, this should be removed during prepare for register allocator.
10109   LOG(FATAL) << "Unreachable";
10110 }
10111 
VisitBoundType(HBoundType * instruction)10112 void InstructionCodeGeneratorARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
10113   // Nothing to do, this should be removed during prepare for register allocator.
10114   LOG(FATAL) << "Unreachable";
10115 }
10116 
10117 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)10118 void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
10119   LocationSummary* locations =
10120       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
10121   locations->SetInAt(0, Location::RequiresRegister());
10122   if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
10123       codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
10124     locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
10125     if (switch_instr->GetStartValue() != 0) {
10126       locations->AddTemp(Location::RequiresRegister());  // We need a temp for the bias.
10127     }
10128   }
10129 }
10130 
10131 // TODO(VIXL): Investigate and reach the parity with old arm codegen.
VisitPackedSwitch(HPackedSwitch * switch_instr)10132 void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
10133   int32_t lower_bound = switch_instr->GetStartValue();
10134   uint32_t num_entries = switch_instr->GetNumEntries();
10135   LocationSummary* locations = switch_instr->GetLocations();
10136   vixl32::Register value_reg = InputRegisterAt(switch_instr, 0);
10137   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
10138 
10139   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
10140       !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
10141     // Create a series of compare/jumps.
10142     UseScratchRegisterScope temps(GetVIXLAssembler());
10143     vixl32::Register temp_reg = temps.Acquire();
10144     // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
10145     // the immediate, because IP is used as the destination register. For the other
10146     // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
10147     // and they can be encoded in the instruction without making use of IP register.
10148     __ Adds(temp_reg, value_reg, -lower_bound);
10149 
10150     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
10151     // Jump to successors[0] if value == lower_bound.
10152     __ B(eq, codegen_->GetLabelOf(successors[0]));
10153     int32_t last_index = 0;
10154     for (; num_entries - last_index > 2; last_index += 2) {
10155       __ Adds(temp_reg, temp_reg, -2);
10156       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
10157       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
10158       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
10159       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
10160     }
10161     if (num_entries - last_index == 2) {
10162       // The last missing case_value.
10163       __ Cmp(temp_reg, 1);
10164       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
10165     }
10166 
10167     // And the default for any other value.
10168     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
10169       __ B(codegen_->GetLabelOf(default_block));
10170     }
10171   } else {
10172     // Create a table lookup.
10173     vixl32::Register table_base = RegisterFrom(locations->GetTemp(0));
10174 
10175     JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr);
10176 
10177     // Remove the bias.
10178     vixl32::Register key_reg;
10179     if (lower_bound != 0) {
10180       key_reg = RegisterFrom(locations->GetTemp(1));
10181       __ Sub(key_reg, value_reg, lower_bound);
10182     } else {
10183       key_reg = value_reg;
10184     }
10185 
10186     // Check whether the value is in the table, jump to default block if not.
10187     __ Cmp(key_reg, num_entries - 1);
10188     __ B(hi, codegen_->GetLabelOf(default_block));
10189 
10190     UseScratchRegisterScope temps(GetVIXLAssembler());
10191     vixl32::Register jump_offset = temps.Acquire();
10192 
10193     // Load jump offset from the table.
10194     {
10195       const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t);
10196       ExactAssemblyScope aas(GetVIXLAssembler(),
10197                              (vixl32::kMaxInstructionSizeInBytes * 4) + jump_size,
10198                              CodeBufferCheckScope::kMaximumSize);
10199       __ adr(table_base, jump_table->GetTableStartLabel());
10200       __ ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
10201 
10202       // Jump to target block by branching to table_base(pc related) + offset.
10203       vixl32::Register target_address = table_base;
10204       __ add(target_address, table_base, jump_offset);
10205       __ bx(target_address);
10206 
10207       jump_table->EmitTable(codegen_);
10208     }
10209   }
10210 }
10211 
10212 // Copy the result of a call into the given target.
MoveFromReturnRegister(Location trg,DataType::Type type)10213 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, DataType::Type type) {
10214   if (!trg.IsValid()) {
10215     DCHECK_EQ(type, DataType::Type::kVoid);
10216     return;
10217   }
10218 
10219   DCHECK_NE(type, DataType::Type::kVoid);
10220 
10221   Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type);
10222   if (return_loc.Equals(trg)) {
10223     return;
10224   }
10225 
10226   // Let the parallel move resolver take care of all of this.
10227   HParallelMove parallel_move(GetGraph()->GetAllocator());
10228   parallel_move.AddMove(return_loc, trg, type, nullptr);
10229   GetMoveResolver()->EmitNativeCode(&parallel_move);
10230 }
10231 
VisitClassTableGet(HClassTableGet * instruction)10232 void LocationsBuilderARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
10233   LocationSummary* locations =
10234       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
10235   locations->SetInAt(0, Location::RequiresRegister());
10236   locations->SetOut(Location::RequiresRegister());
10237 }
10238 
VisitClassTableGet(HClassTableGet * instruction)10239 void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
10240   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
10241     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
10242         instruction->GetIndex(), kArmPointerSize).SizeValue();
10243     GetAssembler()->LoadFromOffset(kLoadWord,
10244                                    OutputRegister(instruction),
10245                                    InputRegisterAt(instruction, 0),
10246                                    method_offset);
10247   } else {
10248     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
10249         instruction->GetIndex(), kArmPointerSize));
10250     GetAssembler()->LoadFromOffset(kLoadWord,
10251                                    OutputRegister(instruction),
10252                                    InputRegisterAt(instruction, 0),
10253                                    mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
10254     GetAssembler()->LoadFromOffset(kLoadWord,
10255                                    OutputRegister(instruction),
10256                                    OutputRegister(instruction),
10257                                    method_offset);
10258   }
10259 }
10260 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,VIXLUInt32Literal * literal,uint64_t index_in_table)10261 static void PatchJitRootUse(uint8_t* code,
10262                             const uint8_t* roots_data,
10263                             VIXLUInt32Literal* literal,
10264                             uint64_t index_in_table) {
10265   DCHECK(literal->IsBound());
10266   uint32_t literal_offset = literal->GetLocation();
10267   uintptr_t address =
10268       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
10269   uint8_t* data = code + literal_offset;
10270   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
10271 }
10272 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)10273 void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
10274   for (const auto& entry : jit_string_patches_) {
10275     const StringReference& string_reference = entry.first;
10276     VIXLUInt32Literal* table_entry_literal = entry.second;
10277     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
10278     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10279   }
10280   for (const auto& entry : jit_class_patches_) {
10281     const TypeReference& type_reference = entry.first;
10282     VIXLUInt32Literal* table_entry_literal = entry.second;
10283     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
10284     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10285   }
10286 }
10287 
EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo * labels,vixl32::Register out)10288 void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
10289     CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
10290     vixl32::Register out) {
10291   ExactAssemblyScope aas(GetVIXLAssembler(),
10292                          3 * vixl32::kMaxInstructionSizeInBytes,
10293                          CodeBufferCheckScope::kMaximumSize);
10294   // TODO(VIXL): Think about using mov instead of movw.
10295   __ bind(&labels->movw_label);
10296   __ movw(out, /* operand= */ 0u);
10297   __ bind(&labels->movt_label);
10298   __ movt(out, /* operand= */ 0u);
10299   __ bind(&labels->add_pc_label);
10300   __ add(out, out, pc);
10301 }
10302 
10303 #undef __
10304 #undef QUICK_ENTRY_POINT
10305 #undef TODO_VIXL32
10306 
10307 #define __ assembler.GetVIXLAssembler()->
10308 
EmitGrayCheckAndFastPath(ArmVIXLAssembler & assembler,vixl32::Register base_reg,vixl32::MemOperand & lock_word,vixl32::Label * slow_path,int32_t raw_ldr_offset,vixl32::Label * throw_npe=nullptr)10309 static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
10310                                      vixl32::Register base_reg,
10311                                      vixl32::MemOperand& lock_word,
10312                                      vixl32::Label* slow_path,
10313                                      int32_t raw_ldr_offset,
10314                                      vixl32::Label* throw_npe = nullptr) {
10315   // Load the lock word containing the rb_state.
10316   __ Ldr(ip, lock_word);
10317   // Given the numeric representation, it's enough to check the low bit of the rb_state.
10318   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
10319   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
10320   __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
10321   __ B(ne, slow_path, /* is_far_target= */ false);
10322   // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
10323   if (throw_npe != nullptr) {
10324     __ Bind(throw_npe);
10325   }
10326   __ Add(lr, lr, raw_ldr_offset);
10327   // Introduce a dependency on the lock_word including rb_state,
10328   // to prevent load-load reordering, and without using
10329   // a memory barrier (which would be more expensive).
10330   __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
10331   __ Bx(lr);          // And return back to the function.
10332   // Note: The fake dependency is unnecessary for the slow path.
10333 }
10334 
10335 // Load the read barrier introspection entrypoint in register `entrypoint`
LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler & assembler)10336 static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) {
10337   // The register where the read barrier introspection entrypoint is loaded
10338   // is the marking register. We clobber it here and the entrypoint restores it to 1.
10339   vixl32::Register entrypoint = mr;
10340   // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
10341   DCHECK_EQ(ip.GetCode(), 12u);
10342   const int32_t entry_point_offset =
10343       Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
10344   __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
10345   return entrypoint;
10346 }
10347 
CompileBakerReadBarrierThunk(ArmVIXLAssembler & assembler,uint32_t encoded_data,std::string * debug_name)10348 void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
10349                                                         uint32_t encoded_data,
10350                                                         /*out*/ std::string* debug_name) {
10351   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
10352   switch (kind) {
10353     case BakerReadBarrierKind::kField: {
10354       vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10355       CheckValidReg(base_reg.GetCode());
10356       vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
10357       CheckValidReg(holder_reg.GetCode());
10358       BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10359       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10360       temps.Exclude(ip);
10361       // In the case of a field load, if `base_reg` differs from
10362       // `holder_reg`, the offset was too large and we must have emitted (during the construction
10363       // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
10364       // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
10365       // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
10366       // not necessarily do that check before going to the thunk.
10367       vixl32::Label throw_npe_label;
10368       vixl32::Label* throw_npe = nullptr;
10369       if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
10370         throw_npe = &throw_npe_label;
10371         __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target= */ false);
10372       }
10373       // Check if the holder is gray and, if not, add fake dependency to the base register
10374       // and return to the LDR instruction to load the reference. Otherwise, use introspection
10375       // to load the reference and call the entrypoint that performs further checks on the
10376       // reference and marks it if needed.
10377       vixl32::Label slow_path;
10378       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
10379       const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
10380           ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
10381           : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
10382       EmitGrayCheckAndFastPath(
10383           assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
10384       __ Bind(&slow_path);
10385       const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10386                                  raw_ldr_offset;
10387       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10388       if (width == BakerReadBarrierWidth::kWide) {
10389         MemOperand ldr_half_address(lr, ldr_offset + 2);
10390         __ Ldrh(ip, ldr_half_address);        // Load the LDR immediate half-word with "Rt | imm12".
10391         __ Ubfx(ip, ip, 0, 12);               // Extract the offset imm12.
10392         __ Ldr(ip, MemOperand(base_reg, ip));   // Load the reference.
10393       } else {
10394         MemOperand ldr_address(lr, ldr_offset);
10395         __ Ldrh(ip, ldr_address);             // Load the LDR immediate, encoding T1.
10396         __ Add(ep_reg,                        // Adjust the entrypoint address to the entrypoint
10397                ep_reg,                        // for narrow LDR.
10398                Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
10399         __ Ubfx(ip, ip, 6, 5);                // Extract the imm5, i.e. offset / 4.
10400         __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2));   // Load the reference.
10401       }
10402       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
10403       __ Bx(ep_reg);                          // Jump to the entrypoint.
10404       break;
10405     }
10406     case BakerReadBarrierKind::kArray: {
10407       vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10408       CheckValidReg(base_reg.GetCode());
10409       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10410                 BakerReadBarrierSecondRegField::Decode(encoded_data));
10411       DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10412       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10413       temps.Exclude(ip);
10414       vixl32::Label slow_path;
10415       int32_t data_offset =
10416           mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
10417       MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
10418       DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
10419       const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
10420       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
10421       __ Bind(&slow_path);
10422       const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10423                                  raw_ldr_offset;
10424       MemOperand ldr_address(lr, ldr_offset + 2);
10425       __ Ldrb(ip, ldr_address);               // Load the LDR (register) byte with "00 | imm2 | Rm",
10426                                               // i.e. Rm+32 because the scale in imm2 is 2.
10427       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10428       __ Bfi(ep_reg, ip, 3, 6);               // Insert ip to the entrypoint address to create
10429                                               // a switch case target based on the index register.
10430       __ Mov(ip, base_reg);                   // Move the base register to ip0.
10431       __ Bx(ep_reg);                          // Jump to the entrypoint's array switch case.
10432       break;
10433     }
10434     case BakerReadBarrierKind::kGcRoot:
10435     case BakerReadBarrierKind::kIntrinsicCas: {
10436       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
10437       // and it does not have a forwarding address), call the correct introspection entrypoint;
10438       // otherwise return the reference (or the extracted forwarding address).
10439       // There is no gray bit check for GC roots.
10440       vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10441       CheckValidReg(root_reg.GetCode());
10442       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10443                 BakerReadBarrierSecondRegField::Decode(encoded_data));
10444       BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10445       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10446       temps.Exclude(ip);
10447       vixl32::Label return_label, not_marked, forwarding_address;
10448       __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target= */ false);
10449       MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
10450       __ Ldr(ip, lock_word);
10451       __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
10452       __ B(eq, &not_marked);
10453       __ Bind(&return_label);
10454       __ Bx(lr);
10455       __ Bind(&not_marked);
10456       static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
10457                     "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
10458                     " the highest bits and the 'forwarding address' state to have all bits set");
10459       __ Cmp(ip, Operand(0xc0000000));
10460       __ B(hs, &forwarding_address);
10461       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10462       // Adjust the art_quick_read_barrier_mark_introspection address
10463       // in kBakerCcEntrypointRegister to one of
10464       //     art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},intrinsic_cas}.
10465       if (kind == BakerReadBarrierKind::kIntrinsicCas) {
10466         DCHECK(width == BakerReadBarrierWidth::kWide);
10467         DCHECK(!root_reg.IsLow());
10468       }
10469       int32_t entrypoint_offset =
10470           (kind == BakerReadBarrierKind::kGcRoot)
10471               ? (width == BakerReadBarrierWidth::kWide)
10472                   ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
10473                   : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
10474               : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_ENTRYPOINT_OFFSET;
10475       __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
10476       __ Mov(ip, root_reg);
10477       __ Bx(ep_reg);
10478       __ Bind(&forwarding_address);
10479       __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
10480       __ Bx(lr);
10481       break;
10482     }
10483     default:
10484       LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
10485       UNREACHABLE();
10486   }
10487 
10488   // For JIT, the slow path is considered part of the compiled method,
10489   // so JIT should pass null as `debug_name`.
10490   DCHECK_IMPLIES(GetCompilerOptions().IsJitCompiler(), debug_name == nullptr);
10491   if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10492     std::ostringstream oss;
10493     oss << "BakerReadBarrierThunk";
10494     switch (kind) {
10495       case BakerReadBarrierKind::kField:
10496         oss << "Field";
10497         if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10498           oss << "Wide";
10499         }
10500         oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
10501             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
10502         break;
10503       case BakerReadBarrierKind::kArray:
10504         oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10505         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10506                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10507         DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10508         break;
10509       case BakerReadBarrierKind::kGcRoot:
10510         oss << "GcRoot";
10511         if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10512           oss << "Wide";
10513         }
10514         oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10515         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10516                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10517         break;
10518       case BakerReadBarrierKind::kIntrinsicCas:
10519         oss << "IntrinsicCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10520         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10521                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10522         DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10523         break;
10524     }
10525     *debug_name = oss.str();
10526   }
10527 }
10528 
10529 #undef __
10530 
10531 }  // namespace arm
10532 }  // namespace art
10533