1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm_vixl.h"
18
19 #include "arch/arm/asm_support_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "arch/arm/jni_frame_arm.h"
22 #include "art_method-inl.h"
23 #include "base/bit_utils.h"
24 #include "base/bit_utils_iterator.h"
25 #include "base/globals.h"
26 #include "class_root-inl.h"
27 #include "class_table.h"
28 #include "code_generator_utils.h"
29 #include "common_arm.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "gc/accounting/card_table.h"
32 #include "gc/space/image_space.h"
33 #include "heap_poisoning.h"
34 #include "interpreter/mterp/nterp.h"
35 #include "intrinsics.h"
36 #include "intrinsics_arm_vixl.h"
37 #include "intrinsics_list.h"
38 #include "intrinsics_utils.h"
39 #include "jit/profiling_info.h"
40 #include "linker/linker_patch.h"
41 #include "mirror/array-inl.h"
42 #include "mirror/class-inl.h"
43 #include "mirror/var_handle.h"
44 #include "profiling_info_builder.h"
45 #include "scoped_thread_state_change-inl.h"
46 #include "thread.h"
47 #include "trace.h"
48 #include "utils/arm/assembler_arm_vixl.h"
49 #include "utils/arm/managed_register_arm.h"
50 #include "utils/assembler.h"
51 #include "utils/stack_checks.h"
52
53 namespace art HIDDEN {
54 namespace arm {
55
56 namespace vixl32 = vixl::aarch32;
57 using namespace vixl32; // NOLINT(build/namespaces)
58
59 using helpers::DRegisterFrom;
60 using helpers::HighRegisterFrom;
61 using helpers::InputDRegisterAt;
62 using helpers::InputOperandAt;
63 using helpers::InputRegister;
64 using helpers::InputRegisterAt;
65 using helpers::InputSRegisterAt;
66 using helpers::InputVRegister;
67 using helpers::InputVRegisterAt;
68 using helpers::Int32ConstantFrom;
69 using helpers::Int64ConstantFrom;
70 using helpers::LocationFrom;
71 using helpers::LowRegisterFrom;
72 using helpers::LowSRegisterFrom;
73 using helpers::OperandFrom;
74 using helpers::OutputRegister;
75 using helpers::OutputSRegister;
76 using helpers::OutputVRegister;
77 using helpers::RegisterFrom;
78 using helpers::SRegisterFrom;
79 using helpers::Uint64ConstantFrom;
80
81 using vixl::EmissionCheckScope;
82 using vixl::ExactAssemblyScope;
83 using vixl::CodeBufferCheckScope;
84
85 using RegisterList = vixl32::RegisterList;
86
ExpectedPairLayout(Location location)87 static bool ExpectedPairLayout(Location location) {
88 // We expected this for both core and fpu register pairs.
89 return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
90 }
91 // Use a local definition to prevent copying mistakes.
92 static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
93 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
94 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
95
96 // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
97 // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
98 // For the Baker read barrier implementation using link-time generated thunks we need to split
99 // the offset explicitly.
100 constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
101
102 // Using a base helps identify when we hit Marking Register check breakpoints.
103 constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
104
105 #ifdef __
106 #error "ARM Codegen VIXL macro-assembler macro already defined."
107 #endif
108
109 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
110 #define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()-> // NOLINT
111 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
112
113 // Marker that code is yet to be, and must, be implemented.
114 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
115
CanEmitNarrowLdr(vixl32::Register rt,vixl32::Register rn,uint32_t offset)116 static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
117 return rt.IsLow() && rn.IsLow() && offset < 32u;
118 }
119
120 class EmitAdrCode {
121 public:
EmitAdrCode(ArmVIXLMacroAssembler * assembler,vixl32::Register rd,vixl32::Label * label)122 EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
123 : assembler_(assembler), rd_(rd), label_(label) {
124 DCHECK(!assembler->AllowMacroInstructions()); // In ExactAssemblyScope.
125 adr_location_ = assembler->GetCursorOffset();
126 assembler->adr(EncodingSize(Wide), rd, label);
127 }
128
~EmitAdrCode()129 ~EmitAdrCode() {
130 DCHECK(label_->IsBound());
131 // The ADR emitted by the assembler does not set the Thumb mode bit we need.
132 // TODO: Maybe extend VIXL to allow ADR for return address?
133 uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
134 // Expecting ADR encoding T3 with `(offset & 1) == 0`.
135 DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26.
136 DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23.
137 DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15.
138 DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`.
139 // Add the Thumb mode bit.
140 raw_adr[2] |= 0x01u;
141 }
142
143 private:
144 ArmVIXLMacroAssembler* const assembler_;
145 vixl32::Register rd_;
146 vixl32::Label* const label_;
147 int32_t adr_location_;
148 };
149
OneRegInReferenceOutSaveEverythingCallerSaves()150 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
151 InvokeRuntimeCallingConventionARMVIXL calling_convention;
152 RegisterSet caller_saves = RegisterSet::Empty();
153 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
154 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
155 // that the kPrimNot result register is the same as the first argument register.
156 return caller_saves;
157 }
158
159 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
160 // for each live D registers they treat two corresponding S registers as live ones.
161 //
162 // Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
163 // from a list of contiguous S registers a list of contiguous D registers (processing first/last
164 // S registers corner cases) and save/restore this new list treating them as D registers.
165 // - decreasing code size
166 // - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
167 // restored and then used in regular non SlowPath code as D register.
168 //
169 // For the following example (v means the S register is live):
170 // D names: | D0 | D1 | D2 | D4 | ...
171 // S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
172 // Live? | | v | v | v | v | v | v | | ...
173 //
174 // S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
175 // as D registers.
176 //
177 // TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers
178 // for lists of floating-point registers.
SaveContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)179 static size_t SaveContiguousSRegisterList(size_t first,
180 size_t last,
181 CodeGenerator* codegen,
182 size_t stack_offset) {
183 static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
184 static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
185 DCHECK_LE(first, last);
186 if ((first == last) && (first == 0)) {
187 __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
188 return stack_offset + kSRegSizeInBytes;
189 }
190 if (first % 2 == 1) {
191 __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
192 stack_offset += kSRegSizeInBytes;
193 }
194
195 bool save_last = false;
196 if (last % 2 == 0) {
197 save_last = true;
198 --last;
199 }
200
201 if (first < last) {
202 vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
203 DCHECK_EQ((last - first + 1) % 2, 0u);
204 size_t number_of_d_regs = (last - first + 1) / 2;
205
206 if (number_of_d_regs == 1) {
207 __ Vstr(d_reg, MemOperand(sp, stack_offset));
208 } else if (number_of_d_regs > 1) {
209 UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
210 vixl32::Register base = sp;
211 if (stack_offset != 0) {
212 base = temps.Acquire();
213 __ Add(base, sp, Operand::From(stack_offset));
214 }
215 __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
216 }
217 stack_offset += number_of_d_regs * kDRegSizeInBytes;
218 }
219
220 if (save_last) {
221 __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
222 stack_offset += kSRegSizeInBytes;
223 }
224
225 return stack_offset;
226 }
227
RestoreContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)228 static size_t RestoreContiguousSRegisterList(size_t first,
229 size_t last,
230 CodeGenerator* codegen,
231 size_t stack_offset) {
232 static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
233 static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
234 DCHECK_LE(first, last);
235 if ((first == last) && (first == 0)) {
236 __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
237 return stack_offset + kSRegSizeInBytes;
238 }
239 if (first % 2 == 1) {
240 __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
241 stack_offset += kSRegSizeInBytes;
242 }
243
244 bool restore_last = false;
245 if (last % 2 == 0) {
246 restore_last = true;
247 --last;
248 }
249
250 if (first < last) {
251 vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
252 DCHECK_EQ((last - first + 1) % 2, 0u);
253 size_t number_of_d_regs = (last - first + 1) / 2;
254 if (number_of_d_regs == 1) {
255 __ Vldr(d_reg, MemOperand(sp, stack_offset));
256 } else if (number_of_d_regs > 1) {
257 UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
258 vixl32::Register base = sp;
259 if (stack_offset != 0) {
260 base = temps.Acquire();
261 __ Add(base, sp, Operand::From(stack_offset));
262 }
263 __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
264 }
265 stack_offset += number_of_d_regs * kDRegSizeInBytes;
266 }
267
268 if (restore_last) {
269 __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
270 stack_offset += kSRegSizeInBytes;
271 }
272
273 return stack_offset;
274 }
275
GetLoadOperandType(DataType::Type type)276 static LoadOperandType GetLoadOperandType(DataType::Type type) {
277 switch (type) {
278 case DataType::Type::kReference:
279 return kLoadWord;
280 case DataType::Type::kBool:
281 case DataType::Type::kUint8:
282 return kLoadUnsignedByte;
283 case DataType::Type::kInt8:
284 return kLoadSignedByte;
285 case DataType::Type::kUint16:
286 return kLoadUnsignedHalfword;
287 case DataType::Type::kInt16:
288 return kLoadSignedHalfword;
289 case DataType::Type::kInt32:
290 return kLoadWord;
291 case DataType::Type::kInt64:
292 return kLoadWordPair;
293 case DataType::Type::kFloat32:
294 return kLoadSWord;
295 case DataType::Type::kFloat64:
296 return kLoadDWord;
297 default:
298 LOG(FATAL) << "Unreachable type " << type;
299 UNREACHABLE();
300 }
301 }
302
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)303 void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
304 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
305 size_t orig_offset = stack_offset;
306
307 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
308 for (uint32_t i : LowToHighBits(core_spills)) {
309 // If the register holds an object, update the stack mask.
310 if (locations->RegisterContainsObject(i)) {
311 locations->SetStackBit(stack_offset / kVRegSize);
312 }
313 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
314 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
315 saved_core_stack_offsets_[i] = stack_offset;
316 stack_offset += kArmWordSize;
317 }
318
319 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
320 arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
321
322 uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
323 orig_offset = stack_offset;
324 for (uint32_t i : LowToHighBits(fp_spills)) {
325 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
326 saved_fpu_stack_offsets_[i] = stack_offset;
327 stack_offset += kArmWordSize;
328 }
329
330 stack_offset = orig_offset;
331 while (fp_spills != 0u) {
332 uint32_t begin = CTZ(fp_spills);
333 uint32_t tmp = fp_spills + (1u << begin);
334 fp_spills &= tmp; // Clear the contiguous range of 1s.
335 uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
336 stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
337 }
338 DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
339 }
340
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)341 void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
342 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
343 size_t orig_offset = stack_offset;
344
345 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
346 for (uint32_t i : LowToHighBits(core_spills)) {
347 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
348 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
349 stack_offset += kArmWordSize;
350 }
351
352 // TODO(VIXL): Check the coherency of stack_offset after this with a test.
353 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
354 arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
355
356 uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
357 while (fp_spills != 0u) {
358 uint32_t begin = CTZ(fp_spills);
359 uint32_t tmp = fp_spills + (1u << begin);
360 fp_spills &= tmp; // Clear the contiguous range of 1s.
361 uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
362 stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
363 }
364 DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
365 }
366
367 class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
368 public:
NullCheckSlowPathARMVIXL(HNullCheck * instruction)369 explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
370
EmitNativeCode(CodeGenerator * codegen)371 void EmitNativeCode(CodeGenerator* codegen) override {
372 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
373 __ Bind(GetEntryLabel());
374 if (instruction_->CanThrowIntoCatchBlock()) {
375 // Live registers will be restored in the catch block if caught.
376 SaveLiveRegisters(codegen, instruction_->GetLocations());
377 }
378 arm_codegen->InvokeRuntime(kQuickThrowNullPointer,
379 instruction_,
380 instruction_->GetDexPc(),
381 this);
382 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
383 }
384
IsFatal() const385 bool IsFatal() const override { return true; }
386
GetDescription() const387 const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; }
388
389 private:
390 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
391 };
392
393 class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
394 public:
DivZeroCheckSlowPathARMVIXL(HDivZeroCheck * instruction)395 explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
396 : SlowPathCodeARMVIXL(instruction) {}
397
EmitNativeCode(CodeGenerator * codegen)398 void EmitNativeCode(CodeGenerator* codegen) override {
399 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
400 __ Bind(GetEntryLabel());
401 arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
402 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
403 }
404
IsFatal() const405 bool IsFatal() const override { return true; }
406
GetDescription() const407 const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; }
408
409 private:
410 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
411 };
412
413 class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
414 public:
SuspendCheckSlowPathARMVIXL(HSuspendCheck * instruction,HBasicBlock * successor)415 SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
416 : SlowPathCodeARMVIXL(instruction), successor_(successor) {}
417
EmitNativeCode(CodeGenerator * codegen)418 void EmitNativeCode(CodeGenerator* codegen) override {
419 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
420 __ Bind(GetEntryLabel());
421 arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
422 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
423 if (successor_ == nullptr) {
424 __ B(GetReturnLabel());
425 } else {
426 __ B(arm_codegen->GetLabelOf(successor_));
427 }
428 }
429
GetReturnLabel()430 vixl32::Label* GetReturnLabel() {
431 DCHECK(successor_ == nullptr);
432 return &return_label_;
433 }
434
GetSuccessor() const435 HBasicBlock* GetSuccessor() const {
436 return successor_;
437 }
438
GetDescription() const439 const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; }
440
441 private:
442 // If not null, the block to branch to after the suspend check.
443 HBasicBlock* const successor_;
444
445 // If `successor_` is null, the label to branch to after the suspend check.
446 vixl32::Label return_label_;
447
448 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
449 };
450
451 class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
452 public:
BoundsCheckSlowPathARMVIXL(HBoundsCheck * instruction)453 explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
454 : SlowPathCodeARMVIXL(instruction) {}
455
EmitNativeCode(CodeGenerator * codegen)456 void EmitNativeCode(CodeGenerator* codegen) override {
457 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
458 LocationSummary* locations = instruction_->GetLocations();
459
460 __ Bind(GetEntryLabel());
461 if (instruction_->CanThrowIntoCatchBlock()) {
462 // Live registers will be restored in the catch block if caught.
463 SaveLiveRegisters(codegen, instruction_->GetLocations());
464 }
465 // We're moving two locations to locations that could overlap, so we need a parallel
466 // move resolver.
467 InvokeRuntimeCallingConventionARMVIXL calling_convention;
468 codegen->EmitParallelMoves(
469 locations->InAt(0),
470 LocationFrom(calling_convention.GetRegisterAt(0)),
471 DataType::Type::kInt32,
472 locations->InAt(1),
473 LocationFrom(calling_convention.GetRegisterAt(1)),
474 DataType::Type::kInt32);
475 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
476 ? kQuickThrowStringBounds
477 : kQuickThrowArrayBounds;
478 arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
479 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
480 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
481 }
482
IsFatal() const483 bool IsFatal() const override { return true; }
484
GetDescription() const485 const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; }
486
487 private:
488 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
489 };
490
491 class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
492 public:
LoadClassSlowPathARMVIXL(HLoadClass * cls,HInstruction * at)493 LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at)
494 : SlowPathCodeARMVIXL(at), cls_(cls) {
495 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
496 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
497 }
498
EmitNativeCode(CodeGenerator * codegen)499 void EmitNativeCode(CodeGenerator* codegen) override {
500 LocationSummary* locations = instruction_->GetLocations();
501 Location out = locations->Out();
502 const uint32_t dex_pc = instruction_->GetDexPc();
503 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
504 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
505
506 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
507 __ Bind(GetEntryLabel());
508 SaveLiveRegisters(codegen, locations);
509
510 InvokeRuntimeCallingConventionARMVIXL calling_convention;
511 if (must_resolve_type) {
512 DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile()) ||
513 arm_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
514 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
515 &cls_->GetDexFile()));
516 dex::TypeIndex type_index = cls_->GetTypeIndex();
517 __ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
518 if (cls_->NeedsAccessCheck()) {
519 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
520 arm_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
521 } else {
522 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
523 arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
524 }
525 // If we also must_do_clinit, the resolved type is now in the correct register.
526 } else {
527 DCHECK(must_do_clinit);
528 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
529 arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source);
530 }
531 if (must_do_clinit) {
532 arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
533 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
534 }
535
536 // Move the class to the desired location.
537 if (out.IsValid()) {
538 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
539 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
540 }
541 RestoreLiveRegisters(codegen, locations);
542 __ B(GetExitLabel());
543 }
544
GetDescription() const545 const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; }
546
547 private:
548 // The class this slow path will load.
549 HLoadClass* const cls_;
550
551 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
552 };
553
554 class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
555 public:
LoadStringSlowPathARMVIXL(HLoadString * instruction)556 explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
557 : SlowPathCodeARMVIXL(instruction) {}
558
EmitNativeCode(CodeGenerator * codegen)559 void EmitNativeCode(CodeGenerator* codegen) override {
560 DCHECK(instruction_->IsLoadString());
561 DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
562 LocationSummary* locations = instruction_->GetLocations();
563 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
564 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
565
566 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
567 __ Bind(GetEntryLabel());
568 SaveLiveRegisters(codegen, locations);
569
570 InvokeRuntimeCallingConventionARMVIXL calling_convention;
571 __ Mov(calling_convention.GetRegisterAt(0), string_index.index_);
572 arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
573 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
574
575 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
576 RestoreLiveRegisters(codegen, locations);
577
578 __ B(GetExitLabel());
579 }
580
GetDescription() const581 const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; }
582
583 private:
584 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
585 };
586
587 class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
588 public:
TypeCheckSlowPathARMVIXL(HInstruction * instruction,bool is_fatal)589 TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
590 : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
591
EmitNativeCode(CodeGenerator * codegen)592 void EmitNativeCode(CodeGenerator* codegen) override {
593 LocationSummary* locations = instruction_->GetLocations();
594 DCHECK(instruction_->IsCheckCast()
595 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
596
597 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
598 __ Bind(GetEntryLabel());
599
600 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
601 SaveLiveRegisters(codegen, locations);
602 }
603
604 // We're moving two locations to locations that could overlap, so we need a parallel
605 // move resolver.
606 InvokeRuntimeCallingConventionARMVIXL calling_convention;
607
608 codegen->EmitParallelMoves(locations->InAt(0),
609 LocationFrom(calling_convention.GetRegisterAt(0)),
610 DataType::Type::kReference,
611 locations->InAt(1),
612 LocationFrom(calling_convention.GetRegisterAt(1)),
613 DataType::Type::kReference);
614 if (instruction_->IsInstanceOf()) {
615 arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
616 instruction_,
617 instruction_->GetDexPc(),
618 this);
619 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
620 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
621 } else {
622 DCHECK(instruction_->IsCheckCast());
623 arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
624 instruction_,
625 instruction_->GetDexPc(),
626 this);
627 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
628 }
629
630 if (!is_fatal_) {
631 RestoreLiveRegisters(codegen, locations);
632 __ B(GetExitLabel());
633 }
634 }
635
GetDescription() const636 const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; }
637
IsFatal() const638 bool IsFatal() const override { return is_fatal_; }
639
640 private:
641 const bool is_fatal_;
642
643 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
644 };
645
646 class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
647 public:
DeoptimizationSlowPathARMVIXL(HDeoptimize * instruction)648 explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
649 : SlowPathCodeARMVIXL(instruction) {}
650
EmitNativeCode(CodeGenerator * codegen)651 void EmitNativeCode(CodeGenerator* codegen) override {
652 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
653 __ Bind(GetEntryLabel());
654 LocationSummary* locations = instruction_->GetLocations();
655 SaveLiveRegisters(codegen, locations);
656 InvokeRuntimeCallingConventionARMVIXL calling_convention;
657 __ Mov(calling_convention.GetRegisterAt(0),
658 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
659
660 arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
661 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
662 }
663
GetDescription() const664 const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; }
665
666 private:
667 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
668 };
669
670 class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
671 public:
ArraySetSlowPathARMVIXL(HInstruction * instruction)672 explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
673
EmitNativeCode(CodeGenerator * codegen)674 void EmitNativeCode(CodeGenerator* codegen) override {
675 LocationSummary* locations = instruction_->GetLocations();
676 __ Bind(GetEntryLabel());
677 SaveLiveRegisters(codegen, locations);
678
679 InvokeRuntimeCallingConventionARMVIXL calling_convention;
680 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
681 parallel_move.AddMove(
682 locations->InAt(0),
683 LocationFrom(calling_convention.GetRegisterAt(0)),
684 DataType::Type::kReference,
685 nullptr);
686 parallel_move.AddMove(
687 locations->InAt(1),
688 LocationFrom(calling_convention.GetRegisterAt(1)),
689 DataType::Type::kInt32,
690 nullptr);
691 parallel_move.AddMove(
692 locations->InAt(2),
693 LocationFrom(calling_convention.GetRegisterAt(2)),
694 DataType::Type::kReference,
695 nullptr);
696 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
697
698 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
699 arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
700 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
701 RestoreLiveRegisters(codegen, locations);
702 __ B(GetExitLabel());
703 }
704
GetDescription() const705 const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; }
706
707 private:
708 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
709 };
710
711 // Slow path generating a read barrier for a heap reference.
712 class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
713 public:
ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)714 ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction* instruction,
715 Location out,
716 Location ref,
717 Location obj,
718 uint32_t offset,
719 Location index)
720 : SlowPathCodeARMVIXL(instruction),
721 out_(out),
722 ref_(ref),
723 obj_(obj),
724 offset_(offset),
725 index_(index) {
726 // If `obj` is equal to `out` or `ref`, it means the initial object
727 // has been overwritten by (or after) the heap object reference load
728 // to be instrumented, e.g.:
729 //
730 // __ LoadFromOffset(kLoadWord, out, out, offset);
731 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
732 //
733 // In that case, we have lost the information about the original
734 // object, and the emitted read barrier cannot work properly.
735 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
736 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
737 }
738
EmitNativeCode(CodeGenerator * codegen)739 void EmitNativeCode(CodeGenerator* codegen) override {
740 DCHECK(codegen->EmitReadBarrier());
741 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
742 LocationSummary* locations = instruction_->GetLocations();
743 vixl32::Register reg_out = RegisterFrom(out_);
744 DCHECK(locations->CanCall());
745 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
746 DCHECK(instruction_->IsInstanceFieldGet() ||
747 instruction_->IsStaticFieldGet() ||
748 instruction_->IsArrayGet() ||
749 instruction_->IsInstanceOf() ||
750 instruction_->IsCheckCast() ||
751 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
752 << "Unexpected instruction in read barrier for heap reference slow path: "
753 << instruction_->DebugName();
754 // The read barrier instrumentation of object ArrayGet
755 // instructions does not support the HIntermediateAddress
756 // instruction.
757 DCHECK(!(instruction_->IsArrayGet() &&
758 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
759
760 __ Bind(GetEntryLabel());
761 SaveLiveRegisters(codegen, locations);
762
763 // We may have to change the index's value, but as `index_` is a
764 // constant member (like other "inputs" of this slow path),
765 // introduce a copy of it, `index`.
766 Location index = index_;
767 if (index_.IsValid()) {
768 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
769 if (instruction_->IsArrayGet()) {
770 // Compute the actual memory offset and store it in `index`.
771 vixl32::Register index_reg = RegisterFrom(index_);
772 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg.GetCode()));
773 if (codegen->IsCoreCalleeSaveRegister(index_reg.GetCode())) {
774 // We are about to change the value of `index_reg` (see the
775 // calls to art::arm::ArmVIXLMacroAssembler::Lsl and
776 // art::arm::ArmVIXLMacroAssembler::Add below), but it has
777 // not been saved by the previous call to
778 // art::SlowPathCode::SaveLiveRegisters, as it is a
779 // callee-save register --
780 // art::SlowPathCode::SaveLiveRegisters does not consider
781 // callee-save registers, as it has been designed with the
782 // assumption that callee-save registers are supposed to be
783 // handled by the called function. So, as a callee-save
784 // register, `index_reg` _would_ eventually be saved onto
785 // the stack, but it would be too late: we would have
786 // changed its value earlier. Therefore, we manually save
787 // it here into another freely available register,
788 // `free_reg`, chosen of course among the caller-save
789 // registers (as a callee-save `free_reg` register would
790 // exhibit the same problem).
791 //
792 // Note we could have requested a temporary register from
793 // the register allocator instead; but we prefer not to, as
794 // this is a slow path, and we know we can find a
795 // caller-save register that is available.
796 vixl32::Register free_reg = FindAvailableCallerSaveRegister(codegen);
797 __ Mov(free_reg, index_reg);
798 index_reg = free_reg;
799 index = LocationFrom(index_reg);
800 } else {
801 // The initial register stored in `index_` has already been
802 // saved in the call to art::SlowPathCode::SaveLiveRegisters
803 // (as it is not a callee-save register), so we can freely
804 // use it.
805 }
806 // Shifting the index value contained in `index_reg` by the scale
807 // factor (2) cannot overflow in practice, as the runtime is
808 // unable to allocate object arrays with a size larger than
809 // 2^26 - 1 (that is, 2^28 - 4 bytes).
810 __ Lsl(index_reg, index_reg, TIMES_4);
811 static_assert(
812 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
813 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
814 __ Add(index_reg, index_reg, offset_);
815 } else {
816 // In the case of the following intrinsics `index_` is not shifted by a scale factor of 2
817 // (as in the case of ArrayGet), as it is actually an offset to an object field within an
818 // object.
819 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
820 DCHECK(instruction_->GetLocations()->Intrinsified());
821 HInvoke* invoke = instruction_->AsInvoke();
822 DCHECK(IsUnsafeGetReference(invoke) ||
823 IsVarHandleGet(invoke) ||
824 IsVarHandleCASFamily(invoke))
825 << invoke->GetIntrinsic();
826 DCHECK_EQ(offset_, 0U);
827 // Though UnsafeGet's offset location is a register pair, we only pass the low
828 // part (high part is irrelevant for 32-bit addresses) to the slow path.
829 // For VarHandle intrinsics, the index is always just a register.
830 DCHECK(index_.IsRegister());
831 index = index_;
832 }
833 }
834
835 // We're moving two or three locations to locations that could
836 // overlap, so we need a parallel move resolver.
837 InvokeRuntimeCallingConventionARMVIXL calling_convention;
838 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
839 parallel_move.AddMove(ref_,
840 LocationFrom(calling_convention.GetRegisterAt(0)),
841 DataType::Type::kReference,
842 nullptr);
843 parallel_move.AddMove(obj_,
844 LocationFrom(calling_convention.GetRegisterAt(1)),
845 DataType::Type::kReference,
846 nullptr);
847 if (index.IsValid()) {
848 parallel_move.AddMove(index,
849 LocationFrom(calling_convention.GetRegisterAt(2)),
850 DataType::Type::kInt32,
851 nullptr);
852 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
853 } else {
854 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
855 __ Mov(calling_convention.GetRegisterAt(2), offset_);
856 }
857 arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
858 CheckEntrypointTypes<
859 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
860 arm_codegen->Move32(out_, LocationFrom(r0));
861
862 RestoreLiveRegisters(codegen, locations);
863 __ B(GetExitLabel());
864 }
865
GetDescription() const866 const char* GetDescription() const override {
867 return "ReadBarrierForHeapReferenceSlowPathARMVIXL";
868 }
869
870 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)871 vixl32::Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
872 uint32_t ref = RegisterFrom(ref_).GetCode();
873 uint32_t obj = RegisterFrom(obj_).GetCode();
874 for (uint32_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
875 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
876 return vixl32::Register(i);
877 }
878 }
879 // We shall never fail to find a free caller-save register, as
880 // there are more than two core caller-save registers on ARM
881 // (meaning it is possible to find one which is different from
882 // `ref` and `obj`).
883 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
884 LOG(FATAL) << "Could not find a free caller-save register";
885 UNREACHABLE();
886 }
887
888 const Location out_;
889 const Location ref_;
890 const Location obj_;
891 const uint32_t offset_;
892 // An additional location containing an index to an array.
893 // Only used for HArrayGet and the UnsafeGetObject &
894 // UnsafeGetObjectVolatile intrinsics.
895 const Location index_;
896
897 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARMVIXL);
898 };
899
900 // Slow path generating a read barrier for a GC root.
901 class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
902 public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction * instruction,Location out,Location root)903 ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
904 : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
905 }
906
EmitNativeCode(CodeGenerator * codegen)907 void EmitNativeCode(CodeGenerator* codegen) override {
908 DCHECK(codegen->EmitReadBarrier());
909 LocationSummary* locations = instruction_->GetLocations();
910 vixl32::Register reg_out = RegisterFrom(out_);
911 DCHECK(locations->CanCall());
912 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
913 DCHECK(instruction_->IsLoadClass() ||
914 instruction_->IsLoadString() ||
915 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
916 << "Unexpected instruction in read barrier for GC root slow path: "
917 << instruction_->DebugName();
918
919 __ Bind(GetEntryLabel());
920 SaveLiveRegisters(codegen, locations);
921
922 InvokeRuntimeCallingConventionARMVIXL calling_convention;
923 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
924 arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), root_);
925 arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
926 instruction_,
927 instruction_->GetDexPc(),
928 this);
929 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
930 arm_codegen->Move32(out_, LocationFrom(r0));
931
932 RestoreLiveRegisters(codegen, locations);
933 __ B(GetExitLabel());
934 }
935
GetDescription() const936 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; }
937
938 private:
939 const Location out_;
940 const Location root_;
941
942 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL);
943 };
944
945 class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL {
946 public:
MethodEntryExitHooksSlowPathARMVIXL(HInstruction * instruction)947 explicit MethodEntryExitHooksSlowPathARMVIXL(HInstruction* instruction)
948 : SlowPathCodeARMVIXL(instruction) {}
949
EmitNativeCode(CodeGenerator * codegen)950 void EmitNativeCode(CodeGenerator* codegen) override {
951 LocationSummary* locations = instruction_->GetLocations();
952 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
953 QuickEntrypointEnum entry_point =
954 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
955 __ Bind(GetEntryLabel());
956 SaveLiveRegisters(codegen, locations);
957 if (instruction_->IsMethodExitHook()) {
958 // Load frame size to pass to the exit hooks
959 __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize());
960 }
961 arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
962 RestoreLiveRegisters(codegen, locations);
963 __ B(GetExitLabel());
964 }
965
GetDescription() const966 const char* GetDescription() const override {
967 return "MethodEntryExitHooksSlowPath";
968 }
969
970 private:
971 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARMVIXL);
972 };
973
974 class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL {
975 public:
CompileOptimizedSlowPathARMVIXL(HSuspendCheck * suspend_check,vixl32::Register profiling_info)976 CompileOptimizedSlowPathARMVIXL(HSuspendCheck* suspend_check,
977 vixl32::Register profiling_info)
978 : SlowPathCodeARMVIXL(suspend_check),
979 profiling_info_(profiling_info) {}
980
EmitNativeCode(CodeGenerator * codegen)981 void EmitNativeCode(CodeGenerator* codegen) override {
982 uint32_t entry_point_offset =
983 GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
984 __ Bind(GetEntryLabel());
985 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
986 UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler());
987 vixl32::Register tmp = temps.Acquire();
988 __ Mov(tmp, ProfilingInfo::GetOptimizeThreshold());
989 __ Strh(tmp,
990 MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
991 __ Ldr(lr, MemOperand(tr, entry_point_offset));
992 // Note: we don't record the call here (and therefore don't generate a stack
993 // map), as the entrypoint should never be suspended.
994 __ Blx(lr);
995 __ B(GetExitLabel());
996 }
997
GetDescription() const998 const char* GetDescription() const override {
999 return "CompileOptimizedSlowPath";
1000 }
1001
1002 private:
1003 vixl32::Register profiling_info_;
1004
1005 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARMVIXL);
1006 };
1007
ARMCondition(IfCondition cond)1008 inline vixl32::Condition ARMCondition(IfCondition cond) {
1009 switch (cond) {
1010 case kCondEQ: return eq;
1011 case kCondNE: return ne;
1012 case kCondLT: return lt;
1013 case kCondLE: return le;
1014 case kCondGT: return gt;
1015 case kCondGE: return ge;
1016 case kCondB: return lo;
1017 case kCondBE: return ls;
1018 case kCondA: return hi;
1019 case kCondAE: return hs;
1020 }
1021 LOG(FATAL) << "Unreachable";
1022 UNREACHABLE();
1023 }
1024
1025 // Maps signed condition to unsigned condition.
ARMUnsignedCondition(IfCondition cond)1026 inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) {
1027 switch (cond) {
1028 case kCondEQ: return eq;
1029 case kCondNE: return ne;
1030 // Signed to unsigned.
1031 case kCondLT: return lo;
1032 case kCondLE: return ls;
1033 case kCondGT: return hi;
1034 case kCondGE: return hs;
1035 // Unsigned remain unchanged.
1036 case kCondB: return lo;
1037 case kCondBE: return ls;
1038 case kCondA: return hi;
1039 case kCondAE: return hs;
1040 }
1041 LOG(FATAL) << "Unreachable";
1042 UNREACHABLE();
1043 }
1044
ARMFPCondition(IfCondition cond,bool gt_bias)1045 inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
1046 // The ARM condition codes can express all the necessary branches, see the
1047 // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
1048 // There is no dex instruction or HIR that would need the missing conditions
1049 // "equal or unordered" or "not equal".
1050 switch (cond) {
1051 case kCondEQ: return eq;
1052 case kCondNE: return ne /* unordered */;
1053 case kCondLT: return gt_bias ? cc : lt /* unordered */;
1054 case kCondLE: return gt_bias ? ls : le /* unordered */;
1055 case kCondGT: return gt_bias ? hi /* unordered */ : gt;
1056 case kCondGE: return gt_bias ? cs /* unordered */ : ge;
1057 default:
1058 LOG(FATAL) << "UNREACHABLE";
1059 UNREACHABLE();
1060 }
1061 }
1062
ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind)1063 inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
1064 switch (op_kind) {
1065 case HDataProcWithShifterOp::kASR: return ShiftType::ASR;
1066 case HDataProcWithShifterOp::kLSL: return ShiftType::LSL;
1067 case HDataProcWithShifterOp::kLSR: return ShiftType::LSR;
1068 default:
1069 LOG(FATAL) << "Unexpected op kind " << op_kind;
1070 UNREACHABLE();
1071 }
1072 }
1073
DumpCoreRegister(std::ostream & stream,int reg) const1074 void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
1075 stream << vixl32::Register(reg);
1076 }
1077
DumpFloatingPointRegister(std::ostream & stream,int reg) const1078 void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1079 stream << vixl32::SRegister(reg);
1080 }
1081
GetInstructionSetFeatures() const1082 const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const {
1083 return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures();
1084 }
1085
ComputeSRegisterListMask(const SRegisterList & regs)1086 static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
1087 uint32_t mask = 0;
1088 for (uint32_t i = regs.GetFirstSRegister().GetCode();
1089 i <= regs.GetLastSRegister().GetCode();
1090 ++i) {
1091 mask |= (1 << i);
1092 }
1093 return mask;
1094 }
1095
1096 // Saves the register in the stack. Returns the size taken on stack.
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1097 size_t CodeGeneratorARMVIXL::SaveCoreRegister([[maybe_unused]] size_t stack_index,
1098 [[maybe_unused]] uint32_t reg_id) {
1099 TODO_VIXL32(FATAL);
1100 UNREACHABLE();
1101 }
1102
1103 // Restores the register from the stack. Returns the size taken on stack.
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1104 size_t CodeGeneratorARMVIXL::RestoreCoreRegister([[maybe_unused]] size_t stack_index,
1105 [[maybe_unused]] uint32_t reg_id) {
1106 TODO_VIXL32(FATAL);
1107 UNREACHABLE();
1108 }
1109
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1110 size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
1111 [[maybe_unused]] uint32_t reg_id) {
1112 TODO_VIXL32(FATAL);
1113 UNREACHABLE();
1114 }
1115
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1116 size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
1117 [[maybe_unused]] uint32_t reg_id) {
1118 TODO_VIXL32(FATAL);
1119 UNREACHABLE();
1120 }
1121
GenerateDataProcInstruction(HInstruction::InstructionKind kind,vixl32::Register out,vixl32::Register first,const Operand & second,CodeGeneratorARMVIXL * codegen)1122 static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
1123 vixl32::Register out,
1124 vixl32::Register first,
1125 const Operand& second,
1126 CodeGeneratorARMVIXL* codegen) {
1127 if (second.IsImmediate() && second.GetImmediate() == 0) {
1128 const Operand in = kind == HInstruction::kAnd
1129 ? Operand(0)
1130 : Operand(first);
1131
1132 __ Mov(out, in);
1133 } else {
1134 switch (kind) {
1135 case HInstruction::kAdd:
1136 __ Add(out, first, second);
1137 break;
1138 case HInstruction::kAnd:
1139 __ And(out, first, second);
1140 break;
1141 case HInstruction::kOr:
1142 __ Orr(out, first, second);
1143 break;
1144 case HInstruction::kSub:
1145 __ Sub(out, first, second);
1146 break;
1147 case HInstruction::kXor:
1148 __ Eor(out, first, second);
1149 break;
1150 default:
1151 LOG(FATAL) << "Unexpected instruction kind: " << kind;
1152 UNREACHABLE();
1153 }
1154 }
1155 }
1156
GenerateDataProc(HInstruction::InstructionKind kind,const Location & out,const Location & first,const Operand & second_lo,const Operand & second_hi,CodeGeneratorARMVIXL * codegen)1157 static void GenerateDataProc(HInstruction::InstructionKind kind,
1158 const Location& out,
1159 const Location& first,
1160 const Operand& second_lo,
1161 const Operand& second_hi,
1162 CodeGeneratorARMVIXL* codegen) {
1163 const vixl32::Register first_hi = HighRegisterFrom(first);
1164 const vixl32::Register first_lo = LowRegisterFrom(first);
1165 const vixl32::Register out_hi = HighRegisterFrom(out);
1166 const vixl32::Register out_lo = LowRegisterFrom(out);
1167
1168 if (kind == HInstruction::kAdd) {
1169 __ Adds(out_lo, first_lo, second_lo);
1170 __ Adc(out_hi, first_hi, second_hi);
1171 } else if (kind == HInstruction::kSub) {
1172 __ Subs(out_lo, first_lo, second_lo);
1173 __ Sbc(out_hi, first_hi, second_hi);
1174 } else {
1175 GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
1176 GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
1177 }
1178 }
1179
GetShifterOperand(vixl32::Register rm,ShiftType shift,uint32_t shift_imm)1180 static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) {
1181 return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm);
1182 }
1183
GenerateLongDataProc(HDataProcWithShifterOp * instruction,CodeGeneratorARMVIXL * codegen)1184 static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
1185 CodeGeneratorARMVIXL* codegen) {
1186 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
1187 DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
1188
1189 const LocationSummary* const locations = instruction->GetLocations();
1190 const uint32_t shift_value = instruction->GetShiftAmount();
1191 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
1192 const Location first = locations->InAt(0);
1193 const Location second = locations->InAt(1);
1194 const Location out = locations->Out();
1195 const vixl32::Register first_hi = HighRegisterFrom(first);
1196 const vixl32::Register first_lo = LowRegisterFrom(first);
1197 const vixl32::Register out_hi = HighRegisterFrom(out);
1198 const vixl32::Register out_lo = LowRegisterFrom(out);
1199 const vixl32::Register second_hi = HighRegisterFrom(second);
1200 const vixl32::Register second_lo = LowRegisterFrom(second);
1201 const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind());
1202
1203 if (shift_value >= 32) {
1204 if (shift == ShiftType::LSL) {
1205 GenerateDataProcInstruction(kind,
1206 out_hi,
1207 first_hi,
1208 Operand(second_lo, ShiftType::LSL, shift_value - 32),
1209 codegen);
1210 GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen);
1211 } else if (shift == ShiftType::ASR) {
1212 GenerateDataProc(kind,
1213 out,
1214 first,
1215 GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32),
1216 Operand(second_hi, ShiftType::ASR, 31),
1217 codegen);
1218 } else {
1219 DCHECK_EQ(shift, ShiftType::LSR);
1220 GenerateDataProc(kind,
1221 out,
1222 first,
1223 GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32),
1224 0,
1225 codegen);
1226 }
1227 } else {
1228 DCHECK_GT(shift_value, 1U);
1229 DCHECK_LT(shift_value, 32U);
1230
1231 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1232
1233 if (shift == ShiftType::LSL) {
1234 // We are not doing this for HInstruction::kAdd because the output will require
1235 // Location::kOutputOverlap; not applicable to other cases.
1236 if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1237 GenerateDataProcInstruction(kind,
1238 out_hi,
1239 first_hi,
1240 Operand(second_hi, ShiftType::LSL, shift_value),
1241 codegen);
1242 GenerateDataProcInstruction(kind,
1243 out_hi,
1244 out_hi,
1245 Operand(second_lo, ShiftType::LSR, 32 - shift_value),
1246 codegen);
1247 GenerateDataProcInstruction(kind,
1248 out_lo,
1249 first_lo,
1250 Operand(second_lo, ShiftType::LSL, shift_value),
1251 codegen);
1252 } else {
1253 const vixl32::Register temp = temps.Acquire();
1254
1255 __ Lsl(temp, second_hi, shift_value);
1256 __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value));
1257 GenerateDataProc(kind,
1258 out,
1259 first,
1260 Operand(second_lo, ShiftType::LSL, shift_value),
1261 temp,
1262 codegen);
1263 }
1264 } else {
1265 DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR);
1266
1267 // We are not doing this for HInstruction::kAdd because the output will require
1268 // Location::kOutputOverlap; not applicable to other cases.
1269 if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1270 GenerateDataProcInstruction(kind,
1271 out_lo,
1272 first_lo,
1273 Operand(second_lo, ShiftType::LSR, shift_value),
1274 codegen);
1275 GenerateDataProcInstruction(kind,
1276 out_lo,
1277 out_lo,
1278 Operand(second_hi, ShiftType::LSL, 32 - shift_value),
1279 codegen);
1280 GenerateDataProcInstruction(kind,
1281 out_hi,
1282 first_hi,
1283 Operand(second_hi, shift, shift_value),
1284 codegen);
1285 } else {
1286 const vixl32::Register temp = temps.Acquire();
1287
1288 __ Lsr(temp, second_lo, shift_value);
1289 __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value));
1290 GenerateDataProc(kind,
1291 out,
1292 first,
1293 temp,
1294 Operand(second_hi, shift, shift_value),
1295 codegen);
1296 }
1297 }
1298 }
1299 }
1300
GenerateVcmp(HInstruction * instruction,CodeGeneratorARMVIXL * codegen)1301 static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
1302 const Location rhs_loc = instruction->GetLocations()->InAt(1);
1303 if (rhs_loc.IsConstant()) {
1304 // 0.0 is the only immediate that can be encoded directly in
1305 // a VCMP instruction.
1306 //
1307 // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
1308 // specify that in a floating-point comparison, positive zero
1309 // and negative zero are considered equal, so we can use the
1310 // literal 0.0 for both cases here.
1311 //
1312 // Note however that some methods (Float.equal, Float.compare,
1313 // Float.compareTo, Double.equal, Double.compare,
1314 // Double.compareTo, Math.max, Math.min, StrictMath.max,
1315 // StrictMath.min) consider 0.0 to be (strictly) greater than
1316 // -0.0. So if we ever translate calls to these methods into a
1317 // HCompare instruction, we must handle the -0.0 case with
1318 // care here.
1319 DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
1320
1321 const DataType::Type type = instruction->InputAt(0)->GetType();
1322
1323 if (type == DataType::Type::kFloat32) {
1324 __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
1325 } else {
1326 DCHECK_EQ(type, DataType::Type::kFloat64);
1327 __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
1328 }
1329 } else {
1330 __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
1331 }
1332 }
1333
AdjustConstantForCondition(int64_t value,IfCondition * condition,IfCondition * opposite)1334 static int64_t AdjustConstantForCondition(int64_t value,
1335 IfCondition* condition,
1336 IfCondition* opposite) {
1337 if (value == 1) {
1338 if (*condition == kCondB) {
1339 value = 0;
1340 *condition = kCondEQ;
1341 *opposite = kCondNE;
1342 } else if (*condition == kCondAE) {
1343 value = 0;
1344 *condition = kCondNE;
1345 *opposite = kCondEQ;
1346 }
1347 } else if (value == -1) {
1348 if (*condition == kCondGT) {
1349 value = 0;
1350 *condition = kCondGE;
1351 *opposite = kCondLT;
1352 } else if (*condition == kCondLE) {
1353 value = 0;
1354 *condition = kCondLT;
1355 *opposite = kCondGE;
1356 }
1357 }
1358
1359 return value;
1360 }
1361
GenerateLongTestConstant(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1362 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
1363 HCondition* condition,
1364 bool invert,
1365 CodeGeneratorARMVIXL* codegen) {
1366 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1367
1368 const LocationSummary* const locations = condition->GetLocations();
1369 IfCondition cond = condition->GetCondition();
1370 IfCondition opposite = condition->GetOppositeCondition();
1371
1372 if (invert) {
1373 std::swap(cond, opposite);
1374 }
1375
1376 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1377 const Location left = locations->InAt(0);
1378 const Location right = locations->InAt(1);
1379
1380 DCHECK(right.IsConstant());
1381
1382 const vixl32::Register left_high = HighRegisterFrom(left);
1383 const vixl32::Register left_low = LowRegisterFrom(left);
1384 int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
1385 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1386
1387 // Comparisons against 0 are common enough to deserve special attention.
1388 if (value == 0) {
1389 switch (cond) {
1390 case kCondNE:
1391 // x > 0 iff x != 0 when the comparison is unsigned.
1392 case kCondA:
1393 ret = std::make_pair(ne, eq);
1394 FALLTHROUGH_INTENDED;
1395 case kCondEQ:
1396 // x <= 0 iff x == 0 when the comparison is unsigned.
1397 case kCondBE:
1398 __ Orrs(temps.Acquire(), left_low, left_high);
1399 return ret;
1400 case kCondLT:
1401 case kCondGE:
1402 __ Cmp(left_high, 0);
1403 return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1404 // Trivially true or false.
1405 case kCondB:
1406 ret = std::make_pair(ne, eq);
1407 FALLTHROUGH_INTENDED;
1408 case kCondAE:
1409 __ Cmp(left_low, left_low);
1410 return ret;
1411 default:
1412 break;
1413 }
1414 }
1415
1416 switch (cond) {
1417 case kCondEQ:
1418 case kCondNE:
1419 case kCondB:
1420 case kCondBE:
1421 case kCondA:
1422 case kCondAE: {
1423 const uint32_t value_low = Low32Bits(value);
1424 Operand operand_low(value_low);
1425
1426 __ Cmp(left_high, High32Bits(value));
1427
1428 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1429 // we must ensure that the operands corresponding to the least significant
1430 // halves of the inputs fit into a 16-bit CMP encoding.
1431 if (!left_low.IsLow() || !IsUint<8>(value_low)) {
1432 operand_low = Operand(temps.Acquire());
1433 __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low);
1434 }
1435
1436 // We use the scope because of the IT block that follows.
1437 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1438 2 * vixl32::k16BitT32InstructionSizeInBytes,
1439 CodeBufferCheckScope::kExactSize);
1440
1441 __ it(eq);
1442 __ cmp(eq, left_low, operand_low);
1443 ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1444 break;
1445 }
1446 case kCondLE:
1447 case kCondGT:
1448 // Trivially true or false.
1449 if (value == std::numeric_limits<int64_t>::max()) {
1450 __ Cmp(left_low, left_low);
1451 ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
1452 break;
1453 }
1454
1455 if (cond == kCondLE) {
1456 DCHECK_EQ(opposite, kCondGT);
1457 cond = kCondLT;
1458 opposite = kCondGE;
1459 } else {
1460 DCHECK_EQ(cond, kCondGT);
1461 DCHECK_EQ(opposite, kCondLE);
1462 cond = kCondGE;
1463 opposite = kCondLT;
1464 }
1465
1466 value++;
1467 FALLTHROUGH_INTENDED;
1468 case kCondGE:
1469 case kCondLT: {
1470 __ Cmp(left_low, Low32Bits(value));
1471 __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
1472 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1473 break;
1474 }
1475 default:
1476 LOG(FATAL) << "Unreachable";
1477 UNREACHABLE();
1478 }
1479
1480 return ret;
1481 }
1482
GenerateLongTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1483 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
1484 HCondition* condition,
1485 bool invert,
1486 CodeGeneratorARMVIXL* codegen) {
1487 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1488
1489 const LocationSummary* const locations = condition->GetLocations();
1490 IfCondition cond = condition->GetCondition();
1491 IfCondition opposite = condition->GetOppositeCondition();
1492
1493 if (invert) {
1494 std::swap(cond, opposite);
1495 }
1496
1497 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1498 Location left = locations->InAt(0);
1499 Location right = locations->InAt(1);
1500
1501 DCHECK(right.IsRegisterPair());
1502
1503 switch (cond) {
1504 case kCondEQ:
1505 case kCondNE:
1506 case kCondB:
1507 case kCondBE:
1508 case kCondA:
1509 case kCondAE: {
1510 __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
1511
1512 // We use the scope because of the IT block that follows.
1513 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1514 2 * vixl32::k16BitT32InstructionSizeInBytes,
1515 CodeBufferCheckScope::kExactSize);
1516
1517 __ it(eq);
1518 __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
1519 ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1520 break;
1521 }
1522 case kCondLE:
1523 case kCondGT:
1524 if (cond == kCondLE) {
1525 DCHECK_EQ(opposite, kCondGT);
1526 cond = kCondGE;
1527 opposite = kCondLT;
1528 } else {
1529 DCHECK_EQ(cond, kCondGT);
1530 DCHECK_EQ(opposite, kCondLE);
1531 cond = kCondLT;
1532 opposite = kCondGE;
1533 }
1534
1535 std::swap(left, right);
1536 FALLTHROUGH_INTENDED;
1537 case kCondGE:
1538 case kCondLT: {
1539 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1540
1541 __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
1542 __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
1543 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1544 break;
1545 }
1546 default:
1547 LOG(FATAL) << "Unreachable";
1548 UNREACHABLE();
1549 }
1550
1551 return ret;
1552 }
1553
GenerateTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1554 static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
1555 bool invert,
1556 CodeGeneratorARMVIXL* codegen) {
1557 const DataType::Type type = condition->GetLeft()->GetType();
1558 IfCondition cond = condition->GetCondition();
1559 IfCondition opposite = condition->GetOppositeCondition();
1560 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1561
1562 if (invert) {
1563 std::swap(cond, opposite);
1564 }
1565
1566 if (type == DataType::Type::kInt64) {
1567 ret = condition->GetLocations()->InAt(1).IsConstant()
1568 ? GenerateLongTestConstant(condition, invert, codegen)
1569 : GenerateLongTest(condition, invert, codegen);
1570 } else if (DataType::IsFloatingPointType(type)) {
1571 GenerateVcmp(condition, codegen);
1572 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
1573 ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
1574 ARMFPCondition(opposite, condition->IsGtBias()));
1575 } else {
1576 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1577 __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
1578 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1579 }
1580
1581 return ret;
1582 }
1583
GenerateConditionGeneric(HCondition * cond,CodeGeneratorARMVIXL * codegen)1584 static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1585 const vixl32::Register out = OutputRegister(cond);
1586 const auto condition = GenerateTest(cond, false, codegen);
1587
1588 __ Mov(LeaveFlags, out, 0);
1589
1590 if (out.IsLow()) {
1591 // We use the scope because of the IT block that follows.
1592 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1593 2 * vixl32::k16BitT32InstructionSizeInBytes,
1594 CodeBufferCheckScope::kExactSize);
1595
1596 __ it(condition.first);
1597 __ mov(condition.first, out, 1);
1598 } else {
1599 vixl32::Label done_label;
1600 vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
1601
1602 __ B(condition.second, final_label, /* is_far_target= */ false);
1603 __ Mov(out, 1);
1604
1605 if (done_label.IsReferenced()) {
1606 __ Bind(&done_label);
1607 }
1608 }
1609 }
1610
GenerateEqualLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1611 static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1612 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1613
1614 const LocationSummary* const locations = cond->GetLocations();
1615 IfCondition condition = cond->GetCondition();
1616 const vixl32::Register out = OutputRegister(cond);
1617 const Location left = locations->InAt(0);
1618 const Location right = locations->InAt(1);
1619 vixl32::Register left_high = HighRegisterFrom(left);
1620 vixl32::Register left_low = LowRegisterFrom(left);
1621 vixl32::Register temp;
1622 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1623
1624 if (right.IsConstant()) {
1625 IfCondition opposite = cond->GetOppositeCondition();
1626 const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
1627 &condition,
1628 &opposite);
1629 Operand right_high = High32Bits(value);
1630 Operand right_low = Low32Bits(value);
1631
1632 // The output uses Location::kNoOutputOverlap.
1633 if (out.Is(left_high)) {
1634 std::swap(left_low, left_high);
1635 std::swap(right_low, right_high);
1636 }
1637
1638 __ Sub(out, left_low, right_low);
1639 temp = temps.Acquire();
1640 __ Sub(temp, left_high, right_high);
1641 } else {
1642 DCHECK(right.IsRegisterPair());
1643 temp = temps.Acquire();
1644 __ Sub(temp, left_high, HighRegisterFrom(right));
1645 __ Sub(out, left_low, LowRegisterFrom(right));
1646 }
1647
1648 // Need to check after calling AdjustConstantForCondition().
1649 DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
1650
1651 if (condition == kCondNE && out.IsLow()) {
1652 __ Orrs(out, out, temp);
1653
1654 // We use the scope because of the IT block that follows.
1655 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1656 2 * vixl32::k16BitT32InstructionSizeInBytes,
1657 CodeBufferCheckScope::kExactSize);
1658
1659 __ it(ne);
1660 __ mov(ne, out, 1);
1661 } else {
1662 __ Orr(out, out, temp);
1663 codegen->GenerateConditionWithZero(condition, out, out, temp);
1664 }
1665 }
1666
GenerateConditionLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1667 static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1668 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1669
1670 const LocationSummary* const locations = cond->GetLocations();
1671 IfCondition condition = cond->GetCondition();
1672 const vixl32::Register out = OutputRegister(cond);
1673 const Location left = locations->InAt(0);
1674 const Location right = locations->InAt(1);
1675
1676 if (right.IsConstant()) {
1677 IfCondition opposite = cond->GetOppositeCondition();
1678
1679 // Comparisons against 0 are common enough to deserve special attention.
1680 if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
1681 switch (condition) {
1682 case kCondNE:
1683 case kCondA:
1684 if (out.IsLow()) {
1685 // We only care if both input registers are 0 or not.
1686 __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
1687
1688 // We use the scope because of the IT block that follows.
1689 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1690 2 * vixl32::k16BitT32InstructionSizeInBytes,
1691 CodeBufferCheckScope::kExactSize);
1692
1693 __ it(ne);
1694 __ mov(ne, out, 1);
1695 return;
1696 }
1697
1698 FALLTHROUGH_INTENDED;
1699 case kCondEQ:
1700 case kCondBE:
1701 // We only care if both input registers are 0 or not.
1702 __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
1703 codegen->GenerateConditionWithZero(condition, out, out);
1704 return;
1705 case kCondLT:
1706 case kCondGE:
1707 // We only care about the sign bit.
1708 FALLTHROUGH_INTENDED;
1709 case kCondAE:
1710 case kCondB:
1711 codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
1712 return;
1713 case kCondLE:
1714 case kCondGT:
1715 default:
1716 break;
1717 }
1718 }
1719 }
1720
1721 // If `out` is a low register, then the GenerateConditionGeneric()
1722 // function generates a shorter code sequence that is still branchless.
1723 if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) {
1724 GenerateEqualLong(cond, codegen);
1725 return;
1726 }
1727
1728 GenerateConditionGeneric(cond, codegen);
1729 }
1730
GenerateConditionIntegralOrNonPrimitive(HCondition * cond,CodeGeneratorARMVIXL * codegen)1731 static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond,
1732 CodeGeneratorARMVIXL* codegen) {
1733 const DataType::Type type = cond->GetLeft()->GetType();
1734
1735 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1736
1737 if (type == DataType::Type::kInt64) {
1738 GenerateConditionLong(cond, codegen);
1739 return;
1740 }
1741
1742 IfCondition condition = cond->GetCondition();
1743 vixl32::Register in = InputRegisterAt(cond, 0);
1744 const vixl32::Register out = OutputRegister(cond);
1745 const Location right = cond->GetLocations()->InAt(1);
1746 int64_t value;
1747
1748 if (right.IsConstant()) {
1749 IfCondition opposite = cond->GetOppositeCondition();
1750
1751 value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
1752
1753 // Comparisons against 0 are common enough to deserve special attention.
1754 if (value == 0) {
1755 switch (condition) {
1756 case kCondNE:
1757 case kCondA:
1758 if (out.IsLow() && out.Is(in)) {
1759 __ Cmp(out, 0);
1760
1761 // We use the scope because of the IT block that follows.
1762 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1763 2 * vixl32::k16BitT32InstructionSizeInBytes,
1764 CodeBufferCheckScope::kExactSize);
1765
1766 __ it(ne);
1767 __ mov(ne, out, 1);
1768 return;
1769 }
1770
1771 FALLTHROUGH_INTENDED;
1772 case kCondEQ:
1773 case kCondBE:
1774 case kCondLT:
1775 case kCondGE:
1776 case kCondAE:
1777 case kCondB:
1778 codegen->GenerateConditionWithZero(condition, out, in);
1779 return;
1780 case kCondLE:
1781 case kCondGT:
1782 default:
1783 break;
1784 }
1785 }
1786 }
1787
1788 if (condition == kCondEQ || condition == kCondNE) {
1789 Operand operand(0);
1790
1791 if (right.IsConstant()) {
1792 operand = Operand::From(value);
1793 } else if (out.Is(RegisterFrom(right))) {
1794 // Avoid 32-bit instructions if possible.
1795 operand = InputOperandAt(cond, 0);
1796 in = RegisterFrom(right);
1797 } else {
1798 operand = InputOperandAt(cond, 1);
1799 }
1800
1801 if (condition == kCondNE && out.IsLow()) {
1802 __ Subs(out, in, operand);
1803
1804 // We use the scope because of the IT block that follows.
1805 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1806 2 * vixl32::k16BitT32InstructionSizeInBytes,
1807 CodeBufferCheckScope::kExactSize);
1808
1809 __ it(ne);
1810 __ mov(ne, out, 1);
1811 } else {
1812 __ Sub(out, in, operand);
1813 codegen->GenerateConditionWithZero(condition, out, out);
1814 }
1815
1816 return;
1817 }
1818
1819 GenerateConditionGeneric(cond, codegen);
1820 }
1821
CanEncodeConstantAs8BitImmediate(HConstant * constant)1822 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
1823 const DataType::Type type = constant->GetType();
1824 bool ret = false;
1825
1826 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1827
1828 if (type == DataType::Type::kInt64) {
1829 const uint64_t value = Uint64ConstantFrom(constant);
1830
1831 ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
1832 } else {
1833 ret = IsUint<8>(Int32ConstantFrom(constant));
1834 }
1835
1836 return ret;
1837 }
1838
Arm8BitEncodableConstantOrRegister(HInstruction * constant)1839 static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
1840 DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
1841
1842 if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
1843 return Location::ConstantLocation(constant);
1844 }
1845
1846 return Location::RequiresRegister();
1847 }
1848
CanGenerateConditionalMove(const Location & out,const Location & src)1849 static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
1850 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1851 // we check that we are not dealing with floating-point output (there is no
1852 // 16-bit VMOV encoding).
1853 if (!out.IsRegister() && !out.IsRegisterPair()) {
1854 return false;
1855 }
1856
1857 // For constants, we also check that the output is in one or two low registers,
1858 // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
1859 // MOV encoding can be used.
1860 if (src.IsConstant()) {
1861 if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
1862 return false;
1863 }
1864
1865 if (out.IsRegister()) {
1866 if (!RegisterFrom(out).IsLow()) {
1867 return false;
1868 }
1869 } else {
1870 DCHECK(out.IsRegisterPair());
1871
1872 if (!HighRegisterFrom(out).IsLow()) {
1873 return false;
1874 }
1875 }
1876 }
1877
1878 return true;
1879 }
1880
1881 #undef __
1882
GetFinalLabel(HInstruction * instruction,vixl32::Label * final_label)1883 vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
1884 vixl32::Label* final_label) {
1885 DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
1886 DCHECK_IMPLIES(instruction->IsInvoke(), !instruction->GetLocations()->CanCall());
1887
1888 const HBasicBlock* const block = instruction->GetBlock();
1889 const HLoopInformation* const info = block->GetLoopInformation();
1890 HInstruction* const next = instruction->GetNext();
1891
1892 // Avoid a branch to a branch.
1893 if (next->IsGoto() && (info == nullptr ||
1894 !info->IsBackEdge(*block) ||
1895 !info->HasSuspendCheck())) {
1896 final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
1897 }
1898
1899 return final_label;
1900 }
1901
1902 namespace detail {
1903
1904 // Mark which intrinsics we don't have handcrafted code for.
1905 template <Intrinsics T>
1906 struct IsUnimplemented {
1907 bool is_unimplemented = false;
1908 };
1909
1910 #define TRUE_OVERRIDE(Name) \
1911 template <> \
1912 struct IsUnimplemented<Intrinsics::k##Name> { \
1913 bool is_unimplemented = true; \
1914 };
1915 UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE)
1916 #undef TRUE_OVERRIDE
1917
1918 static constexpr bool kIsIntrinsicUnimplemented[] = {
1919 false, // kNone
1920 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1921 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1922 ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1923 #undef IS_UNIMPLEMENTED
1924 };
1925
1926 } // namespace detail
1927
CodeGeneratorARMVIXL(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1928 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
1929 const CompilerOptions& compiler_options,
1930 OptimizingCompilerStats* stats)
1931 : CodeGenerator(graph,
1932 kNumberOfCoreRegisters,
1933 kNumberOfSRegisters,
1934 kNumberOfRegisterPairs,
1935 kCoreCalleeSaves.GetList(),
1936 ComputeSRegisterListMask(kFpuCalleeSaves),
1937 compiler_options,
1938 stats,
1939 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1940 block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1941 jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1942 location_builder_(graph, this),
1943 instruction_visitor_(graph, this),
1944 move_resolver_(graph->GetAllocator(), this),
1945 assembler_(graph->GetAllocator()),
1946 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1947 app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1948 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1949 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1950 app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1951 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1952 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1953 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1954 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1955 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1956 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1957 call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1958 baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1959 uint32_literals_(std::less<uint32_t>(),
1960 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1961 jit_string_patches_(StringReferenceValueComparator(),
1962 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1963 jit_class_patches_(TypeReferenceValueComparator(),
1964 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1965 jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1966 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1967 // Always save the LR register to mimic Quick.
1968 AddAllocatedRegister(Location::RegisterLocation(LR));
1969 // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
1970 // S0-S31, which alias to D0-D15.
1971 GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
1972 GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
1973 }
1974
EmitTable(CodeGeneratorARMVIXL * codegen)1975 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
1976 uint32_t num_entries = switch_instr_->GetNumEntries();
1977 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1978
1979 // We are about to use the assembler to place literals directly. Make sure we have enough
1980 // underlying code buffer and we have generated a jump table of the right size, using
1981 // codegen->GetVIXLAssembler()->GetBuffer().Align();
1982 ExactAssemblyScope aas(codegen->GetVIXLAssembler(),
1983 num_entries * sizeof(int32_t),
1984 CodeBufferCheckScope::kMaximumSize);
1985 // TODO(VIXL): Check that using lower case bind is fine here.
1986 codegen->GetVIXLAssembler()->bind(&table_start_);
1987 for (uint32_t i = 0; i < num_entries; i++) {
1988 codegen->GetVIXLAssembler()->place(bb_addresses_[i].get());
1989 }
1990 }
1991
FixTable(CodeGeneratorARMVIXL * codegen)1992 void JumpTableARMVIXL::FixTable(CodeGeneratorARMVIXL* codegen) {
1993 uint32_t num_entries = switch_instr_->GetNumEntries();
1994 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1995
1996 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
1997 for (uint32_t i = 0; i < num_entries; i++) {
1998 vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
1999 DCHECK(target_label->IsBound());
2000 int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
2001 // When doing BX to address we need to have lower bit set to 1 in T32.
2002 if (codegen->GetVIXLAssembler()->IsUsingT32()) {
2003 jump_offset++;
2004 }
2005 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
2006 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
2007
2008 bb_addresses_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler()->GetBuffer());
2009 }
2010 }
2011
FixJumpTables()2012 void CodeGeneratorARMVIXL::FixJumpTables() {
2013 for (auto&& jump_table : jump_tables_) {
2014 jump_table->FixTable(this);
2015 }
2016 }
2017
2018 #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT
2019
Finalize()2020 void CodeGeneratorARMVIXL::Finalize() {
2021 FixJumpTables();
2022
2023 // Emit JIT baker read barrier slow paths.
2024 DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
2025 for (auto& entry : jit_baker_read_barrier_slow_paths_) {
2026 uint32_t encoded_data = entry.first;
2027 vixl::aarch32::Label* slow_path_entry = &entry.second.label;
2028 __ Bind(slow_path_entry);
2029 CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
2030 }
2031
2032 GetAssembler()->FinalizeCode();
2033 CodeGenerator::Finalize();
2034
2035 // Verify Baker read barrier linker patches.
2036 if (kIsDebugBuild) {
2037 ArrayRef<const uint8_t> code(GetCode());
2038 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
2039 DCHECK(info.label.IsBound());
2040 uint32_t literal_offset = info.label.GetLocation();
2041 DCHECK_ALIGNED(literal_offset, 2u);
2042
2043 auto GetInsn16 = [&code](uint32_t offset) {
2044 DCHECK_ALIGNED(offset, 2u);
2045 return (static_cast<uint32_t>(code[offset + 0]) << 0) +
2046 (static_cast<uint32_t>(code[offset + 1]) << 8);
2047 };
2048 auto GetInsn32 = [=](uint32_t offset) {
2049 return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
2050 };
2051
2052 uint32_t encoded_data = info.custom_data;
2053 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
2054 // Check that the next instruction matches the expected LDR.
2055 switch (kind) {
2056 case BakerReadBarrierKind::kField: {
2057 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2058 if (width == BakerReadBarrierWidth::kWide) {
2059 DCHECK_GE(code.size() - literal_offset, 8u);
2060 uint32_t next_insn = GetInsn32(literal_offset + 4u);
2061 // LDR (immediate), encoding T3, with correct base_reg.
2062 CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
2063 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2064 CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
2065 } else {
2066 DCHECK_GE(code.size() - literal_offset, 6u);
2067 uint32_t next_insn = GetInsn16(literal_offset + 4u);
2068 // LDR (immediate), encoding T1, with correct base_reg.
2069 CheckValidReg(next_insn & 0x7u); // Check destination register.
2070 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2071 CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
2072 }
2073 break;
2074 }
2075 case BakerReadBarrierKind::kArray: {
2076 DCHECK_GE(code.size() - literal_offset, 8u);
2077 uint32_t next_insn = GetInsn32(literal_offset + 4u);
2078 // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
2079 CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
2080 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2081 CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
2082 CheckValidReg(next_insn & 0xf); // Check index register
2083 break;
2084 }
2085 case BakerReadBarrierKind::kGcRoot: {
2086 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2087 if (width == BakerReadBarrierWidth::kWide) {
2088 DCHECK_GE(literal_offset, 4u);
2089 uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2090 // LDR (immediate), encoding T3, with correct root_reg.
2091 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2092 CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
2093 } else {
2094 DCHECK_GE(literal_offset, 2u);
2095 uint32_t prev_insn = GetInsn16(literal_offset - 2u);
2096 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2097 // Usually LDR (immediate), encoding T1, with correct root_reg but we may have
2098 // a `MOV marked, old_value` for intrinsic CAS where `marked` is a low register.
2099 if ((prev_insn & 0xff87u) != (0x4600 | root_reg)) {
2100 CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
2101 }
2102 }
2103 break;
2104 }
2105 case BakerReadBarrierKind::kIntrinsicCas: {
2106 DCHECK_GE(literal_offset, 4u);
2107 uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2108 // MOV (register), encoding T3, with correct root_reg.
2109 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2110 DCHECK_GE(root_reg, 8u); // Used only for high registers.
2111 CHECK_EQ(prev_insn & 0xfffffff0u, 0xea4f0000u | (root_reg << 8));
2112 break;
2113 }
2114 default:
2115 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
2116 UNREACHABLE();
2117 }
2118 }
2119 }
2120 }
2121
SetupBlockedRegisters() const2122 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
2123 // Stack register, LR and PC are always reserved.
2124 blocked_core_registers_[SP] = true;
2125 blocked_core_registers_[LR] = true;
2126 blocked_core_registers_[PC] = true;
2127
2128 // TODO: We don't need to reserve marking-register for userfaultfd GC. But
2129 // that would require some work in the assembler code as the right GC is
2130 // chosen at load-time and not compile time.
2131 if (kReserveMarkingRegister) {
2132 // Reserve marking register.
2133 blocked_core_registers_[MR] = true;
2134 }
2135
2136 // Reserve thread register.
2137 blocked_core_registers_[TR] = true;
2138
2139 // Reserve temp register.
2140 blocked_core_registers_[IP] = true;
2141
2142 if (GetGraph()->IsDebuggable()) {
2143 // Stubs do not save callee-save floating point registers. If the graph
2144 // is debuggable, we need to deal with these registers differently. For
2145 // now, just block them.
2146 for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode();
2147 i <= kFpuCalleeSaves.GetLastSRegister().GetCode();
2148 ++i) {
2149 blocked_fpu_registers_[i] = true;
2150 }
2151 }
2152 }
2153
InstructionCodeGeneratorARMVIXL(HGraph * graph,CodeGeneratorARMVIXL * codegen)2154 InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph,
2155 CodeGeneratorARMVIXL* codegen)
2156 : InstructionCodeGenerator(graph, codegen),
2157 assembler_(codegen->GetAssembler()),
2158 codegen_(codegen) {}
2159
ComputeSpillMask()2160 void CodeGeneratorARMVIXL::ComputeSpillMask() {
2161 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
2162 DCHECK_NE(core_spill_mask_ & (1u << kLrCode), 0u)
2163 << "At least the return address register must be saved";
2164 // 16-bit PUSH/POP (T1) can save/restore just the LR/PC.
2165 DCHECK(GetVIXLAssembler()->IsUsingT32());
2166 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
2167 // We use vpush and vpop for saving and restoring floating point registers, which take
2168 // a SRegister and the number of registers to save/restore after that SRegister. We
2169 // therefore update the `fpu_spill_mask_` to also contain those registers not allocated,
2170 // but in the range.
2171 if (fpu_spill_mask_ != 0) {
2172 uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_);
2173 uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_);
2174 for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) {
2175 fpu_spill_mask_ |= (1 << i);
2176 }
2177 }
2178 }
2179
VisitMethodExitHook(HMethodExitHook * method_hook)2180 void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook) {
2181 LocationSummary* locations = new (GetGraph()->GetAllocator())
2182 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2183 locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType()));
2184 // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
2185 // compute the address to store the timestamp counter.
2186 locations->AddRegisterTemps(3);
2187 }
2188
GenerateMethodEntryExitHook(HInstruction * instruction)2189 void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) {
2190 LocationSummary* locations = instruction->GetLocations();
2191 vixl32::Register addr = RegisterFrom(locations->GetTemp(0));
2192 vixl32::Register value = RegisterFrom(locations->GetTemp(1));
2193 vixl32::Register tmp = RegisterFrom(locations->GetTemp(2));
2194
2195 SlowPathCodeARMVIXL* slow_path =
2196 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
2197 codegen_->AddSlowPath(slow_path);
2198
2199 if (instruction->IsMethodExitHook()) {
2200 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
2201 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
2202 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
2203 // disabled in debuggable runtime. The other bit is used when this method itself requires a
2204 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
2205 GetAssembler()->LoadFromOffset(
2206 kLoadWord, value, sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
2207 __ CompareAndBranchIfNonZero(value, slow_path->GetEntryLabel());
2208 }
2209
2210 MemberOffset offset = instruction->IsMethodExitHook() ?
2211 instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
2212 instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
2213 uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
2214 __ Mov(addr, address + offset.Int32Value());
2215 __ Ldrb(value, MemOperand(addr, 0));
2216 __ Cmp(value, instrumentation::Instrumentation::kFastTraceListeners);
2217 // Check if there are any trace method entry / exit listeners. If no, continue.
2218 __ B(lt, slow_path->GetExitLabel());
2219 // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
2220 // If yes, just take the slow path.
2221 __ B(gt, slow_path->GetEntryLabel());
2222
2223 // Check if there is place in the buffer to store a new entry, if no, take slow path.
2224 uint32_t trace_buffer_curr_entry_offset =
2225 Thread::TraceBufferCurrPtrOffset<kArmPointerSize>().Int32Value();
2226 vixl32::Register curr_entry = value;
2227 vixl32::Register init_entry = addr;
2228 __ Ldr(curr_entry, MemOperand(tr, trace_buffer_curr_entry_offset));
2229 __ Subs(curr_entry, curr_entry, static_cast<uint32_t>(kNumEntriesForWallClock * sizeof(void*)));
2230 __ Ldr(init_entry, MemOperand(tr, Thread::TraceBufferPtrOffset<kArmPointerSize>().SizeValue()));
2231 __ Cmp(curr_entry, init_entry);
2232 __ B(lt, slow_path->GetEntryLabel());
2233
2234 // Update the index in the `Thread`.
2235 __ Str(curr_entry, MemOperand(tr, trace_buffer_curr_entry_offset));
2236
2237 // Record method pointer and trace action.
2238 __ Ldr(tmp, MemOperand(sp, 0));
2239 // Use last two bits to encode trace method action. For MethodEntry it is 0
2240 // so no need to set the bits since they are 0 already.
2241 if (instruction->IsMethodExitHook()) {
2242 DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
2243 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
2244 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
2245 __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
2246 }
2247 __ Str(tmp, MemOperand(curr_entry, kMethodOffsetInBytes));
2248
2249 vixl32::Register tmp1 = init_entry;
2250 // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34.
2251 __ Mrrc(/* lower 32-bit */ tmp,
2252 /* higher 32-bit */ tmp1,
2253 /* coproc= */ 15,
2254 /* opc1= */ 1,
2255 /* crm= */ 14);
2256 static_assert(kHighTimestampOffsetInBytes ==
2257 kTimestampOffsetInBytes + static_cast<uint32_t>(kRuntimePointerSize));
2258 __ Strd(tmp, tmp1, MemOperand(curr_entry, kTimestampOffsetInBytes));
2259 __ Bind(slow_path->GetExitLabel());
2260 }
2261
VisitMethodExitHook(HMethodExitHook * instruction)2262 void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instruction) {
2263 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2264 DCHECK(codegen_->RequiresCurrentMethod());
2265 GenerateMethodEntryExitHook(instruction);
2266 }
2267
VisitMethodEntryHook(HMethodEntryHook * method_hook)2268 void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
2269 LocationSummary* locations = new (GetGraph()->GetAllocator())
2270 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2271 // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
2272 // compute the address to store the timestamp counter.
2273 locations->AddRegisterTemps(3);
2274 }
2275
VisitMethodEntryHook(HMethodEntryHook * instruction)2276 void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) {
2277 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2278 DCHECK(codegen_->RequiresCurrentMethod());
2279 GenerateMethodEntryExitHook(instruction);
2280 }
2281
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)2282 void CodeGeneratorARMVIXL::MaybeIncrementHotness(HSuspendCheck* suspend_check,
2283 bool is_frame_entry) {
2284 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
2285 UseScratchRegisterScope temps(GetVIXLAssembler());
2286 vixl32::Register temp = temps.Acquire();
2287 static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
2288 if (!is_frame_entry) {
2289 __ Push(vixl32::Register(kMethodRegister));
2290 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize);
2291 GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
2292 }
2293 // Load with zero extend to clear the high bits for integer overflow check.
2294 __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2295 vixl::aarch32::Label done;
2296 DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
2297 __ CompareAndBranchIfZero(temp, &done, /* is_far_target= */ false);
2298 __ Add(temp, temp, -1);
2299 __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2300 __ Bind(&done);
2301 if (!is_frame_entry) {
2302 __ Pop(vixl32::Register(kMethodRegister));
2303 GetAssembler()->cfi().AdjustCFAOffset(-static_cast<int>(kArmWordSize));
2304 }
2305 }
2306
2307 if (GetGraph()->IsCompilingBaseline() &&
2308 GetGraph()->IsUsefulOptimizing() &&
2309 !Runtime::Current()->IsAotCompiler()) {
2310 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2311 DCHECK(info != nullptr);
2312 DCHECK(!HasEmptyFrame());
2313 uint32_t address = reinterpret_cast32<uint32_t>(info);
2314 UseScratchRegisterScope temps(GetVIXLAssembler());
2315 vixl32::Register tmp = temps.Acquire();
2316 SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL(
2317 suspend_check, /* profiling_info= */ lr);
2318 AddSlowPath(slow_path);
2319 __ Mov(lr, address);
2320 __ Ldrh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2321 __ Adds(tmp, tmp, -1);
2322 __ B(cc, slow_path->GetEntryLabel());
2323 __ Strh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2324 __ Bind(slow_path->GetExitLabel());
2325 }
2326 }
2327
GenerateFrameEntry()2328 void CodeGeneratorARMVIXL::GenerateFrameEntry() {
2329 bool skip_overflow_check =
2330 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
2331 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
2332
2333 // Check if we need to generate the clinit check. We will jump to the
2334 // resolution stub if the class is not initialized and the executing thread is
2335 // not the thread initializing it.
2336 // We do this before constructing the frame to get the correct stack trace if
2337 // an exception is thrown.
2338 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
2339 UseScratchRegisterScope temps(GetVIXLAssembler());
2340 vixl32::Label resolution;
2341 vixl32::Label memory_barrier;
2342
2343 // Check if we're visibly initialized.
2344
2345 vixl32::Register temp1 = temps.Acquire();
2346 // Use r4 as other temporary register.
2347 DCHECK(!blocked_core_registers_[R4]);
2348 DCHECK(!kCoreCalleeSaves.Includes(r4));
2349 vixl32::Register temp2 = r4;
2350 for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2351 DCHECK(!reg.Is(r4));
2352 }
2353
2354 // We don't emit a read barrier here to save on code size. We rely on the
2355 // resolution trampoline to do a suspend check before re-entering this code.
2356 __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
2357 __ Ldrb(temp2, MemOperand(temp1, kClassStatusByteOffset));
2358 __ Cmp(temp2, kShiftedVisiblyInitializedValue);
2359 __ B(cs, &frame_entry_label_);
2360
2361 // Check if we're initialized and jump to code that does a memory barrier if
2362 // so.
2363 __ Cmp(temp2, kShiftedInitializedValue);
2364 __ B(cs, &memory_barrier);
2365
2366 // Check if we're initializing and the thread initializing is the one
2367 // executing the code.
2368 __ Cmp(temp2, kShiftedInitializingValue);
2369 __ B(lo, &resolution);
2370
2371 __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
2372 __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value()));
2373 __ Cmp(temp1, temp2);
2374 __ B(eq, &frame_entry_label_);
2375 __ Bind(&resolution);
2376
2377 // Jump to the resolution stub.
2378 ThreadOffset32 entrypoint_offset =
2379 GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline);
2380 __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value()));
2381 __ Bx(temp1);
2382
2383 __ Bind(&memory_barrier);
2384 GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2385 }
2386
2387 __ Bind(&frame_entry_label_);
2388
2389 if (HasEmptyFrame()) {
2390 // Ensure that the CFI opcode list is not empty.
2391 GetAssembler()->cfi().Nop();
2392 MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
2393 return;
2394 }
2395
2396 // Make sure the frame size isn't unreasonably large.
2397 DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
2398
2399 if (!skip_overflow_check) {
2400 // Using r4 instead of IP saves 2 bytes.
2401 UseScratchRegisterScope temps(GetVIXLAssembler());
2402 vixl32::Register temp;
2403 // TODO: Remove this check when R4 is made a callee-save register
2404 // in ART compiled code (b/72801708). Currently we need to make
2405 // sure r4 is not blocked, e.g. in special purpose
2406 // TestCodeGeneratorARMVIXL; also asserting that r4 is available
2407 // here.
2408 if (!blocked_core_registers_[R4]) {
2409 for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2410 DCHECK(!reg.Is(r4));
2411 }
2412 DCHECK(!kCoreCalleeSaves.Includes(r4));
2413 temp = r4;
2414 } else {
2415 temp = temps.Acquire();
2416 }
2417 __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
2418 // The load must immediately precede RecordPcInfo.
2419 ExactAssemblyScope aas(GetVIXLAssembler(),
2420 vixl32::kMaxInstructionSizeInBytes,
2421 CodeBufferCheckScope::kMaximumSize);
2422 __ ldr(temp, MemOperand(temp));
2423 RecordPcInfo(nullptr, 0);
2424 }
2425
2426 uint32_t frame_size = GetFrameSize();
2427 uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2428 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2429 if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2430 core_spills_offset <= 3u * kArmWordSize) {
2431 // Do a single PUSH for core registers including the method and up to two
2432 // filler registers. Then store the single FP spill if any.
2433 // (The worst case is when the method is not required and we actually
2434 // store 3 extra registers but they are stored in the same properly
2435 // aligned 16-byte chunk where we're already writing anyway.)
2436 DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2437 uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize);
2438 DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(core_spill_mask_));
2439 __ Push(RegisterList(core_spill_mask_ | extra_regs));
2440 GetAssembler()->cfi().AdjustCFAOffset(frame_size);
2441 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2442 core_spills_offset,
2443 core_spill_mask_,
2444 kArmWordSize);
2445 if (fpu_spill_mask_ != 0u) {
2446 DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2447 vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2448 GetAssembler()->StoreSToOffset(sreg, sp, fp_spills_offset);
2449 GetAssembler()->cfi().RelOffset(DWARFReg(sreg), /*offset=*/ fp_spills_offset);
2450 }
2451 } else {
2452 __ Push(RegisterList(core_spill_mask_));
2453 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
2454 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2455 /*offset=*/ 0,
2456 core_spill_mask_,
2457 kArmWordSize);
2458 if (fpu_spill_mask_ != 0) {
2459 uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2460
2461 // Check that list is contiguous.
2462 DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2463
2464 __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2465 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
2466 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0),
2467 /*offset=*/ 0,
2468 fpu_spill_mask_,
2469 kArmWordSize);
2470 }
2471
2472 // Adjust SP and save the current method if we need it. Note that we do
2473 // not save the method in HCurrentMethod, as the instruction might have
2474 // been removed in the SSA graph.
2475 if (RequiresCurrentMethod() && fp_spills_offset <= 3 * kArmWordSize) {
2476 DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2477 __ Push(RegisterList(MaxInt<uint32_t>(fp_spills_offset / kArmWordSize)));
2478 GetAssembler()->cfi().AdjustCFAOffset(fp_spills_offset);
2479 } else {
2480 IncreaseFrame(fp_spills_offset);
2481 if (RequiresCurrentMethod()) {
2482 GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
2483 }
2484 }
2485 }
2486
2487 if (GetGraph()->HasShouldDeoptimizeFlag()) {
2488 UseScratchRegisterScope temps(GetVIXLAssembler());
2489 vixl32::Register temp = temps.Acquire();
2490 // Initialize should_deoptimize flag to 0.
2491 __ Mov(temp, 0);
2492 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
2493 }
2494
2495 MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
2496 MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
2497 }
2498
GenerateFrameExit()2499 void CodeGeneratorARMVIXL::GenerateFrameExit() {
2500 if (HasEmptyFrame()) {
2501 __ Bx(lr);
2502 return;
2503 }
2504
2505 // Pop LR into PC to return.
2506 DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U);
2507 uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode;
2508
2509 uint32_t frame_size = GetFrameSize();
2510 uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2511 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2512 if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2513 // r4 is blocked by TestCodeGeneratorARMVIXL used by some tests.
2514 core_spills_offset <= (blocked_core_registers_[r4.GetCode()] ? 2u : 3u) * kArmWordSize) {
2515 // Load the FP spill if any and then do a single POP including the method
2516 // and up to two filler registers. If we have no FP spills, this also has
2517 // the advantage that we do not need to emit CFI directives.
2518 if (fpu_spill_mask_ != 0u) {
2519 DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2520 vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2521 GetAssembler()->cfi().RememberState();
2522 GetAssembler()->LoadSFromOffset(sreg, sp, fp_spills_offset);
2523 GetAssembler()->cfi().Restore(DWARFReg(sreg));
2524 }
2525 // Clobber registers r2-r4 as they are caller-save in ART managed ABI and
2526 // never hold the return value.
2527 uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize) << r2.GetCode();
2528 DCHECK_EQ(extra_regs & kCoreCalleeSaves.GetList(), 0u);
2529 DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(pop_mask));
2530 __ Pop(RegisterList(pop_mask | extra_regs));
2531 if (fpu_spill_mask_ != 0u) {
2532 GetAssembler()->cfi().RestoreState();
2533 }
2534 } else {
2535 GetAssembler()->cfi().RememberState();
2536 DecreaseFrame(fp_spills_offset);
2537 if (fpu_spill_mask_ != 0) {
2538 uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2539
2540 // Check that list is contiguous.
2541 DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2542
2543 __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2544 GetAssembler()->cfi().AdjustCFAOffset(
2545 -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_));
2546 GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_);
2547 }
2548 __ Pop(RegisterList(pop_mask));
2549 GetAssembler()->cfi().RestoreState();
2550 GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
2551 }
2552 }
2553
Bind(HBasicBlock * block)2554 void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) {
2555 __ Bind(GetLabelOf(block));
2556 }
2557
GetNextLocation(DataType::Type type)2558 Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2559 switch (type) {
2560 case DataType::Type::kReference:
2561 case DataType::Type::kBool:
2562 case DataType::Type::kUint8:
2563 case DataType::Type::kInt8:
2564 case DataType::Type::kUint16:
2565 case DataType::Type::kInt16:
2566 case DataType::Type::kInt32: {
2567 uint32_t index = gp_index_++;
2568 uint32_t stack_index = stack_index_++;
2569 if (index < calling_convention.GetNumberOfRegisters()) {
2570 return LocationFrom(calling_convention.GetRegisterAt(index));
2571 } else {
2572 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2573 }
2574 }
2575
2576 case DataType::Type::kInt64: {
2577 uint32_t index = gp_index_;
2578 uint32_t stack_index = stack_index_;
2579 gp_index_ += 2;
2580 stack_index_ += 2;
2581 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2582 if (calling_convention.GetRegisterAt(index).Is(r1)) {
2583 // Skip R1, and use R2_R3 instead.
2584 gp_index_++;
2585 index++;
2586 }
2587 }
2588 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2589 DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1,
2590 calling_convention.GetRegisterAt(index + 1).GetCode());
2591
2592 return LocationFrom(calling_convention.GetRegisterAt(index),
2593 calling_convention.GetRegisterAt(index + 1));
2594 } else {
2595 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2596 }
2597 }
2598
2599 case DataType::Type::kFloat32: {
2600 uint32_t stack_index = stack_index_++;
2601 if (float_index_ % 2 == 0) {
2602 float_index_ = std::max(double_index_, float_index_);
2603 }
2604 if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
2605 return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
2606 } else {
2607 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2608 }
2609 }
2610
2611 case DataType::Type::kFloat64: {
2612 double_index_ = std::max(double_index_, RoundUp(float_index_, 2));
2613 uint32_t stack_index = stack_index_;
2614 stack_index_ += 2;
2615 if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
2616 uint32_t index = double_index_;
2617 double_index_ += 2;
2618 Location result = LocationFrom(
2619 calling_convention.GetFpuRegisterAt(index),
2620 calling_convention.GetFpuRegisterAt(index + 1));
2621 DCHECK(ExpectedPairLayout(result));
2622 return result;
2623 } else {
2624 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2625 }
2626 }
2627
2628 case DataType::Type::kUint32:
2629 case DataType::Type::kUint64:
2630 case DataType::Type::kVoid:
2631 LOG(FATAL) << "Unexpected parameter type " << type;
2632 UNREACHABLE();
2633 }
2634 return Location::NoLocation();
2635 }
2636
GetReturnLocation(DataType::Type type) const2637 Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const {
2638 switch (type) {
2639 case DataType::Type::kReference:
2640 case DataType::Type::kBool:
2641 case DataType::Type::kUint8:
2642 case DataType::Type::kInt8:
2643 case DataType::Type::kUint16:
2644 case DataType::Type::kInt16:
2645 case DataType::Type::kUint32:
2646 case DataType::Type::kInt32: {
2647 return LocationFrom(r0);
2648 }
2649
2650 case DataType::Type::kFloat32: {
2651 return LocationFrom(s0);
2652 }
2653
2654 case DataType::Type::kUint64:
2655 case DataType::Type::kInt64: {
2656 return LocationFrom(r0, r1);
2657 }
2658
2659 case DataType::Type::kFloat64: {
2660 return LocationFrom(s0, s1);
2661 }
2662
2663 case DataType::Type::kVoid:
2664 return Location::NoLocation();
2665 }
2666
2667 UNREACHABLE();
2668 }
2669
GetMethodLocation() const2670 Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2671 return LocationFrom(kMethodRegister);
2672 }
2673
GetNextLocation(DataType::Type type)2674 Location CriticalNativeCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2675 DCHECK_NE(type, DataType::Type::kReference);
2676
2677 // Native ABI uses the same registers as managed, except that the method register r0
2678 // is a normal argument.
2679 Location location = Location::NoLocation();
2680 if (DataType::Is64BitType(type)) {
2681 gpr_index_ = RoundUp(gpr_index_, 2u);
2682 stack_offset_ = RoundUp(stack_offset_, 2 * kFramePointerSize);
2683 if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2684 location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u],
2685 kParameterCoreRegistersVIXL[gpr_index_]);
2686 gpr_index_ += 2u;
2687 }
2688 } else {
2689 if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2690 location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u]);
2691 ++gpr_index_;
2692 }
2693 }
2694 if (location.IsInvalid()) {
2695 if (DataType::Is64BitType(type)) {
2696 location = Location::DoubleStackSlot(stack_offset_);
2697 stack_offset_ += 2 * kFramePointerSize;
2698 } else {
2699 location = Location::StackSlot(stack_offset_);
2700 stack_offset_ += kFramePointerSize;
2701 }
2702
2703 if (for_register_allocation_) {
2704 location = Location::Any();
2705 }
2706 }
2707 return location;
2708 }
2709
GetReturnLocation(DataType::Type type) const2710 Location CriticalNativeCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type)
2711 const {
2712 // We perform conversion to the managed ABI return register after the call if needed.
2713 InvokeDexCallingConventionVisitorARMVIXL dex_calling_convention;
2714 return dex_calling_convention.GetReturnLocation(type);
2715 }
2716
GetMethodLocation() const2717 Location CriticalNativeCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2718 // Pass the method in the hidden argument R4.
2719 return Location::RegisterLocation(R4);
2720 }
2721
Move32(Location destination,Location source)2722 void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
2723 if (source.Equals(destination)) {
2724 return;
2725 }
2726 if (destination.IsRegister()) {
2727 if (source.IsRegister()) {
2728 __ Mov(RegisterFrom(destination), RegisterFrom(source));
2729 } else if (source.IsFpuRegister()) {
2730 __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
2731 } else {
2732 GetAssembler()->LoadFromOffset(kLoadWord,
2733 RegisterFrom(destination),
2734 sp,
2735 source.GetStackIndex());
2736 }
2737 } else if (destination.IsFpuRegister()) {
2738 if (source.IsRegister()) {
2739 __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
2740 } else if (source.IsFpuRegister()) {
2741 __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
2742 } else {
2743 GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
2744 }
2745 } else {
2746 DCHECK(destination.IsStackSlot()) << destination;
2747 if (source.IsRegister()) {
2748 GetAssembler()->StoreToOffset(kStoreWord,
2749 RegisterFrom(source),
2750 sp,
2751 destination.GetStackIndex());
2752 } else if (source.IsFpuRegister()) {
2753 GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
2754 } else {
2755 DCHECK(source.IsStackSlot()) << source;
2756 UseScratchRegisterScope temps(GetVIXLAssembler());
2757 vixl32::Register temp = temps.Acquire();
2758 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
2759 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
2760 }
2761 }
2762 }
2763
MoveConstant(Location location,int32_t value)2764 void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) {
2765 DCHECK(location.IsRegister());
2766 __ Mov(RegisterFrom(location), value);
2767 }
2768
MoveLocation(Location dst,Location src,DataType::Type dst_type)2769 void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
2770 // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in
2771 // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend.
2772 HParallelMove move(GetGraph()->GetAllocator());
2773 move.AddMove(src, dst, dst_type, nullptr);
2774 GetMoveResolver()->EmitNativeCode(&move);
2775 }
2776
AddLocationAsTemp(Location location,LocationSummary * locations)2777 void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) {
2778 if (location.IsRegister()) {
2779 locations->AddTemp(location);
2780 } else if (location.IsRegisterPair()) {
2781 locations->AddTemp(LocationFrom(LowRegisterFrom(location)));
2782 locations->AddTemp(LocationFrom(HighRegisterFrom(location)));
2783 } else {
2784 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2785 }
2786 }
2787
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2788 void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
2789 HInstruction* instruction,
2790 uint32_t dex_pc,
2791 SlowPathCode* slow_path) {
2792 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2793
2794 ThreadOffset32 entrypoint_offset = GetThreadOffset<kArmPointerSize>(entrypoint);
2795 // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
2796 // entire oat file. This adds an extra branch and we do not want to slow down the main path.
2797 // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
2798 if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
2799 __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
2800 // Ensure the pc position is recorded immediately after the `blx` instruction.
2801 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
2802 ExactAssemblyScope aas(GetVIXLAssembler(),
2803 vixl32::k16BitT32InstructionSizeInBytes,
2804 CodeBufferCheckScope::kExactSize);
2805 __ blx(lr);
2806 if (EntrypointRequiresStackMap(entrypoint)) {
2807 RecordPcInfo(instruction, dex_pc, slow_path);
2808 }
2809 } else {
2810 // Ensure the pc position is recorded immediately after the `bl` instruction.
2811 ExactAssemblyScope aas(GetVIXLAssembler(),
2812 vixl32::k32BitT32InstructionSizeInBytes,
2813 CodeBufferCheckScope::kExactSize);
2814 EmitEntrypointThunkCall(entrypoint_offset);
2815 if (EntrypointRequiresStackMap(entrypoint)) {
2816 RecordPcInfo(instruction, dex_pc, slow_path);
2817 }
2818 }
2819 }
2820
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2821 void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2822 HInstruction* instruction,
2823 SlowPathCode* slow_path) {
2824 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2825 __ Ldr(lr, MemOperand(tr, entry_point_offset));
2826 __ Blx(lr);
2827 }
2828
HandleGoto(HInstruction * got,HBasicBlock * successor)2829 void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2830 if (successor->IsExitBlock()) {
2831 DCHECK(got->GetPrevious()->AlwaysThrows());
2832 return; // no code needed
2833 }
2834
2835 HBasicBlock* block = got->GetBlock();
2836 HInstruction* previous = got->GetPrevious();
2837 HLoopInformation* info = block->GetLoopInformation();
2838
2839 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2840 codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
2841 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2842 return;
2843 }
2844 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2845 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2846 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2);
2847 }
2848 if (!codegen_->GoesToNextBlock(block, successor)) {
2849 __ B(codegen_->GetLabelOf(successor));
2850 }
2851 }
2852
VisitGoto(HGoto * got)2853 void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) {
2854 got->SetLocations(nullptr);
2855 }
2856
VisitGoto(HGoto * got)2857 void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) {
2858 HandleGoto(got, got->GetSuccessor());
2859 }
2860
VisitTryBoundary(HTryBoundary * try_boundary)2861 void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2862 try_boundary->SetLocations(nullptr);
2863 }
2864
VisitTryBoundary(HTryBoundary * try_boundary)2865 void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2866 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2867 if (!successor->IsExitBlock()) {
2868 HandleGoto(try_boundary, successor);
2869 }
2870 }
2871
VisitExit(HExit * exit)2872 void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
2873 exit->SetLocations(nullptr);
2874 }
2875
VisitExit(HExit * exit)2876 void InstructionCodeGeneratorARMVIXL::VisitExit([[maybe_unused]] HExit* exit) {}
2877
GenerateCompareTestAndBranch(HCondition * condition,vixl32::Label * true_target,vixl32::Label * false_target,bool is_far_target)2878 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
2879 vixl32::Label* true_target,
2880 vixl32::Label* false_target,
2881 bool is_far_target) {
2882 if (true_target == false_target) {
2883 DCHECK(true_target != nullptr);
2884 __ B(true_target);
2885 return;
2886 }
2887
2888 vixl32::Label* non_fallthrough_target;
2889 bool invert;
2890 bool emit_both_branches;
2891
2892 if (true_target == nullptr) {
2893 // The true target is fallthrough.
2894 DCHECK(false_target != nullptr);
2895 non_fallthrough_target = false_target;
2896 invert = true;
2897 emit_both_branches = false;
2898 } else {
2899 non_fallthrough_target = true_target;
2900 invert = false;
2901 // Either the false target is fallthrough, or there is no fallthrough
2902 // and both branches must be emitted.
2903 emit_both_branches = (false_target != nullptr);
2904 }
2905
2906 const auto cond = GenerateTest(condition, invert, codegen_);
2907
2908 __ B(cond.first, non_fallthrough_target, is_far_target);
2909
2910 if (emit_both_branches) {
2911 // No target falls through, we need to branch.
2912 __ B(false_target);
2913 }
2914 }
2915
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl32::Label * true_target,vixl32::Label * false_target,bool far_target)2916 void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
2917 size_t condition_input_index,
2918 vixl32::Label* true_target,
2919 vixl32::Label* false_target,
2920 bool far_target) {
2921 HInstruction* cond = instruction->InputAt(condition_input_index);
2922
2923 if (true_target == nullptr && false_target == nullptr) {
2924 // Nothing to do. The code always falls through.
2925 return;
2926 } else if (cond->IsIntConstant()) {
2927 // Constant condition, statically compared against "true" (integer value 1).
2928 if (cond->AsIntConstant()->IsTrue()) {
2929 if (true_target != nullptr) {
2930 __ B(true_target);
2931 }
2932 } else {
2933 DCHECK(cond->AsIntConstant()->IsFalse()) << Int32ConstantFrom(cond);
2934 if (false_target != nullptr) {
2935 __ B(false_target);
2936 }
2937 }
2938 return;
2939 }
2940
2941 // The following code generates these patterns:
2942 // (1) true_target == nullptr && false_target != nullptr
2943 // - opposite condition true => branch to false_target
2944 // (2) true_target != nullptr && false_target == nullptr
2945 // - condition true => branch to true_target
2946 // (3) true_target != nullptr && false_target != nullptr
2947 // - condition true => branch to true_target
2948 // - branch to false_target
2949 if (IsBooleanValueOrMaterializedCondition(cond)) {
2950 // Condition has been materialized, compare the output to 0.
2951 if (kIsDebugBuild) {
2952 Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
2953 DCHECK(cond_val.IsRegister());
2954 }
2955 if (true_target == nullptr) {
2956 __ CompareAndBranchIfZero(InputRegisterAt(instruction, condition_input_index),
2957 false_target,
2958 far_target);
2959 } else {
2960 __ CompareAndBranchIfNonZero(InputRegisterAt(instruction, condition_input_index),
2961 true_target,
2962 far_target);
2963 }
2964 } else {
2965 // Condition has not been materialized. Use its inputs as the comparison and
2966 // its condition as the branch condition.
2967 HCondition* condition = cond->AsCondition();
2968
2969 // If this is a long or FP comparison that has been folded into
2970 // the HCondition, generate the comparison directly.
2971 DataType::Type type = condition->InputAt(0)->GetType();
2972 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2973 GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
2974 return;
2975 }
2976
2977 vixl32::Label* non_fallthrough_target;
2978 vixl32::Condition arm_cond = vixl32::Condition::None();
2979 const vixl32::Register left = InputRegisterAt(cond, 0);
2980 const Operand right = InputOperandAt(cond, 1);
2981
2982 if (true_target == nullptr) {
2983 arm_cond = ARMCondition(condition->GetOppositeCondition());
2984 non_fallthrough_target = false_target;
2985 } else {
2986 arm_cond = ARMCondition(condition->GetCondition());
2987 non_fallthrough_target = true_target;
2988 }
2989
2990 if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
2991 if (arm_cond.Is(eq)) {
2992 __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
2993 } else {
2994 DCHECK(arm_cond.Is(ne));
2995 __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
2996 }
2997 } else {
2998 __ Cmp(left, right);
2999 __ B(arm_cond, non_fallthrough_target, far_target);
3000 }
3001 }
3002
3003 // If neither branch falls through (case 3), the conditional branch to `true_target`
3004 // was already emitted (case 2) and we need to emit a jump to `false_target`.
3005 if (true_target != nullptr && false_target != nullptr) {
3006 __ B(false_target);
3007 }
3008 }
3009
VisitIf(HIf * if_instr)3010 void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) {
3011 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3012 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3013 locations->SetInAt(0, Location::RequiresRegister());
3014 if (GetGraph()->IsCompilingBaseline() &&
3015 codegen_->GetCompilerOptions().ProfileBranches() &&
3016 !Runtime::Current()->IsAotCompiler()) {
3017 locations->AddTemp(Location::RequiresRegister());
3018 }
3019 }
3020 }
3021
VisitIf(HIf * if_instr)3022 void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
3023 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3024 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3025 vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
3026 nullptr : codegen_->GetLabelOf(true_successor);
3027 vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
3028 nullptr : codegen_->GetLabelOf(false_successor);
3029 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3030 if (GetGraph()->IsCompilingBaseline() &&
3031 codegen_->GetCompilerOptions().ProfileBranches() &&
3032 !Runtime::Current()->IsAotCompiler()) {
3033 DCHECK(if_instr->InputAt(0)->IsCondition());
3034 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3035 DCHECK(info != nullptr);
3036 BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
3037 // Currently, not all If branches are profiled.
3038 if (cache != nullptr) {
3039 uint32_t address =
3040 reinterpret_cast32<uint32_t>(cache) + BranchCache::FalseOffset().Int32Value();
3041 static_assert(
3042 BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
3043 "Unexpected offsets for BranchCache");
3044 vixl32::Label done;
3045 UseScratchRegisterScope temps(GetVIXLAssembler());
3046 vixl32::Register temp = temps.Acquire();
3047 vixl32::Register counter = RegisterFrom(if_instr->GetLocations()->GetTemp(0));
3048 vixl32::Register condition = InputRegisterAt(if_instr, 0);
3049 __ Mov(temp, address);
3050 __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
3051 __ Adds(counter, counter, 1);
3052 __ Uxth(counter, counter);
3053 __ CompareAndBranchIfZero(counter, &done);
3054 __ Strh(counter, MemOperand(temp, condition, LSL, 1));
3055 __ Bind(&done);
3056 }
3057 }
3058 }
3059 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3060 }
3061
VisitDeoptimize(HDeoptimize * deoptimize)3062 void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
3063 LocationSummary* locations = new (GetGraph()->GetAllocator())
3064 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3065 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3066 RegisterSet caller_saves = RegisterSet::Empty();
3067 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
3068 locations->SetCustomSlowPathCallerSaves(caller_saves);
3069 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3070 locations->SetInAt(0, Location::RequiresRegister());
3071 }
3072 }
3073
VisitDeoptimize(HDeoptimize * deoptimize)3074 void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
3075 SlowPathCodeARMVIXL* slow_path =
3076 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize);
3077 GenerateTestAndBranch(deoptimize,
3078 /* condition_input_index= */ 0,
3079 slow_path->GetEntryLabel(),
3080 /* false_target= */ nullptr);
3081 }
3082
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3083 void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3084 LocationSummary* locations = new (GetGraph()->GetAllocator())
3085 LocationSummary(flag, LocationSummary::kNoCall);
3086 locations->SetOut(Location::RequiresRegister());
3087 }
3088
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3089 void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3090 GetAssembler()->LoadFromOffset(kLoadWord,
3091 OutputRegister(flag),
3092 sp,
3093 codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
3094 }
3095
VisitSelect(HSelect * select)3096 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
3097 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3098 const bool is_floating_point = DataType::IsFloatingPointType(select->GetType());
3099
3100 if (is_floating_point) {
3101 locations->SetInAt(0, Location::RequiresFpuRegister());
3102 locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
3103 } else {
3104 locations->SetInAt(0, Location::RequiresRegister());
3105 locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
3106 }
3107
3108 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3109 locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
3110 // The code generator handles overlap with the values, but not with the condition.
3111 locations->SetOut(Location::SameAsFirstInput());
3112 } else if (is_floating_point) {
3113 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3114 } else {
3115 if (!locations->InAt(1).IsConstant()) {
3116 locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
3117 }
3118
3119 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3120 }
3121 }
3122
VisitSelect(HSelect * select)3123 void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
3124 HInstruction* const condition = select->GetCondition();
3125 const LocationSummary* const locations = select->GetLocations();
3126 const DataType::Type type = select->GetType();
3127 const Location first = locations->InAt(0);
3128 const Location out = locations->Out();
3129 const Location second = locations->InAt(1);
3130
3131 // In the unlucky case the output of this instruction overlaps
3132 // with an input of an "emitted-at-use-site" condition, and
3133 // the output of this instruction is not one of its inputs, we'll
3134 // need to fallback to branches instead of conditional ARM instructions.
3135 bool output_overlaps_with_condition_inputs =
3136 !IsBooleanValueOrMaterializedCondition(condition) &&
3137 !out.Equals(first) &&
3138 !out.Equals(second) &&
3139 (condition->GetLocations()->InAt(0).Equals(out) ||
3140 condition->GetLocations()->InAt(1).Equals(out));
3141 DCHECK_IMPLIES(output_overlaps_with_condition_inputs, condition->IsCondition());
3142 Location src;
3143
3144 if (condition->IsIntConstant()) {
3145 if (condition->AsIntConstant()->IsFalse()) {
3146 src = first;
3147 } else {
3148 src = second;
3149 }
3150
3151 codegen_->MoveLocation(out, src, type);
3152 return;
3153 }
3154
3155 if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) {
3156 bool invert = false;
3157
3158 if (out.Equals(second)) {
3159 src = first;
3160 invert = true;
3161 } else if (out.Equals(first)) {
3162 src = second;
3163 } else if (second.IsConstant()) {
3164 DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
3165 src = second;
3166 } else if (first.IsConstant()) {
3167 DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
3168 src = first;
3169 invert = true;
3170 } else {
3171 src = second;
3172 }
3173
3174 if (CanGenerateConditionalMove(out, src)) {
3175 if (!out.Equals(first) && !out.Equals(second)) {
3176 codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
3177 }
3178
3179 std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
3180
3181 if (IsBooleanValueOrMaterializedCondition(condition)) {
3182 __ Cmp(InputRegisterAt(select, 2), 0);
3183 cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
3184 } else {
3185 cond = GenerateTest(condition->AsCondition(), invert, codegen_);
3186 }
3187
3188 const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
3189 // We use the scope because of the IT block that follows.
3190 ExactAssemblyScope guard(GetVIXLAssembler(),
3191 instr_count * vixl32::k16BitT32InstructionSizeInBytes,
3192 CodeBufferCheckScope::kExactSize);
3193
3194 if (out.IsRegister()) {
3195 __ it(cond.first);
3196 __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
3197 } else {
3198 DCHECK(out.IsRegisterPair());
3199
3200 Operand operand_high(0);
3201 Operand operand_low(0);
3202
3203 if (src.IsConstant()) {
3204 const int64_t value = Int64ConstantFrom(src);
3205
3206 operand_high = High32Bits(value);
3207 operand_low = Low32Bits(value);
3208 } else {
3209 DCHECK(src.IsRegisterPair());
3210 operand_high = HighRegisterFrom(src);
3211 operand_low = LowRegisterFrom(src);
3212 }
3213
3214 __ it(cond.first);
3215 __ mov(cond.first, LowRegisterFrom(out), operand_low);
3216 __ it(cond.first);
3217 __ mov(cond.first, HighRegisterFrom(out), operand_high);
3218 }
3219
3220 return;
3221 }
3222 }
3223
3224 vixl32::Label* false_target = nullptr;
3225 vixl32::Label* true_target = nullptr;
3226 vixl32::Label select_end;
3227 vixl32::Label other_case;
3228 vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
3229
3230 if (out.Equals(second)) {
3231 true_target = target;
3232 src = first;
3233 } else {
3234 false_target = target;
3235 src = second;
3236
3237 if (!out.Equals(first)) {
3238 if (output_overlaps_with_condition_inputs) {
3239 false_target = &other_case;
3240 } else {
3241 codegen_->MoveLocation(out, first, type);
3242 }
3243 }
3244 }
3245
3246 GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target= */ false);
3247 codegen_->MoveLocation(out, src, type);
3248 if (output_overlaps_with_condition_inputs) {
3249 __ B(target);
3250 __ Bind(&other_case);
3251 codegen_->MoveLocation(out, first, type);
3252 }
3253
3254 if (select_end.IsReferenced()) {
3255 __ Bind(&select_end);
3256 }
3257 }
3258
VisitNop(HNop * nop)3259 void LocationsBuilderARMVIXL::VisitNop(HNop* nop) {
3260 new (GetGraph()->GetAllocator()) LocationSummary(nop);
3261 }
3262
VisitNop(HNop *)3263 void InstructionCodeGeneratorARMVIXL::VisitNop(HNop*) {
3264 // The environment recording already happened in CodeGenerator::Compile.
3265 }
3266
IncreaseFrame(size_t adjustment)3267 void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) {
3268 __ Claim(adjustment);
3269 GetAssembler()->cfi().AdjustCFAOffset(adjustment);
3270 }
3271
DecreaseFrame(size_t adjustment)3272 void CodeGeneratorARMVIXL::DecreaseFrame(size_t adjustment) {
3273 __ Drop(adjustment);
3274 GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
3275 }
3276
GenerateNop()3277 void CodeGeneratorARMVIXL::GenerateNop() {
3278 __ Nop();
3279 }
3280
3281 // `temp` is an extra temporary register that is used for some conditions;
3282 // callers may not specify it, in which case the method will use a scratch
3283 // register instead.
GenerateConditionWithZero(IfCondition condition,vixl32::Register out,vixl32::Register in,vixl32::Register temp)3284 void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
3285 vixl32::Register out,
3286 vixl32::Register in,
3287 vixl32::Register temp) {
3288 switch (condition) {
3289 case kCondEQ:
3290 // x <= 0 iff x == 0 when the comparison is unsigned.
3291 case kCondBE:
3292 if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
3293 temp = out;
3294 }
3295
3296 // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
3297 // different as well.
3298 if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
3299 // temp = - in; only 0 sets the carry flag.
3300 __ Rsbs(temp, in, 0);
3301
3302 if (out.Is(in)) {
3303 std::swap(in, temp);
3304 }
3305
3306 // out = - in + in + carry = carry
3307 __ Adc(out, temp, in);
3308 } else {
3309 // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
3310 __ Clz(out, in);
3311 // Any number less than 32 logically shifted right by 5 bits results in 0;
3312 // the same operation on 32 yields 1.
3313 __ Lsr(out, out, 5);
3314 }
3315
3316 break;
3317 case kCondNE:
3318 // x > 0 iff x != 0 when the comparison is unsigned.
3319 case kCondA: {
3320 UseScratchRegisterScope temps(GetVIXLAssembler());
3321
3322 if (out.Is(in)) {
3323 if (!temp.IsValid() || in.Is(temp)) {
3324 temp = temps.Acquire();
3325 }
3326 } else if (!temp.IsValid() || !temp.IsLow()) {
3327 temp = out;
3328 }
3329
3330 // temp = in - 1; only 0 does not set the carry flag.
3331 __ Subs(temp, in, 1);
3332 // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
3333 __ Sbc(out, in, temp);
3334 break;
3335 }
3336 case kCondGE:
3337 __ Mvn(out, in);
3338 in = out;
3339 FALLTHROUGH_INTENDED;
3340 case kCondLT:
3341 // We only care about the sign bit.
3342 __ Lsr(out, in, 31);
3343 break;
3344 case kCondAE:
3345 // Trivially true.
3346 __ Mov(out, 1);
3347 break;
3348 case kCondB:
3349 // Trivially false.
3350 __ Mov(out, 0);
3351 break;
3352 default:
3353 LOG(FATAL) << "Unexpected condition " << condition;
3354 UNREACHABLE();
3355 }
3356 }
3357
HandleCondition(HCondition * cond)3358 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
3359 LocationSummary* locations =
3360 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
3361 const DataType::Type type = cond->InputAt(0)->GetType();
3362 if (DataType::IsFloatingPointType(type)) {
3363 locations->SetInAt(0, Location::RequiresFpuRegister());
3364 locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
3365 } else {
3366 locations->SetInAt(0, Location::RequiresRegister());
3367 locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
3368 }
3369 if (!cond->IsEmittedAtUseSite()) {
3370 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3371 }
3372 }
3373
HandleCondition(HCondition * cond)3374 void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
3375 if (cond->IsEmittedAtUseSite()) {
3376 return;
3377 }
3378
3379 const DataType::Type type = cond->GetLeft()->GetType();
3380
3381 if (DataType::IsFloatingPointType(type)) {
3382 GenerateConditionGeneric(cond, codegen_);
3383 return;
3384 }
3385
3386 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
3387
3388 const IfCondition condition = cond->GetCondition();
3389
3390 // A condition with only one boolean input, or two boolean inputs without being equality or
3391 // inequality results from transformations done by the instruction simplifier, and is handled
3392 // as a regular condition with integral inputs.
3393 if (type == DataType::Type::kBool &&
3394 cond->GetRight()->GetType() == DataType::Type::kBool &&
3395 (condition == kCondEQ || condition == kCondNE)) {
3396 vixl32::Register left = InputRegisterAt(cond, 0);
3397 const vixl32::Register out = OutputRegister(cond);
3398 const Location right_loc = cond->GetLocations()->InAt(1);
3399
3400 // The constant case is handled by the instruction simplifier.
3401 DCHECK(!right_loc.IsConstant());
3402
3403 vixl32::Register right = RegisterFrom(right_loc);
3404
3405 // Avoid 32-bit instructions if possible.
3406 if (out.Is(right)) {
3407 std::swap(left, right);
3408 }
3409
3410 __ Eor(out, left, right);
3411
3412 if (condition == kCondEQ) {
3413 __ Eor(out, out, 1);
3414 }
3415
3416 return;
3417 }
3418
3419 GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
3420 }
3421
VisitEqual(HEqual * comp)3422 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
3423 HandleCondition(comp);
3424 }
3425
VisitEqual(HEqual * comp)3426 void InstructionCodeGeneratorARMVIXL::VisitEqual(HEqual* comp) {
3427 HandleCondition(comp);
3428 }
3429
VisitNotEqual(HNotEqual * comp)3430 void LocationsBuilderARMVIXL::VisitNotEqual(HNotEqual* comp) {
3431 HandleCondition(comp);
3432 }
3433
VisitNotEqual(HNotEqual * comp)3434 void InstructionCodeGeneratorARMVIXL::VisitNotEqual(HNotEqual* comp) {
3435 HandleCondition(comp);
3436 }
3437
VisitLessThan(HLessThan * comp)3438 void LocationsBuilderARMVIXL::VisitLessThan(HLessThan* comp) {
3439 HandleCondition(comp);
3440 }
3441
VisitLessThan(HLessThan * comp)3442 void InstructionCodeGeneratorARMVIXL::VisitLessThan(HLessThan* comp) {
3443 HandleCondition(comp);
3444 }
3445
VisitLessThanOrEqual(HLessThanOrEqual * comp)3446 void LocationsBuilderARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3447 HandleCondition(comp);
3448 }
3449
VisitLessThanOrEqual(HLessThanOrEqual * comp)3450 void InstructionCodeGeneratorARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3451 HandleCondition(comp);
3452 }
3453
VisitGreaterThan(HGreaterThan * comp)3454 void LocationsBuilderARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3455 HandleCondition(comp);
3456 }
3457
VisitGreaterThan(HGreaterThan * comp)3458 void InstructionCodeGeneratorARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3459 HandleCondition(comp);
3460 }
3461
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3462 void LocationsBuilderARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3463 HandleCondition(comp);
3464 }
3465
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3466 void InstructionCodeGeneratorARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3467 HandleCondition(comp);
3468 }
3469
VisitBelow(HBelow * comp)3470 void LocationsBuilderARMVIXL::VisitBelow(HBelow* comp) {
3471 HandleCondition(comp);
3472 }
3473
VisitBelow(HBelow * comp)3474 void InstructionCodeGeneratorARMVIXL::VisitBelow(HBelow* comp) {
3475 HandleCondition(comp);
3476 }
3477
VisitBelowOrEqual(HBelowOrEqual * comp)3478 void LocationsBuilderARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3479 HandleCondition(comp);
3480 }
3481
VisitBelowOrEqual(HBelowOrEqual * comp)3482 void InstructionCodeGeneratorARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3483 HandleCondition(comp);
3484 }
3485
VisitAbove(HAbove * comp)3486 void LocationsBuilderARMVIXL::VisitAbove(HAbove* comp) {
3487 HandleCondition(comp);
3488 }
3489
VisitAbove(HAbove * comp)3490 void InstructionCodeGeneratorARMVIXL::VisitAbove(HAbove* comp) {
3491 HandleCondition(comp);
3492 }
3493
VisitAboveOrEqual(HAboveOrEqual * comp)3494 void LocationsBuilderARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3495 HandleCondition(comp);
3496 }
3497
VisitAboveOrEqual(HAboveOrEqual * comp)3498 void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3499 HandleCondition(comp);
3500 }
3501
VisitIntConstant(HIntConstant * constant)3502 void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
3503 LocationSummary* locations =
3504 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3505 locations->SetOut(Location::ConstantLocation(constant));
3506 }
3507
VisitIntConstant(HIntConstant * constant)3508 void InstructionCodeGeneratorARMVIXL::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
3509 // Will be generated at use site.
3510 }
3511
VisitNullConstant(HNullConstant * constant)3512 void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
3513 LocationSummary* locations =
3514 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3515 locations->SetOut(Location::ConstantLocation(constant));
3516 }
3517
VisitNullConstant(HNullConstant * constant)3518 void InstructionCodeGeneratorARMVIXL::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
3519 // Will be generated at use site.
3520 }
3521
VisitLongConstant(HLongConstant * constant)3522 void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
3523 LocationSummary* locations =
3524 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3525 locations->SetOut(Location::ConstantLocation(constant));
3526 }
3527
VisitLongConstant(HLongConstant * constant)3528 void InstructionCodeGeneratorARMVIXL::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
3529 // Will be generated at use site.
3530 }
3531
VisitFloatConstant(HFloatConstant * constant)3532 void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
3533 LocationSummary* locations =
3534 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3535 locations->SetOut(Location::ConstantLocation(constant));
3536 }
3537
VisitFloatConstant(HFloatConstant * constant)3538 void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
3539 [[maybe_unused]] HFloatConstant* constant) {
3540 // Will be generated at use site.
3541 }
3542
VisitDoubleConstant(HDoubleConstant * constant)3543 void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
3544 LocationSummary* locations =
3545 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3546 locations->SetOut(Location::ConstantLocation(constant));
3547 }
3548
VisitDoubleConstant(HDoubleConstant * constant)3549 void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
3550 [[maybe_unused]] HDoubleConstant* constant) {
3551 // Will be generated at use site.
3552 }
3553
VisitConstructorFence(HConstructorFence * constructor_fence)3554 void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
3555 constructor_fence->SetLocations(nullptr);
3556 }
3557
VisitConstructorFence(HConstructorFence * constructor_fence)3558 void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
3559 [[maybe_unused]] HConstructorFence* constructor_fence) {
3560 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3561 }
3562
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3563 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3564 memory_barrier->SetLocations(nullptr);
3565 }
3566
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3567 void InstructionCodeGeneratorARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3568 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
3569 }
3570
VisitReturnVoid(HReturnVoid * ret)3571 void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
3572 ret->SetLocations(nullptr);
3573 }
3574
VisitReturnVoid(HReturnVoid * ret)3575 void InstructionCodeGeneratorARMVIXL::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
3576 codegen_->GenerateFrameExit();
3577 }
3578
VisitReturn(HReturn * ret)3579 void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) {
3580 LocationSummary* locations =
3581 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
3582 locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType()));
3583 }
3584
VisitReturn(HReturn * ret)3585 void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret) {
3586 if (GetGraph()->IsCompilingOsr()) {
3587 // To simplify callers of an OSR method, we put the return value in both
3588 // floating point and core registers.
3589 switch (ret->InputAt(0)->GetType()) {
3590 case DataType::Type::kFloat32:
3591 __ Vmov(r0, s0);
3592 break;
3593 case DataType::Type::kFloat64:
3594 __ Vmov(r0, r1, d0);
3595 break;
3596 default:
3597 break;
3598 }
3599 }
3600 codegen_->GenerateFrameExit();
3601 }
3602
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3603 void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3604 // The trampoline uses the same calling convention as dex calling conventions,
3605 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3606 // the method_idx.
3607 HandleInvoke(invoke);
3608 }
3609
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3610 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3611 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3612 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 3);
3613 }
3614
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3615 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3616 // Explicit clinit checks triggered by static invokes must have been pruned by
3617 // art::PrepareForRegisterAllocation.
3618 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3619
3620 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3621 if (intrinsic.TryDispatch(invoke)) {
3622 return;
3623 }
3624
3625 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3626 CriticalNativeCallingConventionVisitorARMVIXL calling_convention_visitor(
3627 /*for_register_allocation=*/ true);
3628 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3629 } else {
3630 HandleInvoke(invoke);
3631 }
3632 }
3633
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3634 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3635 if (invoke->GetLocations()->Intrinsified()) {
3636 IntrinsicCodeGeneratorARMVIXL intrinsic(codegen);
3637 intrinsic.Dispatch(invoke);
3638 return true;
3639 }
3640 return false;
3641 }
3642
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3643 void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3644 // Explicit clinit checks triggered by static invokes must have been pruned by
3645 // art::PrepareForRegisterAllocation.
3646 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3647
3648 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3649 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 4);
3650 return;
3651 }
3652
3653 LocationSummary* locations = invoke->GetLocations();
3654 codegen_->GenerateStaticOrDirectCall(
3655 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3656
3657 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 5);
3658 }
3659
HandleInvoke(HInvoke * invoke)3660 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
3661 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
3662 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3663 }
3664
VisitInvokeVirtual(HInvokeVirtual * invoke)3665 void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3666 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3667 if (intrinsic.TryDispatch(invoke)) {
3668 return;
3669 }
3670
3671 HandleInvoke(invoke);
3672 }
3673
VisitInvokeVirtual(HInvokeVirtual * invoke)3674 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3675 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3676 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 6);
3677 return;
3678 }
3679
3680 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3681 DCHECK(!codegen_->IsLeafMethod());
3682
3683 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 7);
3684 }
3685
VisitInvokeInterface(HInvokeInterface * invoke)3686 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3687 HandleInvoke(invoke);
3688 // Add the hidden argument.
3689 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3690 // We cannot request r12 as it's blocked by the register allocator.
3691 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
3692 }
3693 }
3694
MaybeGenerateInlineCacheCheck(HInstruction * instruction,vixl32::Register klass)3695 void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3696 vixl32::Register klass) {
3697 DCHECK_EQ(r0.GetCode(), klass.GetCode());
3698 if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
3699 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3700 InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
3701 info, GetCompilerOptions(), instruction->AsInvoke());
3702 if (cache != nullptr) {
3703 uint32_t address = reinterpret_cast32<uint32_t>(cache);
3704 vixl32::Label done;
3705 UseScratchRegisterScope temps(GetVIXLAssembler());
3706 temps.Exclude(ip);
3707 __ Mov(r4, address);
3708 __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
3709 // Fast path for a monomorphic cache.
3710 __ Cmp(klass, ip);
3711 __ B(eq, &done, /* is_far_target= */ false);
3712 InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
3713 __ Bind(&done);
3714 } else {
3715 // This is unexpected, but we don't guarantee stable compilation across
3716 // JIT runs so just warn about it.
3717 ScopedObjectAccess soa(Thread::Current());
3718 LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
3719 }
3720 }
3721 }
3722
VisitInvokeInterface(HInvokeInterface * invoke)3723 void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3724 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3725 LocationSummary* locations = invoke->GetLocations();
3726 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3727 Location receiver = locations->InAt(0);
3728 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3729
3730 DCHECK(!receiver.IsStackSlot());
3731
3732 // Ensure the pc position is recorded immediately after the `ldr` instruction.
3733 {
3734 ExactAssemblyScope aas(GetVIXLAssembler(),
3735 vixl32::kMaxInstructionSizeInBytes,
3736 CodeBufferCheckScope::kMaximumSize);
3737 // /* HeapReference<Class> */ temp = receiver->klass_
3738 __ ldr(temp, MemOperand(RegisterFrom(receiver), class_offset));
3739 codegen_->MaybeRecordImplicitNullCheck(invoke);
3740 }
3741 // Instead of simply (possibly) unpoisoning `temp` here, we should
3742 // emit a read barrier for the previous class reference load.
3743 // However this is not required in practice, as this is an
3744 // intermediate/temporary reference and because the current
3745 // concurrent copying collector keeps the from-space memory
3746 // intact/accessible until the end of the marking phase (the
3747 // concurrent copying collector may not in the future).
3748 GetAssembler()->MaybeUnpoisonHeapReference(temp);
3749
3750 // If we're compiling baseline, update the inline cache.
3751 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3752
3753 GetAssembler()->LoadFromOffset(kLoadWord,
3754 temp,
3755 temp,
3756 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
3757
3758 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3759 invoke->GetImtIndex(), kArmPointerSize));
3760 // temp = temp->GetImtEntryAt(method_offset);
3761 GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
3762 uint32_t entry_point =
3763 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
3764 // LR = temp->GetEntryPoint();
3765 GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
3766
3767 {
3768 // Set the hidden (in r12) argument. It is done here, right before a BLX to prevent other
3769 // instruction from clobbering it as they might use r12 as a scratch register.
3770 Location hidden_reg = Location::RegisterLocation(r12.GetCode());
3771 // The VIXL macro assembler may clobber any of the scratch registers that are available to it,
3772 // so it checks if the application is using them (by passing them to the macro assembler
3773 // methods). The following application of UseScratchRegisterScope corrects VIXL's notion of
3774 // what is available, and is the opposite of the standard usage: Instead of requesting a
3775 // temporary location, it imposes an external constraint (i.e. a specific register is reserved
3776 // for the hidden argument). Note that this works even if VIXL needs a scratch register itself
3777 // (to materialize the constant), since the destination register becomes available for such use
3778 // internally for the duration of the macro instruction.
3779 UseScratchRegisterScope temps(GetVIXLAssembler());
3780 temps.Exclude(RegisterFrom(hidden_reg));
3781 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3782 Location current_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
3783 if (current_method.IsStackSlot()) {
3784 GetAssembler()->LoadFromOffset(
3785 kLoadWord, RegisterFrom(hidden_reg), sp, current_method.GetStackIndex());
3786 } else {
3787 __ Mov(RegisterFrom(hidden_reg), RegisterFrom(current_method));
3788 }
3789 } else if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3790 // We pass the method from the IMT in case of a conflict. This will ensure
3791 // we go into the runtime to resolve the actual method.
3792 CHECK_NE(temp.GetCode(), lr.GetCode());
3793 __ Mov(RegisterFrom(hidden_reg), temp);
3794 } else {
3795 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3796 }
3797 }
3798 {
3799 // Ensure the pc position is recorded immediately after the `blx` instruction.
3800 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
3801 ExactAssemblyScope aas(GetVIXLAssembler(),
3802 vixl32::k16BitT32InstructionSizeInBytes,
3803 CodeBufferCheckScope::kExactSize);
3804 // LR();
3805 __ blx(lr);
3806 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
3807 DCHECK(!codegen_->IsLeafMethod());
3808 }
3809
3810 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 8);
3811 }
3812
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3813 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3814 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3815 if (intrinsic.TryDispatch(invoke)) {
3816 return;
3817 }
3818 HandleInvoke(invoke);
3819 }
3820
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3821 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3822 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3823 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 9);
3824 return;
3825 }
3826 codegen_->GenerateInvokePolymorphicCall(invoke);
3827 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 10);
3828 }
3829
VisitInvokeCustom(HInvokeCustom * invoke)3830 void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3831 HandleInvoke(invoke);
3832 }
3833
VisitInvokeCustom(HInvokeCustom * invoke)3834 void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3835 codegen_->GenerateInvokeCustomCall(invoke);
3836 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 11);
3837 }
3838
VisitNeg(HNeg * neg)3839 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
3840 LocationSummary* locations =
3841 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3842 switch (neg->GetResultType()) {
3843 case DataType::Type::kInt32: {
3844 locations->SetInAt(0, Location::RequiresRegister());
3845 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3846 break;
3847 }
3848 case DataType::Type::kInt64: {
3849 locations->SetInAt(0, Location::RequiresRegister());
3850 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3851 break;
3852 }
3853
3854 case DataType::Type::kFloat32:
3855 case DataType::Type::kFloat64:
3856 locations->SetInAt(0, Location::RequiresFpuRegister());
3857 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3858 break;
3859
3860 default:
3861 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3862 }
3863 }
3864
VisitNeg(HNeg * neg)3865 void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) {
3866 LocationSummary* locations = neg->GetLocations();
3867 Location out = locations->Out();
3868 Location in = locations->InAt(0);
3869 switch (neg->GetResultType()) {
3870 case DataType::Type::kInt32:
3871 __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0);
3872 break;
3873
3874 case DataType::Type::kInt64:
3875 // out.lo = 0 - in.lo (and update the carry/borrow (C) flag)
3876 __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0);
3877 // We cannot emit an RSC (Reverse Subtract with Carry)
3878 // instruction here, as it does not exist in the Thumb-2
3879 // instruction set. We use the following approach
3880 // using SBC and SUB instead.
3881 //
3882 // out.hi = -C
3883 __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out));
3884 // out.hi = out.hi - in.hi
3885 __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in));
3886 break;
3887
3888 case DataType::Type::kFloat32:
3889 case DataType::Type::kFloat64:
3890 __ Vneg(OutputVRegister(neg), InputVRegister(neg));
3891 break;
3892
3893 default:
3894 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3895 }
3896 }
3897
VisitTypeConversion(HTypeConversion * conversion)3898 void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
3899 DataType::Type result_type = conversion->GetResultType();
3900 DataType::Type input_type = conversion->GetInputType();
3901 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3902 << input_type << " -> " << result_type;
3903
3904 // The float-to-long, double-to-long and long-to-float type conversions
3905 // rely on a call to the runtime.
3906 LocationSummary::CallKind call_kind =
3907 (((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3908 && result_type == DataType::Type::kInt64)
3909 || (input_type == DataType::Type::kInt64 && result_type == DataType::Type::kFloat32))
3910 ? LocationSummary::kCallOnMainOnly
3911 : LocationSummary::kNoCall;
3912 LocationSummary* locations =
3913 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3914
3915 switch (result_type) {
3916 case DataType::Type::kUint8:
3917 case DataType::Type::kInt8:
3918 case DataType::Type::kUint16:
3919 case DataType::Type::kInt16:
3920 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3921 locations->SetInAt(0, Location::RequiresRegister());
3922 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3923 break;
3924
3925 case DataType::Type::kInt32:
3926 switch (input_type) {
3927 case DataType::Type::kInt64:
3928 locations->SetInAt(0, Location::Any());
3929 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3930 break;
3931
3932 case DataType::Type::kFloat32:
3933 locations->SetInAt(0, Location::RequiresFpuRegister());
3934 locations->SetOut(Location::RequiresRegister());
3935 locations->AddTemp(Location::RequiresFpuRegister());
3936 break;
3937
3938 case DataType::Type::kFloat64:
3939 locations->SetInAt(0, Location::RequiresFpuRegister());
3940 locations->SetOut(Location::RequiresRegister());
3941 locations->AddTemp(Location::RequiresFpuRegister());
3942 break;
3943
3944 default:
3945 LOG(FATAL) << "Unexpected type conversion from " << input_type
3946 << " to " << result_type;
3947 }
3948 break;
3949
3950 case DataType::Type::kInt64:
3951 switch (input_type) {
3952 case DataType::Type::kBool:
3953 case DataType::Type::kUint8:
3954 case DataType::Type::kInt8:
3955 case DataType::Type::kUint16:
3956 case DataType::Type::kInt16:
3957 case DataType::Type::kInt32:
3958 locations->SetInAt(0, Location::RequiresRegister());
3959 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3960 break;
3961
3962 case DataType::Type::kFloat32: {
3963 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3964 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3965 locations->SetOut(LocationFrom(r0, r1));
3966 break;
3967 }
3968
3969 case DataType::Type::kFloat64: {
3970 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3971 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0),
3972 calling_convention.GetFpuRegisterAt(1)));
3973 locations->SetOut(LocationFrom(r0, r1));
3974 break;
3975 }
3976
3977 default:
3978 LOG(FATAL) << "Unexpected type conversion from " << input_type
3979 << " to " << result_type;
3980 }
3981 break;
3982
3983 case DataType::Type::kFloat32:
3984 switch (input_type) {
3985 case DataType::Type::kBool:
3986 case DataType::Type::kUint8:
3987 case DataType::Type::kInt8:
3988 case DataType::Type::kUint16:
3989 case DataType::Type::kInt16:
3990 case DataType::Type::kInt32:
3991 locations->SetInAt(0, Location::RequiresRegister());
3992 locations->SetOut(Location::RequiresFpuRegister());
3993 break;
3994
3995 case DataType::Type::kInt64: {
3996 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3997 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0),
3998 calling_convention.GetRegisterAt(1)));
3999 locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0)));
4000 break;
4001 }
4002
4003 case DataType::Type::kFloat64:
4004 locations->SetInAt(0, Location::RequiresFpuRegister());
4005 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4006 break;
4007
4008 default:
4009 LOG(FATAL) << "Unexpected type conversion from " << input_type
4010 << " to " << result_type;
4011 }
4012 break;
4013
4014 case DataType::Type::kFloat64:
4015 switch (input_type) {
4016 case DataType::Type::kBool:
4017 case DataType::Type::kUint8:
4018 case DataType::Type::kInt8:
4019 case DataType::Type::kUint16:
4020 case DataType::Type::kInt16:
4021 case DataType::Type::kInt32:
4022 locations->SetInAt(0, Location::RequiresRegister());
4023 locations->SetOut(Location::RequiresFpuRegister());
4024 break;
4025
4026 case DataType::Type::kInt64:
4027 locations->SetInAt(0, Location::RequiresRegister());
4028 locations->SetOut(Location::RequiresFpuRegister());
4029 locations->AddTemp(Location::RequiresFpuRegister());
4030 locations->AddTemp(Location::RequiresFpuRegister());
4031 break;
4032
4033 case DataType::Type::kFloat32:
4034 locations->SetInAt(0, Location::RequiresFpuRegister());
4035 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4036 break;
4037
4038 default:
4039 LOG(FATAL) << "Unexpected type conversion from " << input_type
4040 << " to " << result_type;
4041 }
4042 break;
4043
4044 default:
4045 LOG(FATAL) << "Unexpected type conversion from " << input_type
4046 << " to " << result_type;
4047 }
4048 }
4049
VisitTypeConversion(HTypeConversion * conversion)4050 void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
4051 LocationSummary* locations = conversion->GetLocations();
4052 Location out = locations->Out();
4053 Location in = locations->InAt(0);
4054 DataType::Type result_type = conversion->GetResultType();
4055 DataType::Type input_type = conversion->GetInputType();
4056 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
4057 << input_type << " -> " << result_type;
4058 switch (result_type) {
4059 case DataType::Type::kUint8:
4060 switch (input_type) {
4061 case DataType::Type::kInt8:
4062 case DataType::Type::kUint16:
4063 case DataType::Type::kInt16:
4064 case DataType::Type::kInt32:
4065 __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
4066 break;
4067 case DataType::Type::kInt64:
4068 __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
4069 break;
4070
4071 default:
4072 LOG(FATAL) << "Unexpected type conversion from " << input_type
4073 << " to " << result_type;
4074 }
4075 break;
4076
4077 case DataType::Type::kInt8:
4078 switch (input_type) {
4079 case DataType::Type::kUint8:
4080 case DataType::Type::kUint16:
4081 case DataType::Type::kInt16:
4082 case DataType::Type::kInt32:
4083 __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
4084 break;
4085 case DataType::Type::kInt64:
4086 __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
4087 break;
4088
4089 default:
4090 LOG(FATAL) << "Unexpected type conversion from " << input_type
4091 << " to " << result_type;
4092 }
4093 break;
4094
4095 case DataType::Type::kUint16:
4096 switch (input_type) {
4097 case DataType::Type::kInt8:
4098 case DataType::Type::kInt16:
4099 case DataType::Type::kInt32:
4100 __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4101 break;
4102 case DataType::Type::kInt64:
4103 __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4104 break;
4105
4106 default:
4107 LOG(FATAL) << "Unexpected type conversion from " << input_type
4108 << " to " << result_type;
4109 }
4110 break;
4111
4112 case DataType::Type::kInt16:
4113 switch (input_type) {
4114 case DataType::Type::kUint16:
4115 case DataType::Type::kInt32:
4116 __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4117 break;
4118 case DataType::Type::kInt64:
4119 __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4120 break;
4121
4122 default:
4123 LOG(FATAL) << "Unexpected type conversion from " << input_type
4124 << " to " << result_type;
4125 }
4126 break;
4127
4128 case DataType::Type::kInt32:
4129 switch (input_type) {
4130 case DataType::Type::kInt64:
4131 DCHECK(out.IsRegister());
4132 if (in.IsRegisterPair()) {
4133 __ Mov(OutputRegister(conversion), LowRegisterFrom(in));
4134 } else if (in.IsDoubleStackSlot()) {
4135 GetAssembler()->LoadFromOffset(kLoadWord,
4136 OutputRegister(conversion),
4137 sp,
4138 in.GetStackIndex());
4139 } else {
4140 DCHECK(in.IsConstant());
4141 DCHECK(in.GetConstant()->IsLongConstant());
4142 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
4143 __ Mov(OutputRegister(conversion), static_cast<int32_t>(value));
4144 }
4145 break;
4146
4147 case DataType::Type::kFloat32: {
4148 vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
4149 __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0));
4150 __ Vmov(OutputRegister(conversion), temp);
4151 break;
4152 }
4153
4154 case DataType::Type::kFloat64: {
4155 vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4156 __ Vcvt(S32, F64, temp_s, DRegisterFrom(in));
4157 __ Vmov(OutputRegister(conversion), temp_s);
4158 break;
4159 }
4160
4161 default:
4162 LOG(FATAL) << "Unexpected type conversion from " << input_type
4163 << " to " << result_type;
4164 }
4165 break;
4166
4167 case DataType::Type::kInt64:
4168 switch (input_type) {
4169 case DataType::Type::kBool:
4170 case DataType::Type::kUint8:
4171 case DataType::Type::kInt8:
4172 case DataType::Type::kUint16:
4173 case DataType::Type::kInt16:
4174 case DataType::Type::kInt32:
4175 DCHECK(out.IsRegisterPair());
4176 DCHECK(in.IsRegister());
4177 __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0));
4178 // Sign extension.
4179 __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31);
4180 break;
4181
4182 case DataType::Type::kFloat32:
4183 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
4184 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
4185 break;
4186
4187 case DataType::Type::kFloat64:
4188 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
4189 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
4190 break;
4191
4192 default:
4193 LOG(FATAL) << "Unexpected type conversion from " << input_type
4194 << " to " << result_type;
4195 }
4196 break;
4197
4198 case DataType::Type::kFloat32:
4199 switch (input_type) {
4200 case DataType::Type::kBool:
4201 case DataType::Type::kUint8:
4202 case DataType::Type::kInt8:
4203 case DataType::Type::kUint16:
4204 case DataType::Type::kInt16:
4205 case DataType::Type::kInt32:
4206 __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
4207 __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion));
4208 break;
4209
4210 case DataType::Type::kInt64:
4211 codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc());
4212 CheckEntrypointTypes<kQuickL2f, float, int64_t>();
4213 break;
4214
4215 case DataType::Type::kFloat64:
4216 __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in));
4217 break;
4218
4219 default:
4220 LOG(FATAL) << "Unexpected type conversion from " << input_type
4221 << " to " << result_type;
4222 }
4223 break;
4224
4225 case DataType::Type::kFloat64:
4226 switch (input_type) {
4227 case DataType::Type::kBool:
4228 case DataType::Type::kUint8:
4229 case DataType::Type::kInt8:
4230 case DataType::Type::kUint16:
4231 case DataType::Type::kInt16:
4232 case DataType::Type::kInt32:
4233 __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
4234 __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out));
4235 break;
4236
4237 case DataType::Type::kInt64: {
4238 vixl32::Register low = LowRegisterFrom(in);
4239 vixl32::Register high = HighRegisterFrom(in);
4240 vixl32::SRegister out_s = LowSRegisterFrom(out);
4241 vixl32::DRegister out_d = DRegisterFrom(out);
4242 vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4243 vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0));
4244 vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(1));
4245
4246 // temp_d = int-to-double(high)
4247 __ Vmov(temp_s, high);
4248 __ Vcvt(F64, S32, temp_d, temp_s);
4249 // constant_d = k2Pow32EncodingForDouble
4250 __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble));
4251 // out_d = unsigned-to-double(low)
4252 __ Vmov(out_s, low);
4253 __ Vcvt(F64, U32, out_d, out_s);
4254 // out_d += temp_d * constant_d
4255 __ Vmla(F64, out_d, temp_d, constant_d);
4256 break;
4257 }
4258
4259 case DataType::Type::kFloat32:
4260 __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0));
4261 break;
4262
4263 default:
4264 LOG(FATAL) << "Unexpected type conversion from " << input_type
4265 << " to " << result_type;
4266 }
4267 break;
4268
4269 default:
4270 LOG(FATAL) << "Unexpected type conversion from " << input_type
4271 << " to " << result_type;
4272 }
4273 }
4274
VisitAdd(HAdd * add)4275 void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) {
4276 LocationSummary* locations =
4277 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
4278 switch (add->GetResultType()) {
4279 case DataType::Type::kInt32: {
4280 locations->SetInAt(0, Location::RequiresRegister());
4281 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
4282 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4283 break;
4284 }
4285
4286 case DataType::Type::kInt64: {
4287 locations->SetInAt(0, Location::RequiresRegister());
4288 locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
4289 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4290 break;
4291 }
4292
4293 case DataType::Type::kFloat32:
4294 case DataType::Type::kFloat64: {
4295 locations->SetInAt(0, Location::RequiresFpuRegister());
4296 locations->SetInAt(1, Location::RequiresFpuRegister());
4297 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4298 break;
4299 }
4300
4301 default:
4302 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4303 }
4304 }
4305
VisitAdd(HAdd * add)4306 void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) {
4307 LocationSummary* locations = add->GetLocations();
4308 Location out = locations->Out();
4309 Location first = locations->InAt(0);
4310 Location second = locations->InAt(1);
4311
4312 switch (add->GetResultType()) {
4313 case DataType::Type::kInt32: {
4314 __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1));
4315 }
4316 break;
4317
4318 case DataType::Type::kInt64: {
4319 if (second.IsConstant()) {
4320 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4321 GenerateAddLongConst(out, first, value);
4322 } else {
4323 DCHECK(second.IsRegisterPair());
4324 __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4325 __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4326 }
4327 break;
4328 }
4329
4330 case DataType::Type::kFloat32:
4331 case DataType::Type::kFloat64:
4332 __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1));
4333 break;
4334
4335 default:
4336 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4337 }
4338 }
4339
VisitSub(HSub * sub)4340 void LocationsBuilderARMVIXL::VisitSub(HSub* sub) {
4341 LocationSummary* locations =
4342 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
4343 switch (sub->GetResultType()) {
4344 case DataType::Type::kInt32: {
4345 locations->SetInAt(0, Location::RequiresRegister());
4346 locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
4347 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4348 break;
4349 }
4350
4351 case DataType::Type::kInt64: {
4352 locations->SetInAt(0, Location::RequiresRegister());
4353 locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
4354 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4355 break;
4356 }
4357 case DataType::Type::kFloat32:
4358 case DataType::Type::kFloat64: {
4359 locations->SetInAt(0, Location::RequiresFpuRegister());
4360 locations->SetInAt(1, Location::RequiresFpuRegister());
4361 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4362 break;
4363 }
4364 default:
4365 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4366 }
4367 }
4368
VisitSub(HSub * sub)4369 void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) {
4370 LocationSummary* locations = sub->GetLocations();
4371 Location out = locations->Out();
4372 Location first = locations->InAt(0);
4373 Location second = locations->InAt(1);
4374 switch (sub->GetResultType()) {
4375 case DataType::Type::kInt32: {
4376 __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1));
4377 break;
4378 }
4379
4380 case DataType::Type::kInt64: {
4381 if (second.IsConstant()) {
4382 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4383 GenerateAddLongConst(out, first, -value);
4384 } else {
4385 DCHECK(second.IsRegisterPair());
4386 __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4387 __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4388 }
4389 break;
4390 }
4391
4392 case DataType::Type::kFloat32:
4393 case DataType::Type::kFloat64:
4394 __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1));
4395 break;
4396
4397 default:
4398 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4399 }
4400 }
4401
VisitMul(HMul * mul)4402 void LocationsBuilderARMVIXL::VisitMul(HMul* mul) {
4403 LocationSummary* locations =
4404 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4405 switch (mul->GetResultType()) {
4406 case DataType::Type::kInt32:
4407 case DataType::Type::kInt64: {
4408 locations->SetInAt(0, Location::RequiresRegister());
4409 locations->SetInAt(1, Location::RequiresRegister());
4410 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4411 break;
4412 }
4413
4414 case DataType::Type::kFloat32:
4415 case DataType::Type::kFloat64: {
4416 locations->SetInAt(0, Location::RequiresFpuRegister());
4417 locations->SetInAt(1, Location::RequiresFpuRegister());
4418 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4419 break;
4420 }
4421
4422 default:
4423 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4424 }
4425 }
4426
VisitMul(HMul * mul)4427 void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) {
4428 LocationSummary* locations = mul->GetLocations();
4429 Location out = locations->Out();
4430 Location first = locations->InAt(0);
4431 Location second = locations->InAt(1);
4432 switch (mul->GetResultType()) {
4433 case DataType::Type::kInt32: {
4434 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
4435 break;
4436 }
4437 case DataType::Type::kInt64: {
4438 vixl32::Register out_hi = HighRegisterFrom(out);
4439 vixl32::Register out_lo = LowRegisterFrom(out);
4440 vixl32::Register in1_hi = HighRegisterFrom(first);
4441 vixl32::Register in1_lo = LowRegisterFrom(first);
4442 vixl32::Register in2_hi = HighRegisterFrom(second);
4443 vixl32::Register in2_lo = LowRegisterFrom(second);
4444
4445 // Extra checks to protect caused by the existence of R1_R2.
4446 // The algorithm is wrong if out.hi is either in1.lo or in2.lo:
4447 // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2);
4448 DCHECK(!out_hi.Is(in1_lo));
4449 DCHECK(!out_hi.Is(in2_lo));
4450
4451 // input: in1 - 64 bits, in2 - 64 bits
4452 // output: out
4453 // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
4454 // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
4455 // parts: out.lo = (in1.lo * in2.lo)[31:0]
4456
4457 UseScratchRegisterScope temps(GetVIXLAssembler());
4458 vixl32::Register temp = temps.Acquire();
4459 // temp <- in1.lo * in2.hi
4460 __ Mul(temp, in1_lo, in2_hi);
4461 // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo
4462 __ Mla(out_hi, in1_hi, in2_lo, temp);
4463 // out.lo <- (in1.lo * in2.lo)[31:0];
4464 __ Umull(out_lo, temp, in1_lo, in2_lo);
4465 // out.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
4466 __ Add(out_hi, out_hi, temp);
4467 break;
4468 }
4469
4470 case DataType::Type::kFloat32:
4471 case DataType::Type::kFloat64:
4472 __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1));
4473 break;
4474
4475 default:
4476 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4477 }
4478 }
4479
DivRemOneOrMinusOne(HBinaryOperation * instruction)4480 void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4481 DCHECK(instruction->IsDiv() || instruction->IsRem());
4482 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4483
4484 Location second = instruction->GetLocations()->InAt(1);
4485 DCHECK(second.IsConstant());
4486
4487 vixl32::Register out = OutputRegister(instruction);
4488 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4489 int32_t imm = Int32ConstantFrom(second);
4490 DCHECK(imm == 1 || imm == -1);
4491
4492 if (instruction->IsRem()) {
4493 __ Mov(out, 0);
4494 } else {
4495 if (imm == 1) {
4496 __ Mov(out, dividend);
4497 } else {
4498 __ Rsb(out, dividend, 0);
4499 }
4500 }
4501 }
4502
DivRemByPowerOfTwo(HBinaryOperation * instruction)4503 void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
4504 DCHECK(instruction->IsDiv() || instruction->IsRem());
4505 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4506
4507 LocationSummary* locations = instruction->GetLocations();
4508 Location second = locations->InAt(1);
4509 DCHECK(second.IsConstant());
4510
4511 vixl32::Register out = OutputRegister(instruction);
4512 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4513 int32_t imm = Int32ConstantFrom(second);
4514 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4515 int ctz_imm = CTZ(abs_imm);
4516
4517 auto generate_div_code = [this, imm, ctz_imm](vixl32::Register out, vixl32::Register in) {
4518 __ Asr(out, in, ctz_imm);
4519 if (imm < 0) {
4520 __ Rsb(out, out, 0);
4521 }
4522 };
4523
4524 if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
4525 // No need to adjust the result for non-negative dividends or the INT32_MIN dividend.
4526 // NOTE: The generated code for HDiv/HRem correctly works for the INT32_MIN dividend:
4527 // imm == 2
4528 // HDiv
4529 // add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
4530 // asr out, out(0x80000001), #1 => out = 0xc0000000
4531 // This is the same as 'asr out, dividend(0x80000000), #1'
4532 //
4533 // imm > 2
4534 // HDiv
4535 // asr out, dividend(0x80000000), #31 => out = -1
4536 // add out, dividend(0x80000000), out(-1), lsr #(32 - ctz_imm) => out = 0b10..01..1,
4537 // where the number of the rightmost 1s is ctz_imm.
4538 // asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
4539 // leftmost 1s is ctz_imm + 1.
4540 // This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
4541 //
4542 // imm == INT32_MIN
4543 // HDiv
4544 // asr out, dividend(0x80000000), #31 => out = -1
4545 // add out, dividend(0x80000000), out(-1), lsr #1 => out = 0xc0000000
4546 // asr out, out(0xc0000000), #31 => out = -1
4547 // rsb out, out(-1), #0 => out = 1
4548 // This is the same as
4549 // asr out, dividend(0x80000000), #31
4550 // rsb out, out, #0
4551 //
4552 //
4553 // INT_MIN % imm must be 0 for any imm of power 2. 'and' and 'ubfx' work only with bits
4554 // 0..30 of a dividend. For INT32_MIN those bits are zeros. So 'and' and 'ubfx' always
4555 // produce zero.
4556 if (instruction->IsDiv()) {
4557 generate_div_code(out, dividend);
4558 } else {
4559 if (GetVIXLAssembler()->IsModifiedImmediate(abs_imm - 1)) {
4560 __ And(out, dividend, abs_imm - 1);
4561 } else {
4562 __ Ubfx(out, dividend, 0, ctz_imm);
4563 }
4564 return;
4565 }
4566 } else {
4567 vixl32::Register add_right_input = dividend;
4568 if (ctz_imm > 1) {
4569 __ Asr(out, dividend, 31);
4570 add_right_input = out;
4571 }
4572 __ Add(out, dividend, Operand(add_right_input, vixl32::LSR, 32 - ctz_imm));
4573
4574 if (instruction->IsDiv()) {
4575 generate_div_code(out, out);
4576 } else {
4577 __ Bfc(out, 0, ctz_imm);
4578 __ Sub(out, dividend, out);
4579 }
4580 }
4581 }
4582
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4583 void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4584 DCHECK(instruction->IsDiv() || instruction->IsRem());
4585 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4586
4587 LocationSummary* locations = instruction->GetLocations();
4588 Location second = locations->InAt(1);
4589 DCHECK(second.IsConstant());
4590
4591 vixl32::Register out = OutputRegister(instruction);
4592 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4593 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
4594 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
4595 int32_t imm = Int32ConstantFrom(second);
4596
4597 int64_t magic;
4598 int shift;
4599 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4600
4601 auto generate_unsigned_div_code =[this, magic, shift](vixl32::Register out,
4602 vixl32::Register dividend,
4603 vixl32::Register temp1,
4604 vixl32::Register temp2) {
4605 // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4606 __ Mov(temp1, static_cast<int32_t>(magic));
4607 if (magic > 0 && shift == 0) {
4608 __ Smull(temp2, out, dividend, temp1);
4609 } else {
4610 __ Smull(temp2, temp1, dividend, temp1);
4611 if (magic < 0) {
4612 // The negative magic M = static_cast<int>(m) means that the multiplier m is greater
4613 // than INT32_MAX. In such a case shift is never 0.
4614 // Proof:
4615 // m = (2^p + d - 2^p % d) / d, where p = 32 + shift, d > 2
4616 //
4617 // If shift == 0, m = (2^32 + d - 2^32 % d) / d =
4618 // = (2^32 + d - (2^32 - (2^32 / d) * d)) / d =
4619 // = (d + (2^32 / d) * d) / d = 1 + (2^32 / d), here '/' is the integer division.
4620 //
4621 // 1 + (2^32 / d) is decreasing when d is increasing.
4622 // The maximum is 1 431 655 766, when d == 3. This value is less than INT32_MAX.
4623 // the minimum is 3, when d = 2^31 -1.
4624 // So for all values of d in [3, INT32_MAX] m with p == 32 is in [3, INT32_MAX) and
4625 // is never less than 0.
4626 __ Add(temp1, temp1, dividend);
4627 }
4628 DCHECK_NE(shift, 0);
4629 __ Lsr(out, temp1, shift);
4630 }
4631 };
4632
4633 if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
4634 // No need to adjust the result for a non-negative dividend and a positive divisor.
4635 if (instruction->IsDiv()) {
4636 generate_unsigned_div_code(out, dividend, temp1, temp2);
4637 } else {
4638 generate_unsigned_div_code(temp1, dividend, temp1, temp2);
4639 __ Mov(temp2, imm);
4640 __ Mls(out, temp1, temp2, dividend);
4641 }
4642 } else {
4643 // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4644 __ Mov(temp1, static_cast<int32_t>(magic));
4645 __ Smull(temp2, temp1, dividend, temp1);
4646
4647 if (imm > 0 && magic < 0) {
4648 __ Add(temp1, temp1, dividend);
4649 } else if (imm < 0 && magic > 0) {
4650 __ Sub(temp1, temp1, dividend);
4651 }
4652
4653 if (shift != 0) {
4654 __ Asr(temp1, temp1, shift);
4655 }
4656
4657 if (instruction->IsDiv()) {
4658 __ Sub(out, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4659 } else {
4660 __ Sub(temp1, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4661 // TODO: Strength reduction for mls.
4662 __ Mov(temp2, imm);
4663 __ Mls(out, temp1, temp2, dividend);
4664 }
4665 }
4666 }
4667
GenerateDivRemConstantIntegral(HBinaryOperation * instruction)4668 void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral(
4669 HBinaryOperation* instruction) {
4670 DCHECK(instruction->IsDiv() || instruction->IsRem());
4671 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4672
4673 Location second = instruction->GetLocations()->InAt(1);
4674 DCHECK(second.IsConstant());
4675
4676 int32_t imm = Int32ConstantFrom(second);
4677 if (imm == 0) {
4678 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4679 } else if (imm == 1 || imm == -1) {
4680 DivRemOneOrMinusOne(instruction);
4681 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4682 DivRemByPowerOfTwo(instruction);
4683 } else {
4684 DCHECK(imm <= -2 || imm >= 2);
4685 GenerateDivRemWithAnyConstant(instruction);
4686 }
4687 }
4688
VisitDiv(HDiv * div)4689 void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) {
4690 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4691 if (div->GetResultType() == DataType::Type::kInt64) {
4692 // pLdiv runtime call.
4693 call_kind = LocationSummary::kCallOnMainOnly;
4694 } else if (div->GetResultType() == DataType::Type::kInt32 && div->InputAt(1)->IsConstant()) {
4695 // sdiv will be replaced by other instruction sequence.
4696 } else if (div->GetResultType() == DataType::Type::kInt32 &&
4697 !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4698 // pIdivmod runtime call.
4699 call_kind = LocationSummary::kCallOnMainOnly;
4700 }
4701
4702 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4703
4704 switch (div->GetResultType()) {
4705 case DataType::Type::kInt32: {
4706 HInstruction* divisor = div->InputAt(1);
4707 if (divisor->IsConstant()) {
4708 locations->SetInAt(0, Location::RequiresRegister());
4709 locations->SetInAt(1, Location::ConstantLocation(divisor));
4710 int32_t value = Int32ConstantFrom(divisor);
4711 Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4712 if (value == 1 || value == 0 || value == -1) {
4713 // No temp register required.
4714 } else if (IsPowerOfTwo(AbsOrMin(value)) &&
4715 value != 2 &&
4716 value != -2 &&
4717 !HasNonNegativeOrMinIntInputAt(div, 0)) {
4718 // The "out" register is used as a temporary, so it overlaps with the inputs.
4719 out_overlaps = Location::kOutputOverlap;
4720 } else {
4721 locations->AddRegisterTemps(2);
4722 }
4723 locations->SetOut(Location::RequiresRegister(), out_overlaps);
4724 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4725 locations->SetInAt(0, Location::RequiresRegister());
4726 locations->SetInAt(1, Location::RequiresRegister());
4727 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4728 } else {
4729 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4730 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4731 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4732 // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4733 // we only need the former.
4734 locations->SetOut(LocationFrom(r0));
4735 }
4736 break;
4737 }
4738 case DataType::Type::kInt64: {
4739 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4740 locations->SetInAt(0, LocationFrom(
4741 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4742 locations->SetInAt(1, LocationFrom(
4743 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4744 locations->SetOut(LocationFrom(r0, r1));
4745 break;
4746 }
4747 case DataType::Type::kFloat32:
4748 case DataType::Type::kFloat64: {
4749 locations->SetInAt(0, Location::RequiresFpuRegister());
4750 locations->SetInAt(1, Location::RequiresFpuRegister());
4751 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4752 break;
4753 }
4754
4755 default:
4756 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4757 }
4758 }
4759
VisitDiv(HDiv * div)4760 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
4761 Location lhs = div->GetLocations()->InAt(0);
4762 Location rhs = div->GetLocations()->InAt(1);
4763
4764 switch (div->GetResultType()) {
4765 case DataType::Type::kInt32: {
4766 if (rhs.IsConstant()) {
4767 GenerateDivRemConstantIntegral(div);
4768 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4769 __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
4770 } else {
4771 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4772 DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs)));
4773 DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs)));
4774 DCHECK(r0.Is(OutputRegister(div)));
4775
4776 codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc());
4777 CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4778 }
4779 break;
4780 }
4781
4782 case DataType::Type::kInt64: {
4783 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4784 DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs)));
4785 DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs)));
4786 DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs)));
4787 DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs)));
4788 DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0));
4789 DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1));
4790
4791 codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc());
4792 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4793 break;
4794 }
4795
4796 case DataType::Type::kFloat32:
4797 case DataType::Type::kFloat64:
4798 __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1));
4799 break;
4800
4801 default:
4802 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4803 }
4804 }
4805
VisitRem(HRem * rem)4806 void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
4807 DataType::Type type = rem->GetResultType();
4808
4809 // Most remainders are implemented in the runtime.
4810 LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
4811 if (rem->GetResultType() == DataType::Type::kInt32 && rem->InputAt(1)->IsConstant()) {
4812 // sdiv will be replaced by other instruction sequence.
4813 call_kind = LocationSummary::kNoCall;
4814 } else if ((rem->GetResultType() == DataType::Type::kInt32)
4815 && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4816 // Have hardware divide instruction for int, do it with three instructions.
4817 call_kind = LocationSummary::kNoCall;
4818 }
4819
4820 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4821
4822 switch (type) {
4823 case DataType::Type::kInt32: {
4824 HInstruction* divisor = rem->InputAt(1);
4825 if (divisor->IsConstant()) {
4826 locations->SetInAt(0, Location::RequiresRegister());
4827 locations->SetInAt(1, Location::ConstantLocation(divisor));
4828 int32_t value = Int32ConstantFrom(divisor);
4829 Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4830 if (value == 1 || value == 0 || value == -1) {
4831 // No temp register required.
4832 } else if (IsPowerOfTwo(AbsOrMin(value)) && !HasNonNegativeOrMinIntInputAt(rem, 0)) {
4833 // The "out" register is used as a temporary, so it overlaps with the inputs.
4834 out_overlaps = Location::kOutputOverlap;
4835 } else {
4836 locations->AddRegisterTemps(2);
4837 }
4838 locations->SetOut(Location::RequiresRegister(), out_overlaps);
4839 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4840 locations->SetInAt(0, Location::RequiresRegister());
4841 locations->SetInAt(1, Location::RequiresRegister());
4842 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4843 locations->AddTemp(Location::RequiresRegister());
4844 } else {
4845 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4846 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4847 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4848 // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4849 // we only need the latter.
4850 locations->SetOut(LocationFrom(r1));
4851 }
4852 break;
4853 }
4854 case DataType::Type::kInt64: {
4855 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4856 locations->SetInAt(0, LocationFrom(
4857 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4858 locations->SetInAt(1, LocationFrom(
4859 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4860 // The runtime helper puts the output in R2,R3.
4861 locations->SetOut(LocationFrom(r2, r3));
4862 break;
4863 }
4864 case DataType::Type::kFloat32: {
4865 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4866 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
4867 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
4868 locations->SetOut(LocationFrom(s0));
4869 break;
4870 }
4871
4872 case DataType::Type::kFloat64: {
4873 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4874 locations->SetInAt(0, LocationFrom(
4875 calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
4876 locations->SetInAt(1, LocationFrom(
4877 calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
4878 locations->SetOut(LocationFrom(s0, s1));
4879 break;
4880 }
4881
4882 default:
4883 LOG(FATAL) << "Unexpected rem type " << type;
4884 }
4885 }
4886
VisitRem(HRem * rem)4887 void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
4888 LocationSummary* locations = rem->GetLocations();
4889 Location second = locations->InAt(1);
4890
4891 DataType::Type type = rem->GetResultType();
4892 switch (type) {
4893 case DataType::Type::kInt32: {
4894 vixl32::Register reg1 = InputRegisterAt(rem, 0);
4895 vixl32::Register out_reg = OutputRegister(rem);
4896 if (second.IsConstant()) {
4897 GenerateDivRemConstantIntegral(rem);
4898 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4899 vixl32::Register reg2 = RegisterFrom(second);
4900 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
4901
4902 // temp = reg1 / reg2 (integer division)
4903 // dest = reg1 - temp * reg2
4904 __ Sdiv(temp, reg1, reg2);
4905 __ Mls(out_reg, temp, reg2, reg1);
4906 } else {
4907 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4908 DCHECK(reg1.Is(calling_convention.GetRegisterAt(0)));
4909 DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1)));
4910 DCHECK(out_reg.Is(r1));
4911
4912 codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc());
4913 CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4914 }
4915 break;
4916 }
4917
4918 case DataType::Type::kInt64: {
4919 codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc());
4920 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4921 break;
4922 }
4923
4924 case DataType::Type::kFloat32: {
4925 codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc());
4926 CheckEntrypointTypes<kQuickFmodf, float, float, float>();
4927 break;
4928 }
4929
4930 case DataType::Type::kFloat64: {
4931 codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc());
4932 CheckEntrypointTypes<kQuickFmod, double, double, double>();
4933 break;
4934 }
4935
4936 default:
4937 LOG(FATAL) << "Unexpected rem type " << type;
4938 }
4939 }
4940
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4941 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4942 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4943 switch (minmax->GetResultType()) {
4944 case DataType::Type::kInt32:
4945 locations->SetInAt(0, Location::RequiresRegister());
4946 locations->SetInAt(1, Location::RequiresRegister());
4947 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4948 break;
4949 case DataType::Type::kInt64:
4950 locations->SetInAt(0, Location::RequiresRegister());
4951 locations->SetInAt(1, Location::RequiresRegister());
4952 locations->SetOut(Location::SameAsFirstInput());
4953 break;
4954 case DataType::Type::kFloat32:
4955 locations->SetInAt(0, Location::RequiresFpuRegister());
4956 locations->SetInAt(1, Location::RequiresFpuRegister());
4957 locations->SetOut(Location::SameAsFirstInput());
4958 locations->AddTemp(Location::RequiresRegister());
4959 break;
4960 case DataType::Type::kFloat64:
4961 locations->SetInAt(0, Location::RequiresFpuRegister());
4962 locations->SetInAt(1, Location::RequiresFpuRegister());
4963 locations->SetOut(Location::SameAsFirstInput());
4964 break;
4965 default:
4966 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4967 }
4968 }
4969
GenerateMinMaxInt(LocationSummary * locations,bool is_min)4970 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
4971 Location op1_loc = locations->InAt(0);
4972 Location op2_loc = locations->InAt(1);
4973 Location out_loc = locations->Out();
4974
4975 vixl32::Register op1 = RegisterFrom(op1_loc);
4976 vixl32::Register op2 = RegisterFrom(op2_loc);
4977 vixl32::Register out = RegisterFrom(out_loc);
4978
4979 __ Cmp(op1, op2);
4980
4981 {
4982 ExactAssemblyScope aas(GetVIXLAssembler(),
4983 3 * kMaxInstructionSizeInBytes,
4984 CodeBufferCheckScope::kMaximumSize);
4985
4986 __ ite(is_min ? lt : gt);
4987 __ mov(is_min ? lt : gt, out, op1);
4988 __ mov(is_min ? ge : le, out, op2);
4989 }
4990 }
4991
GenerateMinMaxLong(LocationSummary * locations,bool is_min)4992 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
4993 Location op1_loc = locations->InAt(0);
4994 Location op2_loc = locations->InAt(1);
4995 Location out_loc = locations->Out();
4996
4997 // Optimization: don't generate any code if inputs are the same.
4998 if (op1_loc.Equals(op2_loc)) {
4999 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
5000 return;
5001 }
5002
5003 vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
5004 vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
5005 vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
5006 vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
5007 vixl32::Register out_lo = LowRegisterFrom(out_loc);
5008 vixl32::Register out_hi = HighRegisterFrom(out_loc);
5009 UseScratchRegisterScope temps(GetVIXLAssembler());
5010 const vixl32::Register temp = temps.Acquire();
5011
5012 DCHECK(op1_lo.Is(out_lo));
5013 DCHECK(op1_hi.Is(out_hi));
5014
5015 // Compare op1 >= op2, or op1 < op2.
5016 __ Cmp(out_lo, op2_lo);
5017 __ Sbcs(temp, out_hi, op2_hi);
5018
5019 // Now GE/LT condition code is correct for the long comparison.
5020 {
5021 vixl32::ConditionType cond = is_min ? ge : lt;
5022 ExactAssemblyScope it_scope(GetVIXLAssembler(),
5023 3 * kMaxInstructionSizeInBytes,
5024 CodeBufferCheckScope::kMaximumSize);
5025 __ itt(cond);
5026 __ mov(cond, out_lo, op2_lo);
5027 __ mov(cond, out_hi, op2_hi);
5028 }
5029 }
5030
GenerateMinMaxFloat(HInstruction * minmax,bool is_min)5031 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
5032 LocationSummary* locations = minmax->GetLocations();
5033 Location op1_loc = locations->InAt(0);
5034 Location op2_loc = locations->InAt(1);
5035 Location out_loc = locations->Out();
5036
5037 // Optimization: don't generate any code if inputs are the same.
5038 if (op1_loc.Equals(op2_loc)) {
5039 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
5040 return;
5041 }
5042
5043 vixl32::SRegister op1 = SRegisterFrom(op1_loc);
5044 vixl32::SRegister op2 = SRegisterFrom(op2_loc);
5045 vixl32::SRegister out = SRegisterFrom(out_loc);
5046
5047 UseScratchRegisterScope temps(GetVIXLAssembler());
5048 const vixl32::Register temp1 = temps.Acquire();
5049 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
5050 vixl32::Label nan, done;
5051 vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
5052
5053 DCHECK(op1.Is(out));
5054
5055 __ Vcmp(op1, op2);
5056 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5057 __ B(vs, &nan, /* is_far_target= */ false); // if un-ordered, go to NaN handling.
5058
5059 // op1 <> op2
5060 vixl32::ConditionType cond = is_min ? gt : lt;
5061 {
5062 ExactAssemblyScope it_scope(GetVIXLAssembler(),
5063 2 * kMaxInstructionSizeInBytes,
5064 CodeBufferCheckScope::kMaximumSize);
5065 __ it(cond);
5066 __ vmov(cond, F32, out, op2);
5067 }
5068 // for <>(not equal), we've done min/max calculation.
5069 __ B(ne, final_label, /* is_far_target= */ false);
5070
5071 // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
5072 __ Vmov(temp1, op1);
5073 __ Vmov(temp2, op2);
5074 if (is_min) {
5075 __ Orr(temp1, temp1, temp2);
5076 } else {
5077 __ And(temp1, temp1, temp2);
5078 }
5079 __ Vmov(out, temp1);
5080 __ B(final_label);
5081
5082 // handle NaN input.
5083 __ Bind(&nan);
5084 __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
5085 __ Vmov(out, temp1);
5086
5087 if (done.IsReferenced()) {
5088 __ Bind(&done);
5089 }
5090 }
5091
GenerateMinMaxDouble(HInstruction * minmax,bool is_min)5092 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
5093 LocationSummary* locations = minmax->GetLocations();
5094 Location op1_loc = locations->InAt(0);
5095 Location op2_loc = locations->InAt(1);
5096 Location out_loc = locations->Out();
5097
5098 // Optimization: don't generate any code if inputs are the same.
5099 if (op1_loc.Equals(op2_loc)) {
5100 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
5101 return;
5102 }
5103
5104 vixl32::DRegister op1 = DRegisterFrom(op1_loc);
5105 vixl32::DRegister op2 = DRegisterFrom(op2_loc);
5106 vixl32::DRegister out = DRegisterFrom(out_loc);
5107 vixl32::Label handle_nan_eq, done;
5108 vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
5109
5110 DCHECK(op1.Is(out));
5111
5112 __ Vcmp(op1, op2);
5113 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5114 __ B(vs, &handle_nan_eq, /* is_far_target= */ false); // if un-ordered, go to NaN handling.
5115
5116 // op1 <> op2
5117 vixl32::ConditionType cond = is_min ? gt : lt;
5118 {
5119 ExactAssemblyScope it_scope(GetVIXLAssembler(),
5120 2 * kMaxInstructionSizeInBytes,
5121 CodeBufferCheckScope::kMaximumSize);
5122 __ it(cond);
5123 __ vmov(cond, F64, out, op2);
5124 }
5125 // for <>(not equal), we've done min/max calculation.
5126 __ B(ne, final_label, /* is_far_target= */ false);
5127
5128 // handle op1 == op2, max(+0.0,-0.0).
5129 if (!is_min) {
5130 __ Vand(F64, out, op1, op2);
5131 __ B(final_label);
5132 }
5133
5134 // handle op1 == op2, min(+0.0,-0.0), NaN input.
5135 __ Bind(&handle_nan_eq);
5136 __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
5137
5138 if (done.IsReferenced()) {
5139 __ Bind(&done);
5140 }
5141 }
5142
GenerateMinMax(HBinaryOperation * minmax,bool is_min)5143 void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
5144 DataType::Type type = minmax->GetResultType();
5145 switch (type) {
5146 case DataType::Type::kInt32:
5147 GenerateMinMaxInt(minmax->GetLocations(), is_min);
5148 break;
5149 case DataType::Type::kInt64:
5150 GenerateMinMaxLong(minmax->GetLocations(), is_min);
5151 break;
5152 case DataType::Type::kFloat32:
5153 GenerateMinMaxFloat(minmax, is_min);
5154 break;
5155 case DataType::Type::kFloat64:
5156 GenerateMinMaxDouble(minmax, is_min);
5157 break;
5158 default:
5159 LOG(FATAL) << "Unexpected type for HMinMax " << type;
5160 }
5161 }
5162
VisitMin(HMin * min)5163 void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
5164 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
5165 }
5166
VisitMin(HMin * min)5167 void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
5168 GenerateMinMax(min, /*is_min*/ true);
5169 }
5170
VisitMax(HMax * max)5171 void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
5172 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
5173 }
5174
VisitMax(HMax * max)5175 void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
5176 GenerateMinMax(max, /*is_min*/ false);
5177 }
5178
VisitAbs(HAbs * abs)5179 void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
5180 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
5181 switch (abs->GetResultType()) {
5182 case DataType::Type::kInt32:
5183 case DataType::Type::kInt64:
5184 locations->SetInAt(0, Location::RequiresRegister());
5185 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5186 locations->AddTemp(Location::RequiresRegister());
5187 break;
5188 case DataType::Type::kFloat32:
5189 case DataType::Type::kFloat64:
5190 locations->SetInAt(0, Location::RequiresFpuRegister());
5191 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5192 break;
5193 default:
5194 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5195 }
5196 }
5197
VisitAbs(HAbs * abs)5198 void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) {
5199 LocationSummary* locations = abs->GetLocations();
5200 switch (abs->GetResultType()) {
5201 case DataType::Type::kInt32: {
5202 vixl32::Register in_reg = RegisterFrom(locations->InAt(0));
5203 vixl32::Register out_reg = RegisterFrom(locations->Out());
5204 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5205 __ Asr(mask, in_reg, 31);
5206 __ Add(out_reg, in_reg, mask);
5207 __ Eor(out_reg, out_reg, mask);
5208 break;
5209 }
5210 case DataType::Type::kInt64: {
5211 Location in = locations->InAt(0);
5212 vixl32::Register in_reg_lo = LowRegisterFrom(in);
5213 vixl32::Register in_reg_hi = HighRegisterFrom(in);
5214 Location output = locations->Out();
5215 vixl32::Register out_reg_lo = LowRegisterFrom(output);
5216 vixl32::Register out_reg_hi = HighRegisterFrom(output);
5217 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
5218 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5219 __ Asr(mask, in_reg_hi, 31);
5220 __ Adds(out_reg_lo, in_reg_lo, mask);
5221 __ Adc(out_reg_hi, in_reg_hi, mask);
5222 __ Eor(out_reg_lo, out_reg_lo, mask);
5223 __ Eor(out_reg_hi, out_reg_hi, mask);
5224 break;
5225 }
5226 case DataType::Type::kFloat32:
5227 case DataType::Type::kFloat64:
5228 __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0));
5229 break;
5230 default:
5231 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5232 }
5233 }
5234
VisitDivZeroCheck(HDivZeroCheck * instruction)5235 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5236 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5237 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5238 }
5239
VisitDivZeroCheck(HDivZeroCheck * instruction)5240 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5241 DivZeroCheckSlowPathARMVIXL* slow_path =
5242 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARMVIXL(instruction);
5243 codegen_->AddSlowPath(slow_path);
5244
5245 LocationSummary* locations = instruction->GetLocations();
5246 Location value = locations->InAt(0);
5247
5248 switch (instruction->GetType()) {
5249 case DataType::Type::kBool:
5250 case DataType::Type::kUint8:
5251 case DataType::Type::kInt8:
5252 case DataType::Type::kUint16:
5253 case DataType::Type::kInt16:
5254 case DataType::Type::kInt32: {
5255 if (value.IsRegister()) {
5256 __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
5257 } else {
5258 DCHECK(value.IsConstant()) << value;
5259 if (Int32ConstantFrom(value) == 0) {
5260 __ B(slow_path->GetEntryLabel());
5261 }
5262 }
5263 break;
5264 }
5265 case DataType::Type::kInt64: {
5266 if (value.IsRegisterPair()) {
5267 UseScratchRegisterScope temps(GetVIXLAssembler());
5268 vixl32::Register temp = temps.Acquire();
5269 __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value));
5270 __ B(eq, slow_path->GetEntryLabel());
5271 } else {
5272 DCHECK(value.IsConstant()) << value;
5273 if (Int64ConstantFrom(value) == 0) {
5274 __ B(slow_path->GetEntryLabel());
5275 }
5276 }
5277 break;
5278 }
5279 default:
5280 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
5281 }
5282 }
5283
HandleIntegerRotate(HBinaryOperation * rotate)5284 void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HBinaryOperation* rotate) {
5285 LocationSummary* locations = rotate->GetLocations();
5286 vixl32::Register in = InputRegisterAt(rotate, 0);
5287 Location rhs = locations->InAt(1);
5288 vixl32::Register out = OutputRegister(rotate);
5289
5290 if (rhs.IsConstant()) {
5291 // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
5292 // so map all rotations to a +ve. equivalent in that range.
5293 // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
5294 uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
5295 if (rotate->IsRol()) {
5296 rot = -rot;
5297 }
5298 rot &= 0x1f;
5299
5300 if (rot) {
5301 // Rotate, mapping left rotations to right equivalents if necessary.
5302 // (e.g. left by 2 bits == right by 30.)
5303 __ Ror(out, in, rot);
5304 } else if (!out.Is(in)) {
5305 __ Mov(out, in);
5306 }
5307 } else {
5308 if (rotate->IsRol()) {
5309 UseScratchRegisterScope temps(GetVIXLAssembler());
5310
5311 vixl32::Register negated = temps.Acquire();
5312 __ Rsb(negated, RegisterFrom(rhs), 0);
5313 __ Ror(out, in, negated);
5314 } else {
5315 DCHECK(rotate->IsRor());
5316 __ Ror(out, in, RegisterFrom(rhs));
5317 }
5318 }
5319 }
5320
5321 // Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
5322 // rotates by swapping input regs (effectively rotating by the first 32-bits of
5323 // a larger rotation) or flipping direction (thus treating larger right/left
5324 // rotations as sub-word sized rotations in the other direction) as appropriate.
HandleLongRotate(HBinaryOperation * rotate)5325 void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HBinaryOperation* rotate) {
5326 LocationSummary* locations = rotate->GetLocations();
5327 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
5328 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
5329 Location rhs = locations->InAt(1);
5330 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
5331 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
5332
5333 if (rhs.IsConstant()) {
5334 uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
5335
5336 if (rotate->IsRol()) {
5337 rot = -rot;
5338 }
5339
5340 // Map all rotations to +ve. equivalents on the interval [0,63].
5341 rot &= kMaxLongShiftDistance;
5342 // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
5343 // logic below to a simple pair of binary orr.
5344 // (e.g. 34 bits == in_reg swap + 2 bits right.)
5345 if (rot >= kArmBitsPerWord) {
5346 rot -= kArmBitsPerWord;
5347 std::swap(in_reg_hi, in_reg_lo);
5348 }
5349 // Rotate, or mov to out for zero or word size rotations.
5350 if (rot != 0u) {
5351 __ Lsr(out_reg_hi, in_reg_hi, Operand::From(rot));
5352 __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot));
5353 __ Lsr(out_reg_lo, in_reg_lo, Operand::From(rot));
5354 __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot));
5355 } else {
5356 __ Mov(out_reg_lo, in_reg_lo);
5357 __ Mov(out_reg_hi, in_reg_hi);
5358 }
5359 } else {
5360 vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0));
5361 vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
5362 vixl32::Label end;
5363 vixl32::Label shift_by_32_plus_shift_right;
5364 vixl32::Label* final_label = codegen_->GetFinalLabel(rotate, &end);
5365
5366 // Negate rhs, taken from VisitNeg
5367 if (rotate->IsRol()) {
5368 Location negated = locations->GetTemp(2);
5369 Location in = rhs;
5370
5371 __ Rsb(RegisterFrom(negated), RegisterFrom(in), 0);
5372
5373 rhs = negated;
5374 }
5375
5376 __ And(shift_right, RegisterFrom(rhs), 0x1F);
5377 __ Lsrs(shift_left, RegisterFrom(rhs), 6);
5378 __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord));
5379 __ B(cc, &shift_by_32_plus_shift_right, /* is_far_target= */ false);
5380
5381 // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
5382 // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
5383 __ Lsl(out_reg_hi, in_reg_hi, shift_left);
5384 __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5385 __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5386 __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5387 __ Lsr(shift_left, in_reg_hi, shift_right);
5388 __ Add(out_reg_lo, out_reg_lo, shift_left);
5389 __ B(final_label);
5390
5391 __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
5392 // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
5393 // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
5394 __ Lsr(out_reg_hi, in_reg_hi, shift_right);
5395 __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5396 __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5397 __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5398 __ Lsl(shift_right, in_reg_hi, shift_left);
5399 __ Add(out_reg_lo, out_reg_lo, shift_right);
5400
5401 if (end.IsReferenced()) {
5402 __ Bind(&end);
5403 }
5404 }
5405 }
5406
HandleRotate(HBinaryOperation * rotate)5407 void LocationsBuilderARMVIXL::HandleRotate(HBinaryOperation* rotate) {
5408 LocationSummary* locations =
5409 new (GetGraph()->GetAllocator()) LocationSummary(rotate, LocationSummary::kNoCall);
5410 HInstruction* shift = rotate->InputAt(1);
5411 switch (rotate->GetResultType()) {
5412 case DataType::Type::kInt32: {
5413 locations->SetInAt(0, Location::RequiresRegister());
5414 locations->SetInAt(1, Location::RegisterOrConstant(shift));
5415 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5416 break;
5417 }
5418 case DataType::Type::kInt64: {
5419 locations->SetInAt(0, Location::RequiresRegister());
5420 if (shift->IsConstant()) {
5421 locations->SetInAt(1, Location::ConstantLocation(shift));
5422 } else {
5423 locations->SetInAt(1, Location::RequiresRegister());
5424
5425 if (rotate->IsRor()) {
5426 locations->AddRegisterTemps(2);
5427 } else {
5428 DCHECK(rotate->IsRol());
5429 locations->AddRegisterTemps(3);
5430 }
5431 }
5432 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5433 break;
5434 }
5435 default:
5436 LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5437 }
5438 }
5439
VisitRol(HRol * rol)5440 void LocationsBuilderARMVIXL::VisitRol(HRol* rol) {
5441 HandleRotate(rol);
5442 }
5443
VisitRor(HRor * ror)5444 void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
5445 HandleRotate(ror);
5446 }
5447
HandleRotate(HBinaryOperation * rotate)5448 void InstructionCodeGeneratorARMVIXL::HandleRotate(HBinaryOperation* rotate) {
5449 DataType::Type type = rotate->GetResultType();
5450 switch (type) {
5451 case DataType::Type::kInt32: {
5452 HandleIntegerRotate(rotate);
5453 break;
5454 }
5455 case DataType::Type::kInt64: {
5456 HandleLongRotate(rotate);
5457 break;
5458 }
5459 default:
5460 LOG(FATAL) << "Unexpected operation type " << type;
5461 UNREACHABLE();
5462 }
5463 }
5464
VisitRol(HRol * rol)5465 void InstructionCodeGeneratorARMVIXL::VisitRol(HRol* rol) {
5466 HandleRotate(rol);
5467 }
5468
VisitRor(HRor * ror)5469 void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) {
5470 HandleRotate(ror);
5471 }
5472
HandleShift(HBinaryOperation * op)5473 void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
5474 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5475
5476 LocationSummary* locations =
5477 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
5478
5479 HInstruction* shift = op->InputAt(1);
5480 switch (op->GetResultType()) {
5481 case DataType::Type::kInt32: {
5482 locations->SetInAt(0, Location::RequiresRegister());
5483 if (shift->IsConstant()) {
5484 locations->SetInAt(1, Location::ConstantLocation(shift));
5485 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5486 } else {
5487 locations->SetInAt(1, Location::RequiresRegister());
5488 // Make the output overlap, as it will be used to hold the masked
5489 // second input.
5490 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5491 }
5492 break;
5493 }
5494 case DataType::Type::kInt64: {
5495 locations->SetInAt(0, Location::RequiresRegister());
5496 if (shift->IsConstant()) {
5497 locations->SetInAt(1, Location::ConstantLocation(shift));
5498 // For simplicity, use kOutputOverlap even though we only require that low registers
5499 // don't clash with high registers which the register allocator currently guarantees.
5500 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5501 } else {
5502 locations->SetInAt(1, Location::RequiresRegister());
5503 locations->AddTemp(Location::RequiresRegister());
5504 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5505 }
5506 break;
5507 }
5508 default:
5509 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5510 }
5511 }
5512
HandleShift(HBinaryOperation * op)5513 void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) {
5514 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5515
5516 LocationSummary* locations = op->GetLocations();
5517 Location out = locations->Out();
5518 Location first = locations->InAt(0);
5519 Location second = locations->InAt(1);
5520
5521 DataType::Type type = op->GetResultType();
5522 switch (type) {
5523 case DataType::Type::kInt32: {
5524 vixl32::Register out_reg = OutputRegister(op);
5525 vixl32::Register first_reg = InputRegisterAt(op, 0);
5526 if (second.IsRegister()) {
5527 vixl32::Register second_reg = RegisterFrom(second);
5528 // ARM doesn't mask the shift count so we need to do it ourselves.
5529 __ And(out_reg, second_reg, kMaxIntShiftDistance);
5530 if (op->IsShl()) {
5531 __ Lsl(out_reg, first_reg, out_reg);
5532 } else if (op->IsShr()) {
5533 __ Asr(out_reg, first_reg, out_reg);
5534 } else {
5535 __ Lsr(out_reg, first_reg, out_reg);
5536 }
5537 } else {
5538 int32_t cst = Int32ConstantFrom(second);
5539 uint32_t shift_value = cst & kMaxIntShiftDistance;
5540 if (shift_value == 0) { // ARM does not support shifting with 0 immediate.
5541 __ Mov(out_reg, first_reg);
5542 } else if (op->IsShl()) {
5543 __ Lsl(out_reg, first_reg, shift_value);
5544 } else if (op->IsShr()) {
5545 __ Asr(out_reg, first_reg, shift_value);
5546 } else {
5547 __ Lsr(out_reg, first_reg, shift_value);
5548 }
5549 }
5550 break;
5551 }
5552 case DataType::Type::kInt64: {
5553 vixl32::Register o_h = HighRegisterFrom(out);
5554 vixl32::Register o_l = LowRegisterFrom(out);
5555
5556 vixl32::Register high = HighRegisterFrom(first);
5557 vixl32::Register low = LowRegisterFrom(first);
5558
5559 if (second.IsRegister()) {
5560 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
5561
5562 vixl32::Register second_reg = RegisterFrom(second);
5563
5564 if (op->IsShl()) {
5565 __ And(o_l, second_reg, kMaxLongShiftDistance);
5566 // Shift the high part
5567 __ Lsl(o_h, high, o_l);
5568 // Shift the low part and `or` what overflew on the high part
5569 __ Rsb(temp, o_l, Operand::From(kArmBitsPerWord));
5570 __ Lsr(temp, low, temp);
5571 __ Orr(o_h, o_h, temp);
5572 // If the shift is > 32 bits, override the high part
5573 __ Subs(temp, o_l, Operand::From(kArmBitsPerWord));
5574 {
5575 ExactAssemblyScope guard(GetVIXLAssembler(),
5576 2 * vixl32::kMaxInstructionSizeInBytes,
5577 CodeBufferCheckScope::kMaximumSize);
5578 __ it(pl);
5579 __ lsl(pl, o_h, low, temp);
5580 }
5581 // Shift the low part
5582 __ Lsl(o_l, low, o_l);
5583 } else if (op->IsShr()) {
5584 __ And(o_h, second_reg, kMaxLongShiftDistance);
5585 // Shift the low part
5586 __ Lsr(o_l, low, o_h);
5587 // Shift the high part and `or` what underflew on the low part
5588 __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5589 __ Lsl(temp, high, temp);
5590 __ Orr(o_l, o_l, temp);
5591 // If the shift is > 32 bits, override the low part
5592 __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5593 {
5594 ExactAssemblyScope guard(GetVIXLAssembler(),
5595 2 * vixl32::kMaxInstructionSizeInBytes,
5596 CodeBufferCheckScope::kMaximumSize);
5597 __ it(pl);
5598 __ asr(pl, o_l, high, temp);
5599 }
5600 // Shift the high part
5601 __ Asr(o_h, high, o_h);
5602 } else {
5603 __ And(o_h, second_reg, kMaxLongShiftDistance);
5604 // same as Shr except we use `Lsr`s and not `Asr`s
5605 __ Lsr(o_l, low, o_h);
5606 __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5607 __ Lsl(temp, high, temp);
5608 __ Orr(o_l, o_l, temp);
5609 __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5610 {
5611 ExactAssemblyScope guard(GetVIXLAssembler(),
5612 2 * vixl32::kMaxInstructionSizeInBytes,
5613 CodeBufferCheckScope::kMaximumSize);
5614 __ it(pl);
5615 __ lsr(pl, o_l, high, temp);
5616 }
5617 __ Lsr(o_h, high, o_h);
5618 }
5619 } else {
5620 // Register allocator doesn't create partial overlap.
5621 DCHECK(!o_l.Is(high));
5622 DCHECK(!o_h.Is(low));
5623 int32_t cst = Int32ConstantFrom(second);
5624 uint32_t shift_value = cst & kMaxLongShiftDistance;
5625 if (shift_value > 32) {
5626 if (op->IsShl()) {
5627 __ Lsl(o_h, low, shift_value - 32);
5628 __ Mov(o_l, 0);
5629 } else if (op->IsShr()) {
5630 __ Asr(o_l, high, shift_value - 32);
5631 __ Asr(o_h, high, 31);
5632 } else {
5633 __ Lsr(o_l, high, shift_value - 32);
5634 __ Mov(o_h, 0);
5635 }
5636 } else if (shift_value == 32) {
5637 if (op->IsShl()) {
5638 __ Mov(o_h, low);
5639 __ Mov(o_l, 0);
5640 } else if (op->IsShr()) {
5641 __ Mov(o_l, high);
5642 __ Asr(o_h, high, 31);
5643 } else {
5644 __ Mov(o_l, high);
5645 __ Mov(o_h, 0);
5646 }
5647 } else if (shift_value == 1) {
5648 if (op->IsShl()) {
5649 __ Lsls(o_l, low, 1);
5650 __ Adc(o_h, high, high);
5651 } else if (op->IsShr()) {
5652 __ Asrs(o_h, high, 1);
5653 __ Rrx(o_l, low);
5654 } else {
5655 __ Lsrs(o_h, high, 1);
5656 __ Rrx(o_l, low);
5657 }
5658 } else if (shift_value == 0) {
5659 __ Mov(o_l, low);
5660 __ Mov(o_h, high);
5661 } else {
5662 DCHECK(0 < shift_value && shift_value < 32) << shift_value;
5663 if (op->IsShl()) {
5664 __ Lsl(o_h, high, shift_value);
5665 __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value));
5666 __ Lsl(o_l, low, shift_value);
5667 } else if (op->IsShr()) {
5668 __ Lsr(o_l, low, shift_value);
5669 __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5670 __ Asr(o_h, high, shift_value);
5671 } else {
5672 __ Lsr(o_l, low, shift_value);
5673 __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5674 __ Lsr(o_h, high, shift_value);
5675 }
5676 }
5677 }
5678 break;
5679 }
5680 default:
5681 LOG(FATAL) << "Unexpected operation type " << type;
5682 UNREACHABLE();
5683 }
5684 }
5685
VisitShl(HShl * shl)5686 void LocationsBuilderARMVIXL::VisitShl(HShl* shl) {
5687 HandleShift(shl);
5688 }
5689
VisitShl(HShl * shl)5690 void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) {
5691 HandleShift(shl);
5692 }
5693
VisitShr(HShr * shr)5694 void LocationsBuilderARMVIXL::VisitShr(HShr* shr) {
5695 HandleShift(shr);
5696 }
5697
VisitShr(HShr * shr)5698 void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) {
5699 HandleShift(shr);
5700 }
5701
VisitUShr(HUShr * ushr)5702 void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) {
5703 HandleShift(ushr);
5704 }
5705
VisitUShr(HUShr * ushr)5706 void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
5707 HandleShift(ushr);
5708 }
5709
VisitNewInstance(HNewInstance * instruction)5710 void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5711 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5712 instruction, LocationSummary::kCallOnMainOnly);
5713 InvokeRuntimeCallingConventionARMVIXL calling_convention;
5714 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5715 locations->SetOut(LocationFrom(r0));
5716 }
5717
VisitNewInstance(HNewInstance * instruction)5718 void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5719 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5720 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5721 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 12);
5722 }
5723
VisitNewArray(HNewArray * instruction)5724 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
5725 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5726 instruction, LocationSummary::kCallOnMainOnly);
5727 InvokeRuntimeCallingConventionARMVIXL calling_convention;
5728 locations->SetOut(LocationFrom(r0));
5729 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5730 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5731 }
5732
VisitNewArray(HNewArray * instruction)5733 void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
5734 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5735 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5736 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5737 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5738 DCHECK(!codegen_->IsLeafMethod());
5739 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 13);
5740 }
5741
VisitParameterValue(HParameterValue * instruction)5742 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
5743 LocationSummary* locations =
5744 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5745 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5746 if (location.IsStackSlot()) {
5747 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5748 } else if (location.IsDoubleStackSlot()) {
5749 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5750 }
5751 locations->SetOut(location);
5752 }
5753
VisitParameterValue(HParameterValue * instruction)5754 void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
5755 [[maybe_unused]] HParameterValue* instruction) {
5756 // Nothing to do, the parameter is already at its location.
5757 }
5758
VisitCurrentMethod(HCurrentMethod * instruction)5759 void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
5760 LocationSummary* locations =
5761 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5762 locations->SetOut(LocationFrom(kMethodRegister));
5763 }
5764
VisitCurrentMethod(HCurrentMethod * instruction)5765 void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
5766 [[maybe_unused]] HCurrentMethod* instruction) {
5767 // Nothing to do, the method is already at its location.
5768 }
5769
VisitNot(HNot * not_)5770 void LocationsBuilderARMVIXL::VisitNot(HNot* not_) {
5771 LocationSummary* locations =
5772 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5773 locations->SetInAt(0, Location::RequiresRegister());
5774 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5775 }
5776
VisitNot(HNot * not_)5777 void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
5778 LocationSummary* locations = not_->GetLocations();
5779 Location out = locations->Out();
5780 Location in = locations->InAt(0);
5781 switch (not_->GetResultType()) {
5782 case DataType::Type::kInt32:
5783 __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0));
5784 break;
5785
5786 case DataType::Type::kInt64:
5787 __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in));
5788 __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in));
5789 break;
5790
5791 default:
5792 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5793 }
5794 }
5795
VisitBooleanNot(HBooleanNot * bool_not)5796 void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5797 LocationSummary* locations =
5798 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5799 locations->SetInAt(0, Location::RequiresRegister());
5800 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5801 }
5802
VisitBooleanNot(HBooleanNot * bool_not)5803 void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5804 __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1);
5805 }
5806
VisitCompare(HCompare * compare)5807 void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
5808 LocationSummary* locations =
5809 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5810 switch (compare->GetComparisonType()) {
5811 case DataType::Type::kBool:
5812 case DataType::Type::kUint8:
5813 case DataType::Type::kInt8:
5814 case DataType::Type::kUint16:
5815 case DataType::Type::kInt16:
5816 case DataType::Type::kInt32:
5817 case DataType::Type::kUint32:
5818 case DataType::Type::kInt64:
5819 case DataType::Type::kUint64: {
5820 locations->SetInAt(0, Location::RequiresRegister());
5821 locations->SetInAt(1, Location::RequiresRegister());
5822 // Output overlaps because it is written before doing the low comparison.
5823 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5824 break;
5825 }
5826 case DataType::Type::kFloat32:
5827 case DataType::Type::kFloat64: {
5828 locations->SetInAt(0, Location::RequiresFpuRegister());
5829 locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
5830 locations->SetOut(Location::RequiresRegister());
5831 break;
5832 }
5833 default:
5834 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5835 }
5836 }
5837
VisitCompare(HCompare * compare)5838 void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
5839 LocationSummary* locations = compare->GetLocations();
5840 vixl32::Register out = OutputRegister(compare);
5841 Location left = locations->InAt(0);
5842 Location right = locations->InAt(1);
5843
5844 vixl32::Label less, greater, done;
5845 vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
5846 DataType::Type type = compare->GetComparisonType();
5847 vixl32::Condition less_cond = vixl32::ConditionType::lt;
5848 vixl32::Condition greater_cond = vixl32::ConditionType::gt;
5849 switch (type) {
5850 case DataType::Type::kUint32:
5851 less_cond = vixl32::ConditionType::lo;
5852 // greater_cond - is not needed below
5853 FALLTHROUGH_INTENDED;
5854 case DataType::Type::kBool:
5855 case DataType::Type::kUint8:
5856 case DataType::Type::kInt8:
5857 case DataType::Type::kUint16:
5858 case DataType::Type::kInt16:
5859 case DataType::Type::kInt32: {
5860 // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
5861 __ Mov(out, 0);
5862 __ Cmp(RegisterFrom(left), RegisterFrom(right));
5863 break;
5864 }
5865 case DataType::Type::kUint64:
5866 less_cond = vixl32::ConditionType::lo;
5867 greater_cond = vixl32::ConditionType::hi;
5868 FALLTHROUGH_INTENDED;
5869 case DataType::Type::kInt64: {
5870 __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // High part compare.
5871 __ B(less_cond, &less, /* is_far_target= */ false);
5872 __ B(greater_cond, &greater, /* is_far_target= */ false);
5873 // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
5874 __ Mov(out, 0);
5875 __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare.
5876 less_cond = vixl32::ConditionType::lo;
5877 // greater_cond - is not needed below
5878 break;
5879 }
5880 case DataType::Type::kFloat32:
5881 case DataType::Type::kFloat64: {
5882 __ Mov(out, 0);
5883 GenerateVcmp(compare, codegen_);
5884 // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
5885 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5886 less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
5887 break;
5888 }
5889 default:
5890 LOG(FATAL) << "Unexpected compare type " << type;
5891 UNREACHABLE();
5892 }
5893
5894 __ B(eq, final_label, /* is_far_target= */ false);
5895 __ B(less_cond, &less, /* is_far_target= */ false);
5896
5897 __ Bind(&greater);
5898 __ Mov(out, 1);
5899 __ B(final_label);
5900
5901 __ Bind(&less);
5902 __ Mov(out, -1);
5903
5904 if (done.IsReferenced()) {
5905 __ Bind(&done);
5906 }
5907 }
5908
VisitPhi(HPhi * instruction)5909 void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
5910 LocationSummary* locations =
5911 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5912 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5913 locations->SetInAt(i, Location::Any());
5914 }
5915 locations->SetOut(Location::Any());
5916 }
5917
VisitPhi(HPhi * instruction)5918 void InstructionCodeGeneratorARMVIXL::VisitPhi([[maybe_unused]] HPhi* instruction) {
5919 LOG(FATAL) << "Unreachable";
5920 }
5921
GenerateMemoryBarrier(MemBarrierKind kind)5922 void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) {
5923 // TODO (ported from quick): revisit ARM barrier kinds.
5924 DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings.
5925 switch (kind) {
5926 case MemBarrierKind::kAnyStore:
5927 case MemBarrierKind::kLoadAny:
5928 case MemBarrierKind::kAnyAny: {
5929 flavor = DmbOptions::ISH;
5930 break;
5931 }
5932 case MemBarrierKind::kStoreStore: {
5933 flavor = DmbOptions::ISHST;
5934 break;
5935 }
5936 default:
5937 LOG(FATAL) << "Unexpected memory barrier " << kind;
5938 }
5939 __ Dmb(flavor);
5940 }
5941
GenerateWideAtomicLoad(vixl32::Register addr,uint32_t offset,vixl32::Register out_lo,vixl32::Register out_hi)5942 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr,
5943 uint32_t offset,
5944 vixl32::Register out_lo,
5945 vixl32::Register out_hi) {
5946 UseScratchRegisterScope temps(GetVIXLAssembler());
5947 if (offset != 0) {
5948 vixl32::Register temp = temps.Acquire();
5949 __ Add(temp, addr, offset);
5950 addr = temp;
5951 }
5952 __ Ldrexd(out_lo, out_hi, MemOperand(addr));
5953 }
5954
GenerateWideAtomicStore(vixl32::Register addr,uint32_t offset,vixl32::Register value_lo,vixl32::Register value_hi,vixl32::Register temp1,vixl32::Register temp2,HInstruction * instruction)5955 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr,
5956 uint32_t offset,
5957 vixl32::Register value_lo,
5958 vixl32::Register value_hi,
5959 vixl32::Register temp1,
5960 vixl32::Register temp2,
5961 HInstruction* instruction) {
5962 UseScratchRegisterScope temps(GetVIXLAssembler());
5963 vixl32::Label fail;
5964 if (offset != 0) {
5965 vixl32::Register temp = temps.Acquire();
5966 __ Add(temp, addr, offset);
5967 addr = temp;
5968 }
5969 __ Bind(&fail);
5970 {
5971 // Ensure the pc position is recorded immediately after the `ldrexd` instruction.
5972 ExactAssemblyScope aas(GetVIXLAssembler(),
5973 vixl32::kMaxInstructionSizeInBytes,
5974 CodeBufferCheckScope::kMaximumSize);
5975 // We need a load followed by store. (The address used in a STREX instruction must
5976 // be the same as the address in the most recently executed LDREX instruction.)
5977 __ ldrexd(temp1, temp2, MemOperand(addr));
5978 codegen_->MaybeRecordImplicitNullCheck(instruction);
5979 }
5980 __ Strexd(temp1, value_lo, value_hi, MemOperand(addr));
5981 __ CompareAndBranchIfNonZero(temp1, &fail);
5982 }
5983
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5984 void LocationsBuilderARMVIXL::HandleFieldSet(HInstruction* instruction,
5985 const FieldInfo& field_info,
5986 WriteBarrierKind write_barrier_kind) {
5987 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5988
5989 LocationSummary* locations =
5990 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5991 locations->SetInAt(0, Location::RequiresRegister());
5992
5993 DataType::Type field_type = field_info.GetFieldType();
5994 if (DataType::IsFloatingPointType(field_type)) {
5995 locations->SetInAt(1, Location::RequiresFpuRegister());
5996 } else {
5997 locations->SetInAt(1, Location::RequiresRegister());
5998 }
5999
6000 bool is_wide = field_type == DataType::Type::kInt64 || field_type == DataType::Type::kFloat64;
6001 bool generate_volatile = field_info.IsVolatile()
6002 && is_wide
6003 && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6004 bool needs_write_barrier =
6005 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6006 bool check_gc_card =
6007 codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
6008
6009 // Temporary registers for the write barrier.
6010 // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
6011 if (needs_write_barrier || check_gc_card) {
6012 locations->AddRegisterTemps(2);
6013 } else if (generate_volatile) {
6014 // ARM encoding have some additional constraints for ldrexd/strexd:
6015 // - registers need to be consecutive
6016 // - the first register should be even but not R14.
6017 // We don't test for ARM yet, and the assertion makes sure that we
6018 // revisit this if we ever enable ARM encoding.
6019 DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
6020 locations->AddRegisterTemps(2);
6021 if (field_type == DataType::Type::kFloat64) {
6022 // For doubles we need two more registers to copy the value.
6023 locations->AddTemp(LocationFrom(r2));
6024 locations->AddTemp(LocationFrom(r3));
6025 }
6026 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6027 locations->AddTemp(Location::RequiresRegister());
6028 }
6029 }
6030
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6031 void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
6032 const FieldInfo& field_info,
6033 bool value_can_be_null,
6034 WriteBarrierKind write_barrier_kind) {
6035 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6036
6037 LocationSummary* locations = instruction->GetLocations();
6038 vixl32::Register base = InputRegisterAt(instruction, 0);
6039 Location value = locations->InAt(1);
6040
6041 bool is_volatile = field_info.IsVolatile();
6042 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6043 DataType::Type field_type = field_info.GetFieldType();
6044 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6045 bool needs_write_barrier =
6046 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6047
6048 if (is_volatile) {
6049 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
6050 }
6051
6052 switch (field_type) {
6053 case DataType::Type::kBool:
6054 case DataType::Type::kUint8:
6055 case DataType::Type::kInt8:
6056 case DataType::Type::kUint16:
6057 case DataType::Type::kInt16:
6058 case DataType::Type::kInt32: {
6059 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6060 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6061 StoreOperandType operand_type = GetStoreOperandType(field_type);
6062 GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset);
6063 codegen_->MaybeRecordImplicitNullCheck(instruction);
6064 break;
6065 }
6066
6067 case DataType::Type::kReference: {
6068 vixl32::Register value_reg = RegisterFrom(value);
6069 if (kPoisonHeapReferences) {
6070 DCHECK_EQ(field_type, DataType::Type::kReference);
6071 value_reg = RegisterFrom(locations->GetTemp(0));
6072 __ Mov(value_reg, RegisterFrom(value));
6073 GetAssembler()->PoisonHeapReference(value_reg);
6074 }
6075 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6076 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6077 GetAssembler()->StoreToOffset(kStoreWord, value_reg, base, offset);
6078 codegen_->MaybeRecordImplicitNullCheck(instruction);
6079 break;
6080 }
6081
6082 case DataType::Type::kInt64: {
6083 if (is_volatile && !atomic_ldrd_strd) {
6084 GenerateWideAtomicStore(base,
6085 offset,
6086 LowRegisterFrom(value),
6087 HighRegisterFrom(value),
6088 RegisterFrom(locations->GetTemp(0)),
6089 RegisterFrom(locations->GetTemp(1)),
6090 instruction);
6091 } else {
6092 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6093 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6094 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset);
6095 codegen_->MaybeRecordImplicitNullCheck(instruction);
6096 }
6097 break;
6098 }
6099
6100 case DataType::Type::kFloat32: {
6101 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6102 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6103 GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset);
6104 codegen_->MaybeRecordImplicitNullCheck(instruction);
6105 break;
6106 }
6107
6108 case DataType::Type::kFloat64: {
6109 vixl32::DRegister value_reg = DRegisterFrom(value);
6110 if (is_volatile && !atomic_ldrd_strd) {
6111 vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0));
6112 vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1));
6113
6114 __ Vmov(value_reg_lo, value_reg_hi, value_reg);
6115
6116 GenerateWideAtomicStore(base,
6117 offset,
6118 value_reg_lo,
6119 value_reg_hi,
6120 RegisterFrom(locations->GetTemp(2)),
6121 RegisterFrom(locations->GetTemp(3)),
6122 instruction);
6123 } else {
6124 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6125 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6126 GetAssembler()->StoreDToOffset(value_reg, base, offset);
6127 codegen_->MaybeRecordImplicitNullCheck(instruction);
6128 }
6129 break;
6130 }
6131
6132 case DataType::Type::kUint32:
6133 case DataType::Type::kUint64:
6134 case DataType::Type::kVoid:
6135 LOG(FATAL) << "Unreachable type " << field_type;
6136 UNREACHABLE();
6137 }
6138
6139 if (needs_write_barrier) {
6140 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
6141 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
6142 codegen_->MaybeMarkGCCard(
6143 temp,
6144 card,
6145 base,
6146 RegisterFrom(value),
6147 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
6148 } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
6149 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
6150 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
6151 codegen_->CheckGCCardIsValid(temp, card, base);
6152 }
6153
6154 if (is_volatile) {
6155 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6156 }
6157 }
6158
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6159 void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
6160 const FieldInfo& field_info) {
6161 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6162
6163 bool object_field_get_with_read_barrier =
6164 (field_info.GetFieldType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6165 LocationSummary* locations =
6166 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6167 object_field_get_with_read_barrier
6168 ? LocationSummary::kCallOnSlowPath
6169 : LocationSummary::kNoCall);
6170 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6171 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6172 }
6173 // Input for object receiver.
6174 locations->SetInAt(0, Location::RequiresRegister());
6175
6176 bool volatile_for_double = field_info.IsVolatile()
6177 && (field_info.GetFieldType() == DataType::Type::kFloat64)
6178 && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6179 // The output overlaps in case of volatile long: we don't want the code generated by
6180 // `GenerateWideAtomicLoad()` to overwrite the object's location. Likewise, in the case
6181 // of an object field get with non-Baker read barriers enabled, we do not want the load
6182 // to overwrite the object's location, as we need it to emit the read barrier.
6183 // Baker read barrier implementation with introspection does not have this restriction.
6184 bool overlap =
6185 (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) ||
6186 (object_field_get_with_read_barrier && !kUseBakerReadBarrier);
6187
6188 if (DataType::IsFloatingPointType(instruction->GetType())) {
6189 locations->SetOut(Location::RequiresFpuRegister());
6190 } else {
6191 locations->SetOut(Location::RequiresRegister(),
6192 (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
6193 }
6194 if (volatile_for_double) {
6195 // ARM encoding have some additional constraints for ldrexd/strexd:
6196 // - registers need to be consecutive
6197 // - the first register should be even but not R14.
6198 // We don't test for ARM yet, and the assertion makes sure that we
6199 // revisit this if we ever enable ARM encoding.
6200 DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
6201 locations->AddRegisterTemps(2);
6202 } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6203 // We need a temporary register for the read barrier load in
6204 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6205 // only if the offset is too big.
6206 if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
6207 locations->AddTemp(Location::RequiresRegister());
6208 }
6209 }
6210 }
6211
ArithmeticZeroOrFpuRegister(HInstruction * input)6212 Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) {
6213 DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType();
6214 if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
6215 (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
6216 return Location::ConstantLocation(input);
6217 } else {
6218 return Location::RequiresFpuRegister();
6219 }
6220 }
6221
ArmEncodableConstantOrRegister(HInstruction * constant,Opcode opcode)6222 Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
6223 Opcode opcode) {
6224 DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
6225 if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
6226 return Location::ConstantLocation(constant);
6227 }
6228 return Location::RequiresRegister();
6229 }
6230
CanEncode32BitConstantAsImmediate(CodeGeneratorARMVIXL * codegen,uint32_t value,Opcode opcode,vixl32::FlagsUpdate flags_update=vixl32::FlagsUpdate::DontCare)6231 static bool CanEncode32BitConstantAsImmediate(
6232 CodeGeneratorARMVIXL* codegen,
6233 uint32_t value,
6234 Opcode opcode,
6235 vixl32::FlagsUpdate flags_update = vixl32::FlagsUpdate::DontCare) {
6236 ArmVIXLAssembler* assembler = codegen->GetAssembler();
6237 if (assembler->ShifterOperandCanHold(opcode, value, flags_update)) {
6238 return true;
6239 }
6240 Opcode neg_opcode = kNoOperand;
6241 uint32_t neg_value = 0;
6242 switch (opcode) {
6243 case AND: neg_opcode = BIC; neg_value = ~value; break;
6244 case ORR: neg_opcode = ORN; neg_value = ~value; break;
6245 case ADD: neg_opcode = SUB; neg_value = -value; break;
6246 case ADC: neg_opcode = SBC; neg_value = ~value; break;
6247 case SUB: neg_opcode = ADD; neg_value = -value; break;
6248 case SBC: neg_opcode = ADC; neg_value = ~value; break;
6249 case MOV: neg_opcode = MVN; neg_value = ~value; break;
6250 default:
6251 return false;
6252 }
6253
6254 if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, flags_update)) {
6255 return true;
6256 }
6257
6258 return opcode == AND && IsPowerOfTwo(value + 1);
6259 }
6260
CanEncodeConstantAsImmediate(HConstant * input_cst,Opcode opcode)6261 bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) {
6262 uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
6263 if (DataType::Is64BitType(input_cst->GetType())) {
6264 Opcode high_opcode = opcode;
6265 vixl32::FlagsUpdate low_flags_update = vixl32::FlagsUpdate::DontCare;
6266 switch (opcode) {
6267 case SUB:
6268 // Flip the operation to an ADD.
6269 value = -value;
6270 opcode = ADD;
6271 FALLTHROUGH_INTENDED;
6272 case ADD:
6273 if (Low32Bits(value) == 0u) {
6274 return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), opcode);
6275 }
6276 high_opcode = ADC;
6277 low_flags_update = vixl32::FlagsUpdate::SetFlags;
6278 break;
6279 default:
6280 break;
6281 }
6282 return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), high_opcode) &&
6283 CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode, low_flags_update);
6284 } else {
6285 return CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode);
6286 }
6287 }
6288
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6289 void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
6290 const FieldInfo& field_info) {
6291 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6292
6293 LocationSummary* locations = instruction->GetLocations();
6294 uint32_t receiver_input = 0;
6295 vixl32::Register base = InputRegisterAt(instruction, receiver_input);
6296 Location out = locations->Out();
6297 bool is_volatile = field_info.IsVolatile();
6298 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6299 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
6300 DataType::Type load_type = instruction->GetType();
6301 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6302
6303 switch (load_type) {
6304 case DataType::Type::kBool:
6305 case DataType::Type::kUint8:
6306 case DataType::Type::kInt8:
6307 case DataType::Type::kUint16:
6308 case DataType::Type::kInt16:
6309 case DataType::Type::kInt32: {
6310 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6311 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6312 LoadOperandType operand_type = GetLoadOperandType(load_type);
6313 GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset);
6314 codegen_->MaybeRecordImplicitNullCheck(instruction);
6315 break;
6316 }
6317
6318 case DataType::Type::kReference: {
6319 // /* HeapReference<Object> */ out = *(base + offset)
6320 if (codegen_->EmitBakerReadBarrier()) {
6321 Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6322 // Note that a potential implicit null check is handled in this
6323 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
6324 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6325 instruction, out, base, offset, maybe_temp, /* needs_null_check= */ true);
6326 if (is_volatile) {
6327 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6328 }
6329 } else {
6330 {
6331 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6332 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6333 GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
6334 codegen_->MaybeRecordImplicitNullCheck(instruction);
6335 }
6336 if (is_volatile) {
6337 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6338 }
6339 // If read barriers are enabled, emit read barriers other than
6340 // Baker's using a slow path (and also unpoison the loaded
6341 // reference, if heap poisoning is enabled).
6342 codegen_->MaybeGenerateReadBarrierSlow(
6343 instruction, out, out, locations->InAt(receiver_input), offset);
6344 }
6345 break;
6346 }
6347
6348 case DataType::Type::kInt64: {
6349 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6350 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6351 if (is_volatile && !atomic_ldrd_strd) {
6352 GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out));
6353 } else {
6354 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset);
6355 }
6356 codegen_->MaybeRecordImplicitNullCheck(instruction);
6357 break;
6358 }
6359
6360 case DataType::Type::kFloat32: {
6361 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6362 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6363 GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset);
6364 codegen_->MaybeRecordImplicitNullCheck(instruction);
6365 break;
6366 }
6367
6368 case DataType::Type::kFloat64: {
6369 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6370 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6371 vixl32::DRegister out_dreg = DRegisterFrom(out);
6372 if (is_volatile && !atomic_ldrd_strd) {
6373 vixl32::Register lo = RegisterFrom(locations->GetTemp(0));
6374 vixl32::Register hi = RegisterFrom(locations->GetTemp(1));
6375 GenerateWideAtomicLoad(base, offset, lo, hi);
6376 codegen_->MaybeRecordImplicitNullCheck(instruction);
6377 __ Vmov(out_dreg, lo, hi);
6378 } else {
6379 GetAssembler()->LoadDFromOffset(out_dreg, base, offset);
6380 codegen_->MaybeRecordImplicitNullCheck(instruction);
6381 }
6382 break;
6383 }
6384
6385 case DataType::Type::kUint32:
6386 case DataType::Type::kUint64:
6387 case DataType::Type::kVoid:
6388 LOG(FATAL) << "Unreachable type " << load_type;
6389 UNREACHABLE();
6390 }
6391
6392 if (is_volatile) {
6393 if (load_type == DataType::Type::kReference) {
6394 // Memory barriers, in the case of references, are also handled
6395 // in the previous switch statement.
6396 } else {
6397 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6398 }
6399 }
6400 }
6401
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6402 void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6403 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6404 }
6405
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6406 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6407 HandleFieldSet(instruction,
6408 instruction->GetFieldInfo(),
6409 instruction->GetValueCanBeNull(),
6410 instruction->GetWriteBarrierKind());
6411 }
6412
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6413 void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6414 HandleFieldGet(instruction, instruction->GetFieldInfo());
6415 }
6416
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6417 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6418 HandleFieldGet(instruction, instruction->GetFieldInfo());
6419 }
6420
VisitStaticFieldGet(HStaticFieldGet * instruction)6421 void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6422 HandleFieldGet(instruction, instruction->GetFieldInfo());
6423 }
6424
VisitStaticFieldGet(HStaticFieldGet * instruction)6425 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6426 HandleFieldGet(instruction, instruction->GetFieldInfo());
6427 }
6428
VisitStaticFieldSet(HStaticFieldSet * instruction)6429 void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6430 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6431 }
6432
VisitStaticFieldSet(HStaticFieldSet * instruction)6433 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6434 HandleFieldSet(instruction,
6435 instruction->GetFieldInfo(),
6436 instruction->GetValueCanBeNull(),
6437 instruction->GetWriteBarrierKind());
6438 }
6439
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6440 void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6441 codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(r0));
6442 }
6443
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6444 void InstructionCodeGeneratorARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6445 __ Mov(r0, instruction->GetFormat()->GetValue());
6446 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6447 }
6448
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6449 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet(
6450 HUnresolvedInstanceFieldGet* instruction) {
6451 FieldAccessCallingConventionARMVIXL calling_convention;
6452 codegen_->CreateUnresolvedFieldLocationSummary(
6453 instruction, instruction->GetFieldType(), calling_convention);
6454 }
6455
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6456 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldGet(
6457 HUnresolvedInstanceFieldGet* instruction) {
6458 FieldAccessCallingConventionARMVIXL calling_convention;
6459 codegen_->GenerateUnresolvedFieldAccess(instruction,
6460 instruction->GetFieldType(),
6461 instruction->GetFieldIndex(),
6462 instruction->GetDexPc(),
6463 calling_convention);
6464 }
6465
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6466 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldSet(
6467 HUnresolvedInstanceFieldSet* instruction) {
6468 FieldAccessCallingConventionARMVIXL calling_convention;
6469 codegen_->CreateUnresolvedFieldLocationSummary(
6470 instruction, instruction->GetFieldType(), calling_convention);
6471 }
6472
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6473 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldSet(
6474 HUnresolvedInstanceFieldSet* instruction) {
6475 FieldAccessCallingConventionARMVIXL calling_convention;
6476 codegen_->GenerateUnresolvedFieldAccess(instruction,
6477 instruction->GetFieldType(),
6478 instruction->GetFieldIndex(),
6479 instruction->GetDexPc(),
6480 calling_convention);
6481 }
6482
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6483 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldGet(
6484 HUnresolvedStaticFieldGet* instruction) {
6485 FieldAccessCallingConventionARMVIXL calling_convention;
6486 codegen_->CreateUnresolvedFieldLocationSummary(
6487 instruction, instruction->GetFieldType(), calling_convention);
6488 }
6489
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6490 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldGet(
6491 HUnresolvedStaticFieldGet* instruction) {
6492 FieldAccessCallingConventionARMVIXL calling_convention;
6493 codegen_->GenerateUnresolvedFieldAccess(instruction,
6494 instruction->GetFieldType(),
6495 instruction->GetFieldIndex(),
6496 instruction->GetDexPc(),
6497 calling_convention);
6498 }
6499
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6500 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldSet(
6501 HUnresolvedStaticFieldSet* instruction) {
6502 FieldAccessCallingConventionARMVIXL calling_convention;
6503 codegen_->CreateUnresolvedFieldLocationSummary(
6504 instruction, instruction->GetFieldType(), calling_convention);
6505 }
6506
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6507 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldSet(
6508 HUnresolvedStaticFieldSet* instruction) {
6509 FieldAccessCallingConventionARMVIXL calling_convention;
6510 codegen_->GenerateUnresolvedFieldAccess(instruction,
6511 instruction->GetFieldType(),
6512 instruction->GetFieldIndex(),
6513 instruction->GetDexPc(),
6514 calling_convention);
6515 }
6516
VisitNullCheck(HNullCheck * instruction)6517 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6518 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6519 locations->SetInAt(0, Location::RequiresRegister());
6520 }
6521
GenerateImplicitNullCheck(HNullCheck * instruction)6522 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
6523 if (CanMoveNullCheckToUser(instruction)) {
6524 return;
6525 }
6526
6527 UseScratchRegisterScope temps(GetVIXLAssembler());
6528 // Ensure the pc position is recorded immediately after the `ldr` instruction.
6529 ExactAssemblyScope aas(GetVIXLAssembler(),
6530 vixl32::kMaxInstructionSizeInBytes,
6531 CodeBufferCheckScope::kMaximumSize);
6532 __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0)));
6533 RecordPcInfo(instruction, instruction->GetDexPc());
6534 }
6535
GenerateExplicitNullCheck(HNullCheck * instruction)6536 void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) {
6537 NullCheckSlowPathARMVIXL* slow_path =
6538 new (GetScopedAllocator()) NullCheckSlowPathARMVIXL(instruction);
6539 AddSlowPath(slow_path);
6540 __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
6541 }
6542
VisitNullCheck(HNullCheck * instruction)6543 void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6544 codegen_->GenerateNullCheck(instruction);
6545 }
6546
LoadFromShiftedRegOffset(DataType::Type type,Location out_loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6547 void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(DataType::Type type,
6548 Location out_loc,
6549 vixl32::Register base,
6550 vixl32::Register reg_index,
6551 vixl32::Condition cond) {
6552 uint32_t shift_count = DataType::SizeShift(type);
6553 MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6554
6555 switch (type) {
6556 case DataType::Type::kBool:
6557 case DataType::Type::kUint8:
6558 __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
6559 break;
6560 case DataType::Type::kInt8:
6561 __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
6562 break;
6563 case DataType::Type::kUint16:
6564 __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
6565 break;
6566 case DataType::Type::kInt16:
6567 __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
6568 break;
6569 case DataType::Type::kReference:
6570 case DataType::Type::kInt32:
6571 __ Ldr(cond, RegisterFrom(out_loc), mem_address);
6572 break;
6573 // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
6574 case DataType::Type::kInt64:
6575 case DataType::Type::kFloat32:
6576 case DataType::Type::kFloat64:
6577 default:
6578 LOG(FATAL) << "Unreachable type " << type;
6579 UNREACHABLE();
6580 }
6581 }
6582
StoreToShiftedRegOffset(DataType::Type type,Location loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6583 void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type,
6584 Location loc,
6585 vixl32::Register base,
6586 vixl32::Register reg_index,
6587 vixl32::Condition cond) {
6588 uint32_t shift_count = DataType::SizeShift(type);
6589 MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6590
6591 switch (type) {
6592 case DataType::Type::kBool:
6593 case DataType::Type::kUint8:
6594 case DataType::Type::kInt8:
6595 __ Strb(cond, RegisterFrom(loc), mem_address);
6596 break;
6597 case DataType::Type::kUint16:
6598 case DataType::Type::kInt16:
6599 __ Strh(cond, RegisterFrom(loc), mem_address);
6600 break;
6601 case DataType::Type::kReference:
6602 case DataType::Type::kInt32:
6603 __ Str(cond, RegisterFrom(loc), mem_address);
6604 break;
6605 // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
6606 case DataType::Type::kInt64:
6607 case DataType::Type::kFloat32:
6608 case DataType::Type::kFloat64:
6609 default:
6610 LOG(FATAL) << "Unreachable type " << type;
6611 UNREACHABLE();
6612 }
6613 }
6614
VisitArrayGet(HArrayGet * instruction)6615 void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6616 bool object_array_get_with_read_barrier =
6617 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6618 LocationSummary* locations =
6619 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6620 object_array_get_with_read_barrier
6621 ? LocationSummary::kCallOnSlowPath
6622 : LocationSummary::kNoCall);
6623 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6624 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6625 }
6626 locations->SetInAt(0, Location::RequiresRegister());
6627 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6628 if (DataType::IsFloatingPointType(instruction->GetType())) {
6629 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6630 } else {
6631 // The output overlaps for an object array get for non-Baker read barriers: we do not want
6632 // the load to overwrite the object's location, as we need it to emit the read barrier.
6633 // Baker read barrier implementation with introspection does not have this restriction.
6634 bool overlap = object_array_get_with_read_barrier && !kUseBakerReadBarrier;
6635 locations->SetOut(Location::RequiresRegister(),
6636 overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
6637 }
6638 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6639 if (instruction->GetIndex()->IsConstant()) {
6640 // Array loads with constant index are treated as field loads.
6641 // We need a temporary register for the read barrier load in
6642 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6643 // only if the offset is too big.
6644 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
6645 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
6646 offset += index << DataType::SizeShift(DataType::Type::kReference);
6647 if (offset >= kReferenceLoadMinFarOffset) {
6648 locations->AddTemp(Location::RequiresRegister());
6649 }
6650 } else {
6651 // We need a non-scratch temporary for the array data pointer in
6652 // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier().
6653 locations->AddTemp(Location::RequiresRegister());
6654 }
6655 } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6656 // Also need a temporary for String compression feature.
6657 locations->AddTemp(Location::RequiresRegister());
6658 }
6659 }
6660
VisitArrayGet(HArrayGet * instruction)6661 void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6662 LocationSummary* locations = instruction->GetLocations();
6663 Location obj_loc = locations->InAt(0);
6664 vixl32::Register obj = InputRegisterAt(instruction, 0);
6665 Location index = locations->InAt(1);
6666 Location out_loc = locations->Out();
6667 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6668 DataType::Type type = instruction->GetType();
6669 const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
6670 instruction->IsStringCharAt();
6671 HInstruction* array_instr = instruction->GetArray();
6672 bool has_intermediate_address = array_instr->IsIntermediateAddress();
6673
6674 switch (type) {
6675 case DataType::Type::kBool:
6676 case DataType::Type::kUint8:
6677 case DataType::Type::kInt8:
6678 case DataType::Type::kUint16:
6679 case DataType::Type::kInt16:
6680 case DataType::Type::kInt32: {
6681 vixl32::Register length;
6682 if (maybe_compressed_char_at) {
6683 length = RegisterFrom(locations->GetTemp(0));
6684 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6685 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6686 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6687 GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
6688 codegen_->MaybeRecordImplicitNullCheck(instruction);
6689 }
6690 if (index.IsConstant()) {
6691 int32_t const_index = Int32ConstantFrom(index);
6692 if (maybe_compressed_char_at) {
6693 vixl32::Label uncompressed_load, done;
6694 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6695 __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
6696 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6697 "Expecting 0=compressed, 1=uncompressed");
6698 __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6699 GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
6700 RegisterFrom(out_loc),
6701 obj,
6702 data_offset + const_index);
6703 __ B(final_label);
6704 __ Bind(&uncompressed_load);
6705 GetAssembler()->LoadFromOffset(GetLoadOperandType(DataType::Type::kUint16),
6706 RegisterFrom(out_loc),
6707 obj,
6708 data_offset + (const_index << 1));
6709 if (done.IsReferenced()) {
6710 __ Bind(&done);
6711 }
6712 } else {
6713 uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(type));
6714
6715 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6716 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6717 LoadOperandType load_type = GetLoadOperandType(type);
6718 GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset);
6719 codegen_->MaybeRecordImplicitNullCheck(instruction);
6720 }
6721 } else {
6722 UseScratchRegisterScope temps(GetVIXLAssembler());
6723 vixl32::Register temp = temps.Acquire();
6724
6725 if (has_intermediate_address) {
6726 // We do not need to compute the intermediate address from the array: the
6727 // input instruction has done it already. See the comment in
6728 // `TryExtractArrayAccessAddress()`.
6729 if (kIsDebugBuild) {
6730 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6731 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6732 }
6733 temp = obj;
6734 } else {
6735 __ Add(temp, obj, data_offset);
6736 }
6737 if (maybe_compressed_char_at) {
6738 vixl32::Label uncompressed_load, done;
6739 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6740 __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
6741 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6742 "Expecting 0=compressed, 1=uncompressed");
6743 __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6744 __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
6745 __ B(final_label);
6746 __ Bind(&uncompressed_load);
6747 __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
6748 if (done.IsReferenced()) {
6749 __ Bind(&done);
6750 }
6751 } else {
6752 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6753 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6754 codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6755 codegen_->MaybeRecordImplicitNullCheck(instruction);
6756 }
6757 }
6758 break;
6759 }
6760
6761 case DataType::Type::kReference: {
6762 // The read barrier instrumentation of object ArrayGet
6763 // instructions does not support the HIntermediateAddress
6764 // instruction.
6765 DCHECK(!(has_intermediate_address && codegen_->EmitReadBarrier()));
6766
6767 static_assert(
6768 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6769 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6770 // /* HeapReference<Object> */ out =
6771 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6772 if (codegen_->EmitBakerReadBarrier()) {
6773 // Note that a potential implicit null check is handled in this
6774 // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
6775 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
6776 if (index.IsConstant()) {
6777 // Array load with a constant index can be treated as a field load.
6778 Location maybe_temp =
6779 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6780 data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type);
6781 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6782 out_loc,
6783 obj,
6784 data_offset,
6785 maybe_temp,
6786 /* needs_null_check= */ false);
6787 } else {
6788 Location temp = locations->GetTemp(0);
6789 codegen_->GenerateArrayLoadWithBakerReadBarrier(
6790 out_loc, obj, data_offset, index, temp, /* needs_null_check= */ false);
6791 }
6792 } else {
6793 vixl32::Register out = OutputRegister(instruction);
6794 if (index.IsConstant()) {
6795 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6796 {
6797 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6798 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6799 GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
6800 codegen_->MaybeRecordImplicitNullCheck(instruction);
6801 }
6802 // If read barriers are enabled, emit read barriers other than
6803 // Baker's using a slow path (and also unpoison the loaded
6804 // reference, if heap poisoning is enabled).
6805 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6806 } else {
6807 UseScratchRegisterScope temps(GetVIXLAssembler());
6808 vixl32::Register temp = temps.Acquire();
6809
6810 if (has_intermediate_address) {
6811 // We do not need to compute the intermediate address from the array: the
6812 // input instruction has done it already. See the comment in
6813 // `TryExtractArrayAccessAddress()`.
6814 if (kIsDebugBuild) {
6815 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6816 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6817 }
6818 temp = obj;
6819 } else {
6820 __ Add(temp, obj, data_offset);
6821 }
6822 {
6823 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6824 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6825 codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6826 temps.Close();
6827 codegen_->MaybeRecordImplicitNullCheck(instruction);
6828 }
6829 // If read barriers are enabled, emit read barriers other than
6830 // Baker's using a slow path (and also unpoison the loaded
6831 // reference, if heap poisoning is enabled).
6832 codegen_->MaybeGenerateReadBarrierSlow(
6833 instruction, out_loc, out_loc, obj_loc, data_offset, index);
6834 }
6835 }
6836 break;
6837 }
6838
6839 case DataType::Type::kInt64: {
6840 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6841 // As two macro instructions can be emitted the max size is doubled.
6842 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6843 if (index.IsConstant()) {
6844 size_t offset =
6845 (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6846 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset);
6847 } else {
6848 UseScratchRegisterScope temps(GetVIXLAssembler());
6849 vixl32::Register temp = temps.Acquire();
6850 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6851 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset);
6852 }
6853 codegen_->MaybeRecordImplicitNullCheck(instruction);
6854 break;
6855 }
6856
6857 case DataType::Type::kFloat32: {
6858 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6859 // As two macro instructions can be emitted the max size is doubled.
6860 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6861 vixl32::SRegister out = SRegisterFrom(out_loc);
6862 if (index.IsConstant()) {
6863 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6864 GetAssembler()->LoadSFromOffset(out, obj, offset);
6865 } else {
6866 UseScratchRegisterScope temps(GetVIXLAssembler());
6867 vixl32::Register temp = temps.Acquire();
6868 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
6869 GetAssembler()->LoadSFromOffset(out, temp, data_offset);
6870 }
6871 codegen_->MaybeRecordImplicitNullCheck(instruction);
6872 break;
6873 }
6874
6875 case DataType::Type::kFloat64: {
6876 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6877 // As two macro instructions can be emitted the max size is doubled.
6878 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6879 if (index.IsConstant()) {
6880 size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6881 GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset);
6882 } else {
6883 UseScratchRegisterScope temps(GetVIXLAssembler());
6884 vixl32::Register temp = temps.Acquire();
6885 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6886 GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset);
6887 }
6888 codegen_->MaybeRecordImplicitNullCheck(instruction);
6889 break;
6890 }
6891
6892 case DataType::Type::kUint32:
6893 case DataType::Type::kUint64:
6894 case DataType::Type::kVoid:
6895 LOG(FATAL) << "Unreachable type " << type;
6896 UNREACHABLE();
6897 }
6898 }
6899
VisitArraySet(HArraySet * instruction)6900 void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
6901 DataType::Type value_type = instruction->GetComponentType();
6902
6903 const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6904 bool needs_write_barrier =
6905 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6906 bool check_gc_card =
6907 codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
6908
6909 bool needs_type_check = instruction->NeedsTypeCheck();
6910
6911 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6912 instruction,
6913 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6914
6915 locations->SetInAt(0, Location::RequiresRegister());
6916 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6917 if (DataType::IsFloatingPointType(value_type)) {
6918 locations->SetInAt(2, Location::RequiresFpuRegister());
6919 } else {
6920 locations->SetInAt(2, Location::RequiresRegister());
6921 }
6922 if (needs_write_barrier || check_gc_card || instruction->NeedsTypeCheck()) {
6923 // Temporary registers for type checking, write barrier, checking the dirty bit, or register
6924 // poisoning.
6925 locations->AddRegisterTemps(2);
6926 } else if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
6927 locations->AddTemp(Location::RequiresRegister());
6928 }
6929 }
6930
VisitArraySet(HArraySet * instruction)6931 void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
6932 LocationSummary* locations = instruction->GetLocations();
6933 vixl32::Register array = InputRegisterAt(instruction, 0);
6934 Location index = locations->InAt(1);
6935 DataType::Type value_type = instruction->GetComponentType();
6936 bool needs_type_check = instruction->NeedsTypeCheck();
6937 const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6938 bool needs_write_barrier =
6939 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6940 uint32_t data_offset =
6941 mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
6942 Location value_loc = locations->InAt(2);
6943 HInstruction* array_instr = instruction->GetArray();
6944 bool has_intermediate_address = array_instr->IsIntermediateAddress();
6945
6946 switch (value_type) {
6947 case DataType::Type::kBool:
6948 case DataType::Type::kUint8:
6949 case DataType::Type::kInt8:
6950 case DataType::Type::kUint16:
6951 case DataType::Type::kInt16:
6952 case DataType::Type::kInt32: {
6953 if (index.IsConstant()) {
6954 int32_t const_index = Int32ConstantFrom(index);
6955 uint32_t full_offset =
6956 data_offset + (const_index << DataType::SizeShift(value_type));
6957 StoreOperandType store_type = GetStoreOperandType(value_type);
6958 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6959 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6960 GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset);
6961 codegen_->MaybeRecordImplicitNullCheck(instruction);
6962 } else {
6963 UseScratchRegisterScope temps(GetVIXLAssembler());
6964 vixl32::Register temp = temps.Acquire();
6965
6966 if (has_intermediate_address) {
6967 // We do not need to compute the intermediate address from the array: the
6968 // input instruction has done it already. See the comment in
6969 // `TryExtractArrayAccessAddress()`.
6970 if (kIsDebugBuild) {
6971 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6972 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6973 }
6974 temp = array;
6975 } else {
6976 __ Add(temp, array, data_offset);
6977 }
6978 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6979 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6980 codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6981 codegen_->MaybeRecordImplicitNullCheck(instruction);
6982 }
6983 break;
6984 }
6985
6986 case DataType::Type::kReference: {
6987 vixl32::Register value = RegisterFrom(value_loc);
6988 // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
6989 // See the comment in instruction_simplifier_shared.cc.
6990 DCHECK(!has_intermediate_address);
6991
6992 if (instruction->InputAt(2)->IsNullConstant()) {
6993 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6994 // As two macro instructions can be emitted the max size is doubled.
6995 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6996 // Just setting null.
6997 if (index.IsConstant()) {
6998 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6999 GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
7000 } else {
7001 DCHECK(index.IsRegister()) << index;
7002 UseScratchRegisterScope temps(GetVIXLAssembler());
7003 vixl32::Register temp = temps.Acquire();
7004 __ Add(temp, array, data_offset);
7005 codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
7006 }
7007 codegen_->MaybeRecordImplicitNullCheck(instruction);
7008 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
7009 // We need to set a write barrier here even though we are writing null, since this write
7010 // barrier is being relied on.
7011 DCHECK(needs_write_barrier);
7012 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7013 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7014 codegen_->MarkGCCard(temp1, temp2, array);
7015 }
7016 DCHECK(!needs_type_check);
7017 break;
7018 }
7019
7020 const bool can_value_be_null = instruction->GetValueCanBeNull();
7021 // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
7022 // value is null (without an extra CompareAndBranchIfZero since we already checked if the
7023 // value is null for the type check).
7024 const bool skip_marking_gc_card =
7025 can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
7026 vixl32::Label do_store;
7027 vixl32::Label skip_writing_card;
7028 if (can_value_be_null) {
7029 if (skip_marking_gc_card) {
7030 __ CompareAndBranchIfZero(value, &skip_writing_card, /* is_far_target= */ false);
7031 } else {
7032 __ CompareAndBranchIfZero(value, &do_store, /* is_far_target= */ false);
7033 }
7034 }
7035
7036 SlowPathCodeARMVIXL* slow_path = nullptr;
7037 if (needs_type_check) {
7038 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction);
7039 codegen_->AddSlowPath(slow_path);
7040
7041 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7042 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7043 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7044
7045 // Note that when read barriers are enabled, the type checks
7046 // are performed without read barriers. This is fine, even in
7047 // the case where a class object is in the from-space after
7048 // the flip, as a comparison involving such a type would not
7049 // produce a false positive; it may of course produce a false
7050 // negative, in which case we would take the ArraySet slow
7051 // path.
7052
7053 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7054 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7055
7056 {
7057 // Ensure we record the pc position immediately after the `ldr` instruction.
7058 ExactAssemblyScope aas(GetVIXLAssembler(),
7059 vixl32::kMaxInstructionSizeInBytes,
7060 CodeBufferCheckScope::kMaximumSize);
7061 // /* HeapReference<Class> */ temp1 = array->klass_
7062 __ ldr(temp1, MemOperand(array, class_offset));
7063 codegen_->MaybeRecordImplicitNullCheck(instruction);
7064 }
7065 GetAssembler()->MaybeUnpoisonHeapReference(temp1);
7066
7067 // /* HeapReference<Class> */ temp1 = temp1->component_type_
7068 GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
7069 // /* HeapReference<Class> */ temp2 = value->klass_
7070 GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset);
7071 // If heap poisoning is enabled, no need to unpoison `temp1`
7072 // nor `temp2`, as we are comparing two poisoned references.
7073 __ Cmp(temp1, temp2);
7074
7075 if (instruction->StaticTypeOfArrayIsObjectArray()) {
7076 vixl32::Label do_put;
7077 __ B(eq, &do_put, /* is_far_target= */ false);
7078 // If heap poisoning is enabled, the `temp1` reference has
7079 // not been unpoisoned yet; unpoison it now.
7080 GetAssembler()->MaybeUnpoisonHeapReference(temp1);
7081
7082 // /* HeapReference<Class> */ temp1 = temp1->super_class_
7083 GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
7084 // If heap poisoning is enabled, no need to unpoison
7085 // `temp1`, as we are comparing against null below.
7086 __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
7087 __ Bind(&do_put);
7088 } else {
7089 __ B(ne, slow_path->GetEntryLabel());
7090 }
7091 }
7092
7093 if (can_value_be_null && !skip_marking_gc_card) {
7094 DCHECK(do_store.IsReferenced());
7095 __ Bind(&do_store);
7096 }
7097
7098 if (needs_write_barrier) {
7099 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7100 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7101 codegen_->MarkGCCard(temp1, temp2, array);
7102 } else if (codegen_->ShouldCheckGCCard(
7103 value_type, instruction->GetValue(), write_barrier_kind)) {
7104 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7105 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7106 codegen_->CheckGCCardIsValid(temp1, temp2, array);
7107 }
7108
7109 if (skip_marking_gc_card) {
7110 // Note that we don't check that the GC card is valid as it can be correctly clean.
7111 DCHECK(skip_writing_card.IsReferenced());
7112 __ Bind(&skip_writing_card);
7113 }
7114
7115 vixl32::Register source = value;
7116 if (kPoisonHeapReferences) {
7117 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7118 DCHECK_EQ(value_type, DataType::Type::kReference);
7119 __ Mov(temp1, value);
7120 GetAssembler()->PoisonHeapReference(temp1);
7121 source = temp1;
7122 }
7123
7124 {
7125 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7126 // As two macro instructions can be emitted the max size is doubled.
7127 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7128 if (index.IsConstant()) {
7129 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7130 GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
7131 } else {
7132 DCHECK(index.IsRegister()) << index;
7133
7134 UseScratchRegisterScope temps(GetVIXLAssembler());
7135 vixl32::Register temp = temps.Acquire();
7136 __ Add(temp, array, data_offset);
7137 codegen_->StoreToShiftedRegOffset(value_type,
7138 LocationFrom(source),
7139 temp,
7140 RegisterFrom(index));
7141 }
7142
7143 if (can_value_be_null || !needs_type_check) {
7144 codegen_->MaybeRecordImplicitNullCheck(instruction);
7145 }
7146 }
7147
7148 if (slow_path != nullptr) {
7149 __ Bind(slow_path->GetExitLabel());
7150 }
7151
7152 break;
7153 }
7154
7155 case DataType::Type::kInt64: {
7156 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7157 // As two macro instructions can be emitted the max size is doubled.
7158 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7159 Location value = locations->InAt(2);
7160 if (index.IsConstant()) {
7161 size_t offset =
7162 (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7163 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset);
7164 } else {
7165 UseScratchRegisterScope temps(GetVIXLAssembler());
7166 vixl32::Register temp = temps.Acquire();
7167 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7168 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset);
7169 }
7170 codegen_->MaybeRecordImplicitNullCheck(instruction);
7171 break;
7172 }
7173
7174 case DataType::Type::kFloat32: {
7175 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7176 // As two macro instructions can be emitted the max size is doubled.
7177 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7178 Location value = locations->InAt(2);
7179 DCHECK(value.IsFpuRegister());
7180 if (index.IsConstant()) {
7181 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7182 GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset);
7183 } else {
7184 UseScratchRegisterScope temps(GetVIXLAssembler());
7185 vixl32::Register temp = temps.Acquire();
7186 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
7187 GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset);
7188 }
7189 codegen_->MaybeRecordImplicitNullCheck(instruction);
7190 break;
7191 }
7192
7193 case DataType::Type::kFloat64: {
7194 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7195 // As two macro instructions can be emitted the max size is doubled.
7196 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7197 Location value = locations->InAt(2);
7198 DCHECK(value.IsFpuRegisterPair());
7199 if (index.IsConstant()) {
7200 size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7201 GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset);
7202 } else {
7203 UseScratchRegisterScope temps(GetVIXLAssembler());
7204 vixl32::Register temp = temps.Acquire();
7205 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7206 GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset);
7207 }
7208 codegen_->MaybeRecordImplicitNullCheck(instruction);
7209 break;
7210 }
7211
7212 case DataType::Type::kUint32:
7213 case DataType::Type::kUint64:
7214 case DataType::Type::kVoid:
7215 LOG(FATAL) << "Unreachable type " << value_type;
7216 UNREACHABLE();
7217 }
7218 }
7219
VisitArrayLength(HArrayLength * instruction)7220 void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7221 LocationSummary* locations =
7222 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7223 locations->SetInAt(0, Location::RequiresRegister());
7224 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7225 }
7226
VisitArrayLength(HArrayLength * instruction)7227 void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7228 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
7229 vixl32::Register obj = InputRegisterAt(instruction, 0);
7230 vixl32::Register out = OutputRegister(instruction);
7231 {
7232 ExactAssemblyScope aas(GetVIXLAssembler(),
7233 vixl32::kMaxInstructionSizeInBytes,
7234 CodeBufferCheckScope::kMaximumSize);
7235 __ ldr(out, MemOperand(obj, offset));
7236 codegen_->MaybeRecordImplicitNullCheck(instruction);
7237 }
7238 // Mask out compression flag from String's array length.
7239 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
7240 __ Lsr(out, out, 1u);
7241 }
7242 }
7243
VisitIntermediateAddress(HIntermediateAddress * instruction)7244 void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7245 LocationSummary* locations =
7246 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7247
7248 locations->SetInAt(0, Location::RequiresRegister());
7249 locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
7250 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7251 }
7252
VisitIntermediateAddress(HIntermediateAddress * instruction)7253 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7254 vixl32::Register out = OutputRegister(instruction);
7255 vixl32::Register first = InputRegisterAt(instruction, 0);
7256 Location second = instruction->GetLocations()->InAt(1);
7257
7258 if (second.IsRegister()) {
7259 __ Add(out, first, RegisterFrom(second));
7260 } else {
7261 __ Add(out, first, Int32ConstantFrom(second));
7262 }
7263 }
7264
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7265 void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
7266 HIntermediateAddressIndex* instruction) {
7267 LOG(FATAL) << "Unreachable " << instruction->GetId();
7268 }
7269
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7270 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
7271 HIntermediateAddressIndex* instruction) {
7272 LOG(FATAL) << "Unreachable " << instruction->GetId();
7273 }
7274
VisitBoundsCheck(HBoundsCheck * instruction)7275 void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7276 RegisterSet caller_saves = RegisterSet::Empty();
7277 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7278 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
7279 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
7280 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
7281
7282 HInstruction* index = instruction->InputAt(0);
7283 HInstruction* length = instruction->InputAt(1);
7284 // If both index and length are constants we can statically check the bounds. But if at least one
7285 // of them is not encodable ArmEncodableConstantOrRegister will create
7286 // Location::RequiresRegister() which is not desired to happen. Instead we create constant
7287 // locations.
7288 bool both_const = index->IsConstant() && length->IsConstant();
7289 locations->SetInAt(0, both_const
7290 ? Location::ConstantLocation(index)
7291 : ArmEncodableConstantOrRegister(index, CMP));
7292 locations->SetInAt(1, both_const
7293 ? Location::ConstantLocation(length)
7294 : ArmEncodableConstantOrRegister(length, CMP));
7295 }
7296
VisitBoundsCheck(HBoundsCheck * instruction)7297 void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7298 LocationSummary* locations = instruction->GetLocations();
7299 Location index_loc = locations->InAt(0);
7300 Location length_loc = locations->InAt(1);
7301
7302 if (length_loc.IsConstant()) {
7303 int32_t length = Int32ConstantFrom(length_loc);
7304 if (index_loc.IsConstant()) {
7305 // BCE will remove the bounds check if we are guaranteed to pass.
7306 int32_t index = Int32ConstantFrom(index_loc);
7307 if (index < 0 || index >= length) {
7308 SlowPathCodeARMVIXL* slow_path =
7309 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7310 codegen_->AddSlowPath(slow_path);
7311 __ B(slow_path->GetEntryLabel());
7312 } else {
7313 // Some optimization after BCE may have generated this, and we should not
7314 // generate a bounds check if it is a valid range.
7315 }
7316 return;
7317 }
7318
7319 SlowPathCodeARMVIXL* slow_path =
7320 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7321 __ Cmp(RegisterFrom(index_loc), length);
7322 codegen_->AddSlowPath(slow_path);
7323 __ B(hs, slow_path->GetEntryLabel());
7324 } else {
7325 SlowPathCodeARMVIXL* slow_path =
7326 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7327 __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0));
7328 codegen_->AddSlowPath(slow_path);
7329 __ B(ls, slow_path->GetEntryLabel());
7330 }
7331 }
7332
MaybeMarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object,vixl32::Register value,bool emit_null_check)7333 void CodeGeneratorARMVIXL::MaybeMarkGCCard(vixl32::Register temp,
7334 vixl32::Register card,
7335 vixl32::Register object,
7336 vixl32::Register value,
7337 bool emit_null_check) {
7338 vixl32::Label is_null;
7339 if (emit_null_check) {
7340 __ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false);
7341 }
7342 MarkGCCard(temp, card, object);
7343 if (emit_null_check) {
7344 __ Bind(&is_null);
7345 }
7346 }
7347
MarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object)7348 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
7349 vixl32::Register card,
7350 vixl32::Register object) {
7351 // Load the address of the card table into `card`.
7352 GetAssembler()->LoadFromOffset(
7353 kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7354 // Calculate the offset (in the card table) of the card corresponding to `object`.
7355 __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7356 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
7357 // `object`'s card.
7358 //
7359 // Register `card` contains the address of the card table. Note that the card
7360 // table's base is biased during its creation so that it always starts at an
7361 // address whose least-significant byte is equal to `kCardDirty` (see
7362 // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
7363 // below writes the `kCardDirty` (byte) value into the `object`'s card
7364 // (located at `card + object >> kCardShift`).
7365 //
7366 // This dual use of the value in register `card` (1. to calculate the location
7367 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
7368 // (no need to explicitly load `kCardDirty` as an immediate value).
7369 __ Strb(card, MemOperand(card, temp));
7370 }
7371
CheckGCCardIsValid(vixl32::Register temp,vixl32::Register card,vixl32::Register object)7372 void CodeGeneratorARMVIXL::CheckGCCardIsValid(vixl32::Register temp,
7373 vixl32::Register card,
7374 vixl32::Register object) {
7375 vixl32::Label done;
7376 // Load the address of the card table into `card`.
7377 GetAssembler()->LoadFromOffset(
7378 kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7379 // Calculate the offset (in the card table) of the card corresponding to `object`.
7380 __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7381 // assert (!clean || !self->is_gc_marking)
7382 __ Ldrb(temp, MemOperand(card, temp));
7383 static_assert(gc::accounting::CardTable::kCardClean == 0);
7384 __ CompareAndBranchIfNonZero(temp, &done, /*is_far_target=*/false);
7385 __ CompareAndBranchIfZero(mr, &done, /*is_far_target=*/false);
7386 __ Bkpt(0);
7387 __ Bind(&done);
7388 }
7389
VisitParallelMove(HParallelMove * instruction)7390 void LocationsBuilderARMVIXL::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
7391 LOG(FATAL) << "Unreachable";
7392 }
7393
VisitParallelMove(HParallelMove * instruction)7394 void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) {
7395 if (instruction->GetNext()->IsSuspendCheck() &&
7396 instruction->GetBlock()->GetLoopInformation() != nullptr) {
7397 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
7398 // The back edge will generate the suspend check.
7399 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
7400 }
7401
7402 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
7403 }
7404
VisitSuspendCheck(HSuspendCheck * instruction)7405 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7406 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7407 instruction, LocationSummary::kCallOnSlowPath);
7408 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7409 }
7410
VisitSuspendCheck(HSuspendCheck * instruction)7411 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7412 HBasicBlock* block = instruction->GetBlock();
7413 if (block->GetLoopInformation() != nullptr) {
7414 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
7415 // The back edge will generate the suspend check.
7416 return;
7417 }
7418 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
7419 // The goto will generate the suspend check.
7420 return;
7421 }
7422 GenerateSuspendCheck(instruction, nullptr);
7423 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 14);
7424 }
7425
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)7426 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
7427 HBasicBlock* successor) {
7428 SuspendCheckSlowPathARMVIXL* slow_path =
7429 down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath());
7430 if (slow_path == nullptr) {
7431 slow_path =
7432 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARMVIXL(instruction, successor);
7433 instruction->SetSlowPath(slow_path);
7434 codegen_->AddSlowPath(slow_path);
7435 if (successor != nullptr) {
7436 DCHECK(successor->IsLoopHeader());
7437 }
7438 } else {
7439 DCHECK_EQ(slow_path->GetSuccessor(), successor);
7440 }
7441
7442 UseScratchRegisterScope temps(GetVIXLAssembler());
7443 vixl32::Register temp = temps.Acquire();
7444 GetAssembler()->LoadFromOffset(
7445 kLoadWord, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
7446 __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
7447 if (successor == nullptr) {
7448 __ B(ne, slow_path->GetEntryLabel());
7449 __ Bind(slow_path->GetReturnLabel());
7450 } else {
7451 __ B(eq, codegen_->GetLabelOf(successor));
7452 __ B(slow_path->GetEntryLabel());
7453 }
7454 }
7455
GetAssembler() const7456 ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const {
7457 return codegen_->GetAssembler();
7458 }
7459
EmitMove(size_t index)7460 void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
7461 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7462 MoveOperands* move = moves_[index];
7463 Location source = move->GetSource();
7464 Location destination = move->GetDestination();
7465
7466 if (source.IsRegister()) {
7467 if (destination.IsRegister()) {
7468 __ Mov(RegisterFrom(destination), RegisterFrom(source));
7469 } else if (destination.IsFpuRegister()) {
7470 __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
7471 } else {
7472 DCHECK(destination.IsStackSlot());
7473 GetAssembler()->StoreToOffset(kStoreWord,
7474 RegisterFrom(source),
7475 sp,
7476 destination.GetStackIndex());
7477 }
7478 } else if (source.IsStackSlot()) {
7479 if (destination.IsRegister()) {
7480 GetAssembler()->LoadFromOffset(kLoadWord,
7481 RegisterFrom(destination),
7482 sp,
7483 source.GetStackIndex());
7484 } else if (destination.IsFpuRegister()) {
7485 GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
7486 } else {
7487 DCHECK(destination.IsStackSlot());
7488 vixl32::Register temp = temps.Acquire();
7489 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
7490 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7491 }
7492 } else if (source.IsFpuRegister()) {
7493 if (destination.IsRegister()) {
7494 __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
7495 } else if (destination.IsFpuRegister()) {
7496 __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
7497 } else {
7498 DCHECK(destination.IsStackSlot());
7499 GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
7500 }
7501 } else if (source.IsDoubleStackSlot()) {
7502 if (destination.IsDoubleStackSlot()) {
7503 vixl32::DRegister temp = temps.AcquireD();
7504 GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex());
7505 GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex());
7506 } else if (destination.IsRegisterPair()) {
7507 DCHECK(ExpectedPairLayout(destination));
7508 GetAssembler()->LoadFromOffset(
7509 kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex());
7510 } else {
7511 DCHECK(destination.IsFpuRegisterPair()) << destination;
7512 GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex());
7513 }
7514 } else if (source.IsRegisterPair()) {
7515 if (destination.IsRegisterPair()) {
7516 __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
7517 __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source));
7518 } else if (destination.IsFpuRegisterPair()) {
7519 __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source));
7520 } else {
7521 DCHECK(destination.IsDoubleStackSlot()) << destination;
7522 DCHECK(ExpectedPairLayout(source));
7523 GetAssembler()->StoreToOffset(kStoreWordPair,
7524 LowRegisterFrom(source),
7525 sp,
7526 destination.GetStackIndex());
7527 }
7528 } else if (source.IsFpuRegisterPair()) {
7529 if (destination.IsRegisterPair()) {
7530 __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source));
7531 } else if (destination.IsFpuRegisterPair()) {
7532 __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
7533 } else {
7534 DCHECK(destination.IsDoubleStackSlot()) << destination;
7535 GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex());
7536 }
7537 } else {
7538 DCHECK(source.IsConstant()) << source;
7539 HConstant* constant = source.GetConstant();
7540 if (constant->IsIntConstant() || constant->IsNullConstant()) {
7541 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7542 if (destination.IsRegister()) {
7543 __ Mov(RegisterFrom(destination), value);
7544 } else {
7545 DCHECK(destination.IsStackSlot());
7546 vixl32::Register temp = temps.Acquire();
7547 __ Mov(temp, value);
7548 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7549 }
7550 } else if (constant->IsLongConstant()) {
7551 int64_t value = Int64ConstantFrom(source);
7552 if (destination.IsRegisterPair()) {
7553 __ Mov(LowRegisterFrom(destination), Low32Bits(value));
7554 __ Mov(HighRegisterFrom(destination), High32Bits(value));
7555 } else {
7556 DCHECK(destination.IsDoubleStackSlot()) << destination;
7557 vixl32::Register temp = temps.Acquire();
7558 __ Mov(temp, Low32Bits(value));
7559 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7560 __ Mov(temp, High32Bits(value));
7561 GetAssembler()->StoreToOffset(kStoreWord,
7562 temp,
7563 sp,
7564 destination.GetHighStackIndex(kArmWordSize));
7565 }
7566 } else if (constant->IsDoubleConstant()) {
7567 double value = constant->AsDoubleConstant()->GetValue();
7568 if (destination.IsFpuRegisterPair()) {
7569 __ Vmov(DRegisterFrom(destination), value);
7570 } else {
7571 DCHECK(destination.IsDoubleStackSlot()) << destination;
7572 uint64_t int_value = bit_cast<uint64_t, double>(value);
7573 vixl32::Register temp = temps.Acquire();
7574 __ Mov(temp, Low32Bits(int_value));
7575 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7576 __ Mov(temp, High32Bits(int_value));
7577 GetAssembler()->StoreToOffset(kStoreWord,
7578 temp,
7579 sp,
7580 destination.GetHighStackIndex(kArmWordSize));
7581 }
7582 } else {
7583 DCHECK(constant->IsFloatConstant()) << constant->DebugName();
7584 float value = constant->AsFloatConstant()->GetValue();
7585 if (destination.IsFpuRegister()) {
7586 __ Vmov(SRegisterFrom(destination), value);
7587 } else {
7588 DCHECK(destination.IsStackSlot());
7589 vixl32::Register temp = temps.Acquire();
7590 __ Mov(temp, bit_cast<int32_t, float>(value));
7591 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7592 }
7593 }
7594 }
7595 }
7596
Exchange(vixl32::Register reg,int mem)7597 void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
7598 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7599 vixl32::Register temp = temps.Acquire();
7600 __ Mov(temp, reg);
7601 GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem);
7602 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7603 }
7604
Exchange(int mem1,int mem2)7605 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
7606 // TODO(VIXL32): Double check the performance of this implementation.
7607 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7608 vixl32::Register temp1 = temps.Acquire();
7609 ScratchRegisterScope ensure_scratch(
7610 this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
7611 vixl32::Register temp2(ensure_scratch.GetRegister());
7612
7613 int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
7614 GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
7615 GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
7616 GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
7617 GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
7618 }
7619
EmitSwap(size_t index)7620 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
7621 MoveOperands* move = moves_[index];
7622 Location source = move->GetSource();
7623 Location destination = move->GetDestination();
7624 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7625
7626 if (source.IsRegister() && destination.IsRegister()) {
7627 vixl32::Register temp = temps.Acquire();
7628 DCHECK(!RegisterFrom(source).Is(temp));
7629 DCHECK(!RegisterFrom(destination).Is(temp));
7630 __ Mov(temp, RegisterFrom(destination));
7631 __ Mov(RegisterFrom(destination), RegisterFrom(source));
7632 __ Mov(RegisterFrom(source), temp);
7633 } else if (source.IsRegister() && destination.IsStackSlot()) {
7634 Exchange(RegisterFrom(source), destination.GetStackIndex());
7635 } else if (source.IsStackSlot() && destination.IsRegister()) {
7636 Exchange(RegisterFrom(destination), source.GetStackIndex());
7637 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7638 Exchange(source.GetStackIndex(), destination.GetStackIndex());
7639 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7640 vixl32::Register temp = temps.Acquire();
7641 __ Vmov(temp, SRegisterFrom(source));
7642 __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
7643 __ Vmov(SRegisterFrom(destination), temp);
7644 } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
7645 vixl32::DRegister temp = temps.AcquireD();
7646 __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
7647 __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination));
7648 __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination));
7649 __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp);
7650 } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
7651 vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination);
7652 int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex();
7653 DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
7654 vixl32::DRegister temp = temps.AcquireD();
7655 __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1));
7656 GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
7657 GetAssembler()->StoreDToOffset(temp, sp, mem);
7658 } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
7659 vixl32::DRegister first = DRegisterFrom(source);
7660 vixl32::DRegister second = DRegisterFrom(destination);
7661 vixl32::DRegister temp = temps.AcquireD();
7662 __ Vmov(temp, first);
7663 __ Vmov(first, second);
7664 __ Vmov(second, temp);
7665 } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
7666 vixl32::DRegister reg = source.IsFpuRegisterPair()
7667 ? DRegisterFrom(source)
7668 : DRegisterFrom(destination);
7669 int mem = source.IsFpuRegisterPair()
7670 ? destination.GetStackIndex()
7671 : source.GetStackIndex();
7672 vixl32::DRegister temp = temps.AcquireD();
7673 __ Vmov(temp, reg);
7674 GetAssembler()->LoadDFromOffset(reg, sp, mem);
7675 GetAssembler()->StoreDToOffset(temp, sp, mem);
7676 } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
7677 vixl32::SRegister reg = source.IsFpuRegister()
7678 ? SRegisterFrom(source)
7679 : SRegisterFrom(destination);
7680 int mem = source.IsFpuRegister()
7681 ? destination.GetStackIndex()
7682 : source.GetStackIndex();
7683 vixl32::Register temp = temps.Acquire();
7684 __ Vmov(temp, reg);
7685 GetAssembler()->LoadSFromOffset(reg, sp, mem);
7686 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7687 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
7688 vixl32::DRegister temp1 = temps.AcquireD();
7689 vixl32::DRegister temp2 = temps.AcquireD();
7690 __ Vldr(temp1, MemOperand(sp, source.GetStackIndex()));
7691 __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex()));
7692 __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex()));
7693 __ Vstr(temp2, MemOperand(sp, source.GetStackIndex()));
7694 } else {
7695 LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
7696 }
7697 }
7698
SpillScratch(int reg)7699 void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
7700 __ Push(vixl32::Register(reg));
7701 }
7702
RestoreScratch(int reg)7703 void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
7704 __ Pop(vixl32::Register(reg));
7705 }
7706
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7707 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
7708 HLoadClass::LoadKind desired_class_load_kind) {
7709 switch (desired_class_load_kind) {
7710 case HLoadClass::LoadKind::kInvalid:
7711 LOG(FATAL) << "UNREACHABLE";
7712 UNREACHABLE();
7713 case HLoadClass::LoadKind::kReferrersClass:
7714 break;
7715 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7716 case HLoadClass::LoadKind::kBootImageRelRo:
7717 case HLoadClass::LoadKind::kAppImageRelRo:
7718 case HLoadClass::LoadKind::kBssEntry:
7719 case HLoadClass::LoadKind::kBssEntryPublic:
7720 case HLoadClass::LoadKind::kBssEntryPackage:
7721 DCHECK(!GetCompilerOptions().IsJitCompiler());
7722 break;
7723 case HLoadClass::LoadKind::kJitBootImageAddress:
7724 case HLoadClass::LoadKind::kJitTableAddress:
7725 DCHECK(GetCompilerOptions().IsJitCompiler());
7726 break;
7727 case HLoadClass::LoadKind::kRuntimeCall:
7728 break;
7729 }
7730 return desired_class_load_kind;
7731 }
7732
VisitLoadClass(HLoadClass * cls)7733 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
7734 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7735 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7736 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7737 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7738 cls,
7739 LocationFrom(calling_convention.GetRegisterAt(0)),
7740 LocationFrom(r0));
7741 DCHECK(calling_convention.GetRegisterAt(0).Is(r0));
7742 return;
7743 }
7744 DCHECK_EQ(cls->NeedsAccessCheck(),
7745 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7746 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7747
7748 const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
7749 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7750 ? LocationSummary::kCallOnSlowPath
7751 : LocationSummary::kNoCall;
7752 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7753 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7754 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7755 }
7756
7757 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
7758 locations->SetInAt(0, Location::RequiresRegister());
7759 }
7760 locations->SetOut(Location::RequiresRegister());
7761 if (load_kind == HLoadClass::LoadKind::kBssEntry ||
7762 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7763 load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
7764 if (codegen_->EmitNonBakerReadBarrier()) {
7765 // For non-Baker read barrier we have a temp-clobbering call.
7766 } else {
7767 // Rely on the type resolution or initialization and marking to save everything we need.
7768 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7769 }
7770 }
7771 }
7772
7773 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7774 // move.
VisitLoadClass(HLoadClass * cls)7775 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7776 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7777 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7778 codegen_->GenerateLoadClassRuntimeCall(cls);
7779 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 15);
7780 return;
7781 }
7782 DCHECK_EQ(cls->NeedsAccessCheck(),
7783 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7784 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7785
7786 LocationSummary* locations = cls->GetLocations();
7787 Location out_loc = locations->Out();
7788 vixl32::Register out = OutputRegister(cls);
7789
7790 const ReadBarrierOption read_barrier_option =
7791 cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
7792 bool generate_null_check = false;
7793 switch (load_kind) {
7794 case HLoadClass::LoadKind::kReferrersClass: {
7795 DCHECK(!cls->CanCallRuntime());
7796 DCHECK(!cls->MustGenerateClinitCheck());
7797 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7798 vixl32::Register current_method = InputRegisterAt(cls, 0);
7799 codegen_->GenerateGcRootFieldLoad(cls,
7800 out_loc,
7801 current_method,
7802 ArtMethod::DeclaringClassOffset().Int32Value(),
7803 read_barrier_option);
7804 break;
7805 }
7806 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7807 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7808 codegen_->GetCompilerOptions().IsBootImageExtension());
7809 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7810 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7811 codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7812 codegen_->EmitMovwMovtPlaceholder(labels, out);
7813 break;
7814 }
7815 case HLoadClass::LoadKind::kBootImageRelRo: {
7816 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7817 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
7818 codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
7819 break;
7820 }
7821 case HLoadClass::LoadKind::kAppImageRelRo: {
7822 DCHECK(codegen_->GetCompilerOptions().IsAppImage());
7823 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7824 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7825 codegen_->NewAppImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7826 codegen_->EmitMovwMovtPlaceholder(labels, out);
7827 __ Ldr(out, MemOperand(out, /*offset=*/ 0));
7828 break;
7829 }
7830 case HLoadClass::LoadKind::kBssEntry:
7831 case HLoadClass::LoadKind::kBssEntryPublic:
7832 case HLoadClass::LoadKind::kBssEntryPackage: {
7833 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls);
7834 codegen_->EmitMovwMovtPlaceholder(labels, out);
7835 // All aligned loads are implicitly atomic consume operations on ARM.
7836 codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7837 generate_null_check = true;
7838 break;
7839 }
7840 case HLoadClass::LoadKind::kJitBootImageAddress: {
7841 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7842 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7843 DCHECK_NE(address, 0u);
7844 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
7845 break;
7846 }
7847 case HLoadClass::LoadKind::kJitTableAddress: {
7848 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
7849 cls->GetTypeIndex(),
7850 cls->GetClass()));
7851 // /* GcRoot<mirror::Class> */ out = *out
7852 codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7853 break;
7854 }
7855 case HLoadClass::LoadKind::kRuntimeCall:
7856 case HLoadClass::LoadKind::kInvalid:
7857 LOG(FATAL) << "UNREACHABLE";
7858 UNREACHABLE();
7859 }
7860
7861 if (generate_null_check || cls->MustGenerateClinitCheck()) {
7862 DCHECK(cls->CanCallRuntime());
7863 LoadClassSlowPathARMVIXL* slow_path =
7864 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls);
7865 codegen_->AddSlowPath(slow_path);
7866 if (generate_null_check) {
7867 __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
7868 }
7869 if (cls->MustGenerateClinitCheck()) {
7870 GenerateClassInitializationCheck(slow_path, out);
7871 } else {
7872 __ Bind(slow_path->GetExitLabel());
7873 }
7874 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 16);
7875 }
7876 }
7877
VisitLoadMethodHandle(HLoadMethodHandle * load)7878 void LocationsBuilderARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7879 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7880 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7881 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7882 }
7883
VisitLoadMethodHandle(HLoadMethodHandle * load)7884 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7885 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7886 }
7887
VisitLoadMethodType(HLoadMethodType * load)7888 void LocationsBuilderARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7889 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7890 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7891 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7892 }
7893
VisitLoadMethodType(HLoadMethodType * load)7894 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7895 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7896 }
7897
VisitClinitCheck(HClinitCheck * check)7898 void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7899 LocationSummary* locations =
7900 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7901 locations->SetInAt(0, Location::RequiresRegister());
7902 if (check->HasUses()) {
7903 locations->SetOut(Location::SameAsFirstInput());
7904 }
7905 // Rely on the type initialization to save everything we need.
7906 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7907 }
7908
VisitClinitCheck(HClinitCheck * check)7909 void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7910 // We assume the class is not null.
7911 LoadClassSlowPathARMVIXL* slow_path =
7912 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check);
7913 codegen_->AddSlowPath(slow_path);
7914 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
7915 }
7916
GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL * slow_path,vixl32::Register class_reg)7917 void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
7918 LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
7919 UseScratchRegisterScope temps(GetVIXLAssembler());
7920 vixl32::Register temp = temps.Acquire();
7921 __ Ldrb(temp, MemOperand(class_reg, kClassStatusByteOffset));
7922 __ Cmp(temp, kShiftedVisiblyInitializedValue);
7923 __ B(lo, slow_path->GetEntryLabel());
7924 __ Bind(slow_path->GetExitLabel());
7925 }
7926
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl32::Register temp,vixl32::FlagsUpdate flags_update)7927 void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
7928 HTypeCheckInstruction* check,
7929 vixl32::Register temp,
7930 vixl32::FlagsUpdate flags_update) {
7931 uint32_t path_to_root = check->GetBitstringPathToRoot();
7932 uint32_t mask = check->GetBitstringMask();
7933 DCHECK(IsPowerOfTwo(mask + 1));
7934 size_t mask_bits = WhichPowerOf2(mask + 1);
7935
7936 // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
7937 // the Z flag for BNE. This is indicated by the `flags_update` parameter.
7938 if (mask_bits == 16u) {
7939 // Load only the bitstring part of the status word.
7940 __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7941 // Check if the bitstring bits are equal to `path_to_root`.
7942 if (flags_update == SetFlags) {
7943 __ Cmp(temp, path_to_root);
7944 } else {
7945 __ Sub(temp, temp, path_to_root);
7946 }
7947 } else {
7948 // /* uint32_t */ temp = temp->status_
7949 __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7950 if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
7951 // Compare the bitstring bits using SUB.
7952 __ Sub(temp, temp, path_to_root);
7953 // Shift out bits that do not contribute to the comparison.
7954 __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7955 } else if (IsUint<16>(path_to_root)) {
7956 if (temp.IsLow()) {
7957 // Note: Optimized for size but contains one more dependent instruction than necessary.
7958 // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
7959 // macro assembler would use the high reg IP for the constant by default.
7960 // Compare the bitstring bits using SUB.
7961 __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2
7962 __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3
7963 // Shift out bits that do not contribute to the comparison.
7964 __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7965 } else {
7966 // Extract the bitstring bits.
7967 __ Ubfx(temp, temp, 0, mask_bits);
7968 // Check if the bitstring bits are equal to `path_to_root`.
7969 if (flags_update == SetFlags) {
7970 __ Cmp(temp, path_to_root);
7971 } else {
7972 __ Sub(temp, temp, path_to_root);
7973 }
7974 }
7975 } else {
7976 // Shift out bits that do not contribute to the comparison.
7977 __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7978 // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
7979 if (flags_update == SetFlags) {
7980 __ Cmp(temp, path_to_root << (32u - mask_bits));
7981 } else {
7982 __ Sub(temp, temp, path_to_root << (32u - mask_bits));
7983 }
7984 }
7985 }
7986 }
7987
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7988 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
7989 HLoadString::LoadKind desired_string_load_kind) {
7990 switch (desired_string_load_kind) {
7991 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7992 case HLoadString::LoadKind::kBootImageRelRo:
7993 case HLoadString::LoadKind::kBssEntry:
7994 DCHECK(!GetCompilerOptions().IsJitCompiler());
7995 break;
7996 case HLoadString::LoadKind::kJitBootImageAddress:
7997 case HLoadString::LoadKind::kJitTableAddress:
7998 DCHECK(GetCompilerOptions().IsJitCompiler());
7999 break;
8000 case HLoadString::LoadKind::kRuntimeCall:
8001 break;
8002 }
8003 return desired_string_load_kind;
8004 }
8005
VisitLoadString(HLoadString * load)8006 void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
8007 LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
8008 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
8009 HLoadString::LoadKind load_kind = load->GetLoadKind();
8010 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
8011 locations->SetOut(LocationFrom(r0));
8012 } else {
8013 locations->SetOut(Location::RequiresRegister());
8014 if (load_kind == HLoadString::LoadKind::kBssEntry) {
8015 if (codegen_->EmitNonBakerReadBarrier()) {
8016 // For non-Baker read barrier we have a temp-clobbering call.
8017 } else {
8018 // Rely on the pResolveString and marking to save everything we need, including temps.
8019 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
8020 }
8021 }
8022 }
8023 }
8024
8025 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
8026 // move.
VisitLoadString(HLoadString * load)8027 void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
8028 LocationSummary* locations = load->GetLocations();
8029 Location out_loc = locations->Out();
8030 vixl32::Register out = OutputRegister(load);
8031 HLoadString::LoadKind load_kind = load->GetLoadKind();
8032
8033 switch (load_kind) {
8034 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
8035 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
8036 codegen_->GetCompilerOptions().IsBootImageExtension());
8037 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
8038 codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
8039 codegen_->EmitMovwMovtPlaceholder(labels, out);
8040 return;
8041 }
8042 case HLoadString::LoadKind::kBootImageRelRo: {
8043 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
8044 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
8045 codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
8046 return;
8047 }
8048 case HLoadString::LoadKind::kBssEntry: {
8049 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
8050 codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
8051 codegen_->EmitMovwMovtPlaceholder(labels, out);
8052 // All aligned loads are implicitly atomic consume operations on ARM.
8053 codegen_->GenerateGcRootFieldLoad(
8054 load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
8055 LoadStringSlowPathARMVIXL* slow_path =
8056 new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
8057 codegen_->AddSlowPath(slow_path);
8058 __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
8059 __ Bind(slow_path->GetExitLabel());
8060 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 17);
8061 return;
8062 }
8063 case HLoadString::LoadKind::kJitBootImageAddress: {
8064 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
8065 DCHECK_NE(address, 0u);
8066 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
8067 return;
8068 }
8069 case HLoadString::LoadKind::kJitTableAddress: {
8070 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
8071 load->GetStringIndex(),
8072 load->GetString()));
8073 // /* GcRoot<mirror::String> */ out = *out
8074 codegen_->GenerateGcRootFieldLoad(
8075 load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
8076 return;
8077 }
8078 default:
8079 break;
8080 }
8081
8082 DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
8083 InvokeRuntimeCallingConventionARMVIXL calling_convention;
8084 __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
8085 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
8086 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
8087 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 18);
8088 }
8089
GetExceptionTlsOffset()8090 static int32_t GetExceptionTlsOffset() {
8091 return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
8092 }
8093
VisitLoadException(HLoadException * load)8094 void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) {
8095 LocationSummary* locations =
8096 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
8097 locations->SetOut(Location::RequiresRegister());
8098 }
8099
VisitLoadException(HLoadException * load)8100 void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) {
8101 vixl32::Register out = OutputRegister(load);
8102 GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset());
8103 }
8104
8105
VisitClearException(HClearException * clear)8106 void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
8107 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
8108 }
8109
VisitClearException(HClearException * clear)8110 void InstructionCodeGeneratorARMVIXL::VisitClearException([[maybe_unused]] HClearException* clear) {
8111 UseScratchRegisterScope temps(GetVIXLAssembler());
8112 vixl32::Register temp = temps.Acquire();
8113 __ Mov(temp, 0);
8114 GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset());
8115 }
8116
VisitThrow(HThrow * instruction)8117 void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) {
8118 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8119 instruction, LocationSummary::kCallOnMainOnly);
8120 InvokeRuntimeCallingConventionARMVIXL calling_convention;
8121 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8122 }
8123
VisitThrow(HThrow * instruction)8124 void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
8125 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
8126 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
8127 }
8128
8129 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)8130 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
8131 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8132 return 1;
8133 }
8134 if (emit_read_barrier &&
8135 (kUseBakerReadBarrier ||
8136 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
8137 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
8138 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
8139 return 1;
8140 }
8141 return 0;
8142 }
8143
8144 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
8145 // interface pointer, one for loading the current interface.
8146 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)8147 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
8148 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8149 return 3;
8150 }
8151 return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
8152 }
8153
VisitInstanceOf(HInstanceOf * instruction)8154 void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
8155 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
8156 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8157 bool baker_read_barrier_slow_path = false;
8158 switch (type_check_kind) {
8159 case TypeCheckKind::kExactCheck:
8160 case TypeCheckKind::kAbstractClassCheck:
8161 case TypeCheckKind::kClassHierarchyCheck:
8162 case TypeCheckKind::kArrayObjectCheck:
8163 case TypeCheckKind::kInterfaceCheck: {
8164 bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
8165 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
8166 baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
8167 (type_check_kind != TypeCheckKind::kInterfaceCheck);
8168 break;
8169 }
8170 case TypeCheckKind::kArrayCheck:
8171 case TypeCheckKind::kUnresolvedCheck:
8172 call_kind = LocationSummary::kCallOnSlowPath;
8173 break;
8174 case TypeCheckKind::kBitstringCheck:
8175 break;
8176 }
8177
8178 LocationSummary* locations =
8179 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8180 if (baker_read_barrier_slow_path) {
8181 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
8182 }
8183 locations->SetInAt(0, Location::RequiresRegister());
8184 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8185 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8186 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8187 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8188 } else {
8189 locations->SetInAt(1, Location::RequiresRegister());
8190 }
8191 // The "out" register is used as a temporary, so it overlaps with the inputs.
8192 // Note that TypeCheckSlowPathARM uses this register too.
8193 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
8194 locations->AddRegisterTemps(
8195 NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
8196 }
8197
VisitInstanceOf(HInstanceOf * instruction)8198 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
8199 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8200 LocationSummary* locations = instruction->GetLocations();
8201 Location obj_loc = locations->InAt(0);
8202 vixl32::Register obj = InputRegisterAt(instruction, 0);
8203 vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8204 ? vixl32::Register()
8205 : InputRegisterAt(instruction, 1);
8206 Location out_loc = locations->Out();
8207 vixl32::Register out = OutputRegister(instruction);
8208 const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
8209 DCHECK_LE(num_temps, 1u);
8210 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
8211 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8212 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8213 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8214 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8215 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8216 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8217 const uint32_t object_array_data_offset =
8218 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8219 vixl32::Label done;
8220 vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
8221 SlowPathCodeARMVIXL* slow_path = nullptr;
8222
8223 // Return 0 if `obj` is null.
8224 // avoid null check if we know obj is not null.
8225 if (instruction->MustDoNullCheck()) {
8226 DCHECK(!out.Is(obj));
8227 __ Mov(out, 0);
8228 __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8229 }
8230
8231 switch (type_check_kind) {
8232 case TypeCheckKind::kExactCheck: {
8233 ReadBarrierOption read_barrier_option =
8234 codegen_->ReadBarrierOptionForInstanceOf(instruction);
8235 // /* HeapReference<Class> */ out = obj->klass_
8236 GenerateReferenceLoadTwoRegisters(instruction,
8237 out_loc,
8238 obj_loc,
8239 class_offset,
8240 maybe_temp_loc,
8241 read_barrier_option);
8242 // Classes must be equal for the instanceof to succeed.
8243 __ Cmp(out, cls);
8244 // We speculatively set the result to false without changing the condition
8245 // flags, which allows us to avoid some branching later.
8246 __ Mov(LeaveFlags, out, 0);
8247
8248 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8249 // we check that the output is in a low register, so that a 16-bit MOV
8250 // encoding can be used.
8251 if (out.IsLow()) {
8252 // We use the scope because of the IT block that follows.
8253 ExactAssemblyScope guard(GetVIXLAssembler(),
8254 2 * vixl32::k16BitT32InstructionSizeInBytes,
8255 CodeBufferCheckScope::kExactSize);
8256
8257 __ it(eq);
8258 __ mov(eq, out, 1);
8259 } else {
8260 __ B(ne, final_label, /* is_far_target= */ false);
8261 __ Mov(out, 1);
8262 }
8263
8264 break;
8265 }
8266
8267 case TypeCheckKind::kAbstractClassCheck: {
8268 ReadBarrierOption read_barrier_option =
8269 codegen_->ReadBarrierOptionForInstanceOf(instruction);
8270 // /* HeapReference<Class> */ out = obj->klass_
8271 GenerateReferenceLoadTwoRegisters(instruction,
8272 out_loc,
8273 obj_loc,
8274 class_offset,
8275 maybe_temp_loc,
8276 read_barrier_option);
8277 // If the class is abstract, we eagerly fetch the super class of the
8278 // object to avoid doing a comparison we know will fail.
8279 vixl32::Label loop;
8280 __ Bind(&loop);
8281 // /* HeapReference<Class> */ out = out->super_class_
8282 GenerateReferenceLoadOneRegister(instruction,
8283 out_loc,
8284 super_offset,
8285 maybe_temp_loc,
8286 read_barrier_option);
8287 // If `out` is null, we use it for the result, and jump to the final label.
8288 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8289 __ Cmp(out, cls);
8290 __ B(ne, &loop, /* is_far_target= */ false);
8291 __ Mov(out, 1);
8292 break;
8293 }
8294
8295 case TypeCheckKind::kClassHierarchyCheck: {
8296 ReadBarrierOption read_barrier_option =
8297 codegen_->ReadBarrierOptionForInstanceOf(instruction);
8298 // /* HeapReference<Class> */ out = obj->klass_
8299 GenerateReferenceLoadTwoRegisters(instruction,
8300 out_loc,
8301 obj_loc,
8302 class_offset,
8303 maybe_temp_loc,
8304 read_barrier_option);
8305 // Walk over the class hierarchy to find a match.
8306 vixl32::Label loop, success;
8307 __ Bind(&loop);
8308 __ Cmp(out, cls);
8309 __ B(eq, &success, /* is_far_target= */ false);
8310 // /* HeapReference<Class> */ out = out->super_class_
8311 GenerateReferenceLoadOneRegister(instruction,
8312 out_loc,
8313 super_offset,
8314 maybe_temp_loc,
8315 read_barrier_option);
8316 // This is essentially a null check, but it sets the condition flags to the
8317 // proper value for the code that follows the loop, i.e. not `eq`.
8318 __ Cmp(out, 1);
8319 __ B(hs, &loop, /* is_far_target= */ false);
8320
8321 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8322 // we check that the output is in a low register, so that a 16-bit MOV
8323 // encoding can be used.
8324 if (out.IsLow()) {
8325 // If `out` is null, we use it for the result, and the condition flags
8326 // have already been set to `ne`, so the IT block that comes afterwards
8327 // (and which handles the successful case) turns into a NOP (instead of
8328 // overwriting `out`).
8329 __ Bind(&success);
8330
8331 // We use the scope because of the IT block that follows.
8332 ExactAssemblyScope guard(GetVIXLAssembler(),
8333 2 * vixl32::k16BitT32InstructionSizeInBytes,
8334 CodeBufferCheckScope::kExactSize);
8335
8336 // There is only one branch to the `success` label (which is bound to this
8337 // IT block), and it has the same condition, `eq`, so in that case the MOV
8338 // is executed.
8339 __ it(eq);
8340 __ mov(eq, out, 1);
8341 } else {
8342 // If `out` is null, we use it for the result, and jump to the final label.
8343 __ B(final_label);
8344 __ Bind(&success);
8345 __ Mov(out, 1);
8346 }
8347
8348 break;
8349 }
8350
8351 case TypeCheckKind::kArrayObjectCheck: {
8352 ReadBarrierOption read_barrier_option =
8353 codegen_->ReadBarrierOptionForInstanceOf(instruction);
8354 // /* HeapReference<Class> */ out = obj->klass_
8355 GenerateReferenceLoadTwoRegisters(instruction,
8356 out_loc,
8357 obj_loc,
8358 class_offset,
8359 maybe_temp_loc,
8360 read_barrier_option);
8361 // Do an exact check.
8362 vixl32::Label exact_check;
8363 __ Cmp(out, cls);
8364 __ B(eq, &exact_check, /* is_far_target= */ false);
8365 // Otherwise, we need to check that the object's class is a non-primitive array.
8366 // /* HeapReference<Class> */ out = out->component_type_
8367 GenerateReferenceLoadOneRegister(instruction,
8368 out_loc,
8369 component_offset,
8370 maybe_temp_loc,
8371 read_barrier_option);
8372 // If `out` is null, we use it for the result, and jump to the final label.
8373 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8374 GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
8375 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8376 __ Cmp(out, 0);
8377 // We speculatively set the result to false without changing the condition
8378 // flags, which allows us to avoid some branching later.
8379 __ Mov(LeaveFlags, out, 0);
8380
8381 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8382 // we check that the output is in a low register, so that a 16-bit MOV
8383 // encoding can be used.
8384 if (out.IsLow()) {
8385 __ Bind(&exact_check);
8386
8387 // We use the scope because of the IT block that follows.
8388 ExactAssemblyScope guard(GetVIXLAssembler(),
8389 2 * vixl32::k16BitT32InstructionSizeInBytes,
8390 CodeBufferCheckScope::kExactSize);
8391
8392 __ it(eq);
8393 __ mov(eq, out, 1);
8394 } else {
8395 __ B(ne, final_label, /* is_far_target= */ false);
8396 __ Bind(&exact_check);
8397 __ Mov(out, 1);
8398 }
8399
8400 break;
8401 }
8402
8403 case TypeCheckKind::kArrayCheck: {
8404 // No read barrier since the slow path will retry upon failure.
8405 // /* HeapReference<Class> */ out = obj->klass_
8406 GenerateReferenceLoadTwoRegisters(instruction,
8407 out_loc,
8408 obj_loc,
8409 class_offset,
8410 maybe_temp_loc,
8411 kWithoutReadBarrier);
8412 __ Cmp(out, cls);
8413 DCHECK(locations->OnlyCallsOnSlowPath());
8414 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8415 instruction, /* is_fatal= */ false);
8416 codegen_->AddSlowPath(slow_path);
8417 __ B(ne, slow_path->GetEntryLabel());
8418 __ Mov(out, 1);
8419 break;
8420 }
8421
8422 case TypeCheckKind::kInterfaceCheck: {
8423 if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
8424 DCHECK(locations->OnlyCallsOnSlowPath());
8425 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8426 instruction, /* is_fatal= */ false);
8427 codegen_->AddSlowPath(slow_path);
8428 if (codegen_->EmitNonBakerReadBarrier()) {
8429 __ B(slow_path->GetEntryLabel());
8430 break;
8431 }
8432 // For Baker read barrier, take the slow path while marking.
8433 __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
8434 }
8435
8436 // Fast-path without read barriers.
8437 UseScratchRegisterScope temps(GetVIXLAssembler());
8438 vixl32::Register temp = RegisterFrom(maybe_temp_loc);
8439 vixl32::Register temp2 = temps.Acquire();
8440 // /* HeapReference<Class> */ temp = obj->klass_
8441 __ Ldr(temp, MemOperand(obj, class_offset));
8442 GetAssembler()->MaybeUnpoisonHeapReference(temp);
8443 // /* HeapReference<Class> */ temp = temp->iftable_
8444 __ Ldr(temp, MemOperand(temp, iftable_offset));
8445 GetAssembler()->MaybeUnpoisonHeapReference(temp);
8446 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8447 __ Ldr(out, MemOperand(temp, array_length_offset));
8448 // Loop through the `IfTable` and check if any class matches.
8449 vixl32::Label loop;
8450 __ Bind(&loop);
8451 // If taken, the result in `out` is already 0 (false).
8452 __ CompareAndBranchIfZero(out, &done, /* is_far_target= */ false);
8453 __ Ldr(temp2, MemOperand(temp, object_array_data_offset));
8454 GetAssembler()->MaybeUnpoisonHeapReference(temp2);
8455 // Go to next interface.
8456 __ Add(temp, temp, static_cast<uint32_t>(2 * kHeapReferenceSize));
8457 __ Sub(out, out, 2);
8458 // Compare the classes and continue the loop if they do not match.
8459 __ Cmp(cls, temp2);
8460 __ B(ne, &loop);
8461 __ Mov(out, 1);
8462 break;
8463 }
8464
8465 case TypeCheckKind::kUnresolvedCheck: {
8466 // Note that we indeed only call on slow path, but we always go
8467 // into the slow path for the unresolved check case.
8468 //
8469 // We cannot directly call the InstanceofNonTrivial runtime
8470 // entry point without resorting to a type checking slow path
8471 // here (i.e. by calling InvokeRuntime directly), as it would
8472 // require to assign fixed registers for the inputs of this
8473 // HInstanceOf instruction (following the runtime calling
8474 // convention), which might be cluttered by the potential first
8475 // read barrier emission at the beginning of this method.
8476 //
8477 // TODO: Introduce a new runtime entry point taking the object
8478 // to test (instead of its class) as argument, and let it deal
8479 // with the read barrier issues. This will let us refactor this
8480 // case of the `switch` code as it was previously (with a direct
8481 // call to the runtime not using a type checking slow path).
8482 // This should also be beneficial for the other cases above.
8483 DCHECK(locations->OnlyCallsOnSlowPath());
8484 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8485 instruction, /* is_fatal= */ false);
8486 codegen_->AddSlowPath(slow_path);
8487 __ B(slow_path->GetEntryLabel());
8488 break;
8489 }
8490
8491 case TypeCheckKind::kBitstringCheck: {
8492 // /* HeapReference<Class> */ temp = obj->klass_
8493 GenerateReferenceLoadTwoRegisters(instruction,
8494 out_loc,
8495 obj_loc,
8496 class_offset,
8497 maybe_temp_loc,
8498 kWithoutReadBarrier);
8499
8500 GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
8501 // If `out` is a low reg and we would have another low reg temp, we could
8502 // optimize this as RSBS+ADC, see GenerateConditionWithZero().
8503 //
8504 // Also, in some cases when `out` is a low reg and we're loading a constant to IP
8505 // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
8506 // would be the same and we would have fewer direct data dependencies.
8507 codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR
8508 break;
8509 }
8510 }
8511
8512 if (done.IsReferenced()) {
8513 __ Bind(&done);
8514 }
8515
8516 if (slow_path != nullptr) {
8517 __ Bind(slow_path->GetExitLabel());
8518 }
8519 }
8520
VisitCheckCast(HCheckCast * instruction)8521 void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8522 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8523 LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
8524 LocationSummary* locations =
8525 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8526 locations->SetInAt(0, Location::RequiresRegister());
8527 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8528 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8529 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8530 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8531 } else {
8532 locations->SetInAt(1, Location::RequiresRegister());
8533 }
8534 locations->AddRegisterTemps(
8535 NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
8536 }
8537
VisitCheckCast(HCheckCast * instruction)8538 void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8539 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8540 LocationSummary* locations = instruction->GetLocations();
8541 Location obj_loc = locations->InAt(0);
8542 vixl32::Register obj = InputRegisterAt(instruction, 0);
8543 vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8544 ? vixl32::Register()
8545 : InputRegisterAt(instruction, 1);
8546 Location temp_loc = locations->GetTemp(0);
8547 vixl32::Register temp = RegisterFrom(temp_loc);
8548 const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
8549 DCHECK_LE(num_temps, 3u);
8550 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8551 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
8552 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8553 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8554 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8555 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8556 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8557 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8558 const uint32_t object_array_data_offset =
8559 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8560
8561 bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
8562 SlowPathCodeARMVIXL* type_check_slow_path =
8563 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8564 instruction, is_type_check_slow_path_fatal);
8565 codegen_->AddSlowPath(type_check_slow_path);
8566
8567 vixl32::Label done;
8568 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
8569 // Avoid null check if we know obj is not null.
8570 if (instruction->MustDoNullCheck()) {
8571 __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8572 }
8573
8574 switch (type_check_kind) {
8575 case TypeCheckKind::kExactCheck:
8576 case TypeCheckKind::kArrayCheck: {
8577 // /* HeapReference<Class> */ temp = obj->klass_
8578 GenerateReferenceLoadTwoRegisters(instruction,
8579 temp_loc,
8580 obj_loc,
8581 class_offset,
8582 maybe_temp2_loc,
8583 kWithoutReadBarrier);
8584
8585 __ Cmp(temp, cls);
8586 // Jump to slow path for throwing the exception or doing a
8587 // more involved array check.
8588 __ B(ne, type_check_slow_path->GetEntryLabel());
8589 break;
8590 }
8591
8592 case TypeCheckKind::kAbstractClassCheck: {
8593 // /* HeapReference<Class> */ temp = obj->klass_
8594 GenerateReferenceLoadTwoRegisters(instruction,
8595 temp_loc,
8596 obj_loc,
8597 class_offset,
8598 maybe_temp2_loc,
8599 kWithoutReadBarrier);
8600
8601 // If the class is abstract, we eagerly fetch the super class of the
8602 // object to avoid doing a comparison we know will fail.
8603 vixl32::Label loop;
8604 __ Bind(&loop);
8605 // /* HeapReference<Class> */ temp = temp->super_class_
8606 GenerateReferenceLoadOneRegister(instruction,
8607 temp_loc,
8608 super_offset,
8609 maybe_temp2_loc,
8610 kWithoutReadBarrier);
8611
8612 // If the class reference currently in `temp` is null, jump to the slow path to throw the
8613 // exception.
8614 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8615
8616 // Otherwise, compare the classes.
8617 __ Cmp(temp, cls);
8618 __ B(ne, &loop, /* is_far_target= */ false);
8619 break;
8620 }
8621
8622 case TypeCheckKind::kClassHierarchyCheck: {
8623 // /* HeapReference<Class> */ temp = obj->klass_
8624 GenerateReferenceLoadTwoRegisters(instruction,
8625 temp_loc,
8626 obj_loc,
8627 class_offset,
8628 maybe_temp2_loc,
8629 kWithoutReadBarrier);
8630
8631 // Walk over the class hierarchy to find a match.
8632 vixl32::Label loop;
8633 __ Bind(&loop);
8634 __ Cmp(temp, cls);
8635 __ B(eq, final_label, /* is_far_target= */ false);
8636
8637 // /* HeapReference<Class> */ temp = temp->super_class_
8638 GenerateReferenceLoadOneRegister(instruction,
8639 temp_loc,
8640 super_offset,
8641 maybe_temp2_loc,
8642 kWithoutReadBarrier);
8643
8644 // If the class reference currently in `temp` is null, jump to the slow path to throw the
8645 // exception.
8646 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8647 // Otherwise, jump to the beginning of the loop.
8648 __ B(&loop);
8649 break;
8650 }
8651
8652 case TypeCheckKind::kArrayObjectCheck: {
8653 // /* HeapReference<Class> */ temp = obj->klass_
8654 GenerateReferenceLoadTwoRegisters(instruction,
8655 temp_loc,
8656 obj_loc,
8657 class_offset,
8658 maybe_temp2_loc,
8659 kWithoutReadBarrier);
8660
8661 // Do an exact check.
8662 __ Cmp(temp, cls);
8663 __ B(eq, final_label, /* is_far_target= */ false);
8664
8665 // Otherwise, we need to check that the object's class is a non-primitive array.
8666 // /* HeapReference<Class> */ temp = temp->component_type_
8667 GenerateReferenceLoadOneRegister(instruction,
8668 temp_loc,
8669 component_offset,
8670 maybe_temp2_loc,
8671 kWithoutReadBarrier);
8672 // If the component type is null, jump to the slow path to throw the exception.
8673 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8674 // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
8675 // to further check that this component type is not a primitive type.
8676 GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
8677 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8678 __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
8679 break;
8680 }
8681
8682 case TypeCheckKind::kUnresolvedCheck:
8683 // We always go into the type check slow path for the unresolved check case.
8684 // We cannot directly call the CheckCast runtime entry point
8685 // without resorting to a type checking slow path here (i.e. by
8686 // calling InvokeRuntime directly), as it would require to
8687 // assign fixed registers for the inputs of this HInstanceOf
8688 // instruction (following the runtime calling convention), which
8689 // might be cluttered by the potential first read barrier
8690 // emission at the beginning of this method.
8691
8692 __ B(type_check_slow_path->GetEntryLabel());
8693 break;
8694
8695 case TypeCheckKind::kInterfaceCheck: {
8696 // Avoid read barriers to improve performance of the fast path. We can not get false
8697 // positives by doing this.
8698 // /* HeapReference<Class> */ temp = obj->klass_
8699 GenerateReferenceLoadTwoRegisters(instruction,
8700 temp_loc,
8701 obj_loc,
8702 class_offset,
8703 maybe_temp2_loc,
8704 kWithoutReadBarrier);
8705
8706 // /* HeapReference<Class> */ temp = temp->iftable_
8707 GenerateReferenceLoadOneRegister(instruction,
8708 temp_loc,
8709 iftable_offset,
8710 maybe_temp2_loc,
8711 kWithoutReadBarrier);
8712 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8713 __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
8714 // Loop through the iftable and check if any class matches.
8715 vixl32::Label start_loop;
8716 __ Bind(&start_loop);
8717 __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc),
8718 type_check_slow_path->GetEntryLabel());
8719 __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset));
8720 GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc));
8721 // Go to next interface.
8722 __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize));
8723 __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
8724 // Compare the classes and continue the loop if they do not match.
8725 __ Cmp(cls, RegisterFrom(maybe_temp3_loc));
8726 __ B(ne, &start_loop, /* is_far_target= */ false);
8727 break;
8728 }
8729
8730 case TypeCheckKind::kBitstringCheck: {
8731 // /* HeapReference<Class> */ temp = obj->klass_
8732 GenerateReferenceLoadTwoRegisters(instruction,
8733 temp_loc,
8734 obj_loc,
8735 class_offset,
8736 maybe_temp2_loc,
8737 kWithoutReadBarrier);
8738
8739 GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
8740 __ B(ne, type_check_slow_path->GetEntryLabel());
8741 break;
8742 }
8743 }
8744 if (done.IsReferenced()) {
8745 __ Bind(&done);
8746 }
8747
8748 __ Bind(type_check_slow_path->GetExitLabel());
8749 }
8750
VisitMonitorOperation(HMonitorOperation * instruction)8751 void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8752 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8753 instruction, LocationSummary::kCallOnMainOnly);
8754 InvokeRuntimeCallingConventionARMVIXL calling_convention;
8755 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8756 }
8757
VisitMonitorOperation(HMonitorOperation * instruction)8758 void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8759 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
8760 instruction,
8761 instruction->GetDexPc());
8762 if (instruction->IsEnter()) {
8763 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8764 } else {
8765 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8766 }
8767 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 19);
8768 }
8769
VisitAnd(HAnd * instruction)8770 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
8771 HandleBitwiseOperation(instruction, AND);
8772 }
8773
VisitOr(HOr * instruction)8774 void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) {
8775 HandleBitwiseOperation(instruction, ORR);
8776 }
8777
VisitXor(HXor * instruction)8778 void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) {
8779 HandleBitwiseOperation(instruction, EOR);
8780 }
8781
HandleBitwiseOperation(HBinaryOperation * instruction,Opcode opcode)8782 void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
8783 LocationSummary* locations =
8784 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8785 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8786 || instruction->GetResultType() == DataType::Type::kInt64);
8787 // Note: GVN reorders commutative operations to have the constant on the right hand side.
8788 locations->SetInAt(0, Location::RequiresRegister());
8789 locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
8790 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8791 }
8792
VisitAnd(HAnd * instruction)8793 void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) {
8794 HandleBitwiseOperation(instruction);
8795 }
8796
VisitOr(HOr * instruction)8797 void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) {
8798 HandleBitwiseOperation(instruction);
8799 }
8800
VisitXor(HXor * instruction)8801 void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) {
8802 HandleBitwiseOperation(instruction);
8803 }
8804
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8805 void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8806 LocationSummary* locations =
8807 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8808 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8809 || instruction->GetResultType() == DataType::Type::kInt64);
8810
8811 locations->SetInAt(0, Location::RequiresRegister());
8812 locations->SetInAt(1, Location::RequiresRegister());
8813 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8814 }
8815
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8816 void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8817 LocationSummary* locations = instruction->GetLocations();
8818 Location first = locations->InAt(0);
8819 Location second = locations->InAt(1);
8820 Location out = locations->Out();
8821
8822 if (instruction->GetResultType() == DataType::Type::kInt32) {
8823 vixl32::Register first_reg = RegisterFrom(first);
8824 vixl32::Register second_reg = RegisterFrom(second);
8825 vixl32::Register out_reg = RegisterFrom(out);
8826
8827 switch (instruction->GetOpKind()) {
8828 case HInstruction::kAnd:
8829 __ Bic(out_reg, first_reg, second_reg);
8830 break;
8831 case HInstruction::kOr:
8832 __ Orn(out_reg, first_reg, second_reg);
8833 break;
8834 // There is no EON on arm.
8835 case HInstruction::kXor:
8836 default:
8837 LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8838 UNREACHABLE();
8839 }
8840 return;
8841
8842 } else {
8843 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8844 vixl32::Register first_low = LowRegisterFrom(first);
8845 vixl32::Register first_high = HighRegisterFrom(first);
8846 vixl32::Register second_low = LowRegisterFrom(second);
8847 vixl32::Register second_high = HighRegisterFrom(second);
8848 vixl32::Register out_low = LowRegisterFrom(out);
8849 vixl32::Register out_high = HighRegisterFrom(out);
8850
8851 switch (instruction->GetOpKind()) {
8852 case HInstruction::kAnd:
8853 __ Bic(out_low, first_low, second_low);
8854 __ Bic(out_high, first_high, second_high);
8855 break;
8856 case HInstruction::kOr:
8857 __ Orn(out_low, first_low, second_low);
8858 __ Orn(out_high, first_high, second_high);
8859 break;
8860 // There is no EON on arm.
8861 case HInstruction::kXor:
8862 default:
8863 LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8864 UNREACHABLE();
8865 }
8866 }
8867 }
8868
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8869 void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp(
8870 HDataProcWithShifterOp* instruction) {
8871 DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
8872 instruction->GetType() == DataType::Type::kInt64);
8873 LocationSummary* locations =
8874 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8875 const bool overlap = instruction->GetType() == DataType::Type::kInt64 &&
8876 HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
8877
8878 locations->SetInAt(0, Location::RequiresRegister());
8879 locations->SetInAt(1, Location::RequiresRegister());
8880 locations->SetOut(Location::RequiresRegister(),
8881 overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
8882 }
8883
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8884 void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp(
8885 HDataProcWithShifterOp* instruction) {
8886 const LocationSummary* const locations = instruction->GetLocations();
8887 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
8888 const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
8889
8890 if (instruction->GetType() == DataType::Type::kInt32) {
8891 const vixl32::Register first = InputRegisterAt(instruction, 0);
8892 const vixl32::Register output = OutputRegister(instruction);
8893 const vixl32::Register second = instruction->InputAt(1)->GetType() == DataType::Type::kInt64
8894 ? LowRegisterFrom(locations->InAt(1))
8895 : InputRegisterAt(instruction, 1);
8896
8897 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8898 DCHECK_EQ(kind, HInstruction::kAdd);
8899
8900 switch (op_kind) {
8901 case HDataProcWithShifterOp::kUXTB:
8902 __ Uxtab(output, first, second);
8903 break;
8904 case HDataProcWithShifterOp::kUXTH:
8905 __ Uxtah(output, first, second);
8906 break;
8907 case HDataProcWithShifterOp::kSXTB:
8908 __ Sxtab(output, first, second);
8909 break;
8910 case HDataProcWithShifterOp::kSXTH:
8911 __ Sxtah(output, first, second);
8912 break;
8913 default:
8914 LOG(FATAL) << "Unexpected operation kind: " << op_kind;
8915 UNREACHABLE();
8916 }
8917 } else {
8918 GenerateDataProcInstruction(kind,
8919 output,
8920 first,
8921 Operand(second,
8922 ShiftFromOpKind(op_kind),
8923 instruction->GetShiftAmount()),
8924 codegen_);
8925 }
8926 } else {
8927 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
8928
8929 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8930 const vixl32::Register second = InputRegisterAt(instruction, 1);
8931
8932 DCHECK(!LowRegisterFrom(locations->Out()).Is(second));
8933 GenerateDataProc(kind,
8934 locations->Out(),
8935 locations->InAt(0),
8936 second,
8937 Operand(second, ShiftType::ASR, 31),
8938 codegen_);
8939 } else {
8940 GenerateLongDataProc(instruction, codegen_);
8941 }
8942 }
8943 }
8944
8945 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateAndConst(vixl32::Register out,vixl32::Register first,uint32_t value)8946 void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
8947 vixl32::Register first,
8948 uint32_t value) {
8949 // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
8950 if (value == 0xffffffffu) {
8951 if (!out.Is(first)) {
8952 __ Mov(out, first);
8953 }
8954 return;
8955 }
8956 if (value == 0u) {
8957 __ Mov(out, 0);
8958 return;
8959 }
8960 if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
8961 __ And(out, first, value);
8962 } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) {
8963 __ Bic(out, first, ~value);
8964 } else {
8965 DCHECK(IsPowerOfTwo(value + 1));
8966 __ Ubfx(out, first, 0, WhichPowerOf2(value + 1));
8967 }
8968 }
8969
8970 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateOrrConst(vixl32::Register out,vixl32::Register first,uint32_t value)8971 void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out,
8972 vixl32::Register first,
8973 uint32_t value) {
8974 // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
8975 if (value == 0u) {
8976 if (!out.Is(first)) {
8977 __ Mov(out, first);
8978 }
8979 return;
8980 }
8981 if (value == 0xffffffffu) {
8982 __ Mvn(out, 0);
8983 return;
8984 }
8985 if (GetAssembler()->ShifterOperandCanHold(ORR, value)) {
8986 __ Orr(out, first, value);
8987 } else {
8988 DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value));
8989 __ Orn(out, first, ~value);
8990 }
8991 }
8992
8993 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateEorConst(vixl32::Register out,vixl32::Register first,uint32_t value)8994 void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out,
8995 vixl32::Register first,
8996 uint32_t value) {
8997 // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
8998 if (value == 0u) {
8999 if (!out.Is(first)) {
9000 __ Mov(out, first);
9001 }
9002 return;
9003 }
9004 __ Eor(out, first, value);
9005 }
9006
GenerateAddLongConst(Location out,Location first,uint64_t value)9007 void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out,
9008 Location first,
9009 uint64_t value) {
9010 vixl32::Register out_low = LowRegisterFrom(out);
9011 vixl32::Register out_high = HighRegisterFrom(out);
9012 vixl32::Register first_low = LowRegisterFrom(first);
9013 vixl32::Register first_high = HighRegisterFrom(first);
9014 uint32_t value_low = Low32Bits(value);
9015 uint32_t value_high = High32Bits(value);
9016 if (value_low == 0u) {
9017 if (!out_low.Is(first_low)) {
9018 __ Mov(out_low, first_low);
9019 }
9020 __ Add(out_high, first_high, value_high);
9021 return;
9022 }
9023 __ Adds(out_low, first_low, value_low);
9024 if (GetAssembler()->ShifterOperandCanHold(ADC, value_high)) {
9025 __ Adc(out_high, first_high, value_high);
9026 } else {
9027 DCHECK(GetAssembler()->ShifterOperandCanHold(SBC, ~value_high));
9028 __ Sbc(out_high, first_high, ~value_high);
9029 }
9030 }
9031
HandleBitwiseOperation(HBinaryOperation * instruction)9032 void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) {
9033 LocationSummary* locations = instruction->GetLocations();
9034 Location first = locations->InAt(0);
9035 Location second = locations->InAt(1);
9036 Location out = locations->Out();
9037
9038 if (second.IsConstant()) {
9039 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
9040 uint32_t value_low = Low32Bits(value);
9041 if (instruction->GetResultType() == DataType::Type::kInt32) {
9042 vixl32::Register first_reg = InputRegisterAt(instruction, 0);
9043 vixl32::Register out_reg = OutputRegister(instruction);
9044 if (instruction->IsAnd()) {
9045 GenerateAndConst(out_reg, first_reg, value_low);
9046 } else if (instruction->IsOr()) {
9047 GenerateOrrConst(out_reg, first_reg, value_low);
9048 } else {
9049 DCHECK(instruction->IsXor());
9050 GenerateEorConst(out_reg, first_reg, value_low);
9051 }
9052 } else {
9053 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
9054 uint32_t value_high = High32Bits(value);
9055 vixl32::Register first_low = LowRegisterFrom(first);
9056 vixl32::Register first_high = HighRegisterFrom(first);
9057 vixl32::Register out_low = LowRegisterFrom(out);
9058 vixl32::Register out_high = HighRegisterFrom(out);
9059 if (instruction->IsAnd()) {
9060 GenerateAndConst(out_low, first_low, value_low);
9061 GenerateAndConst(out_high, first_high, value_high);
9062 } else if (instruction->IsOr()) {
9063 GenerateOrrConst(out_low, first_low, value_low);
9064 GenerateOrrConst(out_high, first_high, value_high);
9065 } else {
9066 DCHECK(instruction->IsXor());
9067 GenerateEorConst(out_low, first_low, value_low);
9068 GenerateEorConst(out_high, first_high, value_high);
9069 }
9070 }
9071 return;
9072 }
9073
9074 if (instruction->GetResultType() == DataType::Type::kInt32) {
9075 vixl32::Register first_reg = InputRegisterAt(instruction, 0);
9076 vixl32::Register second_reg = InputRegisterAt(instruction, 1);
9077 vixl32::Register out_reg = OutputRegister(instruction);
9078 if (instruction->IsAnd()) {
9079 __ And(out_reg, first_reg, second_reg);
9080 } else if (instruction->IsOr()) {
9081 __ Orr(out_reg, first_reg, second_reg);
9082 } else {
9083 DCHECK(instruction->IsXor());
9084 __ Eor(out_reg, first_reg, second_reg);
9085 }
9086 } else {
9087 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
9088 vixl32::Register first_low = LowRegisterFrom(first);
9089 vixl32::Register first_high = HighRegisterFrom(first);
9090 vixl32::Register second_low = LowRegisterFrom(second);
9091 vixl32::Register second_high = HighRegisterFrom(second);
9092 vixl32::Register out_low = LowRegisterFrom(out);
9093 vixl32::Register out_high = HighRegisterFrom(out);
9094 if (instruction->IsAnd()) {
9095 __ And(out_low, first_low, second_low);
9096 __ And(out_high, first_high, second_high);
9097 } else if (instruction->IsOr()) {
9098 __ Orr(out_low, first_low, second_low);
9099 __ Orr(out_high, first_high, second_high);
9100 } else {
9101 DCHECK(instruction->IsXor());
9102 __ Eor(out_low, first_low, second_low);
9103 __ Eor(out_high, first_high, second_high);
9104 }
9105 }
9106 }
9107
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)9108 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
9109 HInstruction* instruction,
9110 Location out,
9111 uint32_t offset,
9112 Location maybe_temp,
9113 ReadBarrierOption read_barrier_option) {
9114 vixl32::Register out_reg = RegisterFrom(out);
9115 if (read_barrier_option == kWithReadBarrier) {
9116 DCHECK(codegen_->EmitReadBarrier());
9117 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
9118 if (kUseBakerReadBarrier) {
9119 // Load with fast path based Baker's read barrier.
9120 // /* HeapReference<Object> */ out = *(out + offset)
9121 codegen_->GenerateFieldLoadWithBakerReadBarrier(
9122 instruction, out, out_reg, offset, maybe_temp, /* needs_null_check= */ false);
9123 } else {
9124 // Load with slow path based read barrier.
9125 // Save the value of `out` into `maybe_temp` before overwriting it
9126 // in the following move operation, as we will need it for the
9127 // read barrier below.
9128 __ Mov(RegisterFrom(maybe_temp), out_reg);
9129 // /* HeapReference<Object> */ out = *(out + offset)
9130 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
9131 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
9132 }
9133 } else {
9134 // Plain load with no read barrier.
9135 // /* HeapReference<Object> */ out = *(out + offset)
9136 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
9137 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
9138 }
9139 }
9140
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)9141 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
9142 HInstruction* instruction,
9143 Location out,
9144 Location obj,
9145 uint32_t offset,
9146 Location maybe_temp,
9147 ReadBarrierOption read_barrier_option) {
9148 vixl32::Register out_reg = RegisterFrom(out);
9149 vixl32::Register obj_reg = RegisterFrom(obj);
9150 if (read_barrier_option == kWithReadBarrier) {
9151 DCHECK(codegen_->EmitReadBarrier());
9152 if (kUseBakerReadBarrier) {
9153 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
9154 // Load with fast path based Baker's read barrier.
9155 // /* HeapReference<Object> */ out = *(obj + offset)
9156 codegen_->GenerateFieldLoadWithBakerReadBarrier(
9157 instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check= */ false);
9158 } else {
9159 // Load with slow path based read barrier.
9160 // /* HeapReference<Object> */ out = *(obj + offset)
9161 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
9162 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
9163 }
9164 } else {
9165 // Plain load with no read barrier.
9166 // /* HeapReference<Object> */ out = *(obj + offset)
9167 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
9168 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
9169 }
9170 }
9171
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,vixl32::Register obj,uint32_t offset,ReadBarrierOption read_barrier_option)9172 void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
9173 HInstruction* instruction,
9174 Location root,
9175 vixl32::Register obj,
9176 uint32_t offset,
9177 ReadBarrierOption read_barrier_option) {
9178 vixl32::Register root_reg = RegisterFrom(root);
9179 if (read_barrier_option == kWithReadBarrier) {
9180 DCHECK(EmitReadBarrier());
9181 if (kUseBakerReadBarrier) {
9182 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
9183 // Baker's read barrier are used.
9184
9185 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
9186 // the Marking Register) to decide whether we need to enter
9187 // the slow path to mark the GC root.
9188 //
9189 // We use shared thunks for the slow path; shared within the method
9190 // for JIT, across methods for AOT. That thunk checks the reference
9191 // and jumps to the entrypoint if needed.
9192 //
9193 // lr = &return_address;
9194 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
9195 // if (mr) { // Thread::Current()->GetIsGcMarking()
9196 // goto gc_root_thunk<root_reg>(lr)
9197 // }
9198 // return_address:
9199
9200 UseScratchRegisterScope temps(GetVIXLAssembler());
9201 temps.Exclude(ip);
9202 bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
9203 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
9204
9205 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u);
9206 size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions;
9207 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9208 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9209 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9210 vixl32::Label return_address;
9211 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9212 __ cmp(mr, Operand(0));
9213 // Currently the offset is always within range. If that changes,
9214 // we shall have to split the load the same way as for fields.
9215 DCHECK_LT(offset, kReferenceLoadMinFarOffset);
9216 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9217 __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
9218 EmitBakerReadBarrierBne(custom_data);
9219 __ bind(&return_address);
9220 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9221 narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9222 : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
9223 } else {
9224 // GC root loaded through a slow path for read barriers other
9225 // than Baker's.
9226 // /* GcRoot<mirror::Object>* */ root = obj + offset
9227 __ Add(root_reg, obj, offset);
9228 // /* mirror::Object* */ root = root->Read()
9229 GenerateReadBarrierForRootSlow(instruction, root, root);
9230 }
9231 } else {
9232 // Plain GC root load with no read barrier.
9233 // /* GcRoot<mirror::Object> */ root = *(obj + offset)
9234 GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
9235 // Note that GC roots are not affected by heap poisoning, thus we
9236 // do not have to unpoison `root_reg` here.
9237 }
9238 MaybeGenerateMarkingRegisterCheck(/* code= */ 20);
9239 }
9240
GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch32::Register marked_old_value,vixl::aarch32::Register old_value)9241 void CodeGeneratorARMVIXL::GenerateIntrinsicMoveWithBakerReadBarrier(
9242 vixl::aarch32::Register marked_old_value,
9243 vixl::aarch32::Register old_value) {
9244 DCHECK(EmitBakerReadBarrier());
9245
9246 // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
9247 // For low registers, we can reuse the GC root narrow entrypoint, for high registers
9248 // we use a specialized entrypoint because the register bits are 8-11 instead of 12-15.
9249 bool narrow_mov = marked_old_value.IsLow();
9250 uint32_t custom_data = narrow_mov
9251 ? EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode(), /*narrow=*/ true)
9252 : EncodeBakerReadBarrierIntrinsicCasData(marked_old_value.GetCode());
9253
9254 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* MOV */ (narrow_mov ? 1u : 0u);
9255 size_t wide_instructions = /* ADR+CMP+MOV+BNE */ 4u - narrow_instructions;
9256 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9257 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9258 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9259 vixl32::Label return_address;
9260 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9261 __ cmp(mr, Operand(0));
9262 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9263 __ mov(EncodingSize(narrow_mov ? Narrow : Wide), marked_old_value, old_value);
9264 EmitBakerReadBarrierBne(custom_data);
9265 __ bind(&return_address);
9266 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9267 narrow_mov
9268 ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9269 : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_MOV_OFFSET);
9270 }
9271
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,const vixl32::MemOperand & src,bool needs_null_check)9272 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9273 Location ref,
9274 vixl32::Register obj,
9275 const vixl32::MemOperand& src,
9276 bool needs_null_check) {
9277 DCHECK(EmitBakerReadBarrier());
9278
9279 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9280 // Marking Register) to decide whether we need to enter the slow
9281 // path to mark the reference. Then, in the slow path, check the
9282 // gray bit in the lock word of the reference's holder (`obj`) to
9283 // decide whether to mark `ref` or not.
9284 //
9285 // We use shared thunks for the slow path; shared within the method
9286 // for JIT, across methods for AOT. That thunk checks the holder
9287 // and jumps to the entrypoint if needed. If the holder is not gray,
9288 // it creates a fake dependency and returns to the LDR instruction.
9289 //
9290 // lr = &gray_return_address;
9291 // if (mr) { // Thread::Current()->GetIsGcMarking()
9292 // goto field_thunk<holder_reg, base_reg>(lr)
9293 // }
9294 // not_gray_return_address:
9295 // // Original reference load. If the offset is too large to fit
9296 // // into LDR, we use an adjusted base register here.
9297 // HeapReference<mirror::Object> reference = *(obj+offset);
9298 // gray_return_address:
9299
9300 DCHECK(src.GetAddrMode() == vixl32::Offset);
9301 DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>));
9302 vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9303 bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate());
9304
9305 UseScratchRegisterScope temps(GetVIXLAssembler());
9306 temps.Exclude(ip);
9307 uint32_t custom_data =
9308 EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
9309
9310 {
9311 size_t narrow_instructions =
9312 /* CMP */ (mr.IsLow() ? 1u : 0u) +
9313 /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u);
9314 size_t wide_instructions =
9315 /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9316 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9317 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9318 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9319 vixl32::Label return_address;
9320 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9321 __ cmp(mr, Operand(0));
9322 EmitBakerReadBarrierBne(custom_data);
9323 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9324 __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src);
9325 if (needs_null_check) {
9326 MaybeRecordImplicitNullCheck(instruction);
9327 }
9328 // Note: We need a specific width for the unpoisoning NEG.
9329 if (kPoisonHeapReferences) {
9330 if (narrow) {
9331 // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
9332 __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
9333 } else {
9334 __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9335 }
9336 }
9337 __ bind(&return_address);
9338 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9339 narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
9340 : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
9341 }
9342 MaybeGenerateMarkingRegisterCheck(/* code= */ 21, /* temp_loc= */ LocationFrom(ip));
9343 }
9344
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check)9345 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9346 Location ref,
9347 vixl32::Register obj,
9348 uint32_t offset,
9349 Location maybe_temp,
9350 bool needs_null_check) {
9351 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
9352 vixl32::Register base = obj;
9353 if (offset >= kReferenceLoadMinFarOffset) {
9354 base = RegisterFrom(maybe_temp);
9355 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
9356 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
9357 offset &= (kReferenceLoadMinFarOffset - 1u);
9358 }
9359 GenerateFieldLoadWithBakerReadBarrier(
9360 instruction, ref, obj, MemOperand(base, offset), needs_null_check);
9361 }
9362
GenerateArrayLoadWithBakerReadBarrier(Location ref,vixl32::Register obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)9363 void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
9364 vixl32::Register obj,
9365 uint32_t data_offset,
9366 Location index,
9367 Location temp,
9368 bool needs_null_check) {
9369 DCHECK(EmitBakerReadBarrier());
9370
9371 static_assert(
9372 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
9373 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
9374 ScaleFactor scale_factor = TIMES_4;
9375
9376 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9377 // Marking Register) to decide whether we need to enter the slow
9378 // path to mark the reference. Then, in the slow path, check the
9379 // gray bit in the lock word of the reference's holder (`obj`) to
9380 // decide whether to mark `ref` or not.
9381 //
9382 // We use shared thunks for the slow path; shared within the method
9383 // for JIT, across methods for AOT. That thunk checks the holder
9384 // and jumps to the entrypoint if needed. If the holder is not gray,
9385 // it creates a fake dependency and returns to the LDR instruction.
9386 //
9387 // lr = &gray_return_address;
9388 // if (mr) { // Thread::Current()->GetIsGcMarking()
9389 // goto array_thunk<base_reg>(lr)
9390 // }
9391 // not_gray_return_address:
9392 // // Original reference load. If the offset is too large to fit
9393 // // into LDR, we use an adjusted base register here.
9394 // HeapReference<mirror::Object> reference = data[index];
9395 // gray_return_address:
9396
9397 DCHECK(index.IsValid());
9398 vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
9399 vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9400 vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer.
9401
9402 UseScratchRegisterScope temps(GetVIXLAssembler());
9403 temps.Exclude(ip);
9404 uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
9405
9406 __ Add(data_reg, obj, Operand(data_offset));
9407 {
9408 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
9409 size_t wide_instructions =
9410 /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9411 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9412 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9413 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9414 vixl32::Label return_address;
9415 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9416 __ cmp(mr, Operand(0));
9417 EmitBakerReadBarrierBne(custom_data);
9418 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9419 __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
9420 DCHECK(!needs_null_check); // The thunk cannot handle the null check.
9421 // Note: We need a Wide NEG for the unpoisoning.
9422 if (kPoisonHeapReferences) {
9423 __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9424 }
9425 __ bind(&return_address);
9426 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9427 BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
9428 }
9429 MaybeGenerateMarkingRegisterCheck(/* code= */ 22, /* temp_loc= */ LocationFrom(ip));
9430 }
9431
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)9432 void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
9433 // The following condition is a compile-time one, so it does not have a run-time cost.
9434 if (kIsDebugBuild && EmitBakerReadBarrier()) {
9435 // The following condition is a run-time one; it is executed after the
9436 // previous compile-time test, to avoid penalizing non-debug builds.
9437 if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
9438 UseScratchRegisterScope temps(GetVIXLAssembler());
9439 vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
9440 GetAssembler()->GenerateMarkingRegisterCheck(temp,
9441 kMarkingRegisterCheckBreakCodeBaseCode + code);
9442 }
9443 }
9444 }
9445
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9446 SlowPathCodeARMVIXL* CodeGeneratorARMVIXL::AddReadBarrierSlowPath(HInstruction* instruction,
9447 Location out,
9448 Location ref,
9449 Location obj,
9450 uint32_t offset,
9451 Location index) {
9452 SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator())
9453 ReadBarrierForHeapReferenceSlowPathARMVIXL(instruction, out, ref, obj, offset, index);
9454 AddSlowPath(slow_path);
9455 return slow_path;
9456 }
9457
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9458 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
9459 Location out,
9460 Location ref,
9461 Location obj,
9462 uint32_t offset,
9463 Location index) {
9464 DCHECK(EmitReadBarrier());
9465
9466 // Insert a slow path based read barrier *after* the reference load.
9467 //
9468 // If heap poisoning is enabled, the unpoisoning of the loaded
9469 // reference will be carried out by the runtime within the slow
9470 // path.
9471 //
9472 // Note that `ref` currently does not get unpoisoned (when heap
9473 // poisoning is enabled), which is alright as the `ref` argument is
9474 // not used by the artReadBarrierSlow entry point.
9475 //
9476 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
9477 SlowPathCodeARMVIXL* slow_path =
9478 AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
9479
9480 __ B(slow_path->GetEntryLabel());
9481 __ Bind(slow_path->GetExitLabel());
9482 }
9483
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9484 void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
9485 Location out,
9486 Location ref,
9487 Location obj,
9488 uint32_t offset,
9489 Location index) {
9490 if (EmitReadBarrier()) {
9491 // Baker's read barriers shall be handled by the fast path
9492 // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier).
9493 DCHECK(!kUseBakerReadBarrier);
9494 // If heap poisoning is enabled, unpoisoning will be taken care of
9495 // by the runtime within the slow path.
9496 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
9497 } else if (kPoisonHeapReferences) {
9498 GetAssembler()->UnpoisonHeapReference(RegisterFrom(out));
9499 }
9500 }
9501
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)9502 void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction,
9503 Location out,
9504 Location root) {
9505 DCHECK(EmitReadBarrier());
9506
9507 // Insert a slow path based read barrier *after* the GC root load.
9508 //
9509 // Note that GC roots are not affected by heap poisoning, so we do
9510 // not need to do anything special for this here.
9511 SlowPathCodeARMVIXL* slow_path =
9512 new (GetScopedAllocator()) ReadBarrierForRootSlowPathARMVIXL(instruction, out, root);
9513 AddSlowPath(slow_path);
9514
9515 __ B(slow_path->GetEntryLabel());
9516 __ Bind(slow_path->GetExitLabel());
9517 }
9518
9519 // Check if the desired_dispatch_info is supported. If it is, return it,
9520 // otherwise return a fall-back info that should be used instead.
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)9521 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
9522 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
9523 ArtMethod* method) {
9524 if (method->IsIntrinsic() &&
9525 desired_dispatch_info.code_ptr_location == CodePtrLocation::kCallCriticalNative) {
9526 // As a work-around for soft-float native ABI interfering with type checks, we are
9527 // inserting fake calls to Float.floatToRawIntBits() or Double.doubleToRawLongBits()
9528 // when a float or double argument is passed in core registers but we cannot do that
9529 // for actual intrinsic implementations that expect them in FP registers. Therefore
9530 // we do not use `kCallCriticalNative` for intrinsics with FP arguments; if they are
9531 // properly intrinsified, the dispatch type does not matter anyway.
9532 ScopedObjectAccess soa(Thread::Current());
9533 uint32_t shorty_len;
9534 const char* shorty = method->GetShorty(&shorty_len);
9535 for (uint32_t i = 1; i != shorty_len; ++i) {
9536 if (shorty[i] == 'D' || shorty[i] == 'F') {
9537 HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
9538 dispatch_info.code_ptr_location = CodePtrLocation::kCallArtMethod;
9539 return dispatch_info;
9540 }
9541 }
9542 }
9543 return desired_dispatch_info;
9544 }
9545
9546
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)9547 void CodeGeneratorARMVIXL::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
9548 switch (load_kind) {
9549 case MethodLoadKind::kBootImageLinkTimePcRelative: {
9550 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9551 PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
9552 vixl32::Register temp_reg = RegisterFrom(temp);
9553 EmitMovwMovtPlaceholder(labels, temp_reg);
9554 break;
9555 }
9556 case MethodLoadKind::kBootImageRelRo: {
9557 uint32_t boot_image_offset = GetBootImageOffset(invoke);
9558 LoadBootImageRelRoEntry(RegisterFrom(temp), boot_image_offset);
9559 break;
9560 }
9561 case MethodLoadKind::kAppImageRelRo: {
9562 DCHECK(GetCompilerOptions().IsAppImage());
9563 PcRelativePatchInfo* labels = NewAppImageMethodPatch(invoke->GetResolvedMethodReference());
9564 vixl32::Register temp_reg = RegisterFrom(temp);
9565 EmitMovwMovtPlaceholder(labels, temp_reg);
9566 __ Ldr(temp_reg, MemOperand(temp_reg, /*offset=*/ 0));
9567 break;
9568 }
9569 case MethodLoadKind::kBssEntry: {
9570 PcRelativePatchInfo* labels = NewMethodBssEntryPatch(invoke->GetMethodReference());
9571 vixl32::Register temp_reg = RegisterFrom(temp);
9572 EmitMovwMovtPlaceholder(labels, temp_reg);
9573 // All aligned loads are implicitly atomic consume operations on ARM.
9574 GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
9575 break;
9576 }
9577 case MethodLoadKind::kJitDirectAddress: {
9578 __ Mov(RegisterFrom(temp), Operand::From(invoke->GetResolvedMethod()));
9579 break;
9580 }
9581 case MethodLoadKind::kRuntimeCall: {
9582 // Test situation, don't do anything.
9583 break;
9584 }
9585 default: {
9586 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
9587 UNREACHABLE();
9588 }
9589 }
9590 }
9591
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)9592 void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
9593 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
9594 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
9595 switch (invoke->GetMethodLoadKind()) {
9596 case MethodLoadKind::kStringInit: {
9597 uint32_t offset =
9598 GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
9599 // temp = thread->string_init_entrypoint
9600 GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset);
9601 break;
9602 }
9603 case MethodLoadKind::kRecursive: {
9604 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
9605 break;
9606 }
9607 case MethodLoadKind::kRuntimeCall: {
9608 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
9609 return; // No code pointer retrieval; the runtime performs the call directly.
9610 }
9611 case MethodLoadKind::kBootImageLinkTimePcRelative:
9612 // Note: Unlike arm64, x86 and x86-64, we do not avoid the materialization of method
9613 // pointer for kCallCriticalNative because it would not save us an instruction from
9614 // the current sequence MOVW+MOVT+ADD(pc)+LDR+BL. The ADD(pc) separates the patched
9615 // offset instructions MOVW+MOVT from the entrypoint load, so they cannot be fused.
9616 FALLTHROUGH_INTENDED;
9617 default: {
9618 LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
9619 break;
9620 }
9621 }
9622
9623 auto call_code_pointer_member = [&](MemberOffset offset) {
9624 // LR = callee_method->member;
9625 GetAssembler()->LoadFromOffset(kLoadWord, lr, RegisterFrom(callee_method), offset.Int32Value());
9626 {
9627 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9628 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9629 ExactAssemblyScope aas(GetVIXLAssembler(),
9630 vixl32::k16BitT32InstructionSizeInBytes,
9631 CodeBufferCheckScope::kExactSize);
9632 // LR()
9633 __ blx(lr);
9634 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9635 }
9636 };
9637 switch (invoke->GetCodePtrLocation()) {
9638 case CodePtrLocation::kCallSelf:
9639 {
9640 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
9641 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9642 ExactAssemblyScope aas(GetVIXLAssembler(),
9643 vixl32::k32BitT32InstructionSizeInBytes,
9644 CodeBufferCheckScope::kMaximumSize);
9645 __ bl(GetFrameEntryLabel());
9646 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9647 }
9648 break;
9649 case CodePtrLocation::kCallCriticalNative: {
9650 size_t out_frame_size =
9651 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARMVIXL,
9652 kAapcsStackAlignment,
9653 GetCriticalNativeDirectCallFrameSize>(invoke);
9654 call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArmPointerSize));
9655 // Move the result when needed due to native and managed ABI mismatch.
9656 switch (invoke->GetType()) {
9657 case DataType::Type::kFloat32:
9658 __ Vmov(s0, r0);
9659 break;
9660 case DataType::Type::kFloat64:
9661 __ Vmov(d0, r0, r1);
9662 break;
9663 case DataType::Type::kBool:
9664 case DataType::Type::kInt8:
9665 case DataType::Type::kUint16:
9666 case DataType::Type::kInt16:
9667 case DataType::Type::kInt32:
9668 case DataType::Type::kInt64:
9669 case DataType::Type::kVoid:
9670 break;
9671 default:
9672 DCHECK(false) << invoke->GetType();
9673 break;
9674 }
9675 if (out_frame_size != 0u) {
9676 DecreaseFrame(out_frame_size);
9677 }
9678 break;
9679 }
9680 case CodePtrLocation::kCallArtMethod:
9681 call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize));
9682 break;
9683 }
9684
9685 DCHECK(!IsLeafMethod());
9686 }
9687
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_location,SlowPathCode * slow_path)9688 void CodeGeneratorARMVIXL::GenerateVirtualCall(
9689 HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
9690 vixl32::Register temp = RegisterFrom(temp_location);
9691 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
9692 invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
9693
9694 // Use the calling convention instead of the location of the receiver, as
9695 // intrinsics may have put the receiver in a different register. In the intrinsics
9696 // slow path, the arguments have been moved to the right place, so here we are
9697 // guaranteed that the receiver is the first register of the calling convention.
9698 InvokeDexCallingConventionARMVIXL calling_convention;
9699 vixl32::Register receiver = calling_convention.GetRegisterAt(0);
9700 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
9701 {
9702 // Make sure the pc is recorded immediately after the `ldr` instruction.
9703 ExactAssemblyScope aas(GetVIXLAssembler(),
9704 vixl32::kMaxInstructionSizeInBytes,
9705 CodeBufferCheckScope::kMaximumSize);
9706 // /* HeapReference<Class> */ temp = receiver->klass_
9707 __ ldr(temp, MemOperand(receiver, class_offset));
9708 MaybeRecordImplicitNullCheck(invoke);
9709 }
9710 // Instead of simply (possibly) unpoisoning `temp` here, we should
9711 // emit a read barrier for the previous class reference load.
9712 // However this is not required in practice, as this is an
9713 // intermediate/temporary reference and because the current
9714 // concurrent copying collector keeps the from-space memory
9715 // intact/accessible until the end of the marking phase (the
9716 // concurrent copying collector may not in the future).
9717 GetAssembler()->MaybeUnpoisonHeapReference(temp);
9718
9719 // If we're compiling baseline, update the inline cache.
9720 MaybeGenerateInlineCacheCheck(invoke, temp);
9721
9722 // temp = temp->GetMethodAt(method_offset);
9723 uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
9724 kArmPointerSize).Int32Value();
9725 GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
9726 // LR = temp->GetEntryPoint();
9727 GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
9728 {
9729 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9730 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9731 ExactAssemblyScope aas(GetVIXLAssembler(),
9732 vixl32::k16BitT32InstructionSizeInBytes,
9733 CodeBufferCheckScope::kExactSize);
9734 // LR();
9735 __ blx(lr);
9736 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9737 }
9738 }
9739
NewBootImageIntrinsicPatch(uint32_t intrinsic_data)9740 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch(
9741 uint32_t intrinsic_data) {
9742 return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_other_patches_);
9743 }
9744
NewBootImageRelRoPatch(uint32_t boot_image_offset)9745 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
9746 uint32_t boot_image_offset) {
9747 return NewPcRelativePatch(/* dex_file= */ nullptr,
9748 boot_image_offset,
9749 &boot_image_other_patches_);
9750 }
9751
NewBootImageMethodPatch(MethodReference target_method)9752 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
9753 MethodReference target_method) {
9754 return NewPcRelativePatch(
9755 target_method.dex_file, target_method.index, &boot_image_method_patches_);
9756 }
9757
NewAppImageMethodPatch(MethodReference target_method)9758 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewAppImageMethodPatch(
9759 MethodReference target_method) {
9760 return NewPcRelativePatch(
9761 target_method.dex_file, target_method.index, &app_image_method_patches_);
9762 }
9763
NewMethodBssEntryPatch(MethodReference target_method)9764 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch(
9765 MethodReference target_method) {
9766 return NewPcRelativePatch(
9767 target_method.dex_file, target_method.index, &method_bss_entry_patches_);
9768 }
9769
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9770 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch(
9771 const DexFile& dex_file, dex::TypeIndex type_index) {
9772 return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_);
9773 }
9774
NewAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9775 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewAppImageTypePatch(
9776 const DexFile& dex_file, dex::TypeIndex type_index) {
9777 return NewPcRelativePatch(&dex_file, type_index.index_, &app_image_type_patches_);
9778 }
9779
NewTypeBssEntryPatch(HLoadClass * load_class)9780 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch(
9781 HLoadClass* load_class) {
9782 const DexFile& dex_file = load_class->GetDexFile();
9783 dex::TypeIndex type_index = load_class->GetTypeIndex();
9784 ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
9785 switch (load_class->GetLoadKind()) {
9786 case HLoadClass::LoadKind::kBssEntry:
9787 patches = &type_bss_entry_patches_;
9788 break;
9789 case HLoadClass::LoadKind::kBssEntryPublic:
9790 patches = &public_type_bss_entry_patches_;
9791 break;
9792 case HLoadClass::LoadKind::kBssEntryPackage:
9793 patches = &package_type_bss_entry_patches_;
9794 break;
9795 default:
9796 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
9797 UNREACHABLE();
9798 }
9799 return NewPcRelativePatch(&dex_file, type_index.index_, patches);
9800 }
9801
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index)9802 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch(
9803 const DexFile& dex_file, dex::StringIndex string_index) {
9804 return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_);
9805 }
9806
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index)9807 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch(
9808 const DexFile& dex_file, dex::StringIndex string_index) {
9809 return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_);
9810 }
9811
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,ArenaDeque<PcRelativePatchInfo> * patches)9812 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
9813 const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
9814 patches->emplace_back(dex_file, offset_or_index);
9815 return &patches->back();
9816 }
9817
EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset)9818 void CodeGeneratorARMVIXL::EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset) {
9819 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
9820 DCHECK(!GetCompilerOptions().IsJitCompiler());
9821 call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
9822 vixl::aarch32::Label* bl_label = &call_entrypoint_patches_.back().label;
9823 __ bind(bl_label);
9824 vixl32::Label placeholder_label;
9825 __ bl(&placeholder_label); // Placeholder, patched at link-time.
9826 __ bind(&placeholder_label);
9827 }
9828
EmitBakerReadBarrierBne(uint32_t custom_data)9829 void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
9830 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
9831 if (GetCompilerOptions().IsJitCompiler()) {
9832 auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
9833 vixl::aarch32::Label* slow_path_entry = &it->second.label;
9834 __ b(ne, EncodingSize(Wide), slow_path_entry);
9835 } else {
9836 baker_read_barrier_patches_.emplace_back(custom_data);
9837 vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label;
9838 __ bind(patch_label);
9839 vixl32::Label placeholder_label;
9840 __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time.
9841 __ bind(&placeholder_label);
9842 }
9843 }
9844
DeduplicateBootImageAddressLiteral(uint32_t address)9845 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
9846 return DeduplicateUint32Literal(address, &uint32_literals_);
9847 }
9848
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)9849 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
9850 const DexFile& dex_file,
9851 dex::StringIndex string_index,
9852 Handle<mirror::String> handle) {
9853 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
9854 return jit_string_patches_.GetOrCreate(
9855 StringReference(&dex_file, string_index),
9856 [this]() {
9857 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9858 });
9859 }
9860
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)9861 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file,
9862 dex::TypeIndex type_index,
9863 Handle<mirror::Class> handle) {
9864 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
9865 return jit_class_patches_.GetOrCreate(
9866 TypeReference(&dex_file, type_index),
9867 [this]() {
9868 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9869 });
9870 }
9871
LoadBootImageRelRoEntry(vixl32::Register reg,uint32_t boot_image_offset)9872 void CodeGeneratorARMVIXL::LoadBootImageRelRoEntry(vixl32::Register reg,
9873 uint32_t boot_image_offset) {
9874 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
9875 EmitMovwMovtPlaceholder(labels, reg);
9876 __ Ldr(reg, MemOperand(reg, /*offset=*/ 0));
9877 }
9878
LoadBootImageAddress(vixl32::Register reg,uint32_t boot_image_reference)9879 void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg,
9880 uint32_t boot_image_reference) {
9881 if (GetCompilerOptions().IsBootImage()) {
9882 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
9883 NewBootImageIntrinsicPatch(boot_image_reference);
9884 EmitMovwMovtPlaceholder(labels, reg);
9885 } else if (GetCompilerOptions().GetCompilePic()) {
9886 LoadBootImageRelRoEntry(reg, boot_image_reference);
9887 } else {
9888 DCHECK(GetCompilerOptions().IsJitCompiler());
9889 gc::Heap* heap = Runtime::Current()->GetHeap();
9890 DCHECK(!heap->GetBootImageSpaces().empty());
9891 uintptr_t address =
9892 reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference);
9893 __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address)));
9894 }
9895 }
9896
LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,TypeReference target_type)9897 void CodeGeneratorARMVIXL::LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,
9898 TypeReference target_type) {
9899 // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
9900 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9901 PcRelativePatchInfo* labels =
9902 NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
9903 EmitMovwMovtPlaceholder(labels, reg);
9904 }
9905
LoadIntrinsicDeclaringClass(vixl32::Register reg,HInvoke * invoke)9906 void CodeGeneratorARMVIXL::LoadIntrinsicDeclaringClass(vixl32::Register reg, HInvoke* invoke) {
9907 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
9908 if (GetCompilerOptions().IsBootImage()) {
9909 MethodReference target_method = invoke->GetResolvedMethodReference();
9910 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
9911 LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
9912 } else {
9913 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
9914 LoadBootImageAddress(reg, boot_image_offset);
9915 }
9916 }
9917
LoadClassRootForIntrinsic(vixl::aarch32::Register reg,ClassRoot class_root)9918 void CodeGeneratorARMVIXL::LoadClassRootForIntrinsic(vixl::aarch32::Register reg,
9919 ClassRoot class_root) {
9920 if (GetCompilerOptions().IsBootImage()) {
9921 ScopedObjectAccess soa(Thread::Current());
9922 ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
9923 TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
9924 LoadTypeForBootImageIntrinsic(reg, target_type);
9925 } else {
9926 uint32_t boot_image_offset = GetBootImageOffset(class_root);
9927 LoadBootImageAddress(reg, boot_image_offset);
9928 }
9929 }
9930
9931 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)9932 inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
9933 const ArenaDeque<PcRelativePatchInfo>& infos,
9934 ArenaVector<linker::LinkerPatch>* linker_patches) {
9935 for (const PcRelativePatchInfo& info : infos) {
9936 const DexFile* dex_file = info.target_dex_file;
9937 size_t offset_or_index = info.offset_or_index;
9938 DCHECK(info.add_pc_label.IsBound());
9939 uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation());
9940 // Add MOVW patch.
9941 DCHECK(info.movw_label.IsBound());
9942 uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation());
9943 linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index));
9944 // Add MOVT patch.
9945 DCHECK(info.movt_label.IsBound());
9946 uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation());
9947 linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index));
9948 }
9949 }
9950
9951 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)9952 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
9953 const DexFile* target_dex_file,
9954 uint32_t pc_insn_offset,
9955 uint32_t boot_image_offset) {
9956 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
9957 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
9958 }
9959
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)9960 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
9961 DCHECK(linker_patches->empty());
9962 size_t size =
9963 /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() +
9964 /* MOVW+MOVT for each entry */ 2u * app_image_method_patches_.size() +
9965 /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() +
9966 /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() +
9967 /* MOVW+MOVT for each entry */ 2u * app_image_type_patches_.size() +
9968 /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
9969 /* MOVW+MOVT for each entry */ 2u * public_type_bss_entry_patches_.size() +
9970 /* MOVW+MOVT for each entry */ 2u * package_type_bss_entry_patches_.size() +
9971 /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() +
9972 /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() +
9973 /* MOVW+MOVT for each entry */ 2u * boot_image_other_patches_.size() +
9974 call_entrypoint_patches_.size() +
9975 baker_read_barrier_patches_.size();
9976 linker_patches->reserve(size);
9977 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
9978 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
9979 boot_image_method_patches_, linker_patches);
9980 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
9981 boot_image_type_patches_, linker_patches);
9982 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
9983 boot_image_string_patches_, linker_patches);
9984 } else {
9985 DCHECK(boot_image_method_patches_.empty());
9986 DCHECK(boot_image_type_patches_.empty());
9987 DCHECK(boot_image_string_patches_.empty());
9988 }
9989 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
9990 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
9991 if (GetCompilerOptions().IsBootImage()) {
9992 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
9993 boot_image_other_patches_, linker_patches);
9994 } else {
9995 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
9996 boot_image_other_patches_, linker_patches);
9997 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
9998 app_image_method_patches_, linker_patches);
9999 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
10000 app_image_type_patches_, linker_patches);
10001 }
10002 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
10003 method_bss_entry_patches_, linker_patches);
10004 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
10005 type_bss_entry_patches_, linker_patches);
10006 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
10007 public_type_bss_entry_patches_, linker_patches);
10008 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
10009 package_type_bss_entry_patches_, linker_patches);
10010 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
10011 string_bss_entry_patches_, linker_patches);
10012 for (const PatchInfo<vixl32::Label>& info : call_entrypoint_patches_) {
10013 DCHECK(info.target_dex_file == nullptr);
10014 linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
10015 info.label.GetLocation(), info.offset_or_index));
10016 }
10017 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
10018 linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
10019 info.label.GetLocation(), info.custom_data));
10020 }
10021 DCHECK_EQ(size, linker_patches->size());
10022 }
10023
NeedsThunkCode(const linker::LinkerPatch & patch) const10024 bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
10025 return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
10026 patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
10027 patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
10028 }
10029
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)10030 void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
10031 /*out*/ ArenaVector<uint8_t>* code,
10032 /*out*/ std::string* debug_name) {
10033 arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
10034 switch (patch.GetType()) {
10035 case linker::LinkerPatch::Type::kCallRelative: {
10036 // The thunk just uses the entry point in the ArtMethod. This works even for calls
10037 // to the generic JNI and interpreter trampolines.
10038 MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
10039 assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, vixl32::r0, offset.Int32Value());
10040 assembler.GetVIXLAssembler()->Bkpt(0);
10041 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10042 *debug_name = "MethodCallThunk";
10043 }
10044 break;
10045 }
10046 case linker::LinkerPatch::Type::kCallEntrypoint: {
10047 assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, tr, patch.EntrypointOffset());
10048 assembler.GetVIXLAssembler()->Bkpt(0);
10049 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10050 *debug_name = "EntrypointCallThunk_" + std::to_string(patch.EntrypointOffset());
10051 }
10052 break;
10053 }
10054 case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
10055 DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
10056 CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
10057 break;
10058 }
10059 default:
10060 LOG(FATAL) << "Unexpected patch type " << patch.GetType();
10061 UNREACHABLE();
10062 }
10063
10064 // Ensure we emit the literal pool if any.
10065 assembler.FinalizeCode();
10066 code->resize(assembler.CodeSize());
10067 MemoryRegion code_region(code->data(), code->size());
10068 assembler.CopyInstructions(code_region);
10069 }
10070
DeduplicateUint32Literal(uint32_t value,Uint32ToLiteralMap * map)10071 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
10072 uint32_t value,
10073 Uint32ToLiteralMap* map) {
10074 return map->GetOrCreate(
10075 value,
10076 [this, value]() {
10077 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ value);
10078 });
10079 }
10080
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)10081 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
10082 LocationSummary* locations =
10083 new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
10084 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
10085 Location::RequiresRegister());
10086 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
10087 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
10088 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
10089 }
10090
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)10091 void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
10092 vixl32::Register res = OutputRegister(instr);
10093 vixl32::Register accumulator =
10094 InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
10095 vixl32::Register mul_left =
10096 InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
10097 vixl32::Register mul_right =
10098 InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
10099
10100 if (instr->GetOpKind() == HInstruction::kAdd) {
10101 __ Mla(res, mul_left, mul_right, accumulator);
10102 } else {
10103 __ Mls(res, mul_left, mul_right, accumulator);
10104 }
10105 }
10106
VisitBoundType(HBoundType * instruction)10107 void LocationsBuilderARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
10108 // Nothing to do, this should be removed during prepare for register allocator.
10109 LOG(FATAL) << "Unreachable";
10110 }
10111
VisitBoundType(HBoundType * instruction)10112 void InstructionCodeGeneratorARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
10113 // Nothing to do, this should be removed during prepare for register allocator.
10114 LOG(FATAL) << "Unreachable";
10115 }
10116
10117 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)10118 void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
10119 LocationSummary* locations =
10120 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
10121 locations->SetInAt(0, Location::RequiresRegister());
10122 if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
10123 codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
10124 locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base.
10125 if (switch_instr->GetStartValue() != 0) {
10126 locations->AddTemp(Location::RequiresRegister()); // We need a temp for the bias.
10127 }
10128 }
10129 }
10130
10131 // TODO(VIXL): Investigate and reach the parity with old arm codegen.
VisitPackedSwitch(HPackedSwitch * switch_instr)10132 void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
10133 int32_t lower_bound = switch_instr->GetStartValue();
10134 uint32_t num_entries = switch_instr->GetNumEntries();
10135 LocationSummary* locations = switch_instr->GetLocations();
10136 vixl32::Register value_reg = InputRegisterAt(switch_instr, 0);
10137 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
10138
10139 if (num_entries <= kPackedSwitchCompareJumpThreshold ||
10140 !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
10141 // Create a series of compare/jumps.
10142 UseScratchRegisterScope temps(GetVIXLAssembler());
10143 vixl32::Register temp_reg = temps.Acquire();
10144 // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
10145 // the immediate, because IP is used as the destination register. For the other
10146 // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
10147 // and they can be encoded in the instruction without making use of IP register.
10148 __ Adds(temp_reg, value_reg, -lower_bound);
10149
10150 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
10151 // Jump to successors[0] if value == lower_bound.
10152 __ B(eq, codegen_->GetLabelOf(successors[0]));
10153 int32_t last_index = 0;
10154 for (; num_entries - last_index > 2; last_index += 2) {
10155 __ Adds(temp_reg, temp_reg, -2);
10156 // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
10157 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
10158 // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
10159 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
10160 }
10161 if (num_entries - last_index == 2) {
10162 // The last missing case_value.
10163 __ Cmp(temp_reg, 1);
10164 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
10165 }
10166
10167 // And the default for any other value.
10168 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
10169 __ B(codegen_->GetLabelOf(default_block));
10170 }
10171 } else {
10172 // Create a table lookup.
10173 vixl32::Register table_base = RegisterFrom(locations->GetTemp(0));
10174
10175 JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr);
10176
10177 // Remove the bias.
10178 vixl32::Register key_reg;
10179 if (lower_bound != 0) {
10180 key_reg = RegisterFrom(locations->GetTemp(1));
10181 __ Sub(key_reg, value_reg, lower_bound);
10182 } else {
10183 key_reg = value_reg;
10184 }
10185
10186 // Check whether the value is in the table, jump to default block if not.
10187 __ Cmp(key_reg, num_entries - 1);
10188 __ B(hi, codegen_->GetLabelOf(default_block));
10189
10190 UseScratchRegisterScope temps(GetVIXLAssembler());
10191 vixl32::Register jump_offset = temps.Acquire();
10192
10193 // Load jump offset from the table.
10194 {
10195 const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t);
10196 ExactAssemblyScope aas(GetVIXLAssembler(),
10197 (vixl32::kMaxInstructionSizeInBytes * 4) + jump_size,
10198 CodeBufferCheckScope::kMaximumSize);
10199 __ adr(table_base, jump_table->GetTableStartLabel());
10200 __ ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
10201
10202 // Jump to target block by branching to table_base(pc related) + offset.
10203 vixl32::Register target_address = table_base;
10204 __ add(target_address, table_base, jump_offset);
10205 __ bx(target_address);
10206
10207 jump_table->EmitTable(codegen_);
10208 }
10209 }
10210 }
10211
10212 // Copy the result of a call into the given target.
MoveFromReturnRegister(Location trg,DataType::Type type)10213 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, DataType::Type type) {
10214 if (!trg.IsValid()) {
10215 DCHECK_EQ(type, DataType::Type::kVoid);
10216 return;
10217 }
10218
10219 DCHECK_NE(type, DataType::Type::kVoid);
10220
10221 Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type);
10222 if (return_loc.Equals(trg)) {
10223 return;
10224 }
10225
10226 // Let the parallel move resolver take care of all of this.
10227 HParallelMove parallel_move(GetGraph()->GetAllocator());
10228 parallel_move.AddMove(return_loc, trg, type, nullptr);
10229 GetMoveResolver()->EmitNativeCode(¶llel_move);
10230 }
10231
VisitClassTableGet(HClassTableGet * instruction)10232 void LocationsBuilderARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
10233 LocationSummary* locations =
10234 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
10235 locations->SetInAt(0, Location::RequiresRegister());
10236 locations->SetOut(Location::RequiresRegister());
10237 }
10238
VisitClassTableGet(HClassTableGet * instruction)10239 void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
10240 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
10241 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
10242 instruction->GetIndex(), kArmPointerSize).SizeValue();
10243 GetAssembler()->LoadFromOffset(kLoadWord,
10244 OutputRegister(instruction),
10245 InputRegisterAt(instruction, 0),
10246 method_offset);
10247 } else {
10248 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
10249 instruction->GetIndex(), kArmPointerSize));
10250 GetAssembler()->LoadFromOffset(kLoadWord,
10251 OutputRegister(instruction),
10252 InputRegisterAt(instruction, 0),
10253 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
10254 GetAssembler()->LoadFromOffset(kLoadWord,
10255 OutputRegister(instruction),
10256 OutputRegister(instruction),
10257 method_offset);
10258 }
10259 }
10260
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,VIXLUInt32Literal * literal,uint64_t index_in_table)10261 static void PatchJitRootUse(uint8_t* code,
10262 const uint8_t* roots_data,
10263 VIXLUInt32Literal* literal,
10264 uint64_t index_in_table) {
10265 DCHECK(literal->IsBound());
10266 uint32_t literal_offset = literal->GetLocation();
10267 uintptr_t address =
10268 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
10269 uint8_t* data = code + literal_offset;
10270 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
10271 }
10272
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)10273 void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
10274 for (const auto& entry : jit_string_patches_) {
10275 const StringReference& string_reference = entry.first;
10276 VIXLUInt32Literal* table_entry_literal = entry.second;
10277 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
10278 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10279 }
10280 for (const auto& entry : jit_class_patches_) {
10281 const TypeReference& type_reference = entry.first;
10282 VIXLUInt32Literal* table_entry_literal = entry.second;
10283 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
10284 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10285 }
10286 }
10287
EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo * labels,vixl32::Register out)10288 void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
10289 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
10290 vixl32::Register out) {
10291 ExactAssemblyScope aas(GetVIXLAssembler(),
10292 3 * vixl32::kMaxInstructionSizeInBytes,
10293 CodeBufferCheckScope::kMaximumSize);
10294 // TODO(VIXL): Think about using mov instead of movw.
10295 __ bind(&labels->movw_label);
10296 __ movw(out, /* operand= */ 0u);
10297 __ bind(&labels->movt_label);
10298 __ movt(out, /* operand= */ 0u);
10299 __ bind(&labels->add_pc_label);
10300 __ add(out, out, pc);
10301 }
10302
10303 #undef __
10304 #undef QUICK_ENTRY_POINT
10305 #undef TODO_VIXL32
10306
10307 #define __ assembler.GetVIXLAssembler()->
10308
EmitGrayCheckAndFastPath(ArmVIXLAssembler & assembler,vixl32::Register base_reg,vixl32::MemOperand & lock_word,vixl32::Label * slow_path,int32_t raw_ldr_offset,vixl32::Label * throw_npe=nullptr)10309 static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
10310 vixl32::Register base_reg,
10311 vixl32::MemOperand& lock_word,
10312 vixl32::Label* slow_path,
10313 int32_t raw_ldr_offset,
10314 vixl32::Label* throw_npe = nullptr) {
10315 // Load the lock word containing the rb_state.
10316 __ Ldr(ip, lock_word);
10317 // Given the numeric representation, it's enough to check the low bit of the rb_state.
10318 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
10319 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
10320 __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
10321 __ B(ne, slow_path, /* is_far_target= */ false);
10322 // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
10323 if (throw_npe != nullptr) {
10324 __ Bind(throw_npe);
10325 }
10326 __ Add(lr, lr, raw_ldr_offset);
10327 // Introduce a dependency on the lock_word including rb_state,
10328 // to prevent load-load reordering, and without using
10329 // a memory barrier (which would be more expensive).
10330 __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
10331 __ Bx(lr); // And return back to the function.
10332 // Note: The fake dependency is unnecessary for the slow path.
10333 }
10334
10335 // Load the read barrier introspection entrypoint in register `entrypoint`
LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler & assembler)10336 static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) {
10337 // The register where the read barrier introspection entrypoint is loaded
10338 // is the marking register. We clobber it here and the entrypoint restores it to 1.
10339 vixl32::Register entrypoint = mr;
10340 // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
10341 DCHECK_EQ(ip.GetCode(), 12u);
10342 const int32_t entry_point_offset =
10343 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
10344 __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
10345 return entrypoint;
10346 }
10347
CompileBakerReadBarrierThunk(ArmVIXLAssembler & assembler,uint32_t encoded_data,std::string * debug_name)10348 void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
10349 uint32_t encoded_data,
10350 /*out*/ std::string* debug_name) {
10351 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
10352 switch (kind) {
10353 case BakerReadBarrierKind::kField: {
10354 vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10355 CheckValidReg(base_reg.GetCode());
10356 vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
10357 CheckValidReg(holder_reg.GetCode());
10358 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10359 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10360 temps.Exclude(ip);
10361 // In the case of a field load, if `base_reg` differs from
10362 // `holder_reg`, the offset was too large and we must have emitted (during the construction
10363 // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
10364 // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
10365 // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
10366 // not necessarily do that check before going to the thunk.
10367 vixl32::Label throw_npe_label;
10368 vixl32::Label* throw_npe = nullptr;
10369 if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
10370 throw_npe = &throw_npe_label;
10371 __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target= */ false);
10372 }
10373 // Check if the holder is gray and, if not, add fake dependency to the base register
10374 // and return to the LDR instruction to load the reference. Otherwise, use introspection
10375 // to load the reference and call the entrypoint that performs further checks on the
10376 // reference and marks it if needed.
10377 vixl32::Label slow_path;
10378 MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
10379 const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
10380 ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
10381 : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
10382 EmitGrayCheckAndFastPath(
10383 assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
10384 __ Bind(&slow_path);
10385 const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10386 raw_ldr_offset;
10387 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10388 if (width == BakerReadBarrierWidth::kWide) {
10389 MemOperand ldr_half_address(lr, ldr_offset + 2);
10390 __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
10391 __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
10392 __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
10393 } else {
10394 MemOperand ldr_address(lr, ldr_offset);
10395 __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1.
10396 __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint
10397 ep_reg, // for narrow LDR.
10398 Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
10399 __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4.
10400 __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference.
10401 }
10402 // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
10403 __ Bx(ep_reg); // Jump to the entrypoint.
10404 break;
10405 }
10406 case BakerReadBarrierKind::kArray: {
10407 vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10408 CheckValidReg(base_reg.GetCode());
10409 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10410 BakerReadBarrierSecondRegField::Decode(encoded_data));
10411 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10412 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10413 temps.Exclude(ip);
10414 vixl32::Label slow_path;
10415 int32_t data_offset =
10416 mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
10417 MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
10418 DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
10419 const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
10420 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
10421 __ Bind(&slow_path);
10422 const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10423 raw_ldr_offset;
10424 MemOperand ldr_address(lr, ldr_offset + 2);
10425 __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
10426 // i.e. Rm+32 because the scale in imm2 is 2.
10427 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10428 __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create
10429 // a switch case target based on the index register.
10430 __ Mov(ip, base_reg); // Move the base register to ip0.
10431 __ Bx(ep_reg); // Jump to the entrypoint's array switch case.
10432 break;
10433 }
10434 case BakerReadBarrierKind::kGcRoot:
10435 case BakerReadBarrierKind::kIntrinsicCas: {
10436 // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
10437 // and it does not have a forwarding address), call the correct introspection entrypoint;
10438 // otherwise return the reference (or the extracted forwarding address).
10439 // There is no gray bit check for GC roots.
10440 vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10441 CheckValidReg(root_reg.GetCode());
10442 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10443 BakerReadBarrierSecondRegField::Decode(encoded_data));
10444 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10445 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10446 temps.Exclude(ip);
10447 vixl32::Label return_label, not_marked, forwarding_address;
10448 __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target= */ false);
10449 MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
10450 __ Ldr(ip, lock_word);
10451 __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
10452 __ B(eq, ¬_marked);
10453 __ Bind(&return_label);
10454 __ Bx(lr);
10455 __ Bind(¬_marked);
10456 static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
10457 "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
10458 " the highest bits and the 'forwarding address' state to have all bits set");
10459 __ Cmp(ip, Operand(0xc0000000));
10460 __ B(hs, &forwarding_address);
10461 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10462 // Adjust the art_quick_read_barrier_mark_introspection address
10463 // in kBakerCcEntrypointRegister to one of
10464 // art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},intrinsic_cas}.
10465 if (kind == BakerReadBarrierKind::kIntrinsicCas) {
10466 DCHECK(width == BakerReadBarrierWidth::kWide);
10467 DCHECK(!root_reg.IsLow());
10468 }
10469 int32_t entrypoint_offset =
10470 (kind == BakerReadBarrierKind::kGcRoot)
10471 ? (width == BakerReadBarrierWidth::kWide)
10472 ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
10473 : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
10474 : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_ENTRYPOINT_OFFSET;
10475 __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
10476 __ Mov(ip, root_reg);
10477 __ Bx(ep_reg);
10478 __ Bind(&forwarding_address);
10479 __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
10480 __ Bx(lr);
10481 break;
10482 }
10483 default:
10484 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
10485 UNREACHABLE();
10486 }
10487
10488 // For JIT, the slow path is considered part of the compiled method,
10489 // so JIT should pass null as `debug_name`.
10490 DCHECK_IMPLIES(GetCompilerOptions().IsJitCompiler(), debug_name == nullptr);
10491 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10492 std::ostringstream oss;
10493 oss << "BakerReadBarrierThunk";
10494 switch (kind) {
10495 case BakerReadBarrierKind::kField:
10496 oss << "Field";
10497 if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10498 oss << "Wide";
10499 }
10500 oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
10501 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
10502 break;
10503 case BakerReadBarrierKind::kArray:
10504 oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10505 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10506 BakerReadBarrierSecondRegField::Decode(encoded_data));
10507 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10508 break;
10509 case BakerReadBarrierKind::kGcRoot:
10510 oss << "GcRoot";
10511 if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10512 oss << "Wide";
10513 }
10514 oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10515 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10516 BakerReadBarrierSecondRegField::Decode(encoded_data));
10517 break;
10518 case BakerReadBarrierKind::kIntrinsicCas:
10519 oss << "IntrinsicCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10520 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10521 BakerReadBarrierSecondRegField::Decode(encoded_data));
10522 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10523 break;
10524 }
10525 *debug_name = oss.str();
10526 }
10527 }
10528
10529 #undef __
10530
10531 } // namespace arm
10532 } // namespace art
10533