xref: /aosp_15_r20/art/compiler/optimizing/intrinsics_arm64.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm64.h"
18 
19 #include "arch/arm64/callee_save_frame_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "art_method.h"
22 #include "base/bit_utils.h"
23 #include "code_generator_arm64.h"
24 #include "common_arm64.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsic_objects.h"
29 #include "intrinsics.h"
30 #include "intrinsics_utils.h"
31 #include "lock_word.h"
32 #include "mirror/array-inl.h"
33 #include "mirror/method_handle_impl.h"
34 #include "mirror/object_array-inl.h"
35 #include "mirror/reference.h"
36 #include "mirror/string-inl.h"
37 #include "mirror/var_handle.h"
38 #include "optimizing/data_type.h"
39 #include "scoped_thread_state_change-inl.h"
40 #include "thread-current-inl.h"
41 #include "utils/arm64/assembler_arm64.h"
42 #include "well_known_classes.h"
43 
44 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
45 
46 // TODO(VIXL): Make VIXL compile cleanly with -Wshadow, -Wdeprecated-declarations.
47 #pragma GCC diagnostic push
48 #pragma GCC diagnostic ignored "-Wshadow"
49 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
50 #include "aarch64/disasm-aarch64.h"
51 #include "aarch64/macro-assembler-aarch64.h"
52 #pragma GCC diagnostic pop
53 
54 namespace art HIDDEN {
55 
56 namespace arm64 {
57 
58 using helpers::CPURegisterFrom;
59 using helpers::DRegisterFrom;
60 using helpers::HeapOperand;
61 using helpers::LocationFrom;
62 using helpers::Int64FromLocation;
63 using helpers::InputCPURegisterAt;
64 using helpers::InputCPURegisterOrZeroRegAt;
65 using helpers::OperandFrom;
66 using helpers::RegisterFrom;
67 using helpers::SRegisterFrom;
68 using helpers::WRegisterFrom;
69 using helpers::XRegisterFrom;
70 using helpers::HRegisterFrom;
71 using helpers::InputRegisterAt;
72 using helpers::OutputRegister;
73 
74 namespace {
75 
AbsoluteHeapOperandFrom(Location location,size_t offset=0)76 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
77   return MemOperand(XRegisterFrom(location), offset);
78 }
79 
80 }  // namespace
81 
GetVIXLAssembler()82 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
83   return codegen_->GetVIXLAssembler();
84 }
85 
GetAllocator()86 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
87   return codegen_->GetGraph()->GetAllocator();
88 }
89 
90 using IntrinsicSlowPathARM64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM64,
91                                                  SlowPathCodeARM64,
92                                                  Arm64Assembler>;
93 
94 #define __ codegen->GetVIXLAssembler()->
95 
96 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
97 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
98  public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction * instruction,Location tmp)99   ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
100       : SlowPathCodeARM64(instruction), tmp_(tmp) {
101   }
102 
EmitNativeCode(CodeGenerator * codegen_in)103   void EmitNativeCode(CodeGenerator* codegen_in) override {
104     DCHECK(codegen_in->EmitBakerReadBarrier());
105     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
106     LocationSummary* locations = instruction_->GetLocations();
107     DCHECK(locations->CanCall());
108     DCHECK(instruction_->IsInvokeStaticOrDirect())
109         << "Unexpected instruction in read barrier arraycopy slow path: "
110         << instruction_->DebugName();
111     DCHECK(instruction_->GetLocations()->Intrinsified());
112     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
113 
114     const int32_t element_size = DataType::Size(DataType::Type::kReference);
115 
116     Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
117     Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
118     Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
119     Register tmp_reg = WRegisterFrom(tmp_);
120 
121     __ Bind(GetEntryLabel());
122     // The source range and destination pointer were initialized before entering the slow-path.
123     vixl::aarch64::Label slow_copy_loop;
124     __ Bind(&slow_copy_loop);
125     __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
126     codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
127     // TODO: Inline the mark bit check before calling the runtime?
128     // tmp_reg = ReadBarrier::Mark(tmp_reg);
129     // No need to save live registers; it's taken care of by the
130     // entrypoint. Also, there is no need to update the stack mask,
131     // as this runtime call will not trigger a garbage collection.
132     // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
133     // explanations.)
134     DCHECK_NE(tmp_.reg(), LR);
135     DCHECK_NE(tmp_.reg(), WSP);
136     DCHECK_NE(tmp_.reg(), WZR);
137     // IP0 is used internally by the ReadBarrierMarkRegX entry point
138     // as a temporary (and not preserved).  It thus cannot be used by
139     // any live register in this slow path.
140     DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
141     DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
142     DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
143     DCHECK_NE(tmp_.reg(), IP0);
144     DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
145     // TODO: Load the entrypoint once before the loop, instead of
146     // loading it at every iteration.
147     int32_t entry_point_offset =
148         Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
149     // This runtime call does not require a stack map.
150     codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
151     codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
152     __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
153     __ Cmp(src_curr_addr, src_stop_addr);
154     __ B(&slow_copy_loop, ne);
155     __ B(GetExitLabel());
156   }
157 
GetDescription() const158   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
159 
160  private:
161   Location tmp_;
162 
163   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
164 };
165 
166 // The MethodHandle.invokeExact intrinsic sets up arguments to match the target method call. If we
167 // need to go to the slow path, we call art_quick_invoke_polymorphic_with_hidden_receiver, which
168 // expects the MethodHandle object in w0 (in place of the actual ArtMethod).
169 class InvokePolymorphicSlowPathARM64 : public SlowPathCodeARM64 {
170  public:
InvokePolymorphicSlowPathARM64(HInstruction * instruction,Register method_handle)171   InvokePolymorphicSlowPathARM64(HInstruction* instruction, Register method_handle)
172       : SlowPathCodeARM64(instruction), method_handle_(method_handle) {
173     DCHECK(instruction->IsInvokePolymorphic());
174   }
175 
EmitNativeCode(CodeGenerator * codegen_in)176   void EmitNativeCode(CodeGenerator* codegen_in) override {
177     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
178     __ Bind(GetEntryLabel());
179 
180     SaveLiveRegisters(codegen, instruction_->GetLocations());
181     // Passing `MethodHandle` object as hidden argument.
182     __ Mov(w0, method_handle_.W());
183     codegen->InvokeRuntime(QuickEntrypointEnum::kQuickInvokePolymorphicWithHiddenReceiver,
184                            instruction_,
185                            instruction_->GetDexPc());
186 
187     RestoreLiveRegisters(codegen, instruction_->GetLocations());
188     __ B(GetExitLabel());
189   }
190 
GetDescription() const191   const char* GetDescription() const override { return "InvokePolymorphicSlowPathARM64"; }
192 
193  private:
194   const Register method_handle_;
195   DISALLOW_COPY_AND_ASSIGN(InvokePolymorphicSlowPathARM64);
196 };
197 
198 #undef __
199 
TryDispatch(HInvoke * invoke)200 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
201   Dispatch(invoke);
202   LocationSummary* res = invoke->GetLocations();
203   if (res == nullptr) {
204     return false;
205   }
206   return res->Intrinsified();
207 }
208 
209 #define __ masm->
210 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)211 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
212   LocationSummary* locations =
213       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
214   locations->SetInAt(0, Location::RequiresFpuRegister());
215   locations->SetOut(Location::RequiresRegister());
216 }
217 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)218 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
219   LocationSummary* locations =
220       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
221   locations->SetInAt(0, Location::RequiresRegister());
222   locations->SetOut(Location::RequiresFpuRegister());
223 }
224 
MoveFPToInt(LocationSummary * locations,bool is64bit,MacroAssembler * masm)225 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
226   Location input = locations->InAt(0);
227   Location output = locations->Out();
228   __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
229           is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
230 }
231 
MoveIntToFP(LocationSummary * locations,bool is64bit,MacroAssembler * masm)232 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
233   Location input = locations->InAt(0);
234   Location output = locations->Out();
235   __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
236           is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
237 }
238 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)239 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
240   CreateFPToIntLocations(allocator_, invoke);
241 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)242 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
243   CreateIntToFPLocations(allocator_, invoke);
244 }
245 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)246 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
247   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
248 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)249 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
250   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
251 }
252 
VisitFloatFloatToRawIntBits(HInvoke * invoke)253 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
254   CreateFPToIntLocations(allocator_, invoke);
255 }
VisitFloatIntBitsToFloat(HInvoke * invoke)256 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
257   CreateIntToFPLocations(allocator_, invoke);
258 }
259 
VisitFloatFloatToRawIntBits(HInvoke * invoke)260 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
261   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
262 }
VisitFloatIntBitsToFloat(HInvoke * invoke)263 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
264   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
265 }
266 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)267 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
268   LocationSummary* locations =
269       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
270   locations->SetInAt(0, Location::RequiresRegister());
271   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
272 }
273 
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)274 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
275   LocationSummary* locations =
276       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
277   locations->SetInAt(0, Location::RequiresRegister());
278   locations->SetInAt(1, Location::RequiresRegister());
279   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
280 }
281 
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)282 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
283   LocationSummary* locations =
284       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
285   locations->SetInAt(0, Location::RequiresRegister());
286   locations->SetInAt(1, Location::RequiresRegister());
287   // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
288   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
289 }
290 
GenerateReverseBytes(MacroAssembler * masm,DataType::Type type,CPURegister in,CPURegister out)291 static void GenerateReverseBytes(MacroAssembler* masm,
292                                  DataType::Type type,
293                                  CPURegister in,
294                                  CPURegister out) {
295   switch (type) {
296     case DataType::Type::kUint16:
297       __ Rev16(out.W(), in.W());
298       break;
299     case DataType::Type::kInt16:
300       __ Rev16(out.W(), in.W());
301       __ Sxth(out.W(), out.W());
302       break;
303     case DataType::Type::kInt32:
304       __ Rev(out.W(), in.W());
305       break;
306     case DataType::Type::kInt64:
307       __ Rev(out.X(), in.X());
308       break;
309     case DataType::Type::kFloat32:
310       __ Rev(in.W(), in.W());  // Note: Clobbers `in`.
311       __ Fmov(out.S(), in.W());
312       break;
313     case DataType::Type::kFloat64:
314       __ Rev(in.X(), in.X());  // Note: Clobbers `in`.
315       __ Fmov(out.D(), in.X());
316       break;
317     default:
318       LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
319       UNREACHABLE();
320   }
321 }
322 
GenReverseBytes(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)323 static void GenReverseBytes(LocationSummary* locations,
324                             DataType::Type type,
325                             MacroAssembler* masm) {
326   Location in = locations->InAt(0);
327   Location out = locations->Out();
328   GenerateReverseBytes(masm, type, CPURegisterFrom(in, type), CPURegisterFrom(out, type));
329 }
330 
VisitIntegerReverseBytes(HInvoke * invoke)331 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
332   CreateIntToIntLocations(allocator_, invoke);
333 }
334 
VisitIntegerReverseBytes(HInvoke * invoke)335 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
336   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
337 }
338 
VisitLongReverseBytes(HInvoke * invoke)339 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
340   CreateIntToIntLocations(allocator_, invoke);
341 }
342 
VisitLongReverseBytes(HInvoke * invoke)343 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
344   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
345 }
346 
VisitShortReverseBytes(HInvoke * invoke)347 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
348   CreateIntToIntLocations(allocator_, invoke);
349 }
350 
VisitShortReverseBytes(HInvoke * invoke)351 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
352   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
353 }
354 
GenNumberOfLeadingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)355 static void GenNumberOfLeadingZeros(LocationSummary* locations,
356                                     DataType::Type type,
357                                     MacroAssembler* masm) {
358   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
359 
360   Location in = locations->InAt(0);
361   Location out = locations->Out();
362 
363   __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
364 }
365 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)366 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
367   CreateIntToIntLocations(allocator_, invoke);
368 }
369 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)370 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
371   GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
372 }
373 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)374 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
375   CreateIntToIntLocations(allocator_, invoke);
376 }
377 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)378 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
379   GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
380 }
381 
GenNumberOfTrailingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)382 static void GenNumberOfTrailingZeros(LocationSummary* locations,
383                                      DataType::Type type,
384                                      MacroAssembler* masm) {
385   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
386 
387   Location in = locations->InAt(0);
388   Location out = locations->Out();
389 
390   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
391   __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
392 }
393 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)394 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
395   CreateIntToIntLocations(allocator_, invoke);
396 }
397 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)398 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
399   GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
400 }
401 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)402 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
403   CreateIntToIntLocations(allocator_, invoke);
404 }
405 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)406 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
407   GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
408 }
409 
GenReverse(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)410 static void GenReverse(LocationSummary* locations,
411                        DataType::Type type,
412                        MacroAssembler* masm) {
413   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
414 
415   Location in = locations->InAt(0);
416   Location out = locations->Out();
417 
418   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
419 }
420 
VisitIntegerReverse(HInvoke * invoke)421 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
422   CreateIntToIntLocations(allocator_, invoke);
423 }
424 
VisitIntegerReverse(HInvoke * invoke)425 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
426   GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
427 }
428 
VisitLongReverse(HInvoke * invoke)429 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
430   CreateIntToIntLocations(allocator_, invoke);
431 }
432 
VisitLongReverse(HInvoke * invoke)433 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
434   GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
435 }
436 
GenBitCount(HInvoke * instr,DataType::Type type,MacroAssembler * masm)437 static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) {
438   DCHECK(DataType::IsIntOrLongType(type)) << type;
439   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
440   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
441 
442   UseScratchRegisterScope temps(masm);
443 
444   Register src = InputRegisterAt(instr, 0);
445   Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
446   VRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS();
447 
448   __ Fmov(fpr, src);
449   __ Cnt(fpr.V8B(), fpr.V8B());
450   __ Addv(fpr.B(), fpr.V8B());
451   __ Fmov(dst, fpr);
452 }
453 
VisitLongBitCount(HInvoke * invoke)454 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
455   CreateIntToIntLocations(allocator_, invoke);
456 }
457 
VisitLongBitCount(HInvoke * invoke)458 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
459   GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler());
460 }
461 
VisitIntegerBitCount(HInvoke * invoke)462 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
463   CreateIntToIntLocations(allocator_, invoke);
464 }
465 
VisitIntegerBitCount(HInvoke * invoke)466 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
467   GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler());
468 }
469 
GenHighestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)470 static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
471   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
472 
473   UseScratchRegisterScope temps(masm);
474 
475   Register src = InputRegisterAt(invoke, 0);
476   Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
477   Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
478   size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u;
479   size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u;
480 
481   __ Clz(temp, src);
482   __ Mov(dst, UINT64_C(1) << high_bit);  // MOV (bitmask immediate)
483   __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit));  // Clear dst if src was 0.
484   __ Lsr(dst, dst, temp);
485 }
486 
VisitIntegerHighestOneBit(HInvoke * invoke)487 void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
488   CreateIntToIntLocations(allocator_, invoke);
489 }
490 
VisitIntegerHighestOneBit(HInvoke * invoke)491 void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
492   GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
493 }
494 
VisitLongHighestOneBit(HInvoke * invoke)495 void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
496   CreateIntToIntLocations(allocator_, invoke);
497 }
498 
VisitLongHighestOneBit(HInvoke * invoke)499 void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
500   GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
501 }
502 
GenLowestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)503 static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
504   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
505 
506   UseScratchRegisterScope temps(masm);
507 
508   Register src = InputRegisterAt(invoke, 0);
509   Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
510   Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
511 
512   __ Neg(temp, src);
513   __ And(dst, temp, src);
514 }
515 
VisitIntegerLowestOneBit(HInvoke * invoke)516 void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
517   CreateIntToIntLocations(allocator_, invoke);
518 }
519 
VisitIntegerLowestOneBit(HInvoke * invoke)520 void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
521   GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
522 }
523 
VisitLongLowestOneBit(HInvoke * invoke)524 void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
525   CreateIntToIntLocations(allocator_, invoke);
526 }
527 
VisitLongLowestOneBit(HInvoke * invoke)528 void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
529   GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
530 }
531 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)532 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
533   LocationSummary* locations =
534       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
535   locations->SetInAt(0, Location::RequiresFpuRegister());
536   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
537 }
538 
VisitMathSqrt(HInvoke * invoke)539 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
540   CreateFPToFPLocations(allocator_, invoke);
541 }
542 
VisitMathSqrt(HInvoke * invoke)543 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
544   LocationSummary* locations = invoke->GetLocations();
545   MacroAssembler* masm = GetVIXLAssembler();
546   __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
547 }
548 
VisitMathCeil(HInvoke * invoke)549 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
550   CreateFPToFPLocations(allocator_, invoke);
551 }
552 
VisitMathCeil(HInvoke * invoke)553 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
554   LocationSummary* locations = invoke->GetLocations();
555   MacroAssembler* masm = GetVIXLAssembler();
556   __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
557 }
558 
VisitMathFloor(HInvoke * invoke)559 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
560   CreateFPToFPLocations(allocator_, invoke);
561 }
562 
VisitMathFloor(HInvoke * invoke)563 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
564   LocationSummary* locations = invoke->GetLocations();
565   MacroAssembler* masm = GetVIXLAssembler();
566   __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
567 }
568 
VisitMathRint(HInvoke * invoke)569 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
570   CreateFPToFPLocations(allocator_, invoke);
571 }
572 
VisitMathRint(HInvoke * invoke)573 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
574   LocationSummary* locations = invoke->GetLocations();
575   MacroAssembler* masm = GetVIXLAssembler();
576   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
577 }
578 
CreateFPToIntPlusFPTempLocations(ArenaAllocator * allocator,HInvoke * invoke)579 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) {
580   LocationSummary* locations =
581       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
582   locations->SetInAt(0, Location::RequiresFpuRegister());
583   locations->SetOut(Location::RequiresRegister());
584   locations->AddTemp(Location::RequiresFpuRegister());
585 }
586 
GenMathRound(HInvoke * invoke,bool is_double,vixl::aarch64::MacroAssembler * masm)587 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
588   // Java 8 API definition for Math.round():
589   // Return the closest long or int to the argument, with ties rounding to positive infinity.
590   //
591   // There is no single instruction in ARMv8 that can support the above definition.
592   // We choose to use FCVTAS here, because it has closest semantic.
593   // FCVTAS performs rounding to nearest integer, ties away from zero.
594   // For most inputs (positive values, zero or NaN), this instruction is enough.
595   // We only need a few handling code after FCVTAS if the input is negative half value.
596   //
597   // The reason why we didn't choose FCVTPS instruction here is that
598   // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
599   // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
600   // If we were using this instruction, for most inputs, more handling code would be needed.
601   LocationSummary* l = invoke->GetLocations();
602   VRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
603   VRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
604   Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
605   vixl::aarch64::Label done;
606 
607   // Round to nearest integer, ties away from zero.
608   __ Fcvtas(out_reg, in_reg);
609 
610   // For positive values, zero or NaN inputs, rounding is done.
611   __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
612 
613   // Handle input < 0 cases.
614   // If input is negative but not a tie, previous result (round to nearest) is valid.
615   // If input is a negative tie, out_reg += 1.
616   __ Frinta(tmp_fp, in_reg);
617   __ Fsub(tmp_fp, in_reg, tmp_fp);
618   __ Fcmp(tmp_fp, 0.5);
619   __ Cinc(out_reg, out_reg, eq);
620 
621   __ Bind(&done);
622 }
623 
VisitMathRoundDouble(HInvoke * invoke)624 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
625   CreateFPToIntPlusFPTempLocations(allocator_, invoke);
626 }
627 
VisitMathRoundDouble(HInvoke * invoke)628 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
629   GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler());
630 }
631 
VisitMathRoundFloat(HInvoke * invoke)632 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
633   CreateFPToIntPlusFPTempLocations(allocator_, invoke);
634 }
635 
VisitMathRoundFloat(HInvoke * invoke)636 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
637   GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler());
638 }
639 
VisitMemoryPeekByte(HInvoke * invoke)640 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
641   CreateIntToIntLocations(allocator_, invoke);
642 }
643 
VisitMemoryPeekByte(HInvoke * invoke)644 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
645   MacroAssembler* masm = GetVIXLAssembler();
646   __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
647           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
648 }
649 
VisitMemoryPeekIntNative(HInvoke * invoke)650 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
651   CreateIntToIntLocations(allocator_, invoke);
652 }
653 
VisitMemoryPeekIntNative(HInvoke * invoke)654 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
655   MacroAssembler* masm = GetVIXLAssembler();
656   __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
657          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
658 }
659 
VisitMemoryPeekLongNative(HInvoke * invoke)660 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
661   CreateIntToIntLocations(allocator_, invoke);
662 }
663 
VisitMemoryPeekLongNative(HInvoke * invoke)664 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
665   MacroAssembler* masm = GetVIXLAssembler();
666   __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
667          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
668 }
669 
VisitMemoryPeekShortNative(HInvoke * invoke)670 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
671   CreateIntToIntLocations(allocator_, invoke);
672 }
673 
VisitMemoryPeekShortNative(HInvoke * invoke)674 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
675   MacroAssembler* masm = GetVIXLAssembler();
676   __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
677            AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
678 }
679 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)680 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
681   LocationSummary* locations =
682       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
683   locations->SetInAt(0, Location::RequiresRegister());
684   locations->SetInAt(1, Location::RequiresRegister());
685 }
686 
VisitMemoryPokeByte(HInvoke * invoke)687 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
688   CreateIntIntToVoidLocations(allocator_, invoke);
689 }
690 
VisitMemoryPokeByte(HInvoke * invoke)691 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
692   MacroAssembler* masm = GetVIXLAssembler();
693   __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
694           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
695 }
696 
VisitMemoryPokeIntNative(HInvoke * invoke)697 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
698   CreateIntIntToVoidLocations(allocator_, invoke);
699 }
700 
VisitMemoryPokeIntNative(HInvoke * invoke)701 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
702   MacroAssembler* masm = GetVIXLAssembler();
703   __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
704          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
705 }
706 
VisitMemoryPokeLongNative(HInvoke * invoke)707 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
708   CreateIntIntToVoidLocations(allocator_, invoke);
709 }
710 
VisitMemoryPokeLongNative(HInvoke * invoke)711 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
712   MacroAssembler* masm = GetVIXLAssembler();
713   __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
714          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
715 }
716 
VisitMemoryPokeShortNative(HInvoke * invoke)717 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
718   CreateIntIntToVoidLocations(allocator_, invoke);
719 }
720 
VisitMemoryPokeShortNative(HInvoke * invoke)721 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
722   MacroAssembler* masm = GetVIXLAssembler();
723   __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
724           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
725 }
726 
VisitThreadCurrentThread(HInvoke * invoke)727 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
728   LocationSummary* locations =
729       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
730   locations->SetOut(Location::RequiresRegister());
731 }
732 
VisitThreadCurrentThread(HInvoke * invoke)733 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
734   codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()),
735                  MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
736 }
737 
ReadBarrierNeedsTemp(bool is_volatile,HInvoke * invoke)738 static bool ReadBarrierNeedsTemp(bool is_volatile, HInvoke* invoke) {
739   return is_volatile ||
740       !invoke->InputAt(2)->IsLongConstant() ||
741       invoke->InputAt(2)->AsLongConstant()->GetValue() >= kReferenceLoadMinFarOffset;
742 }
743 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARM64 * codegen)744 static void GenUnsafeGet(HInvoke* invoke,
745                          DataType::Type type,
746                          bool is_volatile,
747                          CodeGeneratorARM64* codegen) {
748   LocationSummary* locations = invoke->GetLocations();
749   DCHECK((type == DataType::Type::kInt8) ||
750          (type == DataType::Type::kInt32) ||
751          (type == DataType::Type::kInt64) ||
752          (type == DataType::Type::kReference));
753   Location base_loc = locations->InAt(1);
754   Register base = WRegisterFrom(base_loc);      // Object pointer.
755   Location offset_loc = locations->InAt(2);
756   Location trg_loc = locations->Out();
757   Register trg = RegisterFrom(trg_loc, type);
758 
759   if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
760     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
761     Register temp = WRegisterFrom(locations->GetTemp(0));
762     MacroAssembler* masm = codegen->GetVIXLAssembler();
763     // Piggy-back on the field load path using introspection for the Baker read barrier.
764     if (offset_loc.IsConstant()) {
765       uint32_t offset = Int64FromLocation(offset_loc);
766       Location maybe_temp = ReadBarrierNeedsTemp(is_volatile, invoke)
767           ? locations->GetTemp(0) : Location::NoLocation();
768       DCHECK_EQ(locations->GetTempCount(), ReadBarrierNeedsTemp(is_volatile, invoke));
769       codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
770                                                      trg_loc,
771                                                      base.W(),
772                                                      offset,
773                                                      maybe_temp,
774                                                      /* needs_null_check= */ false,
775                                                      is_volatile);
776     } else {
777       __ Add(temp, base, WRegisterFrom(offset_loc));  // Offset should not exceed 32 bits.
778       codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
779                                                      trg_loc,
780                                                      base,
781                                                      MemOperand(temp.X()),
782                                                      /* needs_null_check= */ false,
783                                                      is_volatile);
784     }
785   } else {
786     // Other cases.
787     MemOperand mem_op;
788     if (offset_loc.IsConstant()) {
789       mem_op = MemOperand(base.X(), Int64FromLocation(offset_loc));
790     } else {
791       mem_op = MemOperand(base.X(), XRegisterFrom(offset_loc));
792     }
793     if (is_volatile) {
794       codegen->LoadAcquire(invoke, type, trg, mem_op, /* needs_null_check= */ true);
795     } else {
796       codegen->Load(type, trg, mem_op);
797     }
798 
799     if (type == DataType::Type::kReference) {
800       DCHECK(trg.IsW());
801       codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
802     }
803   }
804 }
805 
GenUnsafeGetAbsolute(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARM64 * codegen)806 static void GenUnsafeGetAbsolute(HInvoke* invoke,
807                                  DataType::Type type,
808                                  bool is_volatile,
809                                  CodeGeneratorARM64* codegen) {
810   LocationSummary* locations = invoke->GetLocations();
811   DCHECK((type == DataType::Type::kInt8) ||
812          (type == DataType::Type::kInt32) ||
813          (type == DataType::Type::kInt64));
814   Location address_loc = locations->InAt(1);
815   MemOperand mem_op = MemOperand(XRegisterFrom(address_loc));
816   Location trg_loc = locations->Out();
817   Register trg = RegisterFrom(trg_loc, type);
818 
819   if (is_volatile) {
820     codegen->LoadAcquire(invoke, type, trg, mem_op, /* needs_null_check= */ true);
821   } else {
822     codegen->Load(type, trg, mem_op);
823   }
824 }
825 
CreateUnsafeGetLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorARM64 * codegen,bool is_volatile=false)826 static void CreateUnsafeGetLocations(ArenaAllocator* allocator,
827                                      HInvoke* invoke,
828                                      CodeGeneratorARM64* codegen,
829                                      bool is_volatile = false) {
830   bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
831   LocationSummary* locations =
832       new (allocator) LocationSummary(invoke,
833                                       can_call
834                                           ? LocationSummary::kCallOnSlowPath
835                                           : LocationSummary::kNoCall,
836                                       kIntrinsified);
837   if (can_call && kUseBakerReadBarrier) {
838     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
839     if (ReadBarrierNeedsTemp(is_volatile, invoke)) {
840       // We need a temporary register for the read barrier load in order to use
841       // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
842       locations->AddTemp(FixedTempLocation());
843     }
844   }
845   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
846   locations->SetInAt(1, Location::RequiresRegister());
847   locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
848   locations->SetOut(Location::RequiresRegister(),
849                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
850 }
851 
CreateUnsafeGetAbsoluteLocations(ArenaAllocator * allocator,HInvoke * invoke)852 static void CreateUnsafeGetAbsoluteLocations(ArenaAllocator* allocator,
853                                              HInvoke* invoke) {
854   LocationSummary* locations =
855       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
856   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
857   locations->SetInAt(1, Location::RequiresRegister());
858   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
859 }
860 
VisitUnsafeGet(HInvoke * invoke)861 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
862   VisitJdkUnsafeGet(invoke);
863 }
VisitUnsafeGetAbsolute(HInvoke * invoke)864 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
865   VisitJdkUnsafeGetAbsolute(invoke);
866 }
VisitUnsafeGetVolatile(HInvoke * invoke)867 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
868   VisitJdkUnsafeGetVolatile(invoke);
869 }
VisitUnsafeGetLong(HInvoke * invoke)870 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
871   VisitJdkUnsafeGetLong(invoke);
872 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)873 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
874   VisitJdkUnsafeGetLongVolatile(invoke);
875 }
VisitUnsafeGetObject(HInvoke * invoke)876 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
877   VisitJdkUnsafeGetReference(invoke);
878 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)879 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
880   VisitJdkUnsafeGetReferenceVolatile(invoke);
881 }
VisitUnsafeGetByte(HInvoke * invoke)882 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetByte(HInvoke* invoke) {
883   VisitJdkUnsafeGetByte(invoke);
884 }
VisitJdkUnsafeGet(HInvoke * invoke)885 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGet(HInvoke* invoke) {
886   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
887 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)888 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
889   CreateUnsafeGetAbsoluteLocations(allocator_, invoke);
890 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)891 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
892   CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true);
893 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)894 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
895   CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true);
896 }
VisitJdkUnsafeGetLong(HInvoke * invoke)897 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
898   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
899 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)900 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
901   CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true);
902 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)903 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
904   CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true);
905 }
VisitJdkUnsafeGetReference(HInvoke * invoke)906 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
907   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
908 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)909 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
910   CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true);
911 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)912 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
913   CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true);
914 }
VisitJdkUnsafeGetByte(HInvoke * invoke)915 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
916   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
917 }
918 
VisitUnsafeGet(HInvoke * invoke)919 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
920   VisitJdkUnsafeGet(invoke);
921 }
VisitUnsafeGetAbsolute(HInvoke * invoke)922 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
923   VisitJdkUnsafeGetAbsolute(invoke);
924 }
VisitUnsafeGetVolatile(HInvoke * invoke)925 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
926   VisitJdkUnsafeGetVolatile(invoke);
927 }
VisitUnsafeGetLong(HInvoke * invoke)928 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
929   VisitJdkUnsafeGetLong(invoke);
930 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)931 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
932   VisitJdkUnsafeGetLongVolatile(invoke);
933 }
VisitUnsafeGetObject(HInvoke * invoke)934 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
935   VisitJdkUnsafeGetReference(invoke);
936 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)937 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
938   VisitJdkUnsafeGetReferenceVolatile(invoke);
939 }
VisitUnsafeGetByte(HInvoke * invoke)940 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetByte(HInvoke* invoke) {
941   VisitJdkUnsafeGetByte(invoke);
942 }
943 
VisitJdkUnsafeGet(HInvoke * invoke)944 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGet(HInvoke* invoke) {
945   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
946 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)947 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
948   GenUnsafeGetAbsolute(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
949 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)950 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
951   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
952 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)953 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
954   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
955 }
VisitJdkUnsafeGetLong(HInvoke * invoke)956 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
957   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
958 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)959 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
960   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
961 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)962 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
963   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
964 }
VisitJdkUnsafeGetReference(HInvoke * invoke)965 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
966   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
967 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)968 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
969   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
970 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)971 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
972   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
973 }
VisitJdkUnsafeGetByte(HInvoke * invoke)974 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
975   GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
976 }
977 
CreateUnsafePutLocations(ArenaAllocator * allocator,HInvoke * invoke)978 static void CreateUnsafePutLocations(ArenaAllocator* allocator, HInvoke* invoke) {
979   LocationSummary* locations =
980       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
981   static constexpr int kOffsetIndex = 2;
982   static constexpr int kValueIndex = 3;
983   // Unused receiver.
984   locations->SetInAt(0, Location::NoLocation());
985   // The object.
986   locations->SetInAt(1, Location::RequiresRegister());
987   // The offset.
988   locations->SetInAt(
989       kOffsetIndex, Location::RegisterOrConstant(invoke->InputAt(kOffsetIndex)));
990   // The value.
991   if (IsZeroBitPattern(invoke->InputAt(kValueIndex))) {
992     locations->SetInAt(kValueIndex, Location::ConstantLocation(invoke->InputAt(kValueIndex)));
993   } else {
994     locations->SetInAt(kValueIndex, Location::RequiresRegister());
995   }
996 }
997 
CreateUnsafePutAbsoluteLocations(ArenaAllocator * allocator,HInvoke * invoke)998 static void CreateUnsafePutAbsoluteLocations(ArenaAllocator* allocator, HInvoke* invoke) {
999   LocationSummary* locations =
1000       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1001   static constexpr int kAddressIndex = 1;
1002   static constexpr int kValueIndex = 2;
1003   // Unused receiver.
1004   locations->SetInAt(0, Location::NoLocation());
1005   // The address.
1006   locations->SetInAt(kAddressIndex, Location::RequiresRegister());
1007   // The value.
1008   if (IsZeroBitPattern(invoke->InputAt(kValueIndex))) {
1009     locations->SetInAt(kValueIndex, Location::ConstantLocation(invoke->InputAt(kValueIndex)));
1010   } else {
1011     locations->SetInAt(kValueIndex, Location::RequiresRegister());
1012   }
1013 }
1014 
VisitUnsafePut(HInvoke * invoke)1015 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
1016   VisitJdkUnsafePut(invoke);
1017 }
VisitUnsafePutAbsolute(HInvoke * invoke)1018 void IntrinsicLocationsBuilderARM64::VisitUnsafePutAbsolute(HInvoke* invoke) {
1019   VisitJdkUnsafePutAbsolute(invoke);
1020 }
VisitUnsafePutOrdered(HInvoke * invoke)1021 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
1022   VisitJdkUnsafePutOrdered(invoke);
1023 }
VisitUnsafePutVolatile(HInvoke * invoke)1024 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
1025   VisitJdkUnsafePutVolatile(invoke);
1026 }
VisitUnsafePutObject(HInvoke * invoke)1027 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
1028   VisitJdkUnsafePutReference(invoke);
1029 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1030 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1031   VisitJdkUnsafePutObjectOrdered(invoke);
1032 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1033 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1034   VisitJdkUnsafePutReferenceVolatile(invoke);
1035 }
VisitUnsafePutLong(HInvoke * invoke)1036 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
1037   VisitJdkUnsafePutLong(invoke);
1038 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1039 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1040   VisitJdkUnsafePutLongOrdered(invoke);
1041 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1042 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1043   VisitJdkUnsafePutLongVolatile(invoke);
1044 }
VisitUnsafePutByte(HInvoke * invoke)1045 void IntrinsicLocationsBuilderARM64::VisitUnsafePutByte(HInvoke* invoke) {
1046   VisitJdkUnsafePutByte(invoke);
1047 }
1048 
VisitJdkUnsafePut(HInvoke * invoke)1049 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePut(HInvoke* invoke) {
1050   CreateUnsafePutLocations(allocator_, invoke);
1051 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)1052 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
1053   CreateUnsafePutAbsoluteLocations(allocator_, invoke);
1054 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)1055 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
1056   CreateUnsafePutLocations(allocator_, invoke);
1057 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)1058 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
1059   CreateUnsafePutLocations(allocator_, invoke);
1060 }
VisitJdkUnsafePutRelease(HInvoke * invoke)1061 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
1062   CreateUnsafePutLocations(allocator_, invoke);
1063 }
VisitJdkUnsafePutReference(HInvoke * invoke)1064 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReference(HInvoke* invoke) {
1065   CreateUnsafePutLocations(allocator_, invoke);
1066 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)1067 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
1068   CreateUnsafePutLocations(allocator_, invoke);
1069 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)1070 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
1071   CreateUnsafePutLocations(allocator_, invoke);
1072 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)1073 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
1074   CreateUnsafePutLocations(allocator_, invoke);
1075 }
VisitJdkUnsafePutLong(HInvoke * invoke)1076 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLong(HInvoke* invoke) {
1077   CreateUnsafePutLocations(allocator_, invoke);
1078 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)1079 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
1080   CreateUnsafePutLocations(allocator_, invoke);
1081 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)1082 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
1083   CreateUnsafePutLocations(allocator_, invoke);
1084 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)1085 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
1086   CreateUnsafePutLocations(allocator_, invoke);
1087 }
VisitJdkUnsafePutByte(HInvoke * invoke)1088 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutByte(HInvoke* invoke) {
1089   CreateUnsafePutLocations(allocator_, invoke);
1090 }
1091 
GenUnsafePut(HInvoke * invoke,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM64 * codegen)1092 static void GenUnsafePut(HInvoke* invoke,
1093                          DataType::Type type,
1094                          bool is_volatile,
1095                          bool is_ordered,
1096                          CodeGeneratorARM64* codegen) {
1097   LocationSummary* locations = invoke->GetLocations();
1098   MacroAssembler* masm = codegen->GetVIXLAssembler();
1099 
1100   static constexpr int kOffsetIndex = 2;
1101   static constexpr int kValueIndex = 3;
1102   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
1103   Location offset = locations->InAt(kOffsetIndex);      // Long offset.
1104   CPURegister value = InputCPURegisterOrZeroRegAt(invoke, kValueIndex);
1105   CPURegister source = value;
1106   MemOperand mem_op;
1107   if (offset.IsConstant()) {
1108     mem_op = MemOperand(base.X(), Int64FromLocation(offset));
1109   } else {
1110     mem_op = MemOperand(base.X(), XRegisterFrom(offset));
1111   }
1112 
1113   {
1114     // We use a block to end the scratch scope before the write barrier, thus
1115     // freeing the temporary registers so they can be used in `MarkGCCard`.
1116     UseScratchRegisterScope temps(masm);
1117 
1118     if (kPoisonHeapReferences &&
1119         type == DataType::Type::kReference &&
1120         !IsZeroBitPattern(invoke->InputAt(kValueIndex))) {
1121       DCHECK(value.IsW());
1122       Register temp = temps.AcquireW();
1123       __ Mov(temp.W(), value.W());
1124       codegen->GetAssembler()->PoisonHeapReference(temp.W());
1125       source = temp;
1126     }
1127 
1128     if (is_volatile || is_ordered) {
1129       codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false);
1130     } else {
1131       codegen->Store(type, source, mem_op);
1132     }
1133   }
1134 
1135   if (type == DataType::Type::kReference && !IsZeroBitPattern(invoke->InputAt(kValueIndex))) {
1136     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1137     codegen->MaybeMarkGCCard(base, Register(source), value_can_be_null);
1138   }
1139 }
1140 
GenUnsafePutAbsolute(HInvoke * invoke,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM64 * codegen)1141 static void GenUnsafePutAbsolute(HInvoke* invoke,
1142                                  DataType::Type type,
1143                                  bool is_volatile,
1144                                  bool is_ordered,
1145                                  CodeGeneratorARM64* codegen) {
1146   LocationSummary* locations = invoke->GetLocations();
1147 
1148   static constexpr int kAddressIndex = 1;
1149   static constexpr int kValueIndex = 2;
1150   Location address_loc = locations->InAt(kAddressIndex);
1151   MemOperand mem_op = MemOperand(WRegisterFrom(address_loc).X());
1152   CPURegister value = InputCPURegisterOrZeroRegAt(invoke, kValueIndex);
1153 
1154   if (is_volatile || is_ordered) {
1155     codegen->StoreRelease(invoke, type, value, mem_op, /* needs_null_check= */ false);
1156   } else {
1157     codegen->Store(type, value, mem_op);
1158   }
1159 }
1160 
VisitUnsafePut(HInvoke * invoke)1161 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
1162   VisitJdkUnsafePut(invoke);
1163 }
VisitUnsafePutAbsolute(HInvoke * invoke)1164 void IntrinsicCodeGeneratorARM64::VisitUnsafePutAbsolute(HInvoke* invoke) {
1165   VisitJdkUnsafePutAbsolute(invoke);
1166 }
VisitUnsafePutOrdered(HInvoke * invoke)1167 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
1168   VisitJdkUnsafePutOrdered(invoke);
1169 }
VisitUnsafePutVolatile(HInvoke * invoke)1170 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
1171   VisitJdkUnsafePutVolatile(invoke);
1172 }
VisitUnsafePutObject(HInvoke * invoke)1173 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
1174   VisitJdkUnsafePutReference(invoke);
1175 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1176 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1177   VisitJdkUnsafePutObjectOrdered(invoke);
1178 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1179 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1180   VisitJdkUnsafePutReferenceVolatile(invoke);
1181 }
VisitUnsafePutLong(HInvoke * invoke)1182 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
1183   VisitJdkUnsafePutLong(invoke);
1184 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1185 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1186   VisitJdkUnsafePutLongOrdered(invoke);
1187 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1188 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1189   VisitJdkUnsafePutLongVolatile(invoke);
1190 }
VisitUnsafePutByte(HInvoke * invoke)1191 void IntrinsicCodeGeneratorARM64::VisitUnsafePutByte(HInvoke* invoke) {
1192   VisitJdkUnsafePutByte(invoke);
1193 }
1194 
VisitJdkUnsafePut(HInvoke * invoke)1195 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePut(HInvoke* invoke) {
1196   GenUnsafePut(invoke,
1197                DataType::Type::kInt32,
1198                /*is_volatile=*/ false,
1199                /*is_ordered=*/ false,
1200                codegen_);
1201 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)1202 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
1203   GenUnsafePutAbsolute(invoke,
1204                        DataType::Type::kInt32,
1205                        /*is_volatile=*/ false,
1206                        /*is_ordered=*/ false,
1207                        codegen_);
1208 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)1209 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
1210   GenUnsafePut(invoke,
1211                DataType::Type::kInt32,
1212                /*is_volatile=*/ false,
1213                /*is_ordered=*/ true,
1214                codegen_);
1215 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)1216 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
1217   GenUnsafePut(invoke,
1218                DataType::Type::kInt32,
1219                /*is_volatile=*/ true,
1220                /*is_ordered=*/ false,
1221                codegen_);
1222 }
VisitJdkUnsafePutRelease(HInvoke * invoke)1223 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
1224   GenUnsafePut(invoke,
1225                DataType::Type::kInt32,
1226                /*is_volatile=*/ true,
1227                /*is_ordered=*/ false,
1228                codegen_);
1229 }
VisitJdkUnsafePutReference(HInvoke * invoke)1230 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReference(HInvoke* invoke) {
1231   GenUnsafePut(invoke,
1232                DataType::Type::kReference,
1233                /*is_volatile=*/ false,
1234                /*is_ordered=*/ false,
1235                codegen_);
1236 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)1237 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
1238   GenUnsafePut(invoke,
1239                DataType::Type::kReference,
1240                /*is_volatile=*/ false,
1241                /*is_ordered=*/ true,
1242                codegen_);
1243 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)1244 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
1245   GenUnsafePut(invoke,
1246                DataType::Type::kReference,
1247                /*is_volatile=*/ true,
1248                /*is_ordered=*/ false,
1249                codegen_);
1250 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)1251 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
1252   GenUnsafePut(invoke,
1253                DataType::Type::kReference,
1254                /*is_volatile=*/ true,
1255                /*is_ordered=*/ false,
1256                codegen_);
1257 }
VisitJdkUnsafePutLong(HInvoke * invoke)1258 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLong(HInvoke* invoke) {
1259   GenUnsafePut(invoke,
1260                DataType::Type::kInt64,
1261                /*is_volatile=*/ false,
1262                /*is_ordered=*/ false,
1263                codegen_);
1264 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)1265 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
1266   GenUnsafePut(invoke,
1267                DataType::Type::kInt64,
1268                /*is_volatile=*/ false,
1269                /*is_ordered=*/ true,
1270                codegen_);
1271 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)1272 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
1273   GenUnsafePut(invoke,
1274                DataType::Type::kInt64,
1275                /*is_volatile=*/ true,
1276                /*is_ordered=*/ false,
1277                codegen_);
1278 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)1279 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
1280   GenUnsafePut(invoke,
1281                DataType::Type::kInt64,
1282                /*is_volatile=*/ true,
1283                /*is_ordered=*/ false,
1284                codegen_);
1285 }
VisitJdkUnsafePutByte(HInvoke * invoke)1286 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutByte(HInvoke* invoke) {
1287   GenUnsafePut(invoke,
1288                DataType::Type::kInt8,
1289                /*is_volatile=*/ false,
1290                /*is_ordered=*/ false,
1291                codegen_);
1292 }
1293 
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorARM64 * codegen)1294 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
1295                                      HInvoke* invoke,
1296                                      CodeGeneratorARM64* codegen) {
1297   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
1298   LocationSummary* locations =
1299       new (allocator) LocationSummary(invoke,
1300                                       can_call
1301                                           ? LocationSummary::kCallOnSlowPath
1302                                           : LocationSummary::kNoCall,
1303                                       kIntrinsified);
1304   if (can_call && kUseBakerReadBarrier) {
1305     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1306   }
1307   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1308   locations->SetInAt(1, Location::RequiresRegister());
1309   locations->SetInAt(2, Location::RequiresRegister());
1310   locations->SetInAt(3, Location::RequiresRegister());
1311   locations->SetInAt(4, Location::RequiresRegister());
1312 
1313   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1314 }
1315 
EmitLoadExclusive(CodeGeneratorARM64 * codegen,DataType::Type type,Register ptr,Register old_value,bool use_load_acquire)1316 static void EmitLoadExclusive(CodeGeneratorARM64* codegen,
1317                               DataType::Type type,
1318                               Register ptr,
1319                               Register old_value,
1320                               bool use_load_acquire) {
1321   Arm64Assembler* assembler = codegen->GetAssembler();
1322   MacroAssembler* masm = assembler->GetVIXLAssembler();
1323   switch (type) {
1324     case DataType::Type::kBool:
1325     case DataType::Type::kUint8:
1326     case DataType::Type::kInt8:
1327       if (use_load_acquire) {
1328         __ Ldaxrb(old_value, MemOperand(ptr));
1329       } else {
1330         __ Ldxrb(old_value, MemOperand(ptr));
1331       }
1332       break;
1333     case DataType::Type::kUint16:
1334     case DataType::Type::kInt16:
1335       if (use_load_acquire) {
1336         __ Ldaxrh(old_value, MemOperand(ptr));
1337       } else {
1338         __ Ldxrh(old_value, MemOperand(ptr));
1339       }
1340       break;
1341     case DataType::Type::kInt32:
1342     case DataType::Type::kInt64:
1343     case DataType::Type::kReference:
1344       if (use_load_acquire) {
1345         __ Ldaxr(old_value, MemOperand(ptr));
1346       } else {
1347         __ Ldxr(old_value, MemOperand(ptr));
1348       }
1349       break;
1350     default:
1351       LOG(FATAL) << "Unexpected type: " << type;
1352       UNREACHABLE();
1353   }
1354   switch (type) {
1355     case DataType::Type::kInt8:
1356       __ Sxtb(old_value, old_value);
1357       break;
1358     case DataType::Type::kInt16:
1359       __ Sxth(old_value, old_value);
1360       break;
1361     case DataType::Type::kReference:
1362       assembler->MaybeUnpoisonHeapReference(old_value);
1363       break;
1364     default:
1365       break;
1366   }
1367 }
1368 
EmitStoreExclusive(CodeGeneratorARM64 * codegen,DataType::Type type,Register ptr,Register store_result,Register new_value,bool use_store_release)1369 static void EmitStoreExclusive(CodeGeneratorARM64* codegen,
1370                                DataType::Type type,
1371                                Register ptr,
1372                                Register store_result,
1373                                Register new_value,
1374                                bool use_store_release) {
1375   Arm64Assembler* assembler = codegen->GetAssembler();
1376   MacroAssembler* masm = assembler->GetVIXLAssembler();
1377   if (type == DataType::Type::kReference) {
1378     assembler->MaybePoisonHeapReference(new_value);
1379   }
1380   switch (type) {
1381     case DataType::Type::kBool:
1382     case DataType::Type::kUint8:
1383     case DataType::Type::kInt8:
1384       if (use_store_release) {
1385         __ Stlxrb(store_result, new_value, MemOperand(ptr));
1386       } else {
1387         __ Stxrb(store_result, new_value, MemOperand(ptr));
1388       }
1389       break;
1390     case DataType::Type::kUint16:
1391     case DataType::Type::kInt16:
1392       if (use_store_release) {
1393         __ Stlxrh(store_result, new_value, MemOperand(ptr));
1394       } else {
1395         __ Stxrh(store_result, new_value, MemOperand(ptr));
1396       }
1397       break;
1398     case DataType::Type::kInt32:
1399     case DataType::Type::kInt64:
1400     case DataType::Type::kReference:
1401       if (use_store_release) {
1402         __ Stlxr(store_result, new_value, MemOperand(ptr));
1403       } else {
1404         __ Stxr(store_result, new_value, MemOperand(ptr));
1405       }
1406       break;
1407     default:
1408       LOG(FATAL) << "Unexpected type: " << type;
1409       UNREACHABLE();
1410   }
1411   if (type == DataType::Type::kReference) {
1412     assembler->MaybeUnpoisonHeapReference(new_value);
1413   }
1414 }
1415 
GenerateCompareAndSet(CodeGeneratorARM64 * codegen,DataType::Type type,std::memory_order order,bool strong,vixl::aarch64::Label * cmp_failure,Register ptr,Register new_value,Register old_value,Register store_result,Register expected,Register expected2=Register ())1416 static void GenerateCompareAndSet(CodeGeneratorARM64* codegen,
1417                                   DataType::Type type,
1418                                   std::memory_order order,
1419                                   bool strong,
1420                                   vixl::aarch64::Label* cmp_failure,
1421                                   Register ptr,
1422                                   Register new_value,
1423                                   Register old_value,
1424                                   Register store_result,
1425                                   Register expected,
1426                                   Register expected2 = Register()) {
1427   // The `expected2` is valid only for reference slow path and represents the unmarked old value
1428   // from the main path attempt to emit CAS when the marked old value matched `expected`.
1429   DCHECK_IMPLIES(expected2.IsValid(), type == DataType::Type::kReference);
1430 
1431   DCHECK(ptr.IsX());
1432   DCHECK_EQ(new_value.IsX(), type == DataType::Type::kInt64);
1433   DCHECK_EQ(old_value.IsX(), type == DataType::Type::kInt64);
1434   DCHECK(store_result.IsW());
1435   DCHECK_EQ(expected.IsX(), type == DataType::Type::kInt64);
1436   DCHECK_IMPLIES(expected2.IsValid(), expected2.IsW());
1437 
1438   Arm64Assembler* assembler = codegen->GetAssembler();
1439   MacroAssembler* masm = assembler->GetVIXLAssembler();
1440 
1441   bool use_load_acquire =
1442       (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1443   bool use_store_release =
1444       (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1445   DCHECK(use_load_acquire || use_store_release || order == std::memory_order_relaxed);
1446 
1447   // repeat: {
1448   //   old_value = [ptr];  // Load exclusive.
1449   //   if (old_value != expected && old_value != expected2) goto cmp_failure;
1450   //   store_result = failed([ptr] <- new_value);  // Store exclusive.
1451   // }
1452   // if (strong) {
1453   //   if (store_result) goto repeat;  // Repeat until compare fails or store exclusive succeeds.
1454   // } else {
1455   //   store_result = store_result ^ 1;  // Report success as 1, failure as 0.
1456   // }
1457   //
1458   // Flag Z indicates whether `old_value == expected || old_value == expected2`.
1459   // (If `expected2` is not valid, the `old_value == expected2` part is not emitted.)
1460 
1461   vixl::aarch64::Label loop_head;
1462   if (strong) {
1463     __ Bind(&loop_head);
1464   }
1465   EmitLoadExclusive(codegen, type, ptr, old_value, use_load_acquire);
1466   __ Cmp(old_value, expected);
1467   if (expected2.IsValid()) {
1468     __ Ccmp(old_value, expected2, ZFlag, ne);
1469   }
1470   // If the comparison failed, the Z flag is cleared as we branch to the `cmp_failure` label.
1471   // If the comparison succeeded, the Z flag is set and remains set after the end of the
1472   // code emitted here, unless we retry the whole operation.
1473   __ B(cmp_failure, ne);
1474   EmitStoreExclusive(codegen, type, ptr, store_result, new_value, use_store_release);
1475   if (strong) {
1476     __ Cbnz(store_result, &loop_head);
1477   } else {
1478     // Flip the `store_result` register to indicate success by 1 and failure by 0.
1479     __ Eor(store_result, store_result, 1);
1480   }
1481 }
1482 
1483 class ReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
1484  public:
ReadBarrierCasSlowPathARM64(HInvoke * invoke,std::memory_order order,bool strong,Register base,Register offset,Register expected,Register new_value,Register old_value,Register old_value_temp,Register store_result,bool update_old_value,CodeGeneratorARM64 * arm64_codegen)1485   ReadBarrierCasSlowPathARM64(HInvoke* invoke,
1486                               std::memory_order order,
1487                               bool strong,
1488                               Register base,
1489                               Register offset,
1490                               Register expected,
1491                               Register new_value,
1492                               Register old_value,
1493                               Register old_value_temp,
1494                               Register store_result,
1495                               bool update_old_value,
1496                               CodeGeneratorARM64* arm64_codegen)
1497       : SlowPathCodeARM64(invoke),
1498         order_(order),
1499         strong_(strong),
1500         base_(base),
1501         offset_(offset),
1502         expected_(expected),
1503         new_value_(new_value),
1504         old_value_(old_value),
1505         old_value_temp_(old_value_temp),
1506         store_result_(store_result),
1507         update_old_value_(update_old_value),
1508         mark_old_value_slow_path_(nullptr),
1509         update_old_value_slow_path_(nullptr) {
1510     if (!kUseBakerReadBarrier) {
1511       // We need to add the slow path now, it is too late when emitting slow path code.
1512       mark_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1513           invoke,
1514           Location::RegisterLocation(old_value_temp.GetCode()),
1515           Location::RegisterLocation(old_value.GetCode()),
1516           Location::RegisterLocation(base.GetCode()),
1517           /*offset=*/ 0u,
1518           /*index=*/ Location::RegisterLocation(offset.GetCode()));
1519       if (update_old_value_) {
1520         update_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1521             invoke,
1522             Location::RegisterLocation(old_value.GetCode()),
1523             Location::RegisterLocation(old_value_temp.GetCode()),
1524             Location::RegisterLocation(base.GetCode()),
1525             /*offset=*/ 0u,
1526             /*index=*/ Location::RegisterLocation(offset.GetCode()));
1527       }
1528     }
1529   }
1530 
GetDescription() const1531   const char* GetDescription() const override { return "ReadBarrierCasSlowPathARM64"; }
1532 
EmitNativeCode(CodeGenerator * codegen)1533   void EmitNativeCode(CodeGenerator* codegen) override {
1534     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1535     Arm64Assembler* assembler = arm64_codegen->GetAssembler();
1536     MacroAssembler* masm = assembler->GetVIXLAssembler();
1537     __ Bind(GetEntryLabel());
1538 
1539     // Mark the `old_value_` from the main path and compare with `expected_`.
1540     if (kUseBakerReadBarrier) {
1541       DCHECK(mark_old_value_slow_path_ == nullptr);
1542       arm64_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_temp_, old_value_);
1543     } else {
1544       DCHECK(mark_old_value_slow_path_ != nullptr);
1545       __ B(mark_old_value_slow_path_->GetEntryLabel());
1546       __ Bind(mark_old_value_slow_path_->GetExitLabel());
1547     }
1548     __ Cmp(old_value_temp_, expected_);
1549     if (update_old_value_) {
1550       // Update the old value if we're going to return from the slow path.
1551       __ Csel(old_value_, old_value_temp_, old_value_, ne);
1552     }
1553     __ B(GetExitLabel(), ne);  // If taken, Z=false indicates failure.
1554 
1555     // The `old_value` we have read did not match `expected` (which is always a to-space
1556     // reference) but after the read barrier the marked to-space value matched, so the
1557     // `old_value` must be a from-space reference to the same object. Do the same CAS loop
1558     // as the main path but check for both `expected` and the unmarked old value
1559     // representing the to-space and from-space references for the same object.
1560 
1561     UseScratchRegisterScope temps(masm);
1562     DCHECK_IMPLIES(store_result_.IsValid(), !temps.IsAvailable(store_result_));
1563     Register tmp_ptr = temps.AcquireX();
1564     Register store_result = store_result_.IsValid() ? store_result_ : temps.AcquireW();
1565 
1566     // Recalculate the `tmp_ptr` from main path clobbered by the read barrier above.
1567     __ Add(tmp_ptr, base_.X(), Operand(offset_));
1568 
1569     vixl::aarch64::Label mark_old_value;
1570     GenerateCompareAndSet(arm64_codegen,
1571                           DataType::Type::kReference,
1572                           order_,
1573                           strong_,
1574                           /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
1575                           tmp_ptr,
1576                           new_value_,
1577                           /*old_value=*/ old_value_temp_,
1578                           store_result,
1579                           expected_,
1580                           /*expected2=*/ old_value_);
1581     if (update_old_value_) {
1582       // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
1583       // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
1584       __ Mov(old_value_, expected_);
1585     }
1586 
1587     // Z=true from the CMP+CCMP in GenerateCompareAndSet() above indicates comparison success.
1588     // For strong CAS, that's the overall success. For weak CAS, the code also needs
1589     // to check the `store_result` after returning from the slow path.
1590     __ B(GetExitLabel());
1591 
1592     if (update_old_value_) {
1593       __ Bind(&mark_old_value);
1594       if (kUseBakerReadBarrier) {
1595         DCHECK(update_old_value_slow_path_ == nullptr);
1596         arm64_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_, old_value_temp_);
1597       } else {
1598         // Note: We could redirect the `failure` above directly to the entry label and bind
1599         // the exit label in the main path, but the main path would need to access the
1600         // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
1601         DCHECK(update_old_value_slow_path_ != nullptr);
1602         __ B(update_old_value_slow_path_->GetEntryLabel());
1603         __ Bind(update_old_value_slow_path_->GetExitLabel());
1604       }
1605       __ B(GetExitLabel());
1606     }
1607   }
1608 
1609  private:
1610   std::memory_order order_;
1611   bool strong_;
1612   Register base_;
1613   Register offset_;
1614   Register expected_;
1615   Register new_value_;
1616   Register old_value_;
1617   Register old_value_temp_;
1618   Register store_result_;
1619   bool update_old_value_;
1620   SlowPathCodeARM64* mark_old_value_slow_path_;
1621   SlowPathCodeARM64* update_old_value_slow_path_;
1622 };
1623 
GenUnsafeCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARM64 * codegen)1624 static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
1625   MacroAssembler* masm = codegen->GetVIXLAssembler();
1626   LocationSummary* locations = invoke->GetLocations();
1627 
1628   Register out = WRegisterFrom(locations->Out());                 // Boolean result.
1629   Register base = WRegisterFrom(locations->InAt(1));              // Object pointer.
1630   Register offset = XRegisterFrom(locations->InAt(2));            // Long offset.
1631   Register expected = RegisterFrom(locations->InAt(3), type);     // Expected.
1632   Register new_value = RegisterFrom(locations->InAt(4), type);    // New value.
1633 
1634   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1635   if (type == DataType::Type::kReference) {
1636     // Mark card for object assuming new value is stored.
1637     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
1638     codegen->MaybeMarkGCCard(base, new_value, new_value_can_be_null);
1639   }
1640 
1641   UseScratchRegisterScope temps(masm);
1642   Register tmp_ptr = temps.AcquireX();                             // Pointer to actual memory.
1643   Register old_value;                                              // Value in memory.
1644 
1645   vixl::aarch64::Label exit_loop_label;
1646   vixl::aarch64::Label* exit_loop = &exit_loop_label;
1647   vixl::aarch64::Label* cmp_failure = &exit_loop_label;
1648 
1649   if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
1650     // We need to store the `old_value` in a non-scratch register to make sure
1651     // the read barrier in the slow path does not clobber it.
1652     old_value = WRegisterFrom(locations->GetTemp(0));  // The old value from main path.
1653     // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
1654     // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
1655     Register old_value_temp = WRegisterFrom(locations->GetTemp(1));
1656     ReadBarrierCasSlowPathARM64* slow_path =
1657         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
1658             invoke,
1659             std::memory_order_seq_cst,
1660             /*strong=*/ true,
1661             base,
1662             offset,
1663             expected,
1664             new_value,
1665             old_value,
1666             old_value_temp,
1667             /*store_result=*/ Register(),  // Use a scratch register.
1668             /*update_old_value=*/ false,
1669             codegen);
1670     codegen->AddSlowPath(slow_path);
1671     exit_loop = slow_path->GetExitLabel();
1672     cmp_failure = slow_path->GetEntryLabel();
1673   } else {
1674     old_value = temps.AcquireSameSizeAs(new_value);
1675   }
1676 
1677   __ Add(tmp_ptr, base.X(), Operand(offset));
1678 
1679   GenerateCompareAndSet(codegen,
1680                         type,
1681                         std::memory_order_seq_cst,
1682                         /*strong=*/ true,
1683                         cmp_failure,
1684                         tmp_ptr,
1685                         new_value,
1686                         old_value,
1687                         /*store_result=*/ old_value.W(),  // Reuse `old_value` for ST*XR* result.
1688                         expected);
1689   __ Bind(exit_loop);
1690   __ Cset(out, eq);
1691 }
1692 
VisitUnsafeCASInt(HInvoke * invoke)1693 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1694   VisitJdkUnsafeCASInt(invoke);
1695 }
VisitUnsafeCASLong(HInvoke * invoke)1696 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1697   VisitJdkUnsafeCASLong(invoke);
1698 }
VisitUnsafeCASObject(HInvoke * invoke)1699 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1700   VisitJdkUnsafeCASObject(invoke);
1701 }
1702 
VisitJdkUnsafeCASInt(HInvoke * invoke)1703 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
1704   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
1705   VisitJdkUnsafeCompareAndSetInt(invoke);
1706 }
VisitJdkUnsafeCASLong(HInvoke * invoke)1707 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
1708   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
1709   VisitJdkUnsafeCompareAndSetLong(invoke);
1710 }
VisitJdkUnsafeCASObject(HInvoke * invoke)1711 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
1712   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
1713   VisitJdkUnsafeCompareAndSetReference(invoke);
1714 }
1715 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)1716 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
1717   CreateUnsafeCASLocations(allocator_, invoke, codegen_);
1718 }
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)1719 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
1720   CreateUnsafeCASLocations(allocator_, invoke, codegen_);
1721 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)1722 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
1723   // The only supported read barrier implementation is the Baker-style read barriers.
1724   if (codegen_->EmitNonBakerReadBarrier()) {
1725     return;
1726   }
1727 
1728   CreateUnsafeCASLocations(allocator_, invoke, codegen_);
1729   if (codegen_->EmitReadBarrier()) {
1730     // We need two non-scratch temporary registers for read barrier.
1731     LocationSummary* locations = invoke->GetLocations();
1732     if (kUseBakerReadBarrier) {
1733       locations->AddRegisterTemps(2);
1734     } else {
1735       // To preserve the old value across the non-Baker read barrier
1736       // slow path, use a fixed callee-save register.
1737       constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
1738       locations->AddTemp(Location::RegisterLocation(first_callee_save));
1739       // To reduce the number of moves, request x0 as the second temporary.
1740       DCHECK(InvokeRuntimeCallingConvention().GetReturnLocation(DataType::Type::kReference).Equals(
1741                  Location::RegisterLocation(x0.GetCode())));
1742       locations->AddTemp(Location::RegisterLocation(x0.GetCode()));
1743     }
1744   }
1745 }
1746 
VisitUnsafeCASInt(HInvoke * invoke)1747 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1748   VisitJdkUnsafeCASInt(invoke);
1749 }
VisitUnsafeCASLong(HInvoke * invoke)1750 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1751   VisitJdkUnsafeCASLong(invoke);
1752 }
VisitUnsafeCASObject(HInvoke * invoke)1753 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1754   VisitJdkUnsafeCASObject(invoke);
1755 }
1756 
VisitJdkUnsafeCASInt(HInvoke * invoke)1757 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
1758   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
1759   VisitJdkUnsafeCompareAndSetInt(invoke);
1760 }
VisitJdkUnsafeCASLong(HInvoke * invoke)1761 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
1762   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
1763   VisitJdkUnsafeCompareAndSetLong(invoke);
1764 }
VisitJdkUnsafeCASObject(HInvoke * invoke)1765 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
1766   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
1767   VisitJdkUnsafeCompareAndSetReference(invoke);
1768 }
1769 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)1770 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
1771   GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
1772 }
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)1773 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
1774   GenUnsafeCas(invoke, DataType::Type::kInt64, codegen_);
1775 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)1776 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
1777   // The only supported read barrier implementation is the Baker-style read barriers.
1778   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
1779 
1780   GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
1781 }
1782 
1783 enum class GetAndUpdateOp {
1784   kSet,
1785   kAdd,
1786   kAddWithByteSwap,
1787   kAnd,
1788   kOr,
1789   kXor
1790 };
1791 
GenerateGetAndUpdate(CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op,DataType::Type load_store_type,std::memory_order order,Register ptr,CPURegister arg,CPURegister old_value)1792 static void GenerateGetAndUpdate(CodeGeneratorARM64* codegen,
1793                                  GetAndUpdateOp get_and_update_op,
1794                                  DataType::Type load_store_type,
1795                                  std::memory_order order,
1796                                  Register ptr,
1797                                  CPURegister arg,
1798                                  CPURegister old_value) {
1799   MacroAssembler* masm = codegen->GetVIXLAssembler();
1800   UseScratchRegisterScope temps(masm);
1801   Register store_result = temps.AcquireW();
1802 
1803   DCHECK_EQ(old_value.GetSizeInBits(), arg.GetSizeInBits());
1804   Register old_value_reg;
1805   Register new_value;
1806   switch (get_and_update_op) {
1807     case GetAndUpdateOp::kSet:
1808       old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1809       new_value = arg.IsX() ? arg.X() : arg.W();
1810       break;
1811     case GetAndUpdateOp::kAddWithByteSwap:
1812     case GetAndUpdateOp::kAdd:
1813       if (arg.IsVRegister()) {
1814         old_value_reg = arg.IsD() ? temps.AcquireX() : temps.AcquireW();
1815         new_value = old_value_reg;  // Use the same temporary.
1816         break;
1817       }
1818       FALLTHROUGH_INTENDED;
1819     case GetAndUpdateOp::kAnd:
1820     case GetAndUpdateOp::kOr:
1821     case GetAndUpdateOp::kXor:
1822       old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1823       new_value = old_value.IsX() ? temps.AcquireX() : temps.AcquireW();
1824       break;
1825   }
1826 
1827   bool use_load_acquire =
1828       (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1829   bool use_store_release =
1830       (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1831   DCHECK(use_load_acquire || use_store_release);
1832 
1833   vixl::aarch64::Label loop_label;
1834   __ Bind(&loop_label);
1835   EmitLoadExclusive(codegen, load_store_type, ptr, old_value_reg, use_load_acquire);
1836   switch (get_and_update_op) {
1837     case GetAndUpdateOp::kSet:
1838       break;
1839     case GetAndUpdateOp::kAddWithByteSwap:
1840       // To avoid unnecessary sign extension before REV16, the caller must specify `kUint16`
1841       // instead of `kInt16` and do the sign-extension explicitly afterwards.
1842       DCHECK_NE(load_store_type, DataType::Type::kInt16);
1843       GenerateReverseBytes(masm, load_store_type, old_value_reg, old_value_reg);
1844       FALLTHROUGH_INTENDED;
1845     case GetAndUpdateOp::kAdd:
1846       if (arg.IsVRegister()) {
1847         VRegister old_value_vreg = old_value.IsD() ? old_value.D() : old_value.S();
1848         VRegister sum = temps.AcquireSameSizeAs(old_value_vreg);
1849         __ Fmov(old_value_vreg, old_value_reg);
1850         __ Fadd(sum, old_value_vreg, arg.IsD() ? arg.D() : arg.S());
1851         __ Fmov(new_value, sum);
1852       } else {
1853         __ Add(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1854       }
1855       if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
1856         GenerateReverseBytes(masm, load_store_type, new_value, new_value);
1857       }
1858       break;
1859     case GetAndUpdateOp::kAnd:
1860       __ And(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1861       break;
1862     case GetAndUpdateOp::kOr:
1863       __ Orr(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1864       break;
1865     case GetAndUpdateOp::kXor:
1866       __ Eor(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1867       break;
1868   }
1869   EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value, use_store_release);
1870   __ Cbnz(store_result, &loop_label);
1871 }
1872 
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorARM64 * codegen)1873 static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
1874                                               HInvoke* invoke,
1875                                               CodeGeneratorARM64* codegen) {
1876   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
1877   LocationSummary* locations =
1878       new (allocator) LocationSummary(invoke,
1879                                       can_call
1880                                           ? LocationSummary::kCallOnSlowPath
1881                                           : LocationSummary::kNoCall,
1882                                       kIntrinsified);
1883   if (can_call && kUseBakerReadBarrier) {
1884     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1885   }
1886   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1887   locations->SetInAt(1, Location::RequiresRegister());
1888   locations->SetInAt(2, Location::RequiresRegister());
1889   locations->SetInAt(3, Location::RequiresRegister());
1890   locations->AddTemp(Location::RequiresRegister());
1891 
1892   // Request another temporary register for methods that don't return a value.
1893   DataType::Type return_type = invoke->GetType();
1894   const bool is_void = return_type == DataType::Type::kVoid;
1895   if (is_void) {
1896     locations->AddTemp(Location::RequiresRegister());
1897   } else {
1898     locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1899   }
1900 }
1901 
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op)1902 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
1903                                   DataType::Type type,
1904                                   CodeGeneratorARM64* codegen,
1905                                   GetAndUpdateOp get_and_update_op) {
1906   // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check
1907   // before using.
1908   DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet);
1909 
1910   MacroAssembler* masm = codegen->GetVIXLAssembler();
1911   LocationSummary* locations = invoke->GetLocations();
1912 
1913   DataType::Type return_type = invoke->GetType();
1914   const bool is_void = return_type == DataType::Type::kVoid;
1915   // We use a temporary for void methods, as we don't return the value.
1916   Location out_or_temp_loc =
1917       is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
1918   Register out_or_temp = RegisterFrom(out_or_temp_loc, type);     // Result.
1919   Register base = WRegisterFrom(locations->InAt(1));              // Object pointer.
1920   Register offset = XRegisterFrom(locations->InAt(2));            // Long offset.
1921   Register arg = RegisterFrom(locations->InAt(3), type);          // New value or addend.
1922   Register tmp_ptr = XRegisterFrom(locations->GetTemp(0));        // Pointer to actual memory.
1923 
1924   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1925   if (type == DataType::Type::kReference) {
1926     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
1927     // Mark card for object as a new value shall be stored.
1928     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
1929     codegen->MaybeMarkGCCard(base, /*value=*/arg, new_value_can_be_null);
1930   }
1931 
1932   __ Add(tmp_ptr, base.X(), Operand(offset));
1933   GenerateGetAndUpdate(codegen,
1934                        get_and_update_op,
1935                        type,
1936                        std::memory_order_seq_cst,
1937                        tmp_ptr,
1938                        arg,
1939                        /*old_value=*/ out_or_temp);
1940 
1941   if (!is_void && type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
1942     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
1943     if (kUseBakerReadBarrier) {
1944       codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out_or_temp.W(), out_or_temp.W());
1945     } else {
1946       codegen->GenerateReadBarrierSlow(invoke,
1947                                        Location::RegisterLocation(out_or_temp.GetCode()),
1948                                        Location::RegisterLocation(out_or_temp.GetCode()),
1949                                        Location::RegisterLocation(base.GetCode()),
1950                                        /*offset=*/ 0u,
1951                                        /*index=*/ Location::RegisterLocation(offset.GetCode()));
1952     }
1953   }
1954 }
1955 
VisitUnsafeGetAndAddInt(HInvoke * invoke)1956 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
1957   VisitJdkUnsafeGetAndAddInt(invoke);
1958 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)1959 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
1960   VisitJdkUnsafeGetAndAddLong(invoke);
1961 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)1962 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
1963   VisitJdkUnsafeGetAndSetInt(invoke);
1964 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)1965 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
1966   VisitJdkUnsafeGetAndSetLong(invoke);
1967 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)1968 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
1969   VisitJdkUnsafeGetAndSetReference(invoke);
1970 }
1971 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)1972 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
1973   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
1974 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)1975 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
1976   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
1977 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)1978 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
1979   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
1980 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)1981 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
1982   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
1983 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)1984 void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
1985   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
1986 }
1987 
VisitUnsafeGetAndAddInt(HInvoke * invoke)1988 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
1989   VisitJdkUnsafeGetAndAddInt(invoke);
1990 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)1991 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
1992   VisitJdkUnsafeGetAndAddLong(invoke);
1993 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)1994 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
1995   VisitJdkUnsafeGetAndSetInt(invoke);
1996 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)1997 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
1998   VisitJdkUnsafeGetAndSetLong(invoke);
1999 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)2000 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2001   VisitJdkUnsafeGetAndSetReference(invoke);
2002 }
2003 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2004 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2005   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
2006 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2007 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2008   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
2009 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2010 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2011   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
2012 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2013 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2014   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
2015 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2016 void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2017   GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
2018 }
2019 
VisitStringCompareTo(HInvoke * invoke)2020 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
2021   LocationSummary* locations =
2022       new (allocator_) LocationSummary(invoke,
2023                                        invoke->InputAt(1)->CanBeNull()
2024                                            ? LocationSummary::kCallOnSlowPath
2025                                            : LocationSummary::kNoCall,
2026                                        kIntrinsified);
2027   locations->SetInAt(0, Location::RequiresRegister());
2028   locations->SetInAt(1, Location::RequiresRegister());
2029   locations->AddRegisterTemps(3);
2030   // Need temporary registers for String compression's feature.
2031   if (mirror::kUseStringCompression) {
2032     locations->AddTemp(Location::RequiresRegister());
2033   }
2034   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2035 }
2036 
VisitStringCompareTo(HInvoke * invoke)2037 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
2038   MacroAssembler* masm = GetVIXLAssembler();
2039   LocationSummary* locations = invoke->GetLocations();
2040 
2041   Register str = InputRegisterAt(invoke, 0);
2042   Register arg = InputRegisterAt(invoke, 1);
2043   DCHECK(str.IsW());
2044   DCHECK(arg.IsW());
2045   Register out = OutputRegister(invoke);
2046 
2047   Register temp0 = WRegisterFrom(locations->GetTemp(0));
2048   Register temp1 = WRegisterFrom(locations->GetTemp(1));
2049   Register temp2 = WRegisterFrom(locations->GetTemp(2));
2050   Register temp3;
2051   if (mirror::kUseStringCompression) {
2052     temp3 = WRegisterFrom(locations->GetTemp(3));
2053   }
2054 
2055   vixl::aarch64::Label loop;
2056   vixl::aarch64::Label find_char_diff;
2057   vixl::aarch64::Label end;
2058   vixl::aarch64::Label different_compression;
2059 
2060   // Get offsets of count and value fields within a string object.
2061   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
2062   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
2063 
2064   // Note that the null check must have been done earlier.
2065   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
2066 
2067   // Take slow path and throw if input can be and is null.
2068   SlowPathCodeARM64* slow_path = nullptr;
2069   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
2070   if (can_slow_path) {
2071     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2072     codegen_->AddSlowPath(slow_path);
2073     __ Cbz(arg, slow_path->GetEntryLabel());
2074   }
2075 
2076   // Reference equality check, return 0 if same reference.
2077   __ Subs(out, str, arg);
2078   __ B(&end, eq);
2079 
2080   if (mirror::kUseStringCompression) {
2081     // Load `count` fields of this and argument strings.
2082     __ Ldr(temp3, HeapOperand(str, count_offset));
2083     __ Ldr(temp2, HeapOperand(arg, count_offset));
2084     // Clean out compression flag from lengths.
2085     __ Lsr(temp0, temp3, 1u);
2086     __ Lsr(temp1, temp2, 1u);
2087   } else {
2088     // Load lengths of this and argument strings.
2089     __ Ldr(temp0, HeapOperand(str, count_offset));
2090     __ Ldr(temp1, HeapOperand(arg, count_offset));
2091   }
2092   // out = length diff.
2093   __ Subs(out, temp0, temp1);
2094   // temp0 = min(len(str), len(arg)).
2095   __ Csel(temp0, temp1, temp0, ge);
2096   // Shorter string is empty?
2097   __ Cbz(temp0, &end);
2098 
2099   if (mirror::kUseStringCompression) {
2100     // Check if both strings using same compression style to use this comparison loop.
2101     __ Eor(temp2, temp2, Operand(temp3));
2102     // Interleave with compression flag extraction which is needed for both paths
2103     // and also set flags which is needed only for the different compressions path.
2104     __ Ands(temp3.W(), temp3.W(), Operand(1));
2105     __ Tbnz(temp2, 0, &different_compression);  // Does not use flags.
2106   }
2107   // Store offset of string value in preparation for comparison loop.
2108   __ Mov(temp1, value_offset);
2109   if (mirror::kUseStringCompression) {
2110     // For string compression, calculate the number of bytes to compare (not chars).
2111     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
2112     __ Lsl(temp0, temp0, temp3);
2113   }
2114 
2115   UseScratchRegisterScope scratch_scope(masm);
2116   Register temp4 = scratch_scope.AcquireX();
2117 
2118   // Assertions that must hold in order to compare strings 8 bytes at a time.
2119   DCHECK_ALIGNED(value_offset, 8);
2120   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
2121 
2122   const size_t char_size = DataType::Size(DataType::Type::kUint16);
2123   DCHECK_EQ(char_size, 2u);
2124 
2125   // Promote temp2 to an X reg, ready for LDR.
2126   temp2 = temp2.X();
2127 
2128   // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
2129   __ Bind(&loop);
2130   __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
2131   __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
2132   __ Cmp(temp4, temp2);
2133   __ B(ne, &find_char_diff);
2134   __ Add(temp1, temp1, char_size * 4);
2135   // With string compression, we have compared 8 bytes, otherwise 4 chars.
2136   __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
2137   __ B(&loop, hi);
2138   __ B(&end);
2139 
2140   // Promote temp1 to an X reg, ready for EOR.
2141   temp1 = temp1.X();
2142 
2143   // Find the single character difference.
2144   __ Bind(&find_char_diff);
2145   // Get the bit position of the first character that differs.
2146   __ Eor(temp1, temp2, temp4);
2147   __ Rbit(temp1, temp1);
2148   __ Clz(temp1, temp1);
2149 
2150   // If the number of chars remaining <= the index where the difference occurs (0-3), then
2151   // the difference occurs outside the remaining string data, so just return length diff (out).
2152   // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
2153   // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
2154   // unsigned when string compression is disabled.
2155   // When it's enabled, the comparison must be unsigned.
2156   __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
2157   __ B(ls, &end);
2158 
2159   // Extract the characters and calculate the difference.
2160   if (mirror:: kUseStringCompression) {
2161     __ Bic(temp1, temp1, 0x7);
2162     __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
2163   } else {
2164     __ Bic(temp1, temp1, 0xf);
2165   }
2166   __ Lsr(temp2, temp2, temp1);
2167   __ Lsr(temp4, temp4, temp1);
2168   if (mirror::kUseStringCompression) {
2169     // Prioritize the case of compressed strings and calculate such result first.
2170     __ Uxtb(temp1, temp4);
2171     __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
2172     __ Tbz(temp3, 0u, &end);  // If actually compressed, we're done.
2173   }
2174   __ Uxth(temp4, temp4);
2175   __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
2176 
2177   if (mirror::kUseStringCompression) {
2178     __ B(&end);
2179     __ Bind(&different_compression);
2180 
2181     // Comparison for different compression style.
2182     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2183     DCHECK_EQ(c_char_size, 1u);
2184     temp1 = temp1.W();
2185     temp2 = temp2.W();
2186     temp4 = temp4.W();
2187 
2188     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
2189     // Note that flags have been set by the `str` compression flag extraction to `temp3`
2190     // before branching to the `different_compression` label.
2191     __ Csel(temp1, str, arg, eq);   // Pointer to the compressed string.
2192     __ Csel(temp2, str, arg, ne);   // Pointer to the uncompressed string.
2193 
2194     // We want to free up the temp3, currently holding `str` compression flag, for comparison.
2195     // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
2196     // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
2197     // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
2198     __ Lsl(temp0, temp0, 1u);
2199 
2200     // Adjust temp1 and temp2 from string pointers to data pointers.
2201     __ Add(temp1, temp1, Operand(value_offset));
2202     __ Add(temp2, temp2, Operand(value_offset));
2203 
2204     // Complete the move of the compression flag.
2205     __ Sub(temp0, temp0, Operand(temp3));
2206 
2207     vixl::aarch64::Label different_compression_loop;
2208     vixl::aarch64::Label different_compression_diff;
2209 
2210     __ Bind(&different_compression_loop);
2211     __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
2212     __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
2213     __ Subs(temp4, temp4, Operand(temp3));
2214     __ B(&different_compression_diff, ne);
2215     __ Subs(temp0, temp0, 2);
2216     __ B(&different_compression_loop, hi);
2217     __ B(&end);
2218 
2219     // Calculate the difference.
2220     __ Bind(&different_compression_diff);
2221     __ Tst(temp0, Operand(1));
2222     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2223                   "Expecting 0=compressed, 1=uncompressed");
2224     __ Cneg(out, temp4, ne);
2225   }
2226 
2227   __ Bind(&end);
2228 
2229   if (can_slow_path) {
2230     __ Bind(slow_path->GetExitLabel());
2231   }
2232 }
2233 
2234 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
2235 // The normal loop plus the pre-header is 9 instructions without string compression and 12
2236 // instructions with string compression. We can compare up to 8 bytes in 4 instructions
2237 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
2238 // to 10 instructions for the unrolled loop.
2239 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
2240 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)2241 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
2242   if (candidate->IsLoadString()) {
2243     HLoadString* load_string = candidate->AsLoadString();
2244     const DexFile& dex_file = load_string->GetDexFile();
2245     return dex_file.GetStringDataAndUtf16Length(load_string->GetStringIndex(), utf16_length);
2246   }
2247   return nullptr;
2248 }
2249 
VisitStringEquals(HInvoke * invoke)2250 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
2251   LocationSummary* locations =
2252       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2253   locations->SetInAt(0, Location::RequiresRegister());
2254   locations->SetInAt(1, Location::RequiresRegister());
2255 
2256   // For the generic implementation and for long const strings we need a temporary.
2257   // We do not need it for short const strings, up to 8 bytes, see code generation below.
2258   uint32_t const_string_length = 0u;
2259   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
2260   if (const_string == nullptr) {
2261     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
2262   }
2263   bool is_compressed =
2264       mirror::kUseStringCompression &&
2265       const_string != nullptr &&
2266       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
2267   if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
2268     locations->AddTemp(Location::RequiresRegister());
2269   }
2270 
2271   // TODO: If the String.equals() is used only for an immediately following HIf, we can
2272   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
2273   // Then we shall need an extra temporary register instead of the output register.
2274   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2275 }
2276 
VisitStringEquals(HInvoke * invoke)2277 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
2278   MacroAssembler* masm = GetVIXLAssembler();
2279   LocationSummary* locations = invoke->GetLocations();
2280 
2281   Register str = WRegisterFrom(locations->InAt(0));
2282   Register arg = WRegisterFrom(locations->InAt(1));
2283   Register out = XRegisterFrom(locations->Out());
2284 
2285   UseScratchRegisterScope scratch_scope(masm);
2286   Register temp = scratch_scope.AcquireW();
2287   Register temp1 = scratch_scope.AcquireW();
2288 
2289   vixl::aarch64::Label loop;
2290   vixl::aarch64::Label end;
2291   vixl::aarch64::Label return_true;
2292   vixl::aarch64::Label return_false;
2293 
2294   // Get offsets of count, value, and class fields within a string object.
2295   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
2296   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
2297   const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2298 
2299   // Note that the null check must have been done earlier.
2300   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
2301 
2302   StringEqualsOptimizations optimizations(invoke);
2303   if (!optimizations.GetArgumentNotNull()) {
2304     // Check if input is null, return false if it is.
2305     __ Cbz(arg, &return_false);
2306   }
2307 
2308   // Reference equality check, return true if same reference.
2309   __ Cmp(str, arg);
2310   __ B(&return_true, eq);
2311 
2312   if (!optimizations.GetArgumentIsString()) {
2313     // Instanceof check for the argument by comparing class fields.
2314     // All string objects must have the same type since String cannot be subclassed.
2315     // Receiver must be a string object, so its class field is equal to all strings' class fields.
2316     // If the argument is a string object, its class field must be equal to receiver's class field.
2317     //
2318     // As the String class is expected to be non-movable, we can read the class
2319     // field from String.equals' arguments without read barriers.
2320     AssertNonMovableStringClass();
2321     // /* HeapReference<Class> */ temp = str->klass_
2322     __ Ldr(temp, MemOperand(str.X(), class_offset));
2323     // /* HeapReference<Class> */ temp1 = arg->klass_
2324     __ Ldr(temp1, MemOperand(arg.X(), class_offset));
2325     // Also, because we use the previously loaded class references only in the
2326     // following comparison, we don't need to unpoison them.
2327     __ Cmp(temp, temp1);
2328     __ B(&return_false, ne);
2329   }
2330 
2331   // Check if one of the inputs is a const string. Do not special-case both strings
2332   // being const, such cases should be handled by constant folding if needed.
2333   uint32_t const_string_length = 0u;
2334   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
2335   if (const_string == nullptr) {
2336     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
2337     if (const_string != nullptr) {
2338       std::swap(str, arg);  // Make sure the const string is in `str`.
2339     }
2340   }
2341   bool is_compressed =
2342       mirror::kUseStringCompression &&
2343       const_string != nullptr &&
2344       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
2345 
2346   if (const_string != nullptr) {
2347     // Load `count` field of the argument string and check if it matches the const string.
2348     // Also compares the compression style, if differs return false.
2349     __ Ldr(temp, MemOperand(arg.X(), count_offset));
2350     // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
2351     scratch_scope.Release(temp1);
2352     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
2353     temp1 = scratch_scope.AcquireW();
2354     __ B(&return_false, ne);
2355   } else {
2356     // Load `count` fields of this and argument strings.
2357     __ Ldr(temp, MemOperand(str.X(), count_offset));
2358     __ Ldr(temp1, MemOperand(arg.X(), count_offset));
2359     // Check if `count` fields are equal, return false if they're not.
2360     // Also compares the compression style, if differs return false.
2361     __ Cmp(temp, temp1);
2362     __ B(&return_false, ne);
2363   }
2364 
2365   // Assertions that must hold in order to compare strings 8 bytes at a time.
2366   // Ok to do this because strings are zero-padded to kObjectAlignment.
2367   DCHECK_ALIGNED(value_offset, 8);
2368   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
2369 
2370   if (const_string != nullptr &&
2371       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
2372                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
2373     // Load and compare the contents. Though we know the contents of the short const string
2374     // at compile time, materializing constants may be more code than loading from memory.
2375     int32_t offset = value_offset;
2376     size_t remaining_bytes =
2377         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
2378     temp = temp.X();
2379     temp1 = temp1.X();
2380     while (remaining_bytes > sizeof(uint64_t)) {
2381       Register temp2 = XRegisterFrom(locations->GetTemp(0));
2382       __ Ldp(temp, temp1, MemOperand(str.X(), offset));
2383       __ Ldp(temp2, out, MemOperand(arg.X(), offset));
2384       __ Cmp(temp, temp2);
2385       __ Ccmp(temp1, out, NoFlag, eq);
2386       __ B(&return_false, ne);
2387       offset += 2u * sizeof(uint64_t);
2388       remaining_bytes -= 2u * sizeof(uint64_t);
2389     }
2390     if (remaining_bytes != 0u) {
2391       __ Ldr(temp, MemOperand(str.X(), offset));
2392       __ Ldr(temp1, MemOperand(arg.X(), offset));
2393       __ Cmp(temp, temp1);
2394       __ B(&return_false, ne);
2395     }
2396   } else {
2397     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
2398     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2399                   "Expecting 0=compressed, 1=uncompressed");
2400     __ Cbz(temp, &return_true);
2401 
2402     if (mirror::kUseStringCompression) {
2403       // For string compression, calculate the number of bytes to compare (not chars).
2404       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
2405       __ And(temp1, temp, Operand(1));    // Extract compression flag.
2406       __ Lsr(temp, temp, 1u);             // Extract length.
2407       __ Lsl(temp, temp, temp1);          // Calculate number of bytes to compare.
2408     }
2409 
2410     // Store offset of string value in preparation for comparison loop
2411     __ Mov(temp1, value_offset);
2412 
2413     temp1 = temp1.X();
2414     Register temp2 = XRegisterFrom(locations->GetTemp(0));
2415     // Loop to compare strings 8 bytes at a time starting at the front of the string.
2416     __ Bind(&loop);
2417     __ Ldr(out, MemOperand(str.X(), temp1));
2418     __ Ldr(temp2, MemOperand(arg.X(), temp1));
2419     __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
2420     __ Cmp(out, temp2);
2421     __ B(&return_false, ne);
2422     // With string compression, we have compared 8 bytes, otherwise 4 chars.
2423     __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
2424     __ B(&loop, hi);
2425   }
2426 
2427   // Return true and exit the function.
2428   // If loop does not result in returning false, we return true.
2429   __ Bind(&return_true);
2430   __ Mov(out, 1);
2431   __ B(&end);
2432 
2433   // Return false and exit the function.
2434   __ Bind(&return_false);
2435   __ Mov(out, 0);
2436   __ Bind(&end);
2437 }
2438 
GenerateVisitStringIndexOf(HInvoke * invoke,MacroAssembler * masm,CodeGeneratorARM64 * codegen,bool start_at_zero)2439 static void GenerateVisitStringIndexOf(HInvoke* invoke,
2440                                        MacroAssembler* masm,
2441                                        CodeGeneratorARM64* codegen,
2442                                        bool start_at_zero) {
2443   LocationSummary* locations = invoke->GetLocations();
2444 
2445   // Note that the null check must have been done earlier.
2446   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
2447 
2448   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
2449   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
2450   SlowPathCodeARM64* slow_path = nullptr;
2451   HInstruction* code_point = invoke->InputAt(1);
2452   if (code_point->IsIntConstant()) {
2453     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
2454       // Always needs the slow-path. We could directly dispatch to it, but this case should be
2455       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
2456       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2457       codegen->AddSlowPath(slow_path);
2458       __ B(slow_path->GetEntryLabel());
2459       __ Bind(slow_path->GetExitLabel());
2460       return;
2461     }
2462   } else if (code_point->GetType() != DataType::Type::kUint16) {
2463     Register char_reg = WRegisterFrom(locations->InAt(1));
2464     __ Tst(char_reg, 0xFFFF0000);
2465     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2466     codegen->AddSlowPath(slow_path);
2467     __ B(ne, slow_path->GetEntryLabel());
2468   }
2469 
2470   if (start_at_zero) {
2471     // Start-index = 0.
2472     Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
2473     __ Mov(tmp_reg, 0);
2474   }
2475 
2476   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
2477   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
2478 
2479   if (slow_path != nullptr) {
2480     __ Bind(slow_path->GetExitLabel());
2481   }
2482 }
2483 
VisitStringIndexOf(HInvoke * invoke)2484 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
2485   LocationSummary* locations = new (allocator_) LocationSummary(
2486       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2487   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2488   // best to align the inputs accordingly.
2489   InvokeRuntimeCallingConvention calling_convention;
2490   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2491   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2492   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
2493 
2494   // Need to send start_index=0.
2495   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2496 }
2497 
VisitStringIndexOf(HInvoke * invoke)2498 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
2499   GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true);
2500 }
2501 
VisitStringIndexOfAfter(HInvoke * invoke)2502 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
2503   LocationSummary* locations = new (allocator_) LocationSummary(
2504       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2505   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2506   // best to align the inputs accordingly.
2507   InvokeRuntimeCallingConvention calling_convention;
2508   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2509   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2510   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2511   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
2512 }
2513 
VisitStringIndexOfAfter(HInvoke * invoke)2514 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
2515   GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false);
2516 }
2517 
VisitStringNewStringFromBytes(HInvoke * invoke)2518 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
2519   LocationSummary* locations = new (allocator_) LocationSummary(
2520       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2521   InvokeRuntimeCallingConvention calling_convention;
2522   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2523   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2524   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2525   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
2526   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
2527 }
2528 
VisitStringNewStringFromBytes(HInvoke * invoke)2529 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
2530   MacroAssembler* masm = GetVIXLAssembler();
2531   LocationSummary* locations = invoke->GetLocations();
2532 
2533   Register byte_array = WRegisterFrom(locations->InAt(0));
2534   __ Cmp(byte_array, 0);
2535   SlowPathCodeARM64* slow_path =
2536       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2537   codegen_->AddSlowPath(slow_path);
2538   __ B(eq, slow_path->GetEntryLabel());
2539 
2540   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
2541   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
2542   __ Bind(slow_path->GetExitLabel());
2543 }
2544 
VisitStringNewStringFromChars(HInvoke * invoke)2545 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
2546   LocationSummary* locations =
2547       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2548   InvokeRuntimeCallingConvention calling_convention;
2549   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2550   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2551   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2552   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
2553 }
2554 
VisitStringNewStringFromChars(HInvoke * invoke)2555 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
2556   // No need to emit code checking whether `locations->InAt(2)` is a null
2557   // pointer, as callers of the native method
2558   //
2559   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
2560   //
2561   // all include a null check on `data` before calling that method.
2562   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
2563   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
2564 }
2565 
VisitStringNewStringFromString(HInvoke * invoke)2566 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
2567   LocationSummary* locations = new (allocator_) LocationSummary(
2568       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2569   InvokeRuntimeCallingConvention calling_convention;
2570   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2571   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
2572 }
2573 
VisitStringNewStringFromString(HInvoke * invoke)2574 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
2575   MacroAssembler* masm = GetVIXLAssembler();
2576   LocationSummary* locations = invoke->GetLocations();
2577 
2578   Register string_to_copy = WRegisterFrom(locations->InAt(0));
2579   __ Cmp(string_to_copy, 0);
2580   SlowPathCodeARM64* slow_path =
2581       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2582   codegen_->AddSlowPath(slow_path);
2583   __ B(eq, slow_path->GetEntryLabel());
2584 
2585   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
2586   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
2587   __ Bind(slow_path->GetExitLabel());
2588 }
2589 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2590 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2591   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2592   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2593   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2594 
2595   LocationSummary* const locations =
2596       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2597   InvokeRuntimeCallingConvention calling_convention;
2598 
2599   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2600   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2601 }
2602 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2603 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2604   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2605   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2606   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
2607   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2608 
2609   LocationSummary* const locations =
2610       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2611   InvokeRuntimeCallingConvention calling_convention;
2612 
2613   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2614   locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
2615   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2616 }
2617 
CreateFPFPFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)2618 static void CreateFPFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2619   DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
2620   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2621   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
2622   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(2)->GetType()));
2623   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2624 
2625   LocationSummary* const locations =
2626       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2627 
2628   locations->SetInAt(0, Location::RequiresFpuRegister());
2629   locations->SetInAt(1, Location::RequiresFpuRegister());
2630   locations->SetInAt(2, Location::RequiresFpuRegister());
2631   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2632 }
2633 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorARM64 * codegen,QuickEntrypointEnum entry)2634 static void GenFPToFPCall(HInvoke* invoke,
2635                           CodeGeneratorARM64* codegen,
2636                           QuickEntrypointEnum entry) {
2637   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2638 }
2639 
VisitMathCos(HInvoke * invoke)2640 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
2641   CreateFPToFPCallLocations(allocator_, invoke);
2642 }
2643 
VisitMathCos(HInvoke * invoke)2644 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
2645   GenFPToFPCall(invoke, codegen_, kQuickCos);
2646 }
2647 
VisitMathSin(HInvoke * invoke)2648 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
2649   CreateFPToFPCallLocations(allocator_, invoke);
2650 }
2651 
VisitMathSin(HInvoke * invoke)2652 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
2653   GenFPToFPCall(invoke, codegen_, kQuickSin);
2654 }
2655 
VisitMathAcos(HInvoke * invoke)2656 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
2657   CreateFPToFPCallLocations(allocator_, invoke);
2658 }
2659 
VisitMathAcos(HInvoke * invoke)2660 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
2661   GenFPToFPCall(invoke, codegen_, kQuickAcos);
2662 }
2663 
VisitMathAsin(HInvoke * invoke)2664 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
2665   CreateFPToFPCallLocations(allocator_, invoke);
2666 }
2667 
VisitMathAsin(HInvoke * invoke)2668 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
2669   GenFPToFPCall(invoke, codegen_, kQuickAsin);
2670 }
2671 
VisitMathAtan(HInvoke * invoke)2672 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
2673   CreateFPToFPCallLocations(allocator_, invoke);
2674 }
2675 
VisitMathAtan(HInvoke * invoke)2676 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
2677   GenFPToFPCall(invoke, codegen_, kQuickAtan);
2678 }
2679 
VisitMathCbrt(HInvoke * invoke)2680 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
2681   CreateFPToFPCallLocations(allocator_, invoke);
2682 }
2683 
VisitMathCbrt(HInvoke * invoke)2684 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
2685   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
2686 }
2687 
VisitMathCosh(HInvoke * invoke)2688 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
2689   CreateFPToFPCallLocations(allocator_, invoke);
2690 }
2691 
VisitMathCosh(HInvoke * invoke)2692 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
2693   GenFPToFPCall(invoke, codegen_, kQuickCosh);
2694 }
2695 
VisitMathExp(HInvoke * invoke)2696 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
2697   CreateFPToFPCallLocations(allocator_, invoke);
2698 }
2699 
VisitMathExp(HInvoke * invoke)2700 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
2701   GenFPToFPCall(invoke, codegen_, kQuickExp);
2702 }
2703 
VisitMathExpm1(HInvoke * invoke)2704 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
2705   CreateFPToFPCallLocations(allocator_, invoke);
2706 }
2707 
VisitMathExpm1(HInvoke * invoke)2708 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
2709   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
2710 }
2711 
VisitMathLog(HInvoke * invoke)2712 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
2713   CreateFPToFPCallLocations(allocator_, invoke);
2714 }
2715 
VisitMathLog(HInvoke * invoke)2716 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
2717   GenFPToFPCall(invoke, codegen_, kQuickLog);
2718 }
2719 
VisitMathLog10(HInvoke * invoke)2720 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
2721   CreateFPToFPCallLocations(allocator_, invoke);
2722 }
2723 
VisitMathLog10(HInvoke * invoke)2724 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
2725   GenFPToFPCall(invoke, codegen_, kQuickLog10);
2726 }
2727 
VisitMathSinh(HInvoke * invoke)2728 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
2729   CreateFPToFPCallLocations(allocator_, invoke);
2730 }
2731 
VisitMathSinh(HInvoke * invoke)2732 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
2733   GenFPToFPCall(invoke, codegen_, kQuickSinh);
2734 }
2735 
VisitMathTan(HInvoke * invoke)2736 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
2737   CreateFPToFPCallLocations(allocator_, invoke);
2738 }
2739 
VisitMathTan(HInvoke * invoke)2740 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
2741   GenFPToFPCall(invoke, codegen_, kQuickTan);
2742 }
2743 
VisitMathTanh(HInvoke * invoke)2744 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
2745   CreateFPToFPCallLocations(allocator_, invoke);
2746 }
2747 
VisitMathTanh(HInvoke * invoke)2748 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
2749   GenFPToFPCall(invoke, codegen_, kQuickTanh);
2750 }
2751 
VisitMathAtan2(HInvoke * invoke)2752 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
2753   CreateFPFPToFPCallLocations(allocator_, invoke);
2754 }
2755 
VisitMathAtan2(HInvoke * invoke)2756 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
2757   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
2758 }
2759 
VisitMathPow(HInvoke * invoke)2760 void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
2761   CreateFPFPToFPCallLocations(allocator_, invoke);
2762 }
2763 
VisitMathPow(HInvoke * invoke)2764 void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
2765   GenFPToFPCall(invoke, codegen_, kQuickPow);
2766 }
2767 
VisitMathHypot(HInvoke * invoke)2768 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
2769   CreateFPFPToFPCallLocations(allocator_, invoke);
2770 }
2771 
VisitMathHypot(HInvoke * invoke)2772 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
2773   GenFPToFPCall(invoke, codegen_, kQuickHypot);
2774 }
2775 
VisitMathNextAfter(HInvoke * invoke)2776 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
2777   CreateFPFPToFPCallLocations(allocator_, invoke);
2778 }
2779 
VisitMathNextAfter(HInvoke * invoke)2780 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
2781   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
2782 }
2783 
VisitStringGetCharsNoCheck(HInvoke * invoke)2784 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2785   LocationSummary* locations =
2786       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2787   locations->SetInAt(0, Location::RequiresRegister());
2788   locations->SetInAt(1, Location::RequiresRegister());
2789   locations->SetInAt(2, Location::RequiresRegister());
2790   locations->SetInAt(3, Location::RequiresRegister());
2791   locations->SetInAt(4, Location::RequiresRegister());
2792 
2793   locations->AddRegisterTemps(3);
2794 }
2795 
VisitStringGetCharsNoCheck(HInvoke * invoke)2796 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2797   MacroAssembler* masm = GetVIXLAssembler();
2798   LocationSummary* locations = invoke->GetLocations();
2799 
2800   // Check assumption that sizeof(Char) is 2 (used in scaling below).
2801   const size_t char_size = DataType::Size(DataType::Type::kUint16);
2802   DCHECK_EQ(char_size, 2u);
2803 
2804   // Location of data in char array buffer.
2805   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2806 
2807   // Location of char array data in string.
2808   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2809 
2810   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2811   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2812   Register srcObj = XRegisterFrom(locations->InAt(0));
2813   Register srcBegin = XRegisterFrom(locations->InAt(1));
2814   Register srcEnd = XRegisterFrom(locations->InAt(2));
2815   Register dstObj = XRegisterFrom(locations->InAt(3));
2816   Register dstBegin = XRegisterFrom(locations->InAt(4));
2817 
2818   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
2819   Register num_chr = XRegisterFrom(locations->GetTemp(1));
2820   Register tmp1 = XRegisterFrom(locations->GetTemp(2));
2821 
2822   UseScratchRegisterScope temps(masm);
2823   Register dst_ptr = temps.AcquireX();
2824   Register tmp2 = temps.AcquireX();
2825 
2826   vixl::aarch64::Label done;
2827   vixl::aarch64::Label compressed_string_vector_loop;
2828   vixl::aarch64::Label compressed_string_remainder;
2829   __ Sub(num_chr, srcEnd, srcBegin);
2830   // Early out for valid zero-length retrievals.
2831   __ Cbz(num_chr, &done);
2832 
2833   // dst address start to copy to.
2834   __ Add(dst_ptr, dstObj, Operand(data_offset));
2835   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
2836 
2837   // src address to copy from.
2838   __ Add(src_ptr, srcObj, Operand(value_offset));
2839   vixl::aarch64::Label compressed_string_preloop;
2840   if (mirror::kUseStringCompression) {
2841     // Location of count in string.
2842     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2843     // String's length.
2844     __ Ldr(tmp2, MemOperand(srcObj, count_offset));
2845     __ Tbz(tmp2, 0, &compressed_string_preloop);
2846   }
2847   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
2848 
2849   // Do the copy.
2850   vixl::aarch64::Label loop;
2851   vixl::aarch64::Label remainder;
2852 
2853   // Save repairing the value of num_chr on the < 8 character path.
2854   __ Subs(tmp1, num_chr, 8);
2855   __ B(lt, &remainder);
2856 
2857   // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2858   __ Mov(num_chr, tmp1);
2859 
2860   // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
2861   // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
2862   __ Bind(&loop);
2863   __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
2864   __ Subs(num_chr, num_chr, 8);
2865   __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
2866   __ B(ge, &loop);
2867 
2868   __ Adds(num_chr, num_chr, 8);
2869   __ B(eq, &done);
2870 
2871   // Main loop for < 8 character case and remainder handling. Loads and stores one
2872   // 16-bit Java character at a time.
2873   __ Bind(&remainder);
2874   __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
2875   __ Subs(num_chr, num_chr, 1);
2876   __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2877   __ B(gt, &remainder);
2878   __ B(&done);
2879 
2880   if (mirror::kUseStringCompression) {
2881     // For compressed strings, acquire a SIMD temporary register.
2882     VRegister vtmp1 = temps.AcquireVRegisterOfSize(kQRegSize);
2883     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2884     DCHECK_EQ(c_char_size, 1u);
2885     __ Bind(&compressed_string_preloop);
2886     __ Add(src_ptr, src_ptr, Operand(srcBegin));
2887 
2888     // Save repairing the value of num_chr on the < 8 character path.
2889     __ Subs(tmp1, num_chr, 8);
2890     __ B(lt, &compressed_string_remainder);
2891 
2892     // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2893     __ Mov(num_chr, tmp1);
2894 
2895     // Main loop for compressed src, copying 8 characters (8-bit) to (16-bit) at a time.
2896     // Uses SIMD instructions.
2897     __ Bind(&compressed_string_vector_loop);
2898     __ Ld1(vtmp1.V8B(), MemOperand(src_ptr, c_char_size * 8, PostIndex));
2899     __ Subs(num_chr, num_chr, 8);
2900     __ Uxtl(vtmp1.V8H(), vtmp1.V8B());
2901     __ St1(vtmp1.V8H(), MemOperand(dst_ptr, char_size * 8, PostIndex));
2902     __ B(ge, &compressed_string_vector_loop);
2903 
2904     __ Adds(num_chr, num_chr, 8);
2905     __ B(eq, &done);
2906 
2907     // Loop for < 8 character case and remainder handling with a compressed src.
2908     // Copies 1 character (8-bit) to (16-bit) at a time.
2909     __ Bind(&compressed_string_remainder);
2910     __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
2911     __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2912     __ Subs(num_chr, num_chr, Operand(1));
2913     __ B(gt, &compressed_string_remainder);
2914   }
2915 
2916   __ Bind(&done);
2917 }
2918 
2919 // This value is greater than ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore,
2920 // so if we choose to jump to the slow path we will end up in the native implementation.
2921 static constexpr int32_t kSystemArrayCopyCharThreshold = 192;
2922 
LocationForSystemArrayCopyInput(HInstruction * input)2923 static Location LocationForSystemArrayCopyInput(HInstruction* input) {
2924   HIntConstant* const_input = input->AsIntConstantOrNull();
2925   if (const_input != nullptr && vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
2926     return Location::ConstantLocation(const_input);
2927   } else {
2928     return Location::RequiresRegister();
2929   }
2930 }
2931 
VisitSystemArrayCopyChar(HInvoke * invoke)2932 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2933   // Check to see if we have known failures that will cause us to have to bail out
2934   // to the runtime, and just generate the runtime call directly.
2935   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
2936   HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstantOrNull();
2937 
2938   // The positions must be non-negative.
2939   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2940       (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2941     // We will have to fail anyways.
2942     return;
2943   }
2944 
2945   // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2946   // native implementation.
2947   HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
2948   if (length != nullptr) {
2949     int32_t len = length->GetValue();
2950     if (len < 0 || len > kSystemArrayCopyCharThreshold) {
2951       // Just call as normal.
2952       return;
2953     }
2954   }
2955 
2956   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2957   LocationSummary* locations =
2958       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2959   // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2960   locations->SetInAt(0, Location::RequiresRegister());
2961   locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
2962   locations->SetInAt(2, Location::RequiresRegister());
2963   locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
2964   locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
2965 
2966   locations->AddRegisterTemps(3);
2967 }
2968 
CheckSystemArrayCopyPosition(MacroAssembler * masm,Register array,Location pos,Location length,SlowPathCodeARM64 * slow_path,Register temp,bool length_is_array_length,bool position_sign_checked)2969 static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
2970                                          Register array,
2971                                          Location pos,
2972                                          Location length,
2973                                          SlowPathCodeARM64* slow_path,
2974                                          Register temp,
2975                                          bool length_is_array_length,
2976                                          bool position_sign_checked) {
2977   const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2978   if (pos.IsConstant()) {
2979     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2980     if (pos_const == 0) {
2981       if (!length_is_array_length) {
2982         // Check that length(array) >= length.
2983         __ Ldr(temp, MemOperand(array, length_offset));
2984         __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2985         __ B(slow_path->GetEntryLabel(), lt);
2986       }
2987     } else {
2988       // Calculate length(array) - pos.
2989       // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
2990       // as `int32_t`. If the result is negative, the B.LT below shall go to the slow path.
2991       __ Ldr(temp, MemOperand(array, length_offset));
2992       __ Sub(temp, temp, pos_const);
2993 
2994       // Check that (length(array) - pos) >= length.
2995       __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2996       __ B(slow_path->GetEntryLabel(), lt);
2997     }
2998   } else if (length_is_array_length) {
2999     // The only way the copy can succeed is if pos is zero.
3000     __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
3001   } else {
3002     // Check that pos >= 0.
3003     Register pos_reg = WRegisterFrom(pos);
3004     if (!position_sign_checked) {
3005       __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
3006     }
3007 
3008     // Calculate length(array) - pos.
3009     // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
3010     // as `int32_t`. If the result is negative, the B.LT below shall go to the slow path.
3011     __ Ldr(temp, MemOperand(array, length_offset));
3012     __ Sub(temp, temp, pos_reg);
3013 
3014     // Check that (length(array) - pos) >= length.
3015     __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
3016     __ B(slow_path->GetEntryLabel(), lt);
3017   }
3018 }
3019 
GenArrayAddress(MacroAssembler * masm,Register dest,Register base,Location pos,DataType::Type type,int32_t data_offset)3020 static void GenArrayAddress(MacroAssembler* masm,
3021                             Register dest,
3022                             Register base,
3023                             Location pos,
3024                             DataType::Type type,
3025                             int32_t data_offset) {
3026   if (pos.IsConstant()) {
3027     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
3028     __ Add(dest, base, DataType::Size(type) * constant + data_offset);
3029   } else {
3030     if (data_offset != 0) {
3031       __ Add(dest, base, data_offset);
3032       base = dest;
3033     }
3034     __ Add(dest, base, Operand(XRegisterFrom(pos), LSL, DataType::SizeShift(type)));
3035   }
3036 }
3037 
3038 // Compute base source address, base destination address, and end
3039 // source address for System.arraycopy* intrinsics in `src_base`,
3040 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(MacroAssembler * masm,DataType::Type type,Register src,Location src_pos,Register dst,Location dst_pos,Location copy_length,Register src_base,Register dst_base,Register src_end)3041 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
3042                                         DataType::Type type,
3043                                         Register src,
3044                                         Location src_pos,
3045                                         Register dst,
3046                                         Location dst_pos,
3047                                         Location copy_length,
3048                                         Register src_base,
3049                                         Register dst_base,
3050                                         Register src_end) {
3051   // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
3052   DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
3053       << "Unexpected element type: " << type;
3054   const int32_t element_size = DataType::Size(type);
3055   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
3056 
3057   GenArrayAddress(masm, src_base, src, src_pos, type, data_offset);
3058   GenArrayAddress(masm, dst_base, dst, dst_pos, type, data_offset);
3059   if (src_end.IsValid()) {
3060     GenArrayAddress(masm, src_end, src_base, copy_length, type, /*data_offset=*/ 0);
3061   }
3062 }
3063 
VisitSystemArrayCopyChar(HInvoke * invoke)3064 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
3065   MacroAssembler* masm = GetVIXLAssembler();
3066   LocationSummary* locations = invoke->GetLocations();
3067   Register src = XRegisterFrom(locations->InAt(0));
3068   Location src_pos = locations->InAt(1);
3069   Register dst = XRegisterFrom(locations->InAt(2));
3070   Location dst_pos = locations->InAt(3);
3071   Location length = locations->InAt(4);
3072 
3073   SlowPathCodeARM64* slow_path =
3074       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
3075   codegen_->AddSlowPath(slow_path);
3076 
3077   // If source and destination are the same, take the slow path. Overlapping copy regions must be
3078   // copied in reverse and we can't know in all cases if it's needed.
3079   __ Cmp(src, dst);
3080   __ B(slow_path->GetEntryLabel(), eq);
3081 
3082   // Bail out if the source is null.
3083   __ Cbz(src, slow_path->GetEntryLabel());
3084 
3085   // Bail out if the destination is null.
3086   __ Cbz(dst, slow_path->GetEntryLabel());
3087 
3088   if (!length.IsConstant()) {
3089     // Merge the following two comparisons into one:
3090     //   If the length is negative, bail out (delegate to libcore's native implementation).
3091     //   If the length > kSystemArrayCopyCharThreshold then (currently) prefer libcore's
3092     //   native implementation.
3093     __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
3094     __ B(slow_path->GetEntryLabel(), hi);
3095   } else {
3096     // We have already checked in the LocationsBuilder for the constant case.
3097     DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
3098     DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), kSystemArrayCopyCharThreshold);
3099   }
3100 
3101   Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
3102   Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
3103   Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
3104 
3105   CheckSystemArrayCopyPosition(masm,
3106                                src,
3107                                src_pos,
3108                                length,
3109                                slow_path,
3110                                src_curr_addr,
3111                                /*length_is_array_length=*/ false,
3112                                /*position_sign_checked=*/ false);
3113 
3114   CheckSystemArrayCopyPosition(masm,
3115                                dst,
3116                                dst_pos,
3117                                length,
3118                                slow_path,
3119                                src_curr_addr,
3120                                /*length_is_array_length=*/ false,
3121                                /*position_sign_checked=*/ false);
3122 
3123   src_curr_addr = src_curr_addr.X();
3124   dst_curr_addr = dst_curr_addr.X();
3125   src_stop_addr = src_stop_addr.X();
3126 
3127   GenSystemArrayCopyAddresses(masm,
3128                               DataType::Type::kUint16,
3129                               src,
3130                               src_pos,
3131                               dst,
3132                               dst_pos,
3133                               length,
3134                               src_curr_addr,
3135                               dst_curr_addr,
3136                               Register());
3137 
3138   // Iterate over the arrays and do a raw copy of the chars.
3139   const int32_t char_size = DataType::Size(DataType::Type::kUint16);
3140   UseScratchRegisterScope temps(masm);
3141 
3142   // We split processing of the array in two parts: head and tail.
3143   // A first loop handles the head by copying a block of characters per
3144   // iteration (see: chars_per_block).
3145   // A second loop handles the tail by copying the remaining characters.
3146   // If the copy length is not constant, we copy them one-by-one.
3147   // If the copy length is constant, we optimize by always unrolling the tail
3148   // loop, and also unrolling the head loop when the copy length is small (see:
3149   // unroll_threshold).
3150   //
3151   // Both loops are inverted for better performance, meaning they are
3152   // implemented as conditional do-while loops.
3153   // Here, the loop condition is first checked to determine if there are
3154   // sufficient chars to run an iteration, then we enter the do-while: an
3155   // iteration is performed followed by a conditional branch only if another
3156   // iteration is necessary. As opposed to a standard while-loop, this inversion
3157   // can save some branching (e.g. we don't branch back to the initial condition
3158   // at the end of every iteration only to potentially immediately branch
3159   // again).
3160   //
3161   // A full block of chars is subtracted and added before and after the head
3162   // loop, respectively. This ensures that any remaining length after each
3163   // head loop iteration means there is a full block remaining, reducing the
3164   // number of conditional checks required on every iteration.
3165   constexpr int32_t chars_per_block = 4;
3166   constexpr int32_t unroll_threshold = 2 * chars_per_block;
3167   vixl::aarch64::Label loop1, loop2, pre_loop2, done;
3168 
3169   Register length_tmp = src_stop_addr.W();
3170   Register tmp = temps.AcquireRegisterOfSize(char_size * chars_per_block * kBitsPerByte);
3171 
3172   auto emitHeadLoop = [&]() {
3173     __ Bind(&loop1);
3174     __ Ldr(tmp, MemOperand(src_curr_addr, char_size * chars_per_block, PostIndex));
3175     __ Subs(length_tmp, length_tmp, chars_per_block);
3176     __ Str(tmp, MemOperand(dst_curr_addr, char_size * chars_per_block, PostIndex));
3177     __ B(&loop1, ge);
3178   };
3179 
3180   auto emitTailLoop = [&]() {
3181     __ Bind(&loop2);
3182     __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
3183     __ Subs(length_tmp, length_tmp, 1);
3184     __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
3185     __ B(&loop2, gt);
3186   };
3187 
3188   auto emitUnrolledTailLoop = [&](const int32_t tail_length) {
3189     DCHECK_LT(tail_length, 4);
3190 
3191     // Don't use post-index addressing, and instead add a constant offset later.
3192     if ((tail_length & 2) != 0) {
3193       __ Ldr(tmp.W(), MemOperand(src_curr_addr));
3194       __ Str(tmp.W(), MemOperand(dst_curr_addr));
3195     }
3196     if ((tail_length & 1) != 0) {
3197       const int32_t offset = (tail_length & ~1) * char_size;
3198       __ Ldrh(tmp, MemOperand(src_curr_addr, offset));
3199       __ Strh(tmp, MemOperand(dst_curr_addr, offset));
3200     }
3201   };
3202 
3203   if (length.IsConstant()) {
3204     const int32_t constant_length = length.GetConstant()->AsIntConstant()->GetValue();
3205     if (constant_length >= unroll_threshold) {
3206       __ Mov(length_tmp, constant_length - chars_per_block);
3207       emitHeadLoop();
3208     } else {
3209       static_assert(unroll_threshold == 8, "The unroll_threshold must be 8.");
3210       // Fully unroll both the head and tail loops.
3211       if ((constant_length & 4) != 0) {
3212         __ Ldr(tmp, MemOperand(src_curr_addr, 4 * char_size, PostIndex));
3213         __ Str(tmp, MemOperand(dst_curr_addr, 4 * char_size, PostIndex));
3214       }
3215     }
3216     emitUnrolledTailLoop(constant_length % chars_per_block);
3217   } else {
3218     Register length_reg = WRegisterFrom(length);
3219     __ Subs(length_tmp, length_reg, chars_per_block);
3220     __ B(&pre_loop2, lt);
3221 
3222     emitHeadLoop();
3223 
3224     __ Bind(&pre_loop2);
3225     __ Adds(length_tmp, length_tmp, chars_per_block);
3226     __ B(&done, eq);
3227 
3228     emitTailLoop();
3229   }
3230 
3231   __ Bind(&done);
3232   __ Bind(slow_path->GetExitLabel());
3233 }
3234 
3235 // We choose to use the native implementation for longer copy lengths.
3236 static constexpr int32_t kSystemArrayCopyThreshold = 128;
3237 
VisitSystemArrayCopy(HInvoke * invoke)3238 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
3239   // The only read barrier implementation supporting the
3240   // SystemArrayCopy intrinsic is the Baker-style read barriers.
3241   if (codegen_->EmitNonBakerReadBarrier()) {
3242     return;
3243   }
3244 
3245   constexpr size_t kInitialNumTemps = 2u;  // We need at least two temps.
3246   LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
3247       invoke, kSystemArrayCopyThreshold, kInitialNumTemps);
3248   if (locations != nullptr) {
3249     locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
3250     locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
3251     locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
3252     if (codegen_->EmitBakerReadBarrier()) {
3253       // Temporary register IP0, obtained from the VIXL scratch register
3254       // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
3255       // (because that register is clobbered by ReadBarrierMarkRegX
3256       // entry points). It cannot be used in calls to
3257       // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
3258       // either. For these reasons, get a third extra temporary register
3259       // from the register allocator.
3260       locations->AddTemp(Location::RequiresRegister());
3261     } else {
3262       // Cases other than Baker read barriers: the third temporary will
3263       // be acquired from the VIXL scratch register pool.
3264     }
3265   }
3266 }
3267 
VisitSystemArrayCopy(HInvoke * invoke)3268 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
3269   // The only read barrier implementation supporting the
3270   // SystemArrayCopy intrinsic is the Baker-style read barriers.
3271   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
3272 
3273   MacroAssembler* masm = GetVIXLAssembler();
3274   LocationSummary* locations = invoke->GetLocations();
3275 
3276   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3277   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3278   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3279   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3280   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3281 
3282   Register src = XRegisterFrom(locations->InAt(0));
3283   Location src_pos = locations->InAt(1);
3284   Register dest = XRegisterFrom(locations->InAt(2));
3285   Location dest_pos = locations->InAt(3);
3286   Location length = locations->InAt(4);
3287   Register temp1 = WRegisterFrom(locations->GetTemp(0));
3288   Location temp1_loc = LocationFrom(temp1);
3289   Register temp2 = WRegisterFrom(locations->GetTemp(1));
3290   Location temp2_loc = LocationFrom(temp2);
3291 
3292   SlowPathCodeARM64* intrinsic_slow_path =
3293       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
3294   codegen_->AddSlowPath(intrinsic_slow_path);
3295 
3296   vixl::aarch64::Label conditions_on_positions_validated;
3297   SystemArrayCopyOptimizations optimizations(invoke);
3298 
3299   // If source and destination are the same, we go to slow path if we need to do forward copying.
3300   // We do not need to do this check if the source and destination positions are the same.
3301   if (!optimizations.GetSourcePositionIsDestinationPosition()) {
3302     if (src_pos.IsConstant()) {
3303       int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
3304       if (dest_pos.IsConstant()) {
3305         int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
3306         if (optimizations.GetDestinationIsSource()) {
3307           // Checked when building locations.
3308           DCHECK_GE(src_pos_constant, dest_pos_constant);
3309         } else if (src_pos_constant < dest_pos_constant) {
3310           __ Cmp(src, dest);
3311           __ B(intrinsic_slow_path->GetEntryLabel(), eq);
3312         }
3313       } else {
3314         if (!optimizations.GetDestinationIsSource()) {
3315           __ Cmp(src, dest);
3316           __ B(&conditions_on_positions_validated, ne);
3317         }
3318         __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
3319         __ B(intrinsic_slow_path->GetEntryLabel(), gt);
3320       }
3321     } else {
3322       if (!optimizations.GetDestinationIsSource()) {
3323         __ Cmp(src, dest);
3324         __ B(&conditions_on_positions_validated, ne);
3325       }
3326       __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
3327              OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
3328       __ B(intrinsic_slow_path->GetEntryLabel(), lt);
3329     }
3330   }
3331 
3332   __ Bind(&conditions_on_positions_validated);
3333 
3334   if (!optimizations.GetSourceIsNotNull()) {
3335     // Bail out if the source is null.
3336     __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
3337   }
3338 
3339   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
3340     // Bail out if the destination is null.
3341     __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
3342   }
3343 
3344   // We have already checked in the LocationsBuilder for the constant case.
3345   if (!length.IsConstant()) {
3346     // Merge the following two comparisons into one:
3347     //   If the length is negative, bail out (delegate to libcore's native implementation).
3348     //   If the length >= 128 then (currently) prefer native implementation.
3349     __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
3350     __ B(intrinsic_slow_path->GetEntryLabel(), hs);
3351   }
3352   // Validity checks: source.
3353   CheckSystemArrayCopyPosition(masm,
3354                                src,
3355                                src_pos,
3356                                length,
3357                                intrinsic_slow_path,
3358                                temp1,
3359                                optimizations.GetCountIsSourceLength(),
3360                                /*position_sign_checked=*/ false);
3361 
3362   // Validity checks: dest.
3363   bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
3364   CheckSystemArrayCopyPosition(masm,
3365                                dest,
3366                                dest_pos,
3367                                length,
3368                                intrinsic_slow_path,
3369                                temp1,
3370                                optimizations.GetCountIsDestinationLength(),
3371                                dest_position_sign_checked);
3372 
3373   auto check_non_primitive_array_class = [&](Register klass, Register temp) {
3374     // No read barrier is needed for reading a chain of constant references for comparing
3375     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3376     // /* HeapReference<Class> */ temp = klass->component_type_
3377     __ Ldr(temp, HeapOperand(klass, component_offset));
3378     codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3379     // Check that the component type is not null.
3380     __ Cbz(temp, intrinsic_slow_path->GetEntryLabel());
3381     // Check that the component type is not a primitive.
3382     // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_);
3383     __ Ldrh(temp, HeapOperand(temp, primitive_offset));
3384     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3385     __ Cbnz(temp, intrinsic_slow_path->GetEntryLabel());
3386   };
3387 
3388   if (!optimizations.GetDoesNotNeedTypeCheck()) {
3389     // Check whether all elements of the source array are assignable to the component
3390     // type of the destination array. We do two checks: the classes are the same,
3391     // or the destination is Object[]. If none of these checks succeed, we go to the
3392     // slow path.
3393 
3394     if (codegen_->EmitBakerReadBarrier()) {
3395       Location temp3_loc = locations->GetTemp(2);
3396       // /* HeapReference<Class> */ temp1 = dest->klass_
3397       codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3398                                                       temp1_loc,
3399                                                       dest.W(),
3400                                                       class_offset,
3401                                                       temp3_loc,
3402                                                       /* needs_null_check= */ false,
3403                                                       /* use_load_acquire= */ false);
3404       // Register `temp1` is not trashed by the read barrier emitted
3405       // by GenerateFieldLoadWithBakerReadBarrier below, as that
3406       // method produces a call to a ReadBarrierMarkRegX entry point,
3407       // which saves all potentially live registers, including
3408       // temporaries such a `temp1`.
3409       // /* HeapReference<Class> */ temp2 = src->klass_
3410       codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3411                                                       temp2_loc,
3412                                                       src.W(),
3413                                                       class_offset,
3414                                                       temp3_loc,
3415                                                       /* needs_null_check= */ false,
3416                                                       /* use_load_acquire= */ false);
3417     } else {
3418       // /* HeapReference<Class> */ temp1 = dest->klass_
3419       __ Ldr(temp1, MemOperand(dest, class_offset));
3420       codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3421       // /* HeapReference<Class> */ temp2 = src->klass_
3422       __ Ldr(temp2, MemOperand(src, class_offset));
3423       codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3424     }
3425 
3426     __ Cmp(temp1, temp2);
3427     if (optimizations.GetDestinationIsTypedObjectArray()) {
3428       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3429       vixl::aarch64::Label do_copy;
3430       // For class match, we can skip the source type check regardless of the optimization flag.
3431       __ B(&do_copy, eq);
3432       // No read barrier is needed for reading a chain of constant references
3433       // for comparing with null, see `ReadBarrierOption`.
3434       // /* HeapReference<Class> */ temp1 = temp1->component_type_
3435       __ Ldr(temp1, HeapOperand(temp1, component_offset));
3436       codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3437       // /* HeapReference<Class> */ temp1 = temp1->super_class_
3438       __ Ldr(temp1, HeapOperand(temp1, super_offset));
3439       // No need to unpoison the result, we're comparing against null.
3440       __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
3441       // Bail out if the source is not a non primitive array.
3442       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3443         check_non_primitive_array_class(temp2, temp2);
3444       }
3445       __ Bind(&do_copy);
3446     } else {
3447       DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
3448       // For class match, we can skip the array type check completely if at least one of source
3449       // and destination is known to be a non primitive array, otherwise one check is enough.
3450       __ B(intrinsic_slow_path->GetEntryLabel(), ne);
3451       if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
3452           !optimizations.GetSourceIsNonPrimitiveArray()) {
3453         check_non_primitive_array_class(temp2, temp2);
3454       }
3455     }
3456   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3457     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3458     // Bail out if the source is not a non primitive array.
3459     // No read barrier is needed for reading a chain of constant references for comparing
3460     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3461     // /* HeapReference<Class> */ temp2 = src->klass_
3462     __ Ldr(temp2, MemOperand(src, class_offset));
3463     codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3464     check_non_primitive_array_class(temp2, temp2);
3465   }
3466 
3467   if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
3468     // Null constant length: not need to emit the loop code at all.
3469   } else {
3470     vixl::aarch64::Label skip_copy_and_write_barrier;
3471     if (length.IsRegister()) {
3472       // Don't enter the copy loop if the length is null.
3473       __ Cbz(WRegisterFrom(length), &skip_copy_and_write_barrier);
3474     }
3475 
3476     {
3477       // We use a block to end the scratch scope before the write barrier, thus
3478       // freeing the temporary registers so they can be used in `MarkGCCard`.
3479       UseScratchRegisterScope temps(masm);
3480       bool emit_rb = codegen_->EmitBakerReadBarrier();
3481       Register temp3;
3482       Register tmp;
3483       if (emit_rb) {
3484         temp3 = WRegisterFrom(locations->GetTemp(2));
3485         // Make sure `tmp` is not IP0, as it is clobbered by ReadBarrierMarkRegX entry points
3486         // in ReadBarrierSystemArrayCopySlowPathARM64. Explicitly allocate the register IP1.
3487         DCHECK(temps.IsAvailable(ip1));
3488         temps.Exclude(ip1);
3489         tmp = ip1.W();
3490       } else {
3491         temp3 = temps.AcquireW();
3492         tmp = temps.AcquireW();
3493       }
3494 
3495       Register src_curr_addr = temp1.X();
3496       Register dst_curr_addr = temp2.X();
3497       Register src_stop_addr = temp3.X();
3498       const DataType::Type type = DataType::Type::kReference;
3499       const int32_t element_size = DataType::Size(type);
3500 
3501       SlowPathCodeARM64* read_barrier_slow_path = nullptr;
3502       if (emit_rb) {
3503         // TODO: Also convert this intrinsic to the IsGcMarking strategy?
3504 
3505         // SystemArrayCopy implementation for Baker read barriers (see
3506         // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
3507         //
3508         //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3509         //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
3510         //   bool is_gray = (rb_state == ReadBarrier::GrayState());
3511         //   if (is_gray) {
3512         //     // Slow-path copy.
3513         //     do {
3514         //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
3515         //     } while (src_ptr != end_ptr)
3516         //   } else {
3517         //     // Fast-path copy.
3518         //     do {
3519         //       *dest_ptr++ = *src_ptr++;
3520         //     } while (src_ptr != end_ptr)
3521         //   }
3522 
3523         // /* int32_t */ monitor = src->monitor_
3524         __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
3525         // /* LockWord */ lock_word = LockWord(monitor)
3526         static_assert(sizeof(LockWord) == sizeof(int32_t),
3527                       "art::LockWord and int32_t have different sizes.");
3528 
3529         // Introduce a dependency on the lock_word including rb_state,
3530         // to prevent load-load reordering, and without using
3531         // a memory barrier (which would be more expensive).
3532         // `src` is unchanged by this operation, but its value now depends
3533         // on `tmp`.
3534         __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
3535 
3536         // Slow path used to copy array when `src` is gray.
3537         read_barrier_slow_path =
3538             new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(
3539                 invoke, LocationFrom(tmp));
3540         codegen_->AddSlowPath(read_barrier_slow_path);
3541       }
3542 
3543       // Compute base source address, base destination address, and end
3544       // source address for System.arraycopy* intrinsics in `src_base`,
3545       // `dst_base` and `src_end` respectively.
3546       // Note that `src_curr_addr` is computed from from `src` (and
3547       // `src_pos`) here, and thus honors the artificial dependency
3548       // of `src` on `tmp`.
3549       GenSystemArrayCopyAddresses(masm,
3550                                   type,
3551                                   src,
3552                                   src_pos,
3553                                   dest,
3554                                   dest_pos,
3555                                   length,
3556                                   src_curr_addr,
3557                                   dst_curr_addr,
3558                                   src_stop_addr);
3559 
3560       if (emit_rb) {
3561         // Given the numeric representation, it's enough to check the low bit of the rb_state.
3562         static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
3563         static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3564         __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
3565       }
3566 
3567       // Iterate over the arrays and do a raw copy of the objects. We don't need to
3568       // poison/unpoison.
3569       vixl::aarch64::Label loop;
3570       __ Bind(&loop);
3571       __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
3572       __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
3573       __ Cmp(src_curr_addr, src_stop_addr);
3574       __ B(&loop, ne);
3575 
3576       if (emit_rb) {
3577         DCHECK(read_barrier_slow_path != nullptr);
3578         __ Bind(read_barrier_slow_path->GetExitLabel());
3579       }
3580     }
3581 
3582     // We only need one card marking on the destination array.
3583     codegen_->MarkGCCard(dest.W());
3584 
3585     __ Bind(&skip_copy_and_write_barrier);
3586   }
3587 
3588   __ Bind(intrinsic_slow_path->GetExitLabel());
3589 }
3590 
GenIsInfinite(LocationSummary * locations,bool is64bit,MacroAssembler * masm)3591 static void GenIsInfinite(LocationSummary* locations,
3592                           bool is64bit,
3593                           MacroAssembler* masm) {
3594   Operand infinity(0);
3595   Operand tst_mask(0);
3596   Register out;
3597 
3598   if (is64bit) {
3599     infinity = Operand(kPositiveInfinityDouble);
3600     tst_mask = MaskLeastSignificant<uint64_t>(63);
3601     out = XRegisterFrom(locations->Out());
3602   } else {
3603     infinity = Operand(kPositiveInfinityFloat);
3604     tst_mask = MaskLeastSignificant<uint32_t>(31);
3605     out = WRegisterFrom(locations->Out());
3606   }
3607 
3608   MoveFPToInt(locations, is64bit, masm);
3609   // Checks whether exponent bits are all 1 and fraction bits are all 0.
3610   __ Eor(out, out, infinity);
3611   // TST bitmask is used to mask out the sign bit: either 0x7fffffff or 0x7fffffffffffffff
3612   // depending on is64bit.
3613   __ Tst(out, tst_mask);
3614   __ Cset(out, eq);
3615 }
3616 
VisitFloatIsInfinite(HInvoke * invoke)3617 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
3618   CreateFPToIntLocations(allocator_, invoke);
3619 }
3620 
VisitFloatIsInfinite(HInvoke * invoke)3621 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
3622   GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
3623 }
3624 
VisitDoubleIsInfinite(HInvoke * invoke)3625 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
3626   CreateFPToIntLocations(allocator_, invoke);
3627 }
3628 
VisitDoubleIsInfinite(HInvoke * invoke)3629 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
3630   GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
3631 }
3632 
3633 #define VISIT_INTRINSIC(name, low, high, type, start_index)                              \
3634   void IntrinsicLocationsBuilderARM64::Visit##name##ValueOf(HInvoke* invoke) {           \
3635     InvokeRuntimeCallingConvention calling_convention;                                   \
3636     IntrinsicVisitor::ComputeValueOfLocations(                                           \
3637         invoke,                                                                          \
3638         codegen_,                                                                        \
3639         low,                                                                             \
3640         (high) - (low) + 1,                                                              \
3641         calling_convention.GetReturnLocation(DataType::Type::kReference),                \
3642         Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));      \
3643   }                                                                                      \
3644   void IntrinsicCodeGeneratorARM64::Visit##name##ValueOf(HInvoke* invoke) {              \
3645     IntrinsicVisitor::ValueOfInfo info =                                                 \
3646         IntrinsicVisitor::ComputeValueOfInfo(invoke,                                     \
3647                                              codegen_->GetCompilerOptions(),             \
3648                                              WellKnownClasses::java_lang_##name##_value, \
3649                                              low,                                        \
3650                                              (high) - (low) + 1,                         \
3651                                              start_index);                               \
3652     HandleValueOf(invoke, info, type);                                                   \
3653   }
BOXED_TYPES(VISIT_INTRINSIC)3654   BOXED_TYPES(VISIT_INTRINSIC)
3655 #undef VISIT_INTRINSIC
3656 
3657 void IntrinsicCodeGeneratorARM64::HandleValueOf(HInvoke* invoke,
3658                                                 const IntrinsicVisitor::ValueOfInfo& info,
3659                                                 DataType::Type type) {
3660   LocationSummary* locations = invoke->GetLocations();
3661   MacroAssembler* masm = GetVIXLAssembler();
3662 
3663   Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
3664   UseScratchRegisterScope temps(masm);
3665   Register temp = temps.AcquireW();
3666   auto allocate_instance = [&]() {
3667     DCHECK(out.X().Is(InvokeRuntimeCallingConvention().GetRegisterAt(0)));
3668     codegen_->LoadIntrinsicDeclaringClass(out, invoke);
3669     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3670     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3671   };
3672   if (invoke->InputAt(0)->IsIntConstant()) {
3673     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3674     if (static_cast<uint32_t>(value - info.low) < info.length) {
3675       // Just embed the object in the code.
3676       DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3677       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3678     } else {
3679       DCHECK(locations->CanCall());
3680       // Allocate and initialize a new object.
3681       // TODO: If we JIT, we could allocate the object now, and store it in the
3682       // JIT object table.
3683       allocate_instance();
3684       __ Mov(temp.W(), value);
3685       codegen_->Store(type, temp.W(), HeapOperand(out.W(), info.value_offset));
3686       // Class pointer and `value` final field stores require a barrier before publication.
3687       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3688     }
3689   } else {
3690     DCHECK(locations->CanCall());
3691     Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
3692     // Check bounds of our cache.
3693     __ Add(out.W(), in.W(), -info.low);
3694     __ Cmp(out.W(), info.length);
3695     vixl::aarch64::Label allocate, done;
3696     __ B(&allocate, hs);
3697     // If the value is within the bounds, load the object directly from the array.
3698     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
3699     MemOperand source = HeapOperand(
3700         temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
3701     codegen_->Load(DataType::Type::kReference, out, source);
3702     codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
3703     __ B(&done);
3704     __ Bind(&allocate);
3705     // Otherwise allocate and initialize a new object.
3706     allocate_instance();
3707     codegen_->Store(type, in.W(), HeapOperand(out.W(), info.value_offset));
3708     // Class pointer and `value` final field stores require a barrier before publication.
3709     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3710     __ Bind(&done);
3711   }
3712 }
3713 
VisitReferenceGetReferent(HInvoke * invoke)3714 void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3715   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3716 
3717   if (codegen_->EmitBakerReadBarrier() && invoke->GetLocations() != nullptr) {
3718     invoke->GetLocations()->AddTemp(Location::RequiresRegister());
3719   }
3720 }
3721 
VisitReferenceGetReferent(HInvoke * invoke)3722 void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3723   MacroAssembler* masm = GetVIXLAssembler();
3724   LocationSummary* locations = invoke->GetLocations();
3725 
3726   Location obj = locations->InAt(0);
3727   Location out = locations->Out();
3728 
3729   SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
3730   codegen_->AddSlowPath(slow_path);
3731 
3732   if (codegen_->EmitReadBarrier()) {
3733     // Check self->GetWeakRefAccessEnabled().
3734     UseScratchRegisterScope temps(masm);
3735     Register temp = temps.AcquireW();
3736     __ Ldr(temp,
3737            MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArm64PointerSize>().Uint32Value()));
3738     static_assert(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled) == 0);
3739     __ Cbnz(temp, slow_path->GetEntryLabel());
3740   }
3741 
3742   {
3743     // Load the java.lang.ref.Reference class.
3744     UseScratchRegisterScope temps(masm);
3745     Register temp = temps.AcquireW();
3746     codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
3747 
3748     // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3749     MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3750     DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3751     DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3752               IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3753     __ Ldrh(temp, HeapOperand(temp, disable_intrinsic_offset.Uint32Value()));
3754     __ Cbnz(temp, slow_path->GetEntryLabel());
3755   }
3756 
3757   // Load the value from the field.
3758   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3759   if (codegen_->EmitBakerReadBarrier()) {
3760     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3761                                                     out,
3762                                                     WRegisterFrom(obj),
3763                                                     referent_offset,
3764                                                     /*maybe_temp=*/ locations->GetTemp(0),
3765                                                     /*needs_null_check=*/ true,
3766                                                     /*use_load_acquire=*/ true);
3767   } else {
3768     MemOperand field = HeapOperand(WRegisterFrom(obj), referent_offset);
3769     codegen_->LoadAcquire(
3770         invoke, DataType::Type::kReference, WRegisterFrom(out), field, /*needs_null_check=*/ true);
3771     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3772   }
3773   __ Bind(slow_path->GetExitLabel());
3774 }
3775 
VisitReferenceRefersTo(HInvoke * invoke)3776 void IntrinsicLocationsBuilderARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3777   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3778 }
3779 
VisitReferenceRefersTo(HInvoke * invoke)3780 void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3781   LocationSummary* locations = invoke->GetLocations();
3782   MacroAssembler* masm = codegen_->GetVIXLAssembler();
3783   UseScratchRegisterScope temps(masm);
3784 
3785   Register obj = WRegisterFrom(locations->InAt(0));
3786   Register other = WRegisterFrom(locations->InAt(1));
3787   Register out = WRegisterFrom(locations->Out());
3788   Register tmp = temps.AcquireW();
3789 
3790   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3791   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3792 
3793   MemOperand field = HeapOperand(obj, referent_offset);
3794   codegen_->LoadAcquire(invoke, DataType::Type::kReference, tmp, field, /*needs_null_check=*/ true);
3795   codegen_->GetAssembler()->MaybeUnpoisonHeapReference(tmp);
3796 
3797   __ Cmp(tmp, other);
3798 
3799   if (codegen_->EmitReadBarrier()) {
3800     DCHECK(kUseBakerReadBarrier);
3801 
3802     vixl::aarch64::Label calculate_result;
3803 
3804     // If the GC is not marking, the comparison result is final.
3805     __ Cbz(mr, &calculate_result);
3806 
3807     __ B(&calculate_result, eq);  // ZF set if taken.
3808 
3809     // Check if the loaded reference is null.
3810     __ Cbz(tmp, &calculate_result);  // ZF clear if taken.
3811 
3812     // For correct memory visibility, we need a barrier before loading the lock word.
3813     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3814 
3815     // Load the lockword and check if it is a forwarding address.
3816     static_assert(LockWord::kStateShift == 30u);
3817     static_assert(LockWord::kStateForwardingAddress == 3u);
3818     __ Ldr(tmp, HeapOperand(tmp, monitor_offset));
3819     __ Cmp(tmp, Operand(0xc0000000));
3820     __ B(&calculate_result, lo);   // ZF clear if taken.
3821 
3822     // Extract the forwarding address and compare with `other`.
3823     __ Cmp(other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
3824 
3825     __ Bind(&calculate_result);
3826   }
3827 
3828   // Convert ZF into the Boolean result.
3829   __ Cset(out, eq);
3830 }
3831 
VisitThreadInterrupted(HInvoke * invoke)3832 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
3833   LocationSummary* locations =
3834       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3835   locations->SetOut(Location::RequiresRegister());
3836 }
3837 
VisitThreadInterrupted(HInvoke * invoke)3838 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
3839   MacroAssembler* masm = GetVIXLAssembler();
3840   Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32);
3841   UseScratchRegisterScope temps(masm);
3842   Register temp = temps.AcquireX();
3843 
3844   __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
3845   __ Ldar(out.W(), MemOperand(temp));
3846 
3847   vixl::aarch64::Label done;
3848   __ Cbz(out.W(), &done);
3849   __ Stlr(wzr, MemOperand(temp));
3850   __ Bind(&done);
3851 }
3852 
VisitReachabilityFence(HInvoke * invoke)3853 void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
3854   LocationSummary* locations =
3855       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3856   locations->SetInAt(0, Location::Any());
3857 }
3858 
VisitReachabilityFence(HInvoke * invoke)3859 void IntrinsicCodeGeneratorARM64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3860 
VisitCRC32Update(HInvoke * invoke)3861 void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
3862   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3863     return;
3864   }
3865 
3866   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3867                                                                 LocationSummary::kNoCall,
3868                                                                 kIntrinsified);
3869 
3870   locations->SetInAt(0, Location::RequiresRegister());
3871   locations->SetInAt(1, Location::RequiresRegister());
3872   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3873 }
3874 
3875 // Lower the invoke of CRC32.update(int crc, int b).
VisitCRC32Update(HInvoke * invoke)3876 void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) {
3877   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3878 
3879   MacroAssembler* masm = GetVIXLAssembler();
3880 
3881   Register crc = InputRegisterAt(invoke, 0);
3882   Register val = InputRegisterAt(invoke, 1);
3883   Register out = OutputRegister(invoke);
3884 
3885   // The general algorithm of the CRC32 calculation is:
3886   //   crc = ~crc
3887   //   result = crc32_for_byte(crc, b)
3888   //   crc = ~result
3889   // It is directly lowered to three instructions.
3890 
3891   UseScratchRegisterScope temps(masm);
3892   Register tmp = temps.AcquireSameSizeAs(out);
3893 
3894   __ Mvn(tmp, crc);
3895   __ Crc32b(tmp, tmp, val);
3896   __ Mvn(out, tmp);
3897 }
3898 
3899 // Generate code using CRC32 instructions which calculates
3900 // a CRC32 value of a byte.
3901 //
3902 // Parameters:
3903 //   masm   - VIXL macro assembler
3904 //   crc    - a register holding an initial CRC value
3905 //   ptr    - a register holding a memory address of bytes
3906 //   length - a register holding a number of bytes to process
3907 //   out    - a register to put a result of calculation
GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler * masm,const Register & crc,const Register & ptr,const Register & length,const Register & out)3908 static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm,
3909                                                         const Register& crc,
3910                                                         const Register& ptr,
3911                                                         const Register& length,
3912                                                         const Register& out) {
3913   // The algorithm of CRC32 of bytes is:
3914   //   crc = ~crc
3915   //   process a few first bytes to make the array 8-byte aligned
3916   //   while array has 8 bytes do:
3917   //     crc = crc32_of_8bytes(crc, 8_bytes(array))
3918   //   if array has 4 bytes:
3919   //     crc = crc32_of_4bytes(crc, 4_bytes(array))
3920   //   if array has 2 bytes:
3921   //     crc = crc32_of_2bytes(crc, 2_bytes(array))
3922   //   if array has a byte:
3923   //     crc = crc32_of_byte(crc, 1_byte(array))
3924   //   crc = ~crc
3925 
3926   vixl::aarch64::Label loop, done;
3927   vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte;
3928   vixl::aarch64::Label aligned2, aligned4, aligned8;
3929 
3930   // Use VIXL scratch registers as the VIXL macro assembler won't use them in
3931   // instructions below.
3932   UseScratchRegisterScope temps(masm);
3933   Register len = temps.AcquireW();
3934   Register array_elem = temps.AcquireW();
3935 
3936   __ Mvn(out, crc);
3937   __ Mov(len, length);
3938 
3939   __ Tbz(ptr, 0, &aligned2);
3940   __ Subs(len, len, 1);
3941   __ B(&done, lo);
3942   __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex));
3943   __ Crc32b(out, out, array_elem);
3944 
3945   __ Bind(&aligned2);
3946   __ Tbz(ptr, 1, &aligned4);
3947   __ Subs(len, len, 2);
3948   __ B(&process_1byte, lo);
3949   __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3950   __ Crc32h(out, out, array_elem);
3951 
3952   __ Bind(&aligned4);
3953   __ Tbz(ptr, 2, &aligned8);
3954   __ Subs(len, len, 4);
3955   __ B(&process_2bytes, lo);
3956   __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3957   __ Crc32w(out, out, array_elem);
3958 
3959   __ Bind(&aligned8);
3960   __ Subs(len, len, 8);
3961   // If len < 8 go to process data by 4 bytes, 2 bytes and a byte.
3962   __ B(&process_4bytes, lo);
3963 
3964   // The main loop processing data by 8 bytes.
3965   __ Bind(&loop);
3966   __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex));
3967   __ Subs(len, len, 8);
3968   __ Crc32x(out, out, array_elem.X());
3969   // if len >= 8, process the next 8 bytes.
3970   __ B(&loop, hs);
3971 
3972   // Process the data which is less than 8 bytes.
3973   // The code generated below works with values of len
3974   // which come in the range [-8, 0].
3975   // The first three bits are used to detect whether 4 bytes or 2 bytes or
3976   // a byte can be processed.
3977   // The checking order is from bit 2 to bit 0:
3978   //  bit 2 is set: at least 4 bytes available
3979   //  bit 1 is set: at least 2 bytes available
3980   //  bit 0 is set: at least a byte available
3981   __ Bind(&process_4bytes);
3982   // Goto process_2bytes if less than four bytes available
3983   __ Tbz(len, 2, &process_2bytes);
3984   __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3985   __ Crc32w(out, out, array_elem);
3986 
3987   __ Bind(&process_2bytes);
3988   // Goto process_1bytes if less than two bytes available
3989   __ Tbz(len, 1, &process_1byte);
3990   __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3991   __ Crc32h(out, out, array_elem);
3992 
3993   __ Bind(&process_1byte);
3994   // Goto done if no bytes available
3995   __ Tbz(len, 0, &done);
3996   __ Ldrb(array_elem, MemOperand(ptr));
3997   __ Crc32b(out, out, array_elem);
3998 
3999   __ Bind(&done);
4000   __ Mvn(out, out);
4001 }
4002 
4003 // The threshold for sizes of arrays to use the library provided implementation
4004 // of CRC32.updateBytes instead of the intrinsic.
4005 static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
4006 
VisitCRC32UpdateBytes(HInvoke * invoke)4007 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
4008   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
4009     return;
4010   }
4011 
4012   LocationSummary* locations =
4013       new (allocator_) LocationSummary(invoke,
4014                                        LocationSummary::kCallOnSlowPath,
4015                                        kIntrinsified);
4016 
4017   locations->SetInAt(0, Location::RequiresRegister());
4018   locations->SetInAt(1, Location::RequiresRegister());
4019   locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
4020   locations->SetInAt(3, Location::RequiresRegister());
4021   locations->AddTemp(Location::RequiresRegister());
4022   locations->SetOut(Location::RequiresRegister());
4023 }
4024 
4025 // Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
4026 //
4027 // Note: The intrinsic is not used if len exceeds a threshold.
VisitCRC32UpdateBytes(HInvoke * invoke)4028 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
4029   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
4030 
4031   MacroAssembler* masm = GetVIXLAssembler();
4032   LocationSummary* locations = invoke->GetLocations();
4033 
4034   SlowPathCodeARM64* slow_path =
4035       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
4036   codegen_->AddSlowPath(slow_path);
4037 
4038   Register length = WRegisterFrom(locations->InAt(3));
4039   __ Cmp(length, kCRC32UpdateBytesThreshold);
4040   __ B(slow_path->GetEntryLabel(), hi);
4041 
4042   const uint32_t array_data_offset =
4043       mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
4044   Register ptr = XRegisterFrom(locations->GetTemp(0));
4045   Register array = XRegisterFrom(locations->InAt(1));
4046   Location offset = locations->InAt(2);
4047   if (offset.IsConstant()) {
4048     int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
4049     __ Add(ptr, array, array_data_offset + offset_value);
4050   } else {
4051     __ Add(ptr, array, array_data_offset);
4052     __ Add(ptr, ptr, XRegisterFrom(offset));
4053   }
4054 
4055   Register crc = WRegisterFrom(locations->InAt(0));
4056   Register out = WRegisterFrom(locations->Out());
4057 
4058   GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
4059 
4060   __ Bind(slow_path->GetExitLabel());
4061 }
4062 
VisitCRC32UpdateByteBuffer(HInvoke * invoke)4063 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
4064   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
4065     return;
4066   }
4067 
4068   LocationSummary* locations =
4069       new (allocator_) LocationSummary(invoke,
4070                                        LocationSummary::kNoCall,
4071                                        kIntrinsified);
4072 
4073   locations->SetInAt(0, Location::RequiresRegister());
4074   locations->SetInAt(1, Location::RequiresRegister());
4075   locations->SetInAt(2, Location::RequiresRegister());
4076   locations->SetInAt(3, Location::RequiresRegister());
4077   locations->AddTemp(Location::RequiresRegister());
4078   locations->SetOut(Location::RequiresRegister());
4079 }
4080 
4081 // Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len)
4082 //
4083 // There is no need to generate code checking if addr is 0.
4084 // The method updateByteBuffer is a private method of java.util.zip.CRC32.
4085 // This guarantees no calls outside of the CRC32 class.
4086 // An address of DirectBuffer is always passed to the call of updateByteBuffer.
4087 // It might be an implementation of an empty DirectBuffer which can use a zero
4088 // address but it must have the length to be zero. The current generated code
4089 // correctly works with the zero length.
VisitCRC32UpdateByteBuffer(HInvoke * invoke)4090 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
4091   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
4092 
4093   MacroAssembler* masm = GetVIXLAssembler();
4094   LocationSummary* locations = invoke->GetLocations();
4095 
4096   Register addr = XRegisterFrom(locations->InAt(1));
4097   Register ptr = XRegisterFrom(locations->GetTemp(0));
4098   __ Add(ptr, addr, XRegisterFrom(locations->InAt(2)));
4099 
4100   Register crc = WRegisterFrom(locations->InAt(0));
4101   Register length = WRegisterFrom(locations->InAt(3));
4102   Register out = WRegisterFrom(locations->Out());
4103   GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
4104 }
4105 
VisitFP16ToFloat(HInvoke * invoke)4106 void IntrinsicLocationsBuilderARM64::VisitFP16ToFloat(HInvoke* invoke) {
4107   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
4108     return;
4109   }
4110 
4111   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
4112                                                                 LocationSummary::kNoCall,
4113                                                                 kIntrinsified);
4114   locations->SetInAt(0, Location::RequiresRegister());
4115   locations->SetOut(Location::RequiresFpuRegister());
4116 }
4117 
VisitFP16ToFloat(HInvoke * invoke)4118 void IntrinsicCodeGeneratorARM64::VisitFP16ToFloat(HInvoke* invoke) {
4119   DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
4120   MacroAssembler* masm = GetVIXLAssembler();
4121   UseScratchRegisterScope scratch_scope(masm);
4122   Register bits = InputRegisterAt(invoke, 0);
4123   VRegister out = SRegisterFrom(invoke->GetLocations()->Out());
4124   VRegister half = scratch_scope.AcquireH();
4125   __ Fmov(half, bits);  // ARMv8.2
4126   __ Fcvt(out, half);
4127 }
4128 
VisitFP16ToHalf(HInvoke * invoke)4129 void IntrinsicLocationsBuilderARM64::VisitFP16ToHalf(HInvoke* invoke) {
4130   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
4131     return;
4132   }
4133 
4134   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
4135                                                                 LocationSummary::kNoCall,
4136                                                                 kIntrinsified);
4137   locations->SetInAt(0, Location::RequiresFpuRegister());
4138   locations->SetOut(Location::RequiresRegister());
4139 }
4140 
VisitFP16ToHalf(HInvoke * invoke)4141 void IntrinsicCodeGeneratorARM64::VisitFP16ToHalf(HInvoke* invoke) {
4142   DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
4143   MacroAssembler* masm = GetVIXLAssembler();
4144   UseScratchRegisterScope scratch_scope(masm);
4145   VRegister in = SRegisterFrom(invoke->GetLocations()->InAt(0));
4146   VRegister half = scratch_scope.AcquireH();
4147   Register out = WRegisterFrom(invoke->GetLocations()->Out());
4148   __ Fcvt(half, in);
4149   __ Fmov(out, half);
4150   __ Sxth(out, out);  // sign extend due to returning a short type.
4151 }
4152 
4153 template<typename OP>
GenerateFP16Round(HInvoke * invoke,CodeGeneratorARM64 * const codegen_,MacroAssembler * masm,OP && roundOp)4154 void GenerateFP16Round(HInvoke* invoke,
4155                        CodeGeneratorARM64* const codegen_,
4156                        MacroAssembler* masm,
4157                        OP&& roundOp) {
4158   DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
4159   LocationSummary* locations = invoke->GetLocations();
4160   UseScratchRegisterScope scratch_scope(masm);
4161   Register out = WRegisterFrom(locations->Out());
4162   VRegister half = scratch_scope.AcquireH();
4163   __ Fmov(half, WRegisterFrom(locations->InAt(0)));
4164   roundOp(half, half);
4165   __ Fmov(out, half);
4166   __ Sxth(out, out);
4167 }
4168 
VisitFP16Floor(HInvoke * invoke)4169 void IntrinsicLocationsBuilderARM64::VisitFP16Floor(HInvoke* invoke) {
4170   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
4171     return;
4172   }
4173 
4174   CreateIntToIntLocations(allocator_, invoke);
4175 }
4176 
VisitFP16Floor(HInvoke * invoke)4177 void IntrinsicCodeGeneratorARM64::VisitFP16Floor(HInvoke* invoke) {
4178   MacroAssembler* masm = GetVIXLAssembler();
4179   auto roundOp = [masm](const VRegister& out, const VRegister& in) {
4180     __ Frintm(out, in);  // Round towards Minus infinity
4181   };
4182   GenerateFP16Round(invoke, codegen_, masm, roundOp);
4183 }
4184 
VisitFP16Ceil(HInvoke * invoke)4185 void IntrinsicLocationsBuilderARM64::VisitFP16Ceil(HInvoke* invoke) {
4186   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
4187     return;
4188   }
4189 
4190   CreateIntToIntLocations(allocator_, invoke);
4191 }
4192 
VisitFP16Ceil(HInvoke * invoke)4193 void IntrinsicCodeGeneratorARM64::VisitFP16Ceil(HInvoke* invoke) {
4194   MacroAssembler* masm = GetVIXLAssembler();
4195   auto roundOp = [masm](const VRegister& out, const VRegister& in) {
4196     __ Frintp(out, in);  // Round towards Plus infinity
4197   };
4198   GenerateFP16Round(invoke, codegen_, masm, roundOp);
4199 }
4200 
VisitFP16Rint(HInvoke * invoke)4201 void IntrinsicLocationsBuilderARM64::VisitFP16Rint(HInvoke* invoke) {
4202   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
4203     return;
4204   }
4205 
4206   CreateIntToIntLocations(allocator_, invoke);
4207 }
4208 
VisitFP16Rint(HInvoke * invoke)4209 void IntrinsicCodeGeneratorARM64::VisitFP16Rint(HInvoke* invoke) {
4210   MacroAssembler* masm = GetVIXLAssembler();
4211   auto roundOp = [masm](const VRegister& out, const VRegister& in) {
4212     __ Frintn(out, in);  // Round to nearest, with ties to even
4213   };
4214   GenerateFP16Round(invoke, codegen_, masm, roundOp);
4215 }
4216 
FP16ComparisonLocations(HInvoke * invoke,ArenaAllocator * allocator_,CodeGeneratorARM64 * codegen_,int requiredTemps)4217 void FP16ComparisonLocations(HInvoke* invoke,
4218                              ArenaAllocator* allocator_,
4219                              CodeGeneratorARM64* codegen_,
4220                              int requiredTemps) {
4221   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
4222     return;
4223   }
4224 
4225   CreateIntIntToIntLocations(allocator_, invoke);
4226   for (int i = 0; i < requiredTemps; i++) {
4227     invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
4228   }
4229 }
4230 
4231 template<typename OP>
GenerateFP16Compare(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,const OP compareOp)4232 void GenerateFP16Compare(HInvoke* invoke,
4233                          CodeGeneratorARM64* codegen,
4234                          MacroAssembler* masm,
4235                          const OP compareOp) {
4236   DCHECK(codegen->GetInstructionSetFeatures().HasFP16());
4237   LocationSummary* locations = invoke->GetLocations();
4238   Register out = WRegisterFrom(locations->Out());
4239   VRegister half0 = HRegisterFrom(locations->GetTemp(0));
4240   VRegister half1 = HRegisterFrom(locations->GetTemp(1));
4241   __ Fmov(half0, WRegisterFrom(locations->InAt(0)));
4242   __ Fmov(half1, WRegisterFrom(locations->InAt(1)));
4243   compareOp(out, half0, half1);
4244 }
4245 
GenerateFP16Compare(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,vixl::aarch64::Condition cond)4246 static inline void GenerateFP16Compare(HInvoke* invoke,
4247                                        CodeGeneratorARM64* codegen,
4248                                        MacroAssembler* masm,
4249                                        vixl::aarch64::Condition cond) {
4250   auto compareOp = [masm, cond](const Register out, const VRegister& in0, const VRegister& in1) {
4251     __ Fcmp(in0, in1);
4252     __ Cset(out, cond);
4253   };
4254   GenerateFP16Compare(invoke, codegen, masm, compareOp);
4255 }
4256 
VisitFP16Greater(HInvoke * invoke)4257 void IntrinsicLocationsBuilderARM64::VisitFP16Greater(HInvoke* invoke) {
4258   FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
4259 }
4260 
VisitFP16Greater(HInvoke * invoke)4261 void IntrinsicCodeGeneratorARM64::VisitFP16Greater(HInvoke* invoke) {
4262   MacroAssembler* masm = GetVIXLAssembler();
4263   GenerateFP16Compare(invoke, codegen_, masm, gt);
4264 }
4265 
VisitFP16GreaterEquals(HInvoke * invoke)4266 void IntrinsicLocationsBuilderARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
4267   FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
4268 }
4269 
VisitFP16GreaterEquals(HInvoke * invoke)4270 void IntrinsicCodeGeneratorARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
4271   MacroAssembler* masm = GetVIXLAssembler();
4272   GenerateFP16Compare(invoke, codegen_, masm, ge);
4273 }
4274 
VisitFP16Less(HInvoke * invoke)4275 void IntrinsicLocationsBuilderARM64::VisitFP16Less(HInvoke* invoke) {
4276   FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
4277 }
4278 
VisitFP16Less(HInvoke * invoke)4279 void IntrinsicCodeGeneratorARM64::VisitFP16Less(HInvoke* invoke) {
4280   MacroAssembler* masm = GetVIXLAssembler();
4281   GenerateFP16Compare(invoke, codegen_, masm, mi);
4282 }
4283 
VisitFP16LessEquals(HInvoke * invoke)4284 void IntrinsicLocationsBuilderARM64::VisitFP16LessEquals(HInvoke* invoke) {
4285   FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
4286 }
4287 
VisitFP16LessEquals(HInvoke * invoke)4288 void IntrinsicCodeGeneratorARM64::VisitFP16LessEquals(HInvoke* invoke) {
4289   MacroAssembler* masm = GetVIXLAssembler();
4290   GenerateFP16Compare(invoke, codegen_, masm, ls);
4291 }
4292 
VisitFP16Compare(HInvoke * invoke)4293 void IntrinsicLocationsBuilderARM64::VisitFP16Compare(HInvoke* invoke) {
4294   FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
4295 }
4296 
VisitFP16Compare(HInvoke * invoke)4297 void IntrinsicCodeGeneratorARM64::VisitFP16Compare(HInvoke* invoke) {
4298   MacroAssembler* masm = GetVIXLAssembler();
4299   auto compareOp = [masm](const Register out,
4300                           const VRegister& in0,
4301                           const VRegister& in1) {
4302     vixl::aarch64::Label end;
4303     vixl::aarch64::Label equal;
4304     vixl::aarch64::Label normal;
4305 
4306     // The normal cases for this method are:
4307     // - in0 > in1 => out = 1
4308     // - in0 < in1 => out = -1
4309     // - in0 == in1 => out = 0
4310     // +/-Infinity are ordered by default so are handled by the normal case.
4311     // There are two special cases that Fcmp is insufficient for distinguishing:
4312     // - in0 and in1 are +0 and -0 => +0 > -0 so compare encoding instead of value
4313     // - in0 or in1 is NaN => manually compare with in0 and in1 separately
4314     __ Fcmp(in0, in1);
4315     __ B(eq, &equal);  // in0==in1 or +0 -0 case.
4316     __ B(vc, &normal);  // in0 and in1 are ordered (not NaN).
4317 
4318     // Either of the inputs is NaN.
4319     // NaN is equal to itself and greater than any other number so:
4320     // - if only in0 is NaN => return 1
4321     // - if only in1 is NaN => return -1
4322     // - if both in0 and in1 are NaN => return 0
4323     __ Fcmp(in0, 0.0);
4324     __ Mov(out, -1);
4325     __ B(vc, &end);  // in0 != NaN => out = -1.
4326     __ Fcmp(in1, 0.0);
4327     __ Cset(out, vc);  // if in1 != NaN => out = 1, otherwise both are NaNs => out = 0.
4328     __ B(&end);
4329 
4330     // in0 == in1 or if one of the inputs is +0 and the other is -0.
4331     __ Bind(&equal);
4332     // Compare encoding of in0 and in1 as the denormal fraction of single precision float.
4333     // Reverse operand order because -0 > +0 when compared as S registers.
4334     // The instruction Fmov(Hregister, Wregister) zero extends the Hregister.
4335     // Therefore the value of bits[127:16] will not matter when doing the
4336     // below Fcmp as they are set to 0.
4337     __ Fcmp(in1.S(), in0.S());
4338 
4339     __ Bind(&normal);
4340     __ Cset(out, gt);  // if in0 > in1 => out = 1, otherwise out = 0.
4341                        // Note: could be from equals path or original comparison
4342     __ Csinv(out, out, wzr, pl);  // if in0 >= in1 out=out, otherwise out=-1.
4343 
4344     __ Bind(&end);
4345   };
4346 
4347   GenerateFP16Compare(invoke, codegen_, masm, compareOp);
4348 }
4349 
4350 const int kFP16NaN = 0x7e00;
4351 
GenerateFP16MinMax(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,vixl::aarch64::Condition cond)4352 static inline void GenerateFP16MinMax(HInvoke* invoke,
4353                                        CodeGeneratorARM64* codegen,
4354                                        MacroAssembler* masm,
4355                                        vixl::aarch64::Condition cond) {
4356   DCHECK(codegen->GetInstructionSetFeatures().HasFP16());
4357   LocationSummary* locations = invoke->GetLocations();
4358 
4359   vixl::aarch64::Label equal;
4360   vixl::aarch64::Label end;
4361 
4362   UseScratchRegisterScope temps(masm);
4363 
4364   Register out = WRegisterFrom(locations->Out());
4365   Register in0 = WRegisterFrom(locations->InAt(0));
4366   Register in1 = WRegisterFrom(locations->InAt(1));
4367   VRegister half0 = HRegisterFrom(locations->GetTemp(0));
4368   VRegister half1 = temps.AcquireH();
4369 
4370   // The normal cases for this method are:
4371   // - in0.h == in1.h => out = in0 or in1
4372   // - in0.h <cond> in1.h => out = in0
4373   // - in0.h <!cond> in1.h => out = in1
4374   // +/-Infinity are ordered by default so are handled by the normal case.
4375   // There are two special cases that Fcmp is insufficient for distinguishing:
4376   // - in0 and in1 are +0 and -0 => +0 > -0 so compare encoding instead of value
4377   // - in0 or in1 is NaN => out = NaN
4378   __ Fmov(half0, in0);
4379   __ Fmov(half1, in1);
4380   __ Fcmp(half0, half1);
4381   __ B(eq, &equal);  // half0 = half1 or +0/-0 case.
4382   __ Csel(out, in0, in1, cond);  // if half0 <cond> half1 => out = in0, otherwise out = in1.
4383   __ B(vc, &end);  // None of the inputs were NaN.
4384 
4385   // Atleast one input was NaN.
4386   __ Mov(out, kFP16NaN);  // out=NaN.
4387   __ B(&end);
4388 
4389   // in0 == in1 or if one of the inputs is +0 and the other is -0.
4390   __ Bind(&equal);
4391   // Fcmp cannot normally distinguish +0 and -0 so compare encoding.
4392   // Encoding is compared as the denormal fraction of a Single.
4393   // Note: encoding of -0 > encoding of +0 despite +0 > -0 so in0 and in1 are swapped.
4394   // Note: The instruction Fmov(Hregister, Wregister) zero extends the Hregister.
4395   __ Fcmp(half1.S(), half0.S());
4396 
4397   __ Csel(out, in0, in1, cond);  // if half0 <cond> half1 => out = in0, otherwise out = in1.
4398 
4399   __ Bind(&end);
4400 }
4401 
VisitFP16Min(HInvoke * invoke)4402 void IntrinsicLocationsBuilderARM64::VisitFP16Min(HInvoke* invoke) {
4403   FP16ComparisonLocations(invoke, allocator_, codegen_, 1);
4404 }
4405 
VisitFP16Min(HInvoke * invoke)4406 void IntrinsicCodeGeneratorARM64::VisitFP16Min(HInvoke* invoke) {
4407   DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
4408   MacroAssembler* masm = GetVIXLAssembler();
4409   GenerateFP16MinMax(invoke, codegen_, masm, mi);
4410 }
4411 
VisitFP16Max(HInvoke * invoke)4412 void IntrinsicLocationsBuilderARM64::VisitFP16Max(HInvoke* invoke) {
4413   FP16ComparisonLocations(invoke, allocator_, codegen_, 1);
4414 }
4415 
VisitFP16Max(HInvoke * invoke)4416 void IntrinsicCodeGeneratorARM64::VisitFP16Max(HInvoke* invoke) {
4417   DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
4418   MacroAssembler* masm = GetVIXLAssembler();
4419   GenerateFP16MinMax(invoke, codegen_, masm, gt);
4420 }
4421 
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorARM64 * codegen)4422 static void GenerateDivideUnsigned(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4423   LocationSummary* locations = invoke->GetLocations();
4424   MacroAssembler* masm = codegen->GetVIXLAssembler();
4425   DataType::Type type = invoke->GetType();
4426   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4427 
4428   Register dividend = RegisterFrom(locations->InAt(0), type);
4429   Register divisor = RegisterFrom(locations->InAt(1), type);
4430   Register out = RegisterFrom(locations->Out(), type);
4431 
4432   // Check if divisor is zero, bail to managed implementation to handle.
4433   SlowPathCodeARM64* slow_path =
4434       new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
4435   codegen->AddSlowPath(slow_path);
4436   __ Cbz(divisor, slow_path->GetEntryLabel());
4437 
4438   __ Udiv(out, dividend, divisor);
4439 
4440   __ Bind(slow_path->GetExitLabel());
4441 }
4442 
VisitIntegerDivideUnsigned(HInvoke * invoke)4443 void IntrinsicLocationsBuilderARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
4444   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
4445 }
4446 
VisitIntegerDivideUnsigned(HInvoke * invoke)4447 void IntrinsicCodeGeneratorARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
4448   GenerateDivideUnsigned(invoke, codegen_);
4449 }
4450 
VisitLongDivideUnsigned(HInvoke * invoke)4451 void IntrinsicLocationsBuilderARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
4452   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
4453 }
4454 
VisitLongDivideUnsigned(HInvoke * invoke)4455 void IntrinsicCodeGeneratorARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
4456   GenerateDivideUnsigned(invoke, codegen_);
4457 }
4458 
VisitMathMultiplyHigh(HInvoke * invoke)4459 void IntrinsicLocationsBuilderARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
4460   CreateIntIntToIntLocations(allocator_, invoke);
4461 }
4462 
VisitMathMultiplyHigh(HInvoke * invoke)4463 void IntrinsicCodeGeneratorARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
4464   LocationSummary* locations = invoke->GetLocations();
4465   MacroAssembler* masm = codegen_->GetVIXLAssembler();
4466   DataType::Type type = invoke->GetType();
4467   DCHECK(type == DataType::Type::kInt64);
4468 
4469   Register x = RegisterFrom(locations->InAt(0), type);
4470   Register y = RegisterFrom(locations->InAt(1), type);
4471   Register out = RegisterFrom(locations->Out(), type);
4472 
4473   __ Smulh(out, x, y);
4474 }
4475 
GenerateMathFma(HInvoke * invoke,CodeGeneratorARM64 * codegen)4476 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4477   MacroAssembler* masm = codegen->GetVIXLAssembler();
4478 
4479   VRegister n = helpers::InputFPRegisterAt(invoke, 0);
4480   VRegister m = helpers::InputFPRegisterAt(invoke, 1);
4481   VRegister a = helpers::InputFPRegisterAt(invoke, 2);
4482   VRegister out = helpers::OutputFPRegister(invoke);
4483 
4484   __ Fmadd(out, n, m, a);
4485 }
4486 
VisitMathFmaDouble(HInvoke * invoke)4487 void IntrinsicLocationsBuilderARM64::VisitMathFmaDouble(HInvoke* invoke) {
4488   CreateFPFPFPToFPLocations(allocator_, invoke);
4489 }
4490 
VisitMathFmaDouble(HInvoke * invoke)4491 void IntrinsicCodeGeneratorARM64::VisitMathFmaDouble(HInvoke* invoke) {
4492   GenerateMathFma(invoke, codegen_);
4493 }
4494 
VisitMathFmaFloat(HInvoke * invoke)4495 void IntrinsicLocationsBuilderARM64::VisitMathFmaFloat(HInvoke* invoke) {
4496   CreateFPFPFPToFPLocations(allocator_, invoke);
4497 }
4498 
VisitMathFmaFloat(HInvoke * invoke)4499 void IntrinsicCodeGeneratorARM64::VisitMathFmaFloat(HInvoke* invoke) {
4500   GenerateMathFma(invoke, codegen_);
4501 }
4502 
4503 class VarHandleSlowPathARM64 : public IntrinsicSlowPathARM64 {
4504  public:
VarHandleSlowPathARM64(HInvoke * invoke,std::memory_order order)4505   VarHandleSlowPathARM64(HInvoke* invoke, std::memory_order order)
4506       : IntrinsicSlowPathARM64(invoke),
4507         order_(order),
4508         return_success_(false),
4509         strong_(false),
4510         get_and_update_op_(GetAndUpdateOp::kAdd) {
4511   }
4512 
GetByteArrayViewCheckLabel()4513   vixl::aarch64::Label* GetByteArrayViewCheckLabel() {
4514     return &byte_array_view_check_label_;
4515   }
4516 
GetNativeByteOrderLabel()4517   vixl::aarch64::Label* GetNativeByteOrderLabel() {
4518     return &native_byte_order_label_;
4519   }
4520 
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)4521   void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
4522     if (return_success) {
4523       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
4524     } else {
4525       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
4526     }
4527     return_success_ = return_success;
4528     strong_ = strong;
4529   }
4530 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)4531   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
4532     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
4533     get_and_update_op_ = get_and_update_op;
4534   }
4535 
EmitNativeCode(CodeGenerator * codegen_in)4536   void EmitNativeCode(CodeGenerator* codegen_in) override {
4537     if (GetByteArrayViewCheckLabel()->IsLinked()) {
4538       EmitByteArrayViewCode(codegen_in);
4539     }
4540     IntrinsicSlowPathARM64::EmitNativeCode(codegen_in);
4541   }
4542 
4543  private:
GetInvoke() const4544   HInvoke* GetInvoke() const {
4545     return GetInstruction()->AsInvoke();
4546   }
4547 
GetAccessModeTemplate() const4548   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
4549     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
4550   }
4551 
4552   void EmitByteArrayViewCode(CodeGenerator* codegen_in);
4553 
4554   vixl::aarch64::Label byte_array_view_check_label_;
4555   vixl::aarch64::Label native_byte_order_label_;
4556   // Shared parameter for all VarHandle intrinsics.
4557   std::memory_order order_;
4558   // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
4559   bool return_success_;
4560   bool strong_;
4561   // Extra argument for GenerateVarHandleGetAndUpdate().
4562   GetAndUpdateOp get_and_update_op_;
4563 };
4564 
4565 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path,Register object,Register type,bool object_can_be_null=true)4566 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARM64* codegen,
4567                                                     SlowPathCodeARM64* slow_path,
4568                                                     Register object,
4569                                                     Register type,
4570                                                     bool object_can_be_null = true) {
4571   MacroAssembler* masm = codegen->GetVIXLAssembler();
4572 
4573   const MemberOffset class_offset = mirror::Object::ClassOffset();
4574   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
4575 
4576   vixl::aarch64::Label success;
4577   if (object_can_be_null) {
4578     __ Cbz(object, &success);
4579   }
4580 
4581   UseScratchRegisterScope temps(masm);
4582   Register temp = temps.AcquireW();
4583 
4584   __ Ldr(temp, HeapOperand(object, class_offset.Int32Value()));
4585   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4586   vixl::aarch64::Label loop;
4587   __ Bind(&loop);
4588   __ Cmp(type, temp);
4589   __ B(&success, eq);
4590   __ Ldr(temp, HeapOperand(temp, super_class_offset.Int32Value()));
4591   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4592   __ Cbz(temp, slow_path->GetEntryLabel());
4593   __ B(&loop);
4594   __ Bind(&success);
4595 }
4596 
4597 // Check access mode and the primitive type from VarHandle.varType.
4598 // Check reference arguments against the VarHandle.varType; for references this is a subclass
4599 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path,DataType::Type type)4600 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
4601                                                         CodeGeneratorARM64* codegen,
4602                                                         SlowPathCodeARM64* slow_path,
4603                                                         DataType::Type type) {
4604   mirror::VarHandle::AccessMode access_mode =
4605       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
4606   Primitive::Type primitive_type = DataTypeToPrimitive(type);
4607 
4608   MacroAssembler* masm = codegen->GetVIXLAssembler();
4609   Register varhandle = InputRegisterAt(invoke, 0);
4610 
4611   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
4612   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
4613   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4614 
4615   UseScratchRegisterScope temps(masm);
4616   Register var_type_no_rb = temps.AcquireW();
4617   Register temp2 = temps.AcquireW();
4618 
4619   // Check that the operation is permitted and the primitive type of varhandle.varType.
4620   // We do not need a read barrier when loading a reference only for loading constant
4621   // primitive field through the reference. Use LDP to load the fields together.
4622   DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
4623   __ Ldp(var_type_no_rb, temp2, HeapOperand(varhandle, var_type_offset.Int32Value()));
4624   codegen->GetAssembler()->MaybeUnpoisonHeapReference(var_type_no_rb);
4625   __ Tbz(temp2, static_cast<uint32_t>(access_mode), slow_path->GetEntryLabel());
4626   __ Ldrh(temp2, HeapOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
4627   if (primitive_type == Primitive::kPrimNot) {
4628     static_assert(Primitive::kPrimNot == 0);
4629     __ Cbnz(temp2, slow_path->GetEntryLabel());
4630   } else {
4631     __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4632     __ B(slow_path->GetEntryLabel(), ne);
4633   }
4634 
4635   temps.Release(temp2);
4636 
4637   if (type == DataType::Type::kReference) {
4638     // Check reference arguments against the varType.
4639     // False negatives due to varType being an interface or array type
4640     // or due to the missing read barrier are handled by the slow path.
4641     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4642     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4643     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4644     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4645       HInstruction* arg = invoke->InputAt(arg_index);
4646       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
4647       if (!arg->IsNullConstant()) {
4648         Register arg_reg = WRegisterFrom(invoke->GetLocations()->InAt(arg_index));
4649         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
4650       }
4651     }
4652   }
4653 }
4654 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path)4655 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
4656                                               CodeGeneratorARM64* codegen,
4657                                               SlowPathCodeARM64* slow_path) {
4658   MacroAssembler* masm = codegen->GetVIXLAssembler();
4659   Register varhandle = InputRegisterAt(invoke, 0);
4660 
4661   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4662 
4663   UseScratchRegisterScope temps(masm);
4664   Register temp = temps.AcquireW();
4665 
4666   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
4667   // Do not emit read barrier (or unpoison the reference) for comparing to null.
4668   __ Ldr(temp, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4669   __ Cbnz(temp, slow_path->GetEntryLabel());
4670 }
4671 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path)4672 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
4673                                                  CodeGeneratorARM64* codegen,
4674                                                  SlowPathCodeARM64* slow_path) {
4675   VarHandleOptimizations optimizations(invoke);
4676   MacroAssembler* masm = codegen->GetVIXLAssembler();
4677   Register varhandle = InputRegisterAt(invoke, 0);
4678   Register object = InputRegisterAt(invoke, 1);
4679 
4680   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4681   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4682 
4683   // Null-check the object.
4684   if (!optimizations.GetSkipObjectNullCheck()) {
4685     __ Cbz(object, slow_path->GetEntryLabel());
4686   }
4687 
4688   if (!optimizations.GetUseKnownImageVarHandle()) {
4689     UseScratchRegisterScope temps(masm);
4690     Register temp = temps.AcquireW();
4691     Register temp2 = temps.AcquireW();
4692 
4693     // Check that the VarHandle references an instance field by checking that
4694     // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
4695     // type compatibility check with the source object's type, which will fail for null.
4696     DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4697     __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4698     codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4699     // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4700     __ Cbnz(temp2, slow_path->GetEntryLabel());
4701 
4702     // Check that the object has the correct type.
4703     // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
4704     temps.Release(temp2);  // Needed by GenerateSubTypeObjectCheckNoReadBarrier().
4705     GenerateSubTypeObjectCheckNoReadBarrier(
4706         codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
4707   }
4708 }
4709 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,VarHandleSlowPathARM64 * slow_path)4710 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
4711                                          CodeGeneratorARM64* codegen,
4712                                          VarHandleSlowPathARM64* slow_path) {
4713   VarHandleOptimizations optimizations(invoke);
4714   MacroAssembler* masm = codegen->GetVIXLAssembler();
4715   Register varhandle = InputRegisterAt(invoke, 0);
4716   Register object = InputRegisterAt(invoke, 1);
4717   Register index = InputRegisterAt(invoke, 2);
4718   DataType::Type value_type =
4719       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4720   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
4721 
4722   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4723   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4724   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
4725   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4726   const MemberOffset class_offset = mirror::Object::ClassOffset();
4727   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
4728 
4729   // Null-check the object.
4730   if (!optimizations.GetSkipObjectNullCheck()) {
4731     __ Cbz(object, slow_path->GetEntryLabel());
4732   }
4733 
4734   UseScratchRegisterScope temps(masm);
4735   Register temp = temps.AcquireW();
4736   Register temp2 = temps.AcquireW();
4737 
4738   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
4739   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
4740   // coordinateType0 shall not be null but we do not explicitly verify that.
4741   DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4742   __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4743   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4744   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4745   __ Cbz(temp2, slow_path->GetEntryLabel());
4746 
4747   // Check object class against componentType0.
4748   //
4749   // This is an exact check and we defer other cases to the runtime. This includes
4750   // conversion to array of superclass references, which is valid but subsequently
4751   // requires all update operations to check that the value can indeed be stored.
4752   // We do not want to perform such extra checks in the intrinsified code.
4753   //
4754   // We do this check without read barrier, so there can be false negatives which we
4755   // defer to the slow path. There shall be no false negatives for array classes in the
4756   // boot image (including Object[] and primitive arrays) because they are non-movable.
4757   __ Ldr(temp2, HeapOperand(object, class_offset.Int32Value()));
4758   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4759   __ Cmp(temp, temp2);
4760   __ B(slow_path->GetEntryLabel(), ne);
4761 
4762   // Check that the coordinateType0 is an array type. We do not need a read barrier
4763   // for loading constant reference fields (or chains of them) for comparison with null,
4764   // nor for finally loading a constant primitive field (primitive type) below.
4765   __ Ldr(temp2, HeapOperand(temp, component_type_offset.Int32Value()));
4766   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4767   __ Cbz(temp2, slow_path->GetEntryLabel());
4768 
4769   // Check that the array component type matches the primitive type.
4770   __ Ldrh(temp2, HeapOperand(temp2, primitive_type_offset.Int32Value()));
4771   if (primitive_type == Primitive::kPrimNot) {
4772     static_assert(Primitive::kPrimNot == 0);
4773     __ Cbnz(temp2, slow_path->GetEntryLabel());
4774   } else {
4775     // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
4776     // we shall check for a byte array view in the slow path.
4777     // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4778     // so we cannot emit that if we're JITting without boot image.
4779     bool boot_image_available =
4780         codegen->GetCompilerOptions().IsBootImage() ||
4781         !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4782     bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
4783     vixl::aarch64::Label* slow_path_label =
4784         can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4785     __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4786     __ B(slow_path_label, ne);
4787   }
4788 
4789   // Check for array index out of bounds.
4790   __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
4791   __ Cmp(index, temp);
4792   __ B(slow_path->GetEntryLabel(), hs);
4793 }
4794 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,VarHandleSlowPathARM64 * slow_path)4795 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4796                                               CodeGeneratorARM64* codegen,
4797                                               VarHandleSlowPathARM64* slow_path) {
4798   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4799   if (expected_coordinates_count == 0u) {
4800     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4801   } else if (expected_coordinates_count == 1u) {
4802     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4803   } else {
4804     DCHECK_EQ(expected_coordinates_count, 2u);
4805     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4806   }
4807 }
4808 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,DataType::Type type)4809 static VarHandleSlowPathARM64* GenerateVarHandleChecks(HInvoke* invoke,
4810                                                        CodeGeneratorARM64* codegen,
4811                                                        std::memory_order order,
4812                                                        DataType::Type type) {
4813   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4814   VarHandleOptimizations optimizations(invoke);
4815   if (optimizations.GetUseKnownImageVarHandle()) {
4816     DCHECK_NE(expected_coordinates_count, 2u);
4817     if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4818       return nullptr;
4819     }
4820   }
4821 
4822   VarHandleSlowPathARM64* slow_path =
4823       new (codegen->GetScopedAllocator()) VarHandleSlowPathARM64(invoke, order);
4824   codegen->AddSlowPath(slow_path);
4825 
4826   if (!optimizations.GetUseKnownImageVarHandle()) {
4827     GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4828   }
4829   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4830 
4831   return slow_path;
4832 }
4833 
4834 struct VarHandleTarget {
4835   Register object;  // The object holding the value to operate on.
4836   Register offset;  // The offset of the value to operate on.
4837 };
4838 
GetVarHandleTarget(HInvoke * invoke)4839 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4840   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4841   LocationSummary* locations = invoke->GetLocations();
4842 
4843   VarHandleTarget target;
4844   // The temporary allocated for loading the offset.
4845   target.offset = WRegisterFrom(locations->GetTemp(0u));
4846   // The reference to the object that holds the value to operate on.
4847   target.object = (expected_coordinates_count == 0u)
4848       ? WRegisterFrom(locations->GetTemp(1u))
4849       : InputRegisterAt(invoke, 1);
4850   return target;
4851 }
4852 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorARM64 * codegen)4853 static void GenerateVarHandleTarget(HInvoke* invoke,
4854                                     const VarHandleTarget& target,
4855                                     CodeGeneratorARM64* codegen) {
4856   MacroAssembler* masm = codegen->GetVIXLAssembler();
4857   Register varhandle = InputRegisterAt(invoke, 0);
4858   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4859 
4860   if (expected_coordinates_count <= 1u) {
4861     if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
4862       ScopedObjectAccess soa(Thread::Current());
4863       ArtField* target_field = GetBootImageVarHandleField(invoke);
4864       if (expected_coordinates_count == 0u) {
4865         ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4866         if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4867           uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
4868           codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
4869         } else {
4870           codegen->LoadTypeForBootImageIntrinsic(
4871               target.object,
4872               TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
4873         }
4874       }
4875       __ Mov(target.offset, target_field->GetOffset().Uint32Value());
4876     } else {
4877       // For static fields, we need to fill the `target.object` with the declaring class,
4878       // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
4879       // we do not need the declaring class, so we can forget the `ArtField*` when
4880       // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
4881       Register field = (expected_coordinates_count == 0) ? target.object : target.offset;
4882 
4883       const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4884       const MemberOffset offset_offset = ArtField::OffsetOffset();
4885 
4886       // Load the ArtField*, the offset and, if needed, declaring class.
4887       __ Ldr(field.X(), HeapOperand(varhandle, art_field_offset.Int32Value()));
4888       __ Ldr(target.offset, MemOperand(field.X(), offset_offset.Int32Value()));
4889       if (expected_coordinates_count == 0u) {
4890         codegen->GenerateGcRootFieldLoad(invoke,
4891                                          LocationFrom(target.object),
4892                                          field.X(),
4893                                          ArtField::DeclaringClassOffset().Int32Value(),
4894                                          /*fixup_label=*/nullptr,
4895                                          codegen->GetCompilerReadBarrierOption());
4896       }
4897     }
4898   } else {
4899     DCHECK_EQ(expected_coordinates_count, 2u);
4900     DataType::Type value_type =
4901         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4902     size_t size_shift = DataType::SizeShift(value_type);
4903     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4904 
4905     Register index = InputRegisterAt(invoke, 2);
4906     Register shifted_index = index;
4907     if (size_shift != 0u) {
4908       shifted_index = target.offset;
4909       __ Lsl(shifted_index, index, size_shift);
4910     }
4911     __ Add(target.offset, shifted_index, data_offset.Int32Value());
4912   }
4913 }
4914 
CreateVarHandleCommonLocations(HInvoke * invoke,CodeGeneratorARM64 * codegen)4915 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
4916                                                        CodeGeneratorARM64* codegen) {
4917   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4918   DataType::Type return_type = invoke->GetType();
4919 
4920   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4921   LocationSummary* locations =
4922       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4923   locations->SetInAt(0, Location::RequiresRegister());
4924   // Require coordinates in registers. These are the object holding the value
4925   // to operate on (except for static fields) and index (for arrays and views).
4926   for (size_t i = 0; i != expected_coordinates_count; ++i) {
4927     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4928   }
4929   if (return_type != DataType::Type::kVoid) {
4930     if (DataType::IsFloatingPointType(return_type)) {
4931       locations->SetOut(Location::RequiresFpuRegister());
4932     } else {
4933       locations->SetOut(Location::RequiresRegister());
4934     }
4935   }
4936   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4937   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4938   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4939     HInstruction* arg = invoke->InputAt(arg_index);
4940     if (IsZeroBitPattern(arg)) {
4941       locations->SetInAt(arg_index, Location::ConstantLocation(arg));
4942     } else if (DataType::IsFloatingPointType(arg->GetType())) {
4943       locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4944     } else {
4945       locations->SetInAt(arg_index, Location::RequiresRegister());
4946     }
4947   }
4948 
4949   // Add a temporary for offset.
4950   if (codegen->EmitNonBakerReadBarrier() &&
4951       GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
4952     // To preserve the offset value across the non-Baker read barrier slow path
4953     // for loading the declaring class, use a fixed callee-save register.
4954     constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
4955     locations->AddTemp(Location::RegisterLocation(first_callee_save));
4956   } else {
4957     locations->AddTemp(Location::RequiresRegister());
4958   }
4959   if (expected_coordinates_count == 0u) {
4960     // Add a temporary to hold the declaring class.
4961     locations->AddTemp(Location::RequiresRegister());
4962   }
4963 
4964   return locations;
4965 }
4966 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorARM64 * codegen)4967 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4968   VarHandleOptimizations optimizations(invoke);
4969   if (optimizations.GetDoNotIntrinsify()) {
4970     return;
4971   }
4972 
4973   if (codegen->EmitNonBakerReadBarrier() &&
4974       invoke->GetType() == DataType::Type::kReference &&
4975       invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4976       invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4977     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4978     // the passed reference and reloads it from the field. This gets the memory visibility
4979     // wrong for Acquire/Volatile operations. b/173104084
4980     return;
4981   }
4982 
4983   CreateVarHandleCommonLocations(invoke, codegen);
4984 }
4985 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool byte_swap=false)4986 static void GenerateVarHandleGet(HInvoke* invoke,
4987                                  CodeGeneratorARM64* codegen,
4988                                  std::memory_order order,
4989                                  bool byte_swap = false) {
4990   DataType::Type type = invoke->GetType();
4991   DCHECK_NE(type, DataType::Type::kVoid);
4992 
4993   LocationSummary* locations = invoke->GetLocations();
4994   MacroAssembler* masm = codegen->GetVIXLAssembler();
4995   CPURegister out = helpers::OutputCPURegister(invoke);
4996 
4997   VarHandleTarget target = GetVarHandleTarget(invoke);
4998   VarHandleSlowPathARM64* slow_path = nullptr;
4999   if (!byte_swap) {
5000     slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
5001     GenerateVarHandleTarget(invoke, target, codegen);
5002     if (slow_path != nullptr) {
5003       __ Bind(slow_path->GetNativeByteOrderLabel());
5004     }
5005   }
5006 
5007   // ARM64 load-acquire instructions are implicitly sequentially consistent.
5008   bool use_load_acquire =
5009       (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
5010   DCHECK(use_load_acquire || order == std::memory_order_relaxed);
5011 
5012   // Load the value from the target location.
5013   if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
5014     // Piggy-back on the field load path using introspection for the Baker read barrier.
5015     // The `target.offset` is a temporary, use it for field address.
5016     Register tmp_ptr = target.offset.X();
5017     __ Add(tmp_ptr, target.object.X(), target.offset.X());
5018     codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
5019                                                    locations->Out(),
5020                                                    target.object,
5021                                                    MemOperand(tmp_ptr),
5022                                                    /*needs_null_check=*/ false,
5023                                                    use_load_acquire);
5024     DCHECK(!byte_swap);
5025   } else {
5026     MemOperand address(target.object.X(), target.offset.X());
5027     CPURegister load_reg = out;
5028     DataType::Type load_type = type;
5029     UseScratchRegisterScope temps(masm);
5030     if (byte_swap) {
5031       if (type == DataType::Type::kInt16) {
5032         // Avoid unnecessary sign extension before REV16.
5033         load_type = DataType::Type::kUint16;
5034       } else if (type == DataType::Type::kFloat32) {
5035         load_type = DataType::Type::kInt32;
5036         load_reg = target.offset.W();
5037       } else if (type == DataType::Type::kFloat64) {
5038         load_type = DataType::Type::kInt64;
5039         load_reg = target.offset.X();
5040       }
5041     }
5042     if (use_load_acquire) {
5043       codegen->LoadAcquire(invoke, load_type, load_reg, address, /*needs_null_check=*/ false);
5044     } else {
5045       codegen->Load(load_type, load_reg, address);
5046     }
5047     if (type == DataType::Type::kReference) {
5048       DCHECK(!byte_swap);
5049       DCHECK(out.IsW());
5050       Location out_loc = locations->Out();
5051       Location object_loc = LocationFrom(target.object);
5052       Location offset_loc = LocationFrom(target.offset);
5053       codegen->MaybeGenerateReadBarrierSlow(invoke, out_loc, out_loc, object_loc, 0u, offset_loc);
5054     } else if (byte_swap) {
5055       GenerateReverseBytes(masm, type, load_reg, out);
5056     }
5057   }
5058 
5059   if (slow_path != nullptr) {
5060     DCHECK(!byte_swap);
5061     __ Bind(slow_path->GetExitLabel());
5062   }
5063 }
5064 
VisitVarHandleGet(HInvoke * invoke)5065 void IntrinsicLocationsBuilderARM64::VisitVarHandleGet(HInvoke* invoke) {
5066   CreateVarHandleGetLocations(invoke, codegen_);
5067 }
5068 
VisitVarHandleGet(HInvoke * invoke)5069 void IntrinsicCodeGeneratorARM64::VisitVarHandleGet(HInvoke* invoke) {
5070   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
5071 }
5072 
VisitVarHandleGetOpaque(HInvoke * invoke)5073 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
5074   CreateVarHandleGetLocations(invoke, codegen_);
5075 }
5076 
VisitVarHandleGetOpaque(HInvoke * invoke)5077 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
5078   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
5079 }
5080 
VisitVarHandleGetAcquire(HInvoke * invoke)5081 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
5082   CreateVarHandleGetLocations(invoke, codegen_);
5083 }
5084 
VisitVarHandleGetAcquire(HInvoke * invoke)5085 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
5086   GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire);
5087 }
5088 
VisitVarHandleGetVolatile(HInvoke * invoke)5089 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
5090   CreateVarHandleGetLocations(invoke, codegen_);
5091 }
5092 
VisitVarHandleGetVolatile(HInvoke * invoke)5093 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
5094   GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst);
5095 }
5096 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorARM64 * codegen)5097 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorARM64* codegen) {
5098   VarHandleOptimizations optimizations(invoke);
5099   if (optimizations.GetDoNotIntrinsify()) {
5100     return;
5101   }
5102 
5103   CreateVarHandleCommonLocations(invoke, codegen);
5104 }
5105 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool byte_swap=false)5106 static void GenerateVarHandleSet(HInvoke* invoke,
5107                                  CodeGeneratorARM64* codegen,
5108                                  std::memory_order order,
5109                                  bool byte_swap = false) {
5110   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
5111   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
5112 
5113   MacroAssembler* masm = codegen->GetVIXLAssembler();
5114   CPURegister value = InputCPURegisterOrZeroRegAt(invoke, value_index);
5115 
5116   VarHandleTarget target = GetVarHandleTarget(invoke);
5117   VarHandleSlowPathARM64* slow_path = nullptr;
5118   if (!byte_swap) {
5119     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5120     GenerateVarHandleTarget(invoke, target, codegen);
5121     if (slow_path != nullptr) {
5122       __ Bind(slow_path->GetNativeByteOrderLabel());
5123     }
5124   }
5125 
5126   // ARM64 store-release instructions are implicitly sequentially consistent.
5127   bool use_store_release =
5128       (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
5129   DCHECK(use_store_release || order == std::memory_order_relaxed);
5130 
5131   // Store the value to the target location.
5132   {
5133     CPURegister source = value;
5134     UseScratchRegisterScope temps(masm);
5135     if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
5136       DCHECK(value.IsW());
5137       Register temp = temps.AcquireW();
5138       __ Mov(temp, value.W());
5139       codegen->GetAssembler()->PoisonHeapReference(temp);
5140       source = temp;
5141     }
5142     if (byte_swap) {
5143       DCHECK(!source.IsZero());  // We use the main path for zero as it does not need a byte swap.
5144       Register temp = source.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
5145       if (value_type == DataType::Type::kInt16) {
5146         // Avoid unnecessary sign extension before storing.
5147         value_type = DataType::Type::kUint16;
5148       } else if (DataType::IsFloatingPointType(value_type)) {
5149         __ Fmov(temp, source.Is64Bits() ? source.D() : source.S());
5150         value_type = source.Is64Bits() ? DataType::Type::kInt64 : DataType::Type::kInt32;
5151         source = temp;  // Source for the `GenerateReverseBytes()` below.
5152       }
5153       GenerateReverseBytes(masm, value_type, source, temp);
5154       source = temp;
5155     }
5156     MemOperand address(target.object.X(), target.offset.X());
5157     if (use_store_release) {
5158       codegen->StoreRelease(invoke, value_type, source, address, /*needs_null_check=*/ false);
5159     } else {
5160       codegen->Store(value_type, source, address);
5161     }
5162   }
5163 
5164   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
5165     codegen->MaybeMarkGCCard(target.object, Register(value), /* emit_null_check= */ true);
5166   }
5167 
5168   if (slow_path != nullptr) {
5169     DCHECK(!byte_swap);
5170     __ Bind(slow_path->GetExitLabel());
5171   }
5172 }
5173 
VisitVarHandleSet(HInvoke * invoke)5174 void IntrinsicLocationsBuilderARM64::VisitVarHandleSet(HInvoke* invoke) {
5175   CreateVarHandleSetLocations(invoke, codegen_);
5176 }
5177 
VisitVarHandleSet(HInvoke * invoke)5178 void IntrinsicCodeGeneratorARM64::VisitVarHandleSet(HInvoke* invoke) {
5179   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
5180 }
5181 
VisitVarHandleSetOpaque(HInvoke * invoke)5182 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
5183   CreateVarHandleSetLocations(invoke, codegen_);
5184 }
5185 
VisitVarHandleSetOpaque(HInvoke * invoke)5186 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
5187   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
5188 }
5189 
VisitVarHandleSetRelease(HInvoke * invoke)5190 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
5191   CreateVarHandleSetLocations(invoke, codegen_);
5192 }
5193 
VisitVarHandleSetRelease(HInvoke * invoke)5194 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
5195   GenerateVarHandleSet(invoke, codegen_, std::memory_order_release);
5196 }
5197 
VisitVarHandleSetVolatile(HInvoke * invoke)5198 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
5199   CreateVarHandleSetLocations(invoke, codegen_);
5200 }
5201 
VisitVarHandleSetVolatile(HInvoke * invoke)5202 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
5203   GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst);
5204 }
5205 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorARM64 * codegen,bool return_success)5206 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
5207                                                             CodeGeneratorARM64* codegen,
5208                                                             bool return_success) {
5209   VarHandleOptimizations optimizations(invoke);
5210   if (optimizations.GetDoNotIntrinsify()) {
5211     return;
5212   }
5213 
5214   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
5215   DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
5216   if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5217     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5218     // the passed reference and reloads it from the field. This breaks the read barriers
5219     // in slow path in different ways. The marked old value may not actually be a to-space
5220     // reference to the same object as `old_value`, breaking slow path assumptions. And
5221     // for CompareAndExchange, marking the old value after comparison failure may actually
5222     // return the reference to `expected`, erroneously indicating success even though we
5223     // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
5224     return;
5225   }
5226 
5227   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
5228 
5229   if (codegen->EmitNonBakerReadBarrier()) {
5230     // We need callee-save registers for both the class object and offset instead of
5231     // the temporaries reserved in CreateVarHandleCommonLocations().
5232     static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u);
5233     uint32_t first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
5234     uint32_t second_callee_save = CTZ(kArm64CalleeSaveRefSpills ^ (1u << first_callee_save));
5235     if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
5236       DCHECK_EQ(locations->GetTempCount(), 2u);
5237       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
5238       DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
5239       locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
5240     } else {
5241       DCHECK_EQ(locations->GetTempCount(), 1u);
5242       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
5243       locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
5244     }
5245   }
5246   size_t old_temp_count = locations->GetTempCount();
5247   DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
5248   if (!return_success) {
5249     if (DataType::IsFloatingPointType(value_type)) {
5250       // Add a temporary for old value and exclusive store result if floating point
5251       // `expected` and/or `new_value` take scratch registers.
5252       size_t available_scratch_registers =
5253           (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) +
5254           (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u);
5255       size_t temps_needed = /* pointer, old value, store result */ 3u - available_scratch_registers;
5256       // We can reuse the declaring class (if present) and offset temporary.
5257       if (temps_needed > old_temp_count) {
5258         locations->AddRegisterTemps(temps_needed - old_temp_count);
5259       }
5260     } else if ((value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) &&
5261                !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) &&
5262                !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) &&
5263                GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5264       // Allocate a normal temporary for store result in the non-native byte order path
5265       // because scratch registers are used by the byte-swapped `expected` and `new_value`.
5266       DCHECK_EQ(old_temp_count, 1u);
5267       locations->AddTemp(Location::RequiresRegister());
5268     }
5269   }
5270   if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5271     // Add a temporary for the `old_value_temp` in slow path.
5272     locations->AddTemp(Location::RequiresRegister());
5273   }
5274 }
5275 
MoveToTempIfFpRegister(const CPURegister & cpu_reg,DataType::Type type,MacroAssembler * masm,UseScratchRegisterScope * temps)5276 static Register MoveToTempIfFpRegister(const CPURegister& cpu_reg,
5277                                        DataType::Type type,
5278                                        MacroAssembler* masm,
5279                                        UseScratchRegisterScope* temps) {
5280   if (cpu_reg.IsS()) {
5281     DCHECK_EQ(type, DataType::Type::kFloat32);
5282     Register reg = temps->AcquireW();
5283     __ Fmov(reg, cpu_reg.S());
5284     return reg;
5285   } else if (cpu_reg.IsD()) {
5286     DCHECK_EQ(type, DataType::Type::kFloat64);
5287     Register reg = temps->AcquireX();
5288     __ Fmov(reg, cpu_reg.D());
5289     return reg;
5290   } else {
5291     return DataType::Is64BitType(type) ? cpu_reg.X() : cpu_reg.W();
5292   }
5293 }
5294 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)5295 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
5296                                                      CodeGeneratorARM64* codegen,
5297                                                      std::memory_order order,
5298                                                      bool return_success,
5299                                                      bool strong,
5300                                                      bool byte_swap = false) {
5301   DCHECK(return_success || strong);
5302 
5303   uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
5304   uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
5305   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
5306   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
5307 
5308   MacroAssembler* masm = codegen->GetVIXLAssembler();
5309   LocationSummary* locations = invoke->GetLocations();
5310   CPURegister expected = InputCPURegisterOrZeroRegAt(invoke, expected_index);
5311   CPURegister new_value = InputCPURegisterOrZeroRegAt(invoke, new_value_index);
5312   CPURegister out = helpers::OutputCPURegister(invoke);
5313 
5314   VarHandleTarget target = GetVarHandleTarget(invoke);
5315   VarHandleSlowPathARM64* slow_path = nullptr;
5316   if (!byte_swap) {
5317     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5318     GenerateVarHandleTarget(invoke, target, codegen);
5319     if (slow_path != nullptr) {
5320       slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
5321       __ Bind(slow_path->GetNativeByteOrderLabel());
5322     }
5323   }
5324 
5325   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
5326   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
5327     // Mark card for object assuming new value is stored.
5328     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
5329     codegen->MaybeMarkGCCard(target.object, new_value.W(), new_value_can_be_null);
5330   }
5331 
5332   // Reuse the `offset` temporary for the pointer to the target location,
5333   // except for references that need the offset for the read barrier.
5334   UseScratchRegisterScope temps(masm);
5335   Register tmp_ptr = target.offset.X();
5336   if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5337     tmp_ptr = temps.AcquireX();
5338   }
5339   __ Add(tmp_ptr, target.object.X(), target.offset.X());
5340 
5341   // Move floating point values to scratch registers.
5342   // Note that float/double CAS uses bitwise comparison, rather than the operator==.
5343   Register expected_reg = MoveToTempIfFpRegister(expected, value_type, masm, &temps);
5344   Register new_value_reg = MoveToTempIfFpRegister(new_value, value_type, masm, &temps);
5345   bool is_fp = DataType::IsFloatingPointType(value_type);
5346   DataType::Type cas_type = is_fp
5347       ? ((value_type == DataType::Type::kFloat64) ? DataType::Type::kInt64 : DataType::Type::kInt32)
5348       : value_type;
5349   // Avoid sign extension in the CAS loop by zero-extending `expected` before the loop. This adds
5350   // one instruction for CompareAndExchange as we shall need to sign-extend the returned value.
5351   if (value_type == DataType::Type::kInt16 && !expected.IsZero()) {
5352     Register temp = temps.AcquireW();
5353     __ Uxth(temp, expected_reg);
5354     expected_reg = temp;
5355     cas_type = DataType::Type::kUint16;
5356   } else if (value_type == DataType::Type::kInt8 && !expected.IsZero()) {
5357     Register temp = temps.AcquireW();
5358     __ Uxtb(temp, expected_reg);
5359     expected_reg = temp;
5360     cas_type = DataType::Type::kUint8;
5361   }
5362 
5363   if (byte_swap) {
5364     // Do the byte swap and move values to scratch registers if needed.
5365     // Non-zero FP values and non-zero `expected` for `kInt16` are already in scratch registers.
5366     DCHECK_NE(value_type, DataType::Type::kInt8);
5367     if (!expected.IsZero()) {
5368       bool is_scratch = is_fp || (value_type == DataType::Type::kInt16);
5369       Register temp = is_scratch ? expected_reg : temps.AcquireSameSizeAs(expected_reg);
5370       GenerateReverseBytes(masm, cas_type, expected_reg, temp);
5371       expected_reg = temp;
5372     }
5373     if (!new_value.IsZero()) {
5374       Register temp = is_fp ? new_value_reg : temps.AcquireSameSizeAs(new_value_reg);
5375       GenerateReverseBytes(masm, cas_type, new_value_reg, temp);
5376       new_value_reg = temp;
5377     }
5378   }
5379 
5380   // Prepare registers for old value and the result of the exclusive store.
5381   Register old_value;
5382   Register store_result;
5383   if (return_success) {
5384     // Use the output register for both old value and exclusive store result.
5385     old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
5386     store_result = out.W();
5387   } else if (DataType::IsFloatingPointType(value_type)) {
5388     // We need two temporary registers but we have already used scratch registers for
5389     // holding the expected and new value unless they are zero bit pattern (+0.0f or
5390     // +0.0). We have allocated sufficient normal temporaries to handle that.
5391     size_t next_temp = 1u;
5392     if (expected.IsZero()) {
5393       old_value = (cas_type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
5394     } else {
5395       Location temp = locations->GetTemp(next_temp);
5396       ++next_temp;
5397       old_value = (cas_type == DataType::Type::kInt64) ? XRegisterFrom(temp) : WRegisterFrom(temp);
5398     }
5399     store_result =
5400         new_value.IsZero() ? temps.AcquireW() : WRegisterFrom(locations->GetTemp(next_temp));
5401     DCHECK(!old_value.Is(tmp_ptr));
5402     DCHECK(!store_result.Is(tmp_ptr));
5403   } else {
5404     // Use the output register for the old value.
5405     old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
5406     // Use scratch register for the store result, except when we have used up
5407     // scratch registers for byte-swapped `expected` and `new_value`.
5408     // In that case, we have allocated a normal temporary.
5409     store_result = (byte_swap && !expected.IsZero() && !new_value.IsZero())
5410         ? WRegisterFrom(locations->GetTemp(1))
5411         : temps.AcquireW();
5412     DCHECK(!store_result.Is(tmp_ptr));
5413   }
5414 
5415   vixl::aarch64::Label exit_loop_label;
5416   vixl::aarch64::Label* exit_loop = &exit_loop_label;
5417   vixl::aarch64::Label* cmp_failure = &exit_loop_label;
5418 
5419   if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5420     // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
5421     // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
5422     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
5423     Register old_value_temp =
5424         WRegisterFrom(locations->GetTemp((expected_coordinates_count == 0u) ? 2u : 1u));
5425     // For strong CAS, use a scratch register for the store result in slow path.
5426     // For weak CAS, we need to check the store result, so store it in `store_result`.
5427     Register slow_path_store_result = strong ? Register() : store_result;
5428     ReadBarrierCasSlowPathARM64* rb_slow_path =
5429         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
5430             invoke,
5431             order,
5432             strong,
5433             target.object,
5434             target.offset.X(),
5435             expected_reg,
5436             new_value_reg,
5437             old_value,
5438             old_value_temp,
5439             slow_path_store_result,
5440             /*update_old_value=*/ !return_success,
5441             codegen);
5442     codegen->AddSlowPath(rb_slow_path);
5443     exit_loop = rb_slow_path->GetExitLabel();
5444     cmp_failure = rb_slow_path->GetEntryLabel();
5445   }
5446 
5447   GenerateCompareAndSet(codegen,
5448                         cas_type,
5449                         order,
5450                         strong,
5451                         cmp_failure,
5452                         tmp_ptr,
5453                         new_value_reg,
5454                         old_value,
5455                         store_result,
5456                         expected_reg);
5457   __ Bind(exit_loop);
5458 
5459   if (return_success) {
5460     if (strong) {
5461       __ Cset(out.W(), eq);
5462     } else {
5463       // On success, the Z flag is set and the store result is 1, see GenerateCompareAndSet().
5464       // On failure, either the Z flag is clear or the store result is 0.
5465       // Determine the final success value with a CSEL.
5466       __ Csel(out.W(), store_result, wzr, eq);
5467     }
5468   } else if (byte_swap) {
5469     // Also handles moving to FP registers.
5470     GenerateReverseBytes(masm, value_type, old_value, out);
5471   } else if (DataType::IsFloatingPointType(value_type)) {
5472     __ Fmov((value_type == DataType::Type::kFloat64) ? out.D() : out.S(), old_value);
5473   } else if (value_type == DataType::Type::kInt8) {
5474     __ Sxtb(out.W(), old_value);
5475   } else if (value_type == DataType::Type::kInt16) {
5476     __ Sxth(out.W(), old_value);
5477   }
5478 
5479   if (slow_path != nullptr) {
5480     DCHECK(!byte_swap);
5481     __ Bind(slow_path->GetExitLabel());
5482   }
5483 }
5484 
VisitVarHandleCompareAndExchange(HInvoke * invoke)5485 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5486   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5487 }
5488 
VisitVarHandleCompareAndExchange(HInvoke * invoke)5489 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5490   GenerateVarHandleCompareAndSetOrExchange(
5491       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
5492 }
5493 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5494 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5495   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5496 }
5497 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5498 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5499   GenerateVarHandleCompareAndSetOrExchange(
5500       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
5501 }
5502 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5503 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5504   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5505 }
5506 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5507 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5508   GenerateVarHandleCompareAndSetOrExchange(
5509       invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
5510 }
5511 
VisitVarHandleCompareAndSet(HInvoke * invoke)5512 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5513   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5514 }
5515 
VisitVarHandleCompareAndSet(HInvoke * invoke)5516 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5517   GenerateVarHandleCompareAndSetOrExchange(
5518       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
5519 }
5520 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5521 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5522   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5523 }
5524 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5525 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5526   GenerateVarHandleCompareAndSetOrExchange(
5527       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
5528 }
5529 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5530 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5531   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5532 }
5533 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5534 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5535   GenerateVarHandleCompareAndSetOrExchange(
5536       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
5537 }
5538 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5539 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5540   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5541 }
5542 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5543 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5544   GenerateVarHandleCompareAndSetOrExchange(
5545       invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
5546 }
5547 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5548 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5549   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5550 }
5551 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5552 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5553   GenerateVarHandleCompareAndSetOrExchange(
5554       invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
5555 }
5556 
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op)5557 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
5558                                                  CodeGeneratorARM64* codegen,
5559                                                  GetAndUpdateOp get_and_update_op) {
5560   VarHandleOptimizations optimizations(invoke);
5561   if (optimizations.GetDoNotIntrinsify()) {
5562     return;
5563   }
5564 
5565   // Get the type from the shorty as the invokes may not return a value.
5566   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5567   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5568   if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5569     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5570     // the passed reference and reloads it from the field, thus seeing the new value
5571     // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
5572     return;
5573   }
5574 
5575   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
5576   size_t old_temp_count = locations->GetTempCount();
5577 
5578   DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
5579   if (DataType::IsFloatingPointType(value_type)) {
5580     if (get_and_update_op == GetAndUpdateOp::kAdd) {
5581       // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
5582       locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister());
5583     } else {
5584       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
5585       // We can reuse the declaring class temporary if present.
5586       if (old_temp_count == 1u &&
5587           !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5588         // Add a temporary for `old_value` if floating point `new_value` takes a scratch register.
5589         locations->AddTemp(Location::RequiresRegister());
5590       }
5591     }
5592   }
5593   // We need a temporary for the byte-swap path for bitwise operations unless the argument is a
5594   // zero which does not need a byte-swap. We can reuse the declaring class temporary if present.
5595   if (old_temp_count == 1u &&
5596       (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) &&
5597       GetExpectedVarHandleCoordinatesCount(invoke) == 2u &&
5598       !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5599     if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) {
5600       locations->AddTemp(Location::RequiresRegister());
5601     }
5602   }
5603 
5604   // Request another temporary register for methods that don't return a value.
5605   // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`.
5606   DataType::Type return_type = invoke->GetType();
5607   const bool is_void = return_type == DataType::Type::kVoid;
5608   DCHECK_IMPLIES(!is_void, return_type == value_type);
5609   if (is_void) {
5610     if (DataType::IsFloatingPointType(value_type)) {
5611       locations->AddTemp(Location::RequiresFpuRegister());
5612     } else {
5613       locations->AddTemp(Location::RequiresRegister());
5614     }
5615   }
5616 }
5617 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)5618 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5619                                           CodeGeneratorARM64* codegen,
5620                                           GetAndUpdateOp get_and_update_op,
5621                                           std::memory_order order,
5622                                           bool byte_swap = false) {
5623   // Get the type from the shorty as the invokes may not return a value.
5624   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5625   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5626   bool is_fp = DataType::IsFloatingPointType(value_type);
5627 
5628   MacroAssembler* masm = codegen->GetVIXLAssembler();
5629   LocationSummary* locations = invoke->GetLocations();
5630   CPURegister arg = (is_fp && get_and_update_op == GetAndUpdateOp::kAdd)
5631       ? InputCPURegisterAt(invoke, arg_index)
5632       : InputCPURegisterOrZeroRegAt(invoke, arg_index);
5633   DataType::Type return_type = invoke->GetType();
5634   const bool is_void = return_type == DataType::Type::kVoid;
5635   DCHECK_IMPLIES(!is_void, return_type == value_type);
5636   // We use a temporary for void methods, as we don't return the value.
5637   CPURegister out_or_temp =
5638       is_void ? CPURegisterFrom(locations->GetTemp(locations->GetTempCount() - 1u), value_type) :
5639                 helpers::OutputCPURegister(invoke);
5640 
5641   VarHandleTarget target = GetVarHandleTarget(invoke);
5642   VarHandleSlowPathARM64* slow_path = nullptr;
5643   if (!byte_swap) {
5644     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5645     GenerateVarHandleTarget(invoke, target, codegen);
5646     if (slow_path != nullptr) {
5647       slow_path->SetGetAndUpdateOp(get_and_update_op);
5648       __ Bind(slow_path->GetNativeByteOrderLabel());
5649     }
5650   }
5651 
5652   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
5653   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5654     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
5655     // Mark card for object, the new value shall be stored.
5656     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
5657     codegen->MaybeMarkGCCard(target.object, arg.W(), new_value_can_be_null);
5658   }
5659 
5660   // Reuse the `target.offset` temporary for the pointer to the target location,
5661   // except for references that need the offset for the non-Baker read barrier.
5662   UseScratchRegisterScope temps(masm);
5663   Register tmp_ptr = target.offset.X();
5664   if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5665     tmp_ptr = temps.AcquireX();
5666   }
5667   __ Add(tmp_ptr, target.object.X(), target.offset.X());
5668 
5669   // The load/store type is never floating point.
5670   DataType::Type load_store_type = is_fp
5671       ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
5672       : value_type;
5673   // Avoid sign extension in the CAS loop. Sign-extend after the loop.
5674   // Note: Using unsigned values yields the same value to store (we do not store higher bits).
5675   if (value_type == DataType::Type::kInt8) {
5676     load_store_type = DataType::Type::kUint8;
5677   } else if (value_type == DataType::Type::kInt16) {
5678     load_store_type = DataType::Type::kUint16;
5679   }
5680 
5681   // Prepare register for old value.
5682   CPURegister old_value = out_or_temp;
5683   if (get_and_update_op == GetAndUpdateOp::kSet) {
5684     // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5685     // rather than moving between core and FP registers in the loop.
5686     arg = MoveToTempIfFpRegister(arg, value_type, masm, &temps);
5687     if (is_fp && !arg.IsZero()) {
5688       // We need a temporary register but we have already used a scratch register for
5689       // the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one
5690       // in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that.
5691       old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type);
5692     } else if (value_type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
5693       // Load the old value initially to a scratch register.
5694       // We shall move it to `out` later with a read barrier.
5695       old_value = temps.AcquireW();
5696     }
5697   }
5698 
5699   if (byte_swap) {
5700     DCHECK_NE(value_type, DataType::Type::kReference);
5701     DCHECK_NE(DataType::Size(value_type), 1u);
5702     if (get_and_update_op == GetAndUpdateOp::kAdd) {
5703       // We need to do the byte swapping in the CAS loop for GetAndAdd.
5704       get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5705     } else if (!arg.IsZero()) {
5706       // For other operations, avoid byte swap inside the CAS loop by providing an adjusted `arg`.
5707       // For GetAndSet use a scratch register; FP argument is already in a scratch register.
5708       // For bitwise operations GenerateGetAndUpdate() needs both scratch registers;
5709       // we have allocated a normal temporary to handle that.
5710       CPURegister temp = (get_and_update_op == GetAndUpdateOp::kSet)
5711           ? (is_fp ? arg : (arg.Is64Bits() ? temps.AcquireX() : temps.AcquireW()))
5712           : CPURegisterFrom(locations->GetTemp(1u), load_store_type);
5713       GenerateReverseBytes(masm, load_store_type, arg, temp);
5714       arg = temp;
5715     }
5716   }
5717 
5718   GenerateGetAndUpdate(codegen, get_and_update_op, load_store_type, order, tmp_ptr, arg, old_value);
5719 
5720   if (!is_void) {
5721     if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
5722       // The only adjustment needed is sign-extension for `kInt16`.
5723       // Everything else has been done by the `GenerateGetAndUpdate()`.
5724       DCHECK(byte_swap);
5725       if (value_type == DataType::Type::kInt16) {
5726         DCHECK_EQ(load_store_type, DataType::Type::kUint16);
5727         __ Sxth(out_or_temp.W(), old_value.W());
5728       }
5729     } else if (byte_swap) {
5730       // Also handles moving to FP registers.
5731       GenerateReverseBytes(masm, value_type, old_value, out_or_temp);
5732     } else if (get_and_update_op == GetAndUpdateOp::kSet &&
5733                value_type == DataType::Type::kFloat64) {
5734       __ Fmov(out_or_temp.D(), old_value.X());
5735     } else if (get_and_update_op == GetAndUpdateOp::kSet &&
5736                value_type == DataType::Type::kFloat32) {
5737       __ Fmov(out_or_temp.S(), old_value.W());
5738     } else if (value_type == DataType::Type::kInt8) {
5739       __ Sxtb(out_or_temp.W(), old_value.W());
5740     } else if (value_type == DataType::Type::kInt16) {
5741       __ Sxth(out_or_temp.W(), old_value.W());
5742     } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5743       if (kUseBakerReadBarrier) {
5744         codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out_or_temp.W(), old_value.W());
5745       } else {
5746         codegen->GenerateReadBarrierSlow(
5747             invoke,
5748             Location::RegisterLocation(out_or_temp.GetCode()),
5749             Location::RegisterLocation(old_value.GetCode()),
5750             Location::RegisterLocation(target.object.GetCode()),
5751             /*offset=*/0u,
5752             /*index=*/Location::RegisterLocation(target.offset.GetCode()));
5753       }
5754     }
5755   }
5756 
5757   if (slow_path != nullptr) {
5758     DCHECK(!byte_swap);
5759     __ Bind(slow_path->GetExitLabel());
5760   }
5761 }
5762 
VisitVarHandleGetAndSet(HInvoke * invoke)5763 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5764   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5765 }
5766 
VisitVarHandleGetAndSet(HInvoke * invoke)5767 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5768   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
5769 }
5770 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5771 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5772   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5773 }
5774 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5775 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5776   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
5777 }
5778 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5779 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5780   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5781 }
5782 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5783 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5784   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
5785 }
5786 
VisitVarHandleGetAndAdd(HInvoke * invoke)5787 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5788   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5789 }
5790 
VisitVarHandleGetAndAdd(HInvoke * invoke)5791 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5792   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5793 }
5794 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5795 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5796   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5797 }
5798 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5799 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5800   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5801 }
5802 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5803 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5804   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5805 }
5806 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5807 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5808   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5809 }
5810 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5811 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5812   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5813 }
5814 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5815 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5816   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5817 }
5818 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5819 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5820   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5821 }
5822 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5823 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5824   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5825 }
5826 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5827 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5828   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5829 }
5830 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5831 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5832   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5833 }
5834 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5835 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5836   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5837 }
5838 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5839 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5840   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5841 }
5842 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5843 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5844   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5845 }
5846 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5847 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5848   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5849 }
5850 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5851 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5852   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5853 }
5854 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5855 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5856   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5857 }
5858 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5859 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5860   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5861 }
5862 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5863 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5864   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5865 }
5866 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5867 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5868   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5869 }
5870 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5871 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5872   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5873 }
5874 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5875 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5876   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5877 }
5878 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5879 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5880   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5881 }
5882 
EmitByteArrayViewCode(CodeGenerator * codegen_in)5883 void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5884   DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5885   CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
5886   MacroAssembler* masm = codegen->GetVIXLAssembler();
5887   HInvoke* invoke = GetInvoke();
5888   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5889   DataType::Type value_type =
5890       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5891   DCHECK_NE(value_type, DataType::Type::kReference);
5892   size_t size = DataType::Size(value_type);
5893   DCHECK_GT(size, 1u);
5894   Register varhandle = InputRegisterAt(invoke, 0);
5895   Register object = InputRegisterAt(invoke, 1);
5896   Register index = InputRegisterAt(invoke, 2);
5897 
5898   MemberOffset class_offset = mirror::Object::ClassOffset();
5899   MemberOffset array_length_offset = mirror::Array::LengthOffset();
5900   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5901   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5902 
5903   __ Bind(GetByteArrayViewCheckLabel());
5904 
5905   VarHandleTarget target = GetVarHandleTarget(invoke);
5906   {
5907     UseScratchRegisterScope temps(masm);
5908     Register temp = temps.AcquireW();
5909     Register temp2 = temps.AcquireW();
5910 
5911     // The main path checked that the coordinateType0 is an array class that matches
5912     // the class of the actual coordinate argument but it does not match the value type.
5913     // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5914     __ Ldr(temp, HeapOperand(varhandle, class_offset.Int32Value()));
5915     codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
5916     codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5917     __ Cmp(temp, temp2);
5918     __ B(GetEntryLabel(), ne);
5919 
5920     // Check for array index out of bounds.
5921     __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
5922     __ Subs(temp, temp, index);
5923     __ Ccmp(temp, size, NoFlag, hs);  // If SUBS yields LO (C=false), keep the C flag clear.
5924     __ B(GetEntryLabel(), lo);
5925 
5926     // Construct the target.
5927     __ Add(target.offset, index, data_offset.Int32Value());
5928 
5929     // Alignment check. For unaligned access, go to the runtime.
5930     DCHECK(IsPowerOfTwo(size));
5931     if (size == 2u) {
5932       __ Tbnz(target.offset, 0, GetEntryLabel());
5933     } else {
5934       __ Tst(target.offset, size - 1u);
5935       __ B(GetEntryLabel(), ne);
5936     }
5937 
5938     // Byte order check. For native byte order return to the main path.
5939     if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5940         IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5941       // There is no reason to differentiate between native byte order and byte-swap
5942       // for setting a zero bit pattern. Just return to the main path.
5943       __ B(GetNativeByteOrderLabel());
5944       return;
5945     }
5946     __ Ldr(temp, HeapOperand(varhandle, native_byte_order_offset.Int32Value()));
5947     __ Cbnz(temp, GetNativeByteOrderLabel());
5948   }
5949 
5950   switch (access_mode_template) {
5951     case mirror::VarHandle::AccessModeTemplate::kGet:
5952       GenerateVarHandleGet(invoke, codegen, order_, /*byte_swap=*/ true);
5953       break;
5954     case mirror::VarHandle::AccessModeTemplate::kSet:
5955       GenerateVarHandleSet(invoke, codegen, order_, /*byte_swap=*/ true);
5956       break;
5957     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5958     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5959       GenerateVarHandleCompareAndSetOrExchange(
5960           invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5961       break;
5962     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5963       GenerateVarHandleGetAndUpdate(
5964           invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5965       break;
5966   }
5967   __ B(GetExitLabel());
5968 }
5969 
VisitMethodHandleInvokeExact(HInvoke * invoke)5970 void IntrinsicLocationsBuilderARM64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
5971   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
5972   LocationSummary* locations = new (allocator)
5973       LocationSummary(invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
5974 
5975   InvokeDexCallingConventionVisitorARM64 calling_convention;
5976   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
5977 
5978   locations->SetInAt(0, Location::RequiresRegister());
5979 
5980   // Accomodating LocationSummary for underlying invoke-* call.
5981   uint32_t number_of_args = invoke->GetNumberOfArguments();
5982   for (uint32_t i = 1; i < number_of_args; ++i) {
5983     locations->SetInAt(i, calling_convention.GetNextLocation(invoke->InputAt(i)->GetType()));
5984   }
5985 
5986   // The last input is MethodType object corresponding to the call-site.
5987   locations->SetInAt(number_of_args, Location::RequiresRegister());
5988 
5989   locations->AddTemp(Location::RequiresRegister());
5990   locations->AddTemp(calling_convention.GetMethodLocation());
5991 }
5992 
VisitMethodHandleInvokeExact(HInvoke * invoke)5993 void IntrinsicCodeGeneratorARM64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
5994   LocationSummary* locations = invoke->GetLocations();
5995 
5996   Register method_handle = InputRegisterAt(invoke, 0);
5997 
5998   SlowPathCodeARM64* slow_path =
5999       new (codegen_->GetScopedAllocator()) InvokePolymorphicSlowPathARM64(invoke, method_handle);
6000   codegen_->AddSlowPath(slow_path);
6001   MacroAssembler* masm = codegen_->GetVIXLAssembler();
6002 
6003   Register call_site_type = InputRegisterAt(invoke, invoke->GetNumberOfArguments());
6004 
6005   // Call site should match with MethodHandle's type.
6006   Register temp = WRegisterFrom(locations->GetTemp(0));
6007   __ Ldr(temp, HeapOperand(method_handle.W(), mirror::MethodHandle::MethodTypeOffset()));
6008   codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp);
6009   __ Cmp(call_site_type, temp);
6010   __ B(ne, slow_path->GetEntryLabel());
6011 
6012   __ Ldr(temp, HeapOperand(method_handle.W(), mirror::MethodHandle::HandleKindOffset()));
6013   __ Cmp(temp, Operand(mirror::MethodHandle::Kind::kInvokeStatic));
6014   __ B(ne, slow_path->GetEntryLabel());
6015 
6016   Register method = XRegisterFrom(locations->GetTemp(1));
6017   __ Ldr(method, HeapOperand(method_handle.W(), mirror::MethodHandle::ArtFieldOrMethodOffset()));
6018   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
6019   __ Ldr(lr, MemOperand(method, entry_point.SizeValue()));
6020   __ Blr(lr);
6021   codegen_->RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
6022   __ Bind(slow_path->GetExitLabel());
6023 }
6024 
6025 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARM64, Name)
6026 UNIMPLEMENTED_INTRINSIC_LIST_ARM64(MARK_UNIMPLEMENTED);
6027 #undef MARK_UNIMPLEMENTED
6028 
6029 UNREACHABLE_INTRINSICS(ARM64)
6030 
6031 #undef __
6032 
6033 }  // namespace arm64
6034 }  // namespace art
6035