xref: /aosp_15_r20/art/compiler/optimizing/intrinsics_arm_vixl.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm_vixl.h"
18 
19 #include "aarch32/constants-aarch32.h"
20 #include "aarch32/operands-aarch32.h"
21 #include "arch/arm/callee_save_frame_arm.h"
22 #include "arch/arm/instruction_set_features_arm.h"
23 #include "art_method.h"
24 #include "code_generator_arm_vixl.h"
25 #include "common_arm.h"
26 #include "heap_poisoning.h"
27 #include "intrinsic_objects.h"
28 #include "intrinsics.h"
29 #include "intrinsics_utils.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/object_array-inl.h"
33 #include "mirror/reference.h"
34 #include "mirror/string-inl.h"
35 #include "optimizing/data_type.h"
36 #include "scoped_thread_state_change-inl.h"
37 #include "thread-current-inl.h"
38 #include "well_known_classes.h"
39 
40 namespace art HIDDEN {
41 namespace arm {
42 
43 #define __ assembler->GetVIXLAssembler()->
44 
45 using helpers::DRegisterFrom;
46 using helpers::HighRegisterFrom;
47 using helpers::InputDRegisterAt;
48 using helpers::InputRegisterAt;
49 using helpers::InputSRegisterAt;
50 using helpers::Int32ConstantFrom;
51 using helpers::LocationFrom;
52 using helpers::LowRegisterFrom;
53 using helpers::LowSRegisterFrom;
54 using helpers::HighSRegisterFrom;
55 using helpers::OperandFrom;
56 using helpers::OutputDRegister;
57 using helpers::OutputRegister;
58 using helpers::RegisterFrom;
59 using helpers::SRegisterFrom;
60 
61 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
62 
63 using vixl::ExactAssemblyScope;
64 using vixl::CodeBufferCheckScope;
65 
GetAssembler()66 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
67   return codegen_->GetAssembler();
68 }
69 
GetAllocator()70 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
71   return codegen_->GetGraph()->GetAllocator();
72 }
73 
74 using IntrinsicSlowPathARMVIXL = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARMVIXL,
75                                                    SlowPathCodeARMVIXL,
76                                                    ArmVIXLAssembler>;
77 
78 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
79 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
80  public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)81   explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
82       : SlowPathCodeARMVIXL(instruction) {
83   }
84 
EmitNativeCode(CodeGenerator * codegen)85   void EmitNativeCode(CodeGenerator* codegen) override {
86     DCHECK(codegen->EmitBakerReadBarrier());
87     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
88     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
89     LocationSummary* locations = instruction_->GetLocations();
90     DCHECK(locations->CanCall());
91     DCHECK(instruction_->IsInvokeStaticOrDirect())
92         << "Unexpected instruction in read barrier arraycopy slow path: "
93         << instruction_->DebugName();
94     DCHECK(instruction_->GetLocations()->Intrinsified());
95     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
96 
97     DataType::Type type = DataType::Type::kReference;
98     const int32_t element_size = DataType::Size(type);
99 
100     vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
101     vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
102     vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
103     vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
104 
105     __ Bind(GetEntryLabel());
106     // The source range and destination pointer were initialized before entering the slow-path.
107     vixl32::Label loop;
108     __ Bind(&loop);
109     __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
110     assembler->MaybeUnpoisonHeapReference(tmp);
111     // TODO: Inline the mark bit check before calling the runtime?
112     // tmp = ReadBarrier::Mark(tmp);
113     // No need to save live registers; it's taken care of by the
114     // entrypoint. Also, there is no need to update the stack mask,
115     // as this runtime call will not trigger a garbage collection.
116     // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
117     // explanations.)
118     DCHECK(!tmp.IsSP());
119     DCHECK(!tmp.IsLR());
120     DCHECK(!tmp.IsPC());
121     // IP is used internally by the ReadBarrierMarkRegX entry point
122     // as a temporary (and not preserved).  It thus cannot be used by
123     // any live register in this slow path.
124     DCHECK(!src_curr_addr.Is(ip));
125     DCHECK(!dst_curr_addr.Is(ip));
126     DCHECK(!src_stop_addr.Is(ip));
127     DCHECK(!tmp.Is(ip));
128     DCHECK(tmp.IsRegister()) << tmp;
129     // TODO: Load the entrypoint once before the loop, instead of
130     // loading it at every iteration.
131     int32_t entry_point_offset =
132         Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
133     // This runtime call does not require a stack map.
134     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
135     assembler->MaybePoisonHeapReference(tmp);
136     __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
137     __ Cmp(src_curr_addr, src_stop_addr);
138     __ B(ne, &loop, /* is_far_target= */ false);
139     __ B(GetExitLabel());
140   }
141 
GetDescription() const142   const char* GetDescription() const override {
143     return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
144   }
145 
146  private:
147   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
148 };
149 
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)150 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
151     : allocator_(codegen->GetGraph()->GetAllocator()),
152       codegen_(codegen),
153       assembler_(codegen->GetAssembler()),
154       features_(codegen->GetInstructionSetFeatures()) {}
155 
TryDispatch(HInvoke * invoke)156 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
157   Dispatch(invoke);
158   LocationSummary* res = invoke->GetLocations();
159   if (res == nullptr) {
160     return false;
161   }
162   return res->Intrinsified();
163 }
164 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)165 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
166   LocationSummary* locations =
167       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
168   locations->SetInAt(0, Location::RequiresFpuRegister());
169   locations->SetOut(Location::RequiresRegister());
170 }
171 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)172 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
173   LocationSummary* locations =
174       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
175   locations->SetInAt(0, Location::RequiresRegister());
176   locations->SetOut(Location::RequiresFpuRegister());
177 }
178 
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)179 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
180   Location input = locations->InAt(0);
181   Location output = locations->Out();
182   if (is64bit) {
183     __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
184   } else {
185     __ Vmov(RegisterFrom(output), SRegisterFrom(input));
186   }
187 }
188 
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)189 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
190   Location input = locations->InAt(0);
191   Location output = locations->Out();
192   if (is64bit) {
193     __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
194   } else {
195     __ Vmov(SRegisterFrom(output), RegisterFrom(input));
196   }
197 }
198 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)199 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
200   CreateFPToIntLocations(allocator_, invoke);
201 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)202 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
203   CreateIntToFPLocations(allocator_, invoke);
204 }
205 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)206 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
207   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
208 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)209 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
210   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
211 }
212 
VisitFloatFloatToRawIntBits(HInvoke * invoke)213 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
214   CreateFPToIntLocations(allocator_, invoke);
215 }
VisitFloatIntBitsToFloat(HInvoke * invoke)216 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
217   CreateIntToFPLocations(allocator_, invoke);
218 }
219 
VisitFloatFloatToRawIntBits(HInvoke * invoke)220 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
221   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
222 }
VisitFloatIntBitsToFloat(HInvoke * invoke)223 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
224   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
225 }
226 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)227 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
228   LocationSummary* locations =
229       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
230   locations->SetInAt(0, Location::RequiresRegister());
231   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
232 }
233 
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)234 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
235   LocationSummary* locations =
236       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
237   locations->SetInAt(0, Location::RequiresRegister());
238   locations->SetInAt(1, Location::RequiresRegister());
239   // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
240   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
241 }
242 
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)243 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
244   LocationSummary* locations =
245       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
246   locations->SetInAt(0, Location::RequiresRegister());
247   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
248 }
249 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)250 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
251   LocationSummary* locations =
252       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
253   locations->SetInAt(0, Location::RequiresFpuRegister());
254   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
255 }
256 
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)257 static void GenNumberOfLeadingZeros(HInvoke* invoke,
258                                     DataType::Type type,
259                                     CodeGeneratorARMVIXL* codegen) {
260   ArmVIXLAssembler* assembler = codegen->GetAssembler();
261   LocationSummary* locations = invoke->GetLocations();
262   Location in = locations->InAt(0);
263   vixl32::Register out = RegisterFrom(locations->Out());
264 
265   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
266 
267   if (type == DataType::Type::kInt64) {
268     vixl32::Register in_reg_lo = LowRegisterFrom(in);
269     vixl32::Register in_reg_hi = HighRegisterFrom(in);
270     vixl32::Label end;
271     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
272     __ Clz(out, in_reg_hi);
273     __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false);
274     __ Clz(out, in_reg_lo);
275     __ Add(out, out, 32);
276     if (end.IsReferenced()) {
277       __ Bind(&end);
278     }
279   } else {
280     __ Clz(out, RegisterFrom(in));
281   }
282 }
283 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)284 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
285   CreateIntToIntLocations(allocator_, invoke);
286 }
287 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)288 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
289   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
290 }
291 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)292 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
293   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
294 }
295 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)296 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
297   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
298 }
299 
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)300 static void GenNumberOfTrailingZeros(HInvoke* invoke,
301                                      DataType::Type type,
302                                      CodeGeneratorARMVIXL* codegen) {
303   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
304 
305   ArmVIXLAssembler* assembler = codegen->GetAssembler();
306   LocationSummary* locations = invoke->GetLocations();
307   vixl32::Register out = RegisterFrom(locations->Out());
308 
309   if (type == DataType::Type::kInt64) {
310     vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
311     vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
312     vixl32::Label end;
313     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
314     __ Rbit(out, in_reg_lo);
315     __ Clz(out, out);
316     __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false);
317     __ Rbit(out, in_reg_hi);
318     __ Clz(out, out);
319     __ Add(out, out, 32);
320     if (end.IsReferenced()) {
321       __ Bind(&end);
322     }
323   } else {
324     vixl32::Register in = RegisterFrom(locations->InAt(0));
325     __ Rbit(out, in);
326     __ Clz(out, out);
327   }
328 }
329 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)330 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
331   CreateIntToIntLocations(allocator_, invoke);
332 }
333 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)334 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
335   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
336 }
337 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)338 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
339   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
340 }
341 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)342 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
343   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
344 }
345 
VisitMathSqrt(HInvoke * invoke)346 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
347   CreateFPToFPLocations(allocator_, invoke);
348 }
349 
VisitMathSqrt(HInvoke * invoke)350 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
351   ArmVIXLAssembler* assembler = GetAssembler();
352   __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
353 }
354 
VisitMathRint(HInvoke * invoke)355 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
356   if (features_.HasARMv8AInstructions()) {
357     CreateFPToFPLocations(allocator_, invoke);
358   }
359 }
360 
VisitMathRint(HInvoke * invoke)361 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
362   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
363   ArmVIXLAssembler* assembler = GetAssembler();
364   __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
365 }
366 
VisitMathRoundFloat(HInvoke * invoke)367 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
368   if (features_.HasARMv8AInstructions()) {
369     LocationSummary* locations =
370         new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
371     locations->SetInAt(0, Location::RequiresFpuRegister());
372     locations->SetOut(Location::RequiresRegister());
373     locations->AddTemp(Location::RequiresFpuRegister());
374   }
375 }
376 
VisitMathRoundFloat(HInvoke * invoke)377 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
378   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
379 
380   ArmVIXLAssembler* assembler = GetAssembler();
381   vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
382   vixl32::Register out_reg = OutputRegister(invoke);
383   vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
384   vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
385   vixl32::Label done;
386   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
387 
388   // Round to nearest integer, ties away from zero.
389   __ Vcvta(S32, F32, temp1, in_reg);
390   __ Vmov(out_reg, temp1);
391 
392   // For positive, zero or NaN inputs, rounding is done.
393   __ Cmp(out_reg, 0);
394   __ B(ge, final_label, /* is_far_target= */ false);
395 
396   // Handle input < 0 cases.
397   // If input is negative but not a tie, previous result (round to nearest) is valid.
398   // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
399   __ Vrinta(F32, temp1, in_reg);
400   __ Vmov(temp2, 0.5);
401   __ Vsub(F32, temp1, in_reg, temp1);
402   __ Vcmp(F32, temp1, temp2);
403   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
404   {
405     // Use ExactAssemblyScope here because we are using IT.
406     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
407                                 2 * kMaxInstructionSizeInBytes,
408                                 CodeBufferCheckScope::kMaximumSize);
409     __ it(eq);
410     __ add(eq, out_reg, out_reg, 1);
411   }
412 
413   if (done.IsReferenced()) {
414     __ Bind(&done);
415   }
416 }
417 
VisitMemoryPeekByte(HInvoke * invoke)418 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
419   CreateIntToIntLocations(allocator_, invoke);
420 }
421 
VisitMemoryPeekByte(HInvoke * invoke)422 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
423   ArmVIXLAssembler* assembler = GetAssembler();
424   // Ignore upper 4B of long address.
425   __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
426 }
427 
VisitMemoryPeekIntNative(HInvoke * invoke)428 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
429   CreateIntToIntLocations(allocator_, invoke);
430 }
431 
VisitMemoryPeekIntNative(HInvoke * invoke)432 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
433   ArmVIXLAssembler* assembler = GetAssembler();
434   // Ignore upper 4B of long address.
435   __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
436 }
437 
VisitMemoryPeekLongNative(HInvoke * invoke)438 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
439   CreateIntToIntLocations(allocator_, invoke);
440 }
441 
VisitMemoryPeekLongNative(HInvoke * invoke)442 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
443   ArmVIXLAssembler* assembler = GetAssembler();
444   // Ignore upper 4B of long address.
445   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
446   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
447   // exception. So we can't use ldrd as addr may be unaligned.
448   vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
449   vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
450   if (addr.Is(lo)) {
451     __ Ldr(hi, MemOperand(addr, 4));
452     __ Ldr(lo, MemOperand(addr));
453   } else {
454     __ Ldr(lo, MemOperand(addr));
455     __ Ldr(hi, MemOperand(addr, 4));
456   }
457 }
458 
VisitMemoryPeekShortNative(HInvoke * invoke)459 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
460   CreateIntToIntLocations(allocator_, invoke);
461 }
462 
VisitMemoryPeekShortNative(HInvoke * invoke)463 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
464   ArmVIXLAssembler* assembler = GetAssembler();
465   // Ignore upper 4B of long address.
466   __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
467 }
468 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)469 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
470   LocationSummary* locations =
471       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
472   locations->SetInAt(0, Location::RequiresRegister());
473   locations->SetInAt(1, Location::RequiresRegister());
474 }
475 
VisitMemoryPokeByte(HInvoke * invoke)476 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
477   CreateIntIntToVoidLocations(allocator_, invoke);
478 }
479 
VisitMemoryPokeByte(HInvoke * invoke)480 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
481   ArmVIXLAssembler* assembler = GetAssembler();
482   __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
483 }
484 
VisitMemoryPokeIntNative(HInvoke * invoke)485 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
486   CreateIntIntToVoidLocations(allocator_, invoke);
487 }
488 
VisitMemoryPokeIntNative(HInvoke * invoke)489 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
490   ArmVIXLAssembler* assembler = GetAssembler();
491   __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
492 }
493 
VisitMemoryPokeLongNative(HInvoke * invoke)494 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
495   CreateIntIntToVoidLocations(allocator_, invoke);
496 }
497 
VisitMemoryPokeLongNative(HInvoke * invoke)498 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
499   ArmVIXLAssembler* assembler = GetAssembler();
500   // Ignore upper 4B of long address.
501   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
502   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
503   // exception. So we can't use ldrd as addr may be unaligned.
504   __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
505   __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
506 }
507 
VisitMemoryPokeShortNative(HInvoke * invoke)508 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
509   CreateIntIntToVoidLocations(allocator_, invoke);
510 }
511 
VisitMemoryPokeShortNative(HInvoke * invoke)512 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
513   ArmVIXLAssembler* assembler = GetAssembler();
514   __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
515 }
516 
VisitThreadCurrentThread(HInvoke * invoke)517 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
518   LocationSummary* locations =
519       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
520   locations->SetOut(Location::RequiresRegister());
521 }
522 
VisitThreadCurrentThread(HInvoke * invoke)523 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
524   ArmVIXLAssembler* assembler = GetAssembler();
525   __ Ldr(OutputRegister(invoke),
526          MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
527 }
528 
VisitStringCompareTo(HInvoke * invoke)529 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
530   // The inputs plus one temp.
531   LocationSummary* locations =
532       new (allocator_) LocationSummary(invoke,
533                                        invoke->InputAt(1)->CanBeNull()
534                                            ? LocationSummary::kCallOnSlowPath
535                                            : LocationSummary::kNoCall,
536                                        kIntrinsified);
537   locations->SetInAt(0, Location::RequiresRegister());
538   locations->SetInAt(1, Location::RequiresRegister());
539   locations->AddRegisterTemps(3);
540   // Need temporary registers for String compression's feature.
541   if (mirror::kUseStringCompression) {
542     locations->AddTemp(Location::RequiresRegister());
543   }
544   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
545 }
546 
547 // Forward declaration.
548 //
549 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
550 // by the compiler for every C++ function, and if this function gets inlined in
551 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
552 // build failure. That is the reason why NO_INLINE attribute is used.
553 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
554                                                   HInvoke* invoke,
555                                                   vixl32::Label* end,
556                                                   vixl32::Label* different_compression);
557 
VisitStringCompareTo(HInvoke * invoke)558 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
559   ArmVIXLAssembler* assembler = GetAssembler();
560   LocationSummary* locations = invoke->GetLocations();
561 
562   const vixl32::Register str = InputRegisterAt(invoke, 0);
563   const vixl32::Register arg = InputRegisterAt(invoke, 1);
564   const vixl32::Register out = OutputRegister(invoke);
565 
566   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
567   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
568   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
569   vixl32::Register temp3;
570   if (mirror::kUseStringCompression) {
571     temp3 = RegisterFrom(locations->GetTemp(3));
572   }
573 
574   vixl32::Label end;
575   vixl32::Label different_compression;
576 
577   // Get offsets of count and value fields within a string object.
578   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
579 
580   // Note that the null check must have been done earlier.
581   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
582 
583   // Take slow path and throw if input can be and is null.
584   SlowPathCodeARMVIXL* slow_path = nullptr;
585   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
586   if (can_slow_path) {
587     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
588     codegen_->AddSlowPath(slow_path);
589     __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
590   }
591 
592   // Reference equality check, return 0 if same reference.
593   __ Subs(out, str, arg);
594   __ B(eq, &end);
595 
596   if (mirror::kUseStringCompression) {
597     // Load `count` fields of this and argument strings.
598     __ Ldr(temp3, MemOperand(str, count_offset));
599     __ Ldr(temp2, MemOperand(arg, count_offset));
600     // Extract lengths from the `count` fields.
601     __ Lsr(temp0, temp3, 1u);
602     __ Lsr(temp1, temp2, 1u);
603   } else {
604     // Load lengths of this and argument strings.
605     __ Ldr(temp0, MemOperand(str, count_offset));
606     __ Ldr(temp1, MemOperand(arg, count_offset));
607   }
608   // out = length diff.
609   __ Subs(out, temp0, temp1);
610   // temp0 = min(len(str), len(arg)).
611 
612   {
613     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
614                            2 * kMaxInstructionSizeInBytes,
615                            CodeBufferCheckScope::kMaximumSize);
616 
617     __ it(gt);
618     __ mov(gt, temp0, temp1);
619   }
620 
621   // Shorter string is empty?
622   // Note that mirror::kUseStringCompression==true introduces lots of instructions,
623   // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
624   __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
625 
626   if (mirror::kUseStringCompression) {
627     // Check if both strings using same compression style to use this comparison loop.
628     __ Eors(temp2, temp2, temp3);
629     __ Lsrs(temp2, temp2, 1u);
630     __ B(cs, &different_compression);
631     // For string compression, calculate the number of bytes to compare (not chars).
632     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
633     __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
634 
635     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
636                            2 * kMaxInstructionSizeInBytes,
637                            CodeBufferCheckScope::kMaximumSize);
638 
639     __ it(ne);
640     __ add(ne, temp0, temp0, temp0);
641   }
642 
643 
644   GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
645 
646   __ Bind(&end);
647 
648   if (can_slow_path) {
649     __ Bind(slow_path->GetExitLabel());
650   }
651 }
652 
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)653 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
654                                         HInvoke* invoke,
655                                         vixl32::Label* end,
656                                         vixl32::Label* different_compression) {
657   LocationSummary* locations = invoke->GetLocations();
658 
659   const vixl32::Register str = InputRegisterAt(invoke, 0);
660   const vixl32::Register arg = InputRegisterAt(invoke, 1);
661   const vixl32::Register out = OutputRegister(invoke);
662 
663   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
664   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
665   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
666   vixl32::Register temp3;
667   if (mirror::kUseStringCompression) {
668     temp3 = RegisterFrom(locations->GetTemp(3));
669   }
670 
671   vixl32::Label loop;
672   vixl32::Label find_char_diff;
673 
674   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
675   // Store offset of string value in preparation for comparison loop.
676   __ Mov(temp1, value_offset);
677 
678   // Assertions that must hold in order to compare multiple characters at a time.
679   CHECK_ALIGNED(value_offset, 8);
680   static_assert(IsAligned<8>(kObjectAlignment),
681                 "String data must be 8-byte aligned for unrolled CompareTo loop.");
682 
683   const unsigned char_size = DataType::Size(DataType::Type::kUint16);
684   DCHECK_EQ(char_size, 2u);
685 
686   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
687 
688   vixl32::Label find_char_diff_2nd_cmp;
689   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
690   __ Bind(&loop);
691   vixl32::Register temp_reg = temps.Acquire();
692   __ Ldr(temp_reg, MemOperand(str, temp1));
693   __ Ldr(temp2, MemOperand(arg, temp1));
694   __ Cmp(temp_reg, temp2);
695   __ B(ne, &find_char_diff, /* is_far_target= */ false);
696   __ Add(temp1, temp1, char_size * 2);
697 
698   __ Ldr(temp_reg, MemOperand(str, temp1));
699   __ Ldr(temp2, MemOperand(arg, temp1));
700   __ Cmp(temp_reg, temp2);
701   __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false);
702   __ Add(temp1, temp1, char_size * 2);
703   // With string compression, we have compared 8 bytes, otherwise 4 chars.
704   __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
705   __ B(hi, &loop, /* is_far_target= */ false);
706   __ B(end);
707 
708   __ Bind(&find_char_diff_2nd_cmp);
709   if (mirror::kUseStringCompression) {
710     __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
711     __ B(ls, end, /* is_far_target= */ false);  // Was the second comparison fully beyond the end?
712   } else {
713     // Without string compression, we can start treating temp0 as signed
714     // and rely on the signed comparison below.
715     __ Sub(temp0, temp0, 2);
716   }
717 
718   // Find the single character difference.
719   __ Bind(&find_char_diff);
720   // Get the bit position of the first character that differs.
721   __ Eor(temp1, temp2, temp_reg);
722   __ Rbit(temp1, temp1);
723   __ Clz(temp1, temp1);
724 
725   // temp0 = number of characters remaining to compare.
726   // (Without string compression, it could be < 1 if a difference is found by the second CMP
727   // in the comparison loop, and after the end of the shorter string data).
728 
729   // Without string compression (temp1 >> 4) = character where difference occurs between the last
730   // two words compared, in the interval [0,1].
731   // (0 for low half-word different, 1 for high half-word different).
732   // With string compression, (temp1 << 3) = byte where the difference occurs,
733   // in the interval [0,3].
734 
735   // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
736   // the remaining string data, so just return length diff (out).
737   // The comparison is unsigned for string compression, otherwise signed.
738   __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
739   __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false);
740 
741   // Extract the characters and calculate the difference.
742   if (mirror::kUseStringCompression) {
743     // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
744     // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
745     // The compression flag is now in the highest bit of temp3, so let's play some tricks.
746     __ Orr(temp3, temp3, 0xffu << 23);                  // uncompressed ? 0xff800000u : 0x7ff80000u
747     __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
748     __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
749     __ Lsr(temp2, temp2, temp1);                        // Extract second character.
750     __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
751     __ Lsr(out, temp_reg, temp1);                       // Extract first character.
752     __ And(temp2, temp2, temp3);
753     __ And(out, out, temp3);
754   } else {
755     __ Bic(temp1, temp1, 0xf);
756     __ Lsr(temp2, temp2, temp1);
757     __ Lsr(out, temp_reg, temp1);
758     __ Movt(temp2, 0);
759     __ Movt(out, 0);
760   }
761 
762   __ Sub(out, out, temp2);
763   temps.Release(temp_reg);
764 
765   if (mirror::kUseStringCompression) {
766     __ B(end);
767     __ Bind(different_compression);
768 
769     // Comparison for different compression style.
770     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
771     DCHECK_EQ(c_char_size, 1u);
772 
773     // We want to free up the temp3, currently holding `str.count`, for comparison.
774     // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
775     // need to treat as unsigned. Start by freeing the bit with an ADD and continue
776     // further down by a LSRS+SBC which will flip the meaning of the flag but allow
777     // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
778     __ Add(temp0, temp0, temp0);              // Unlike LSL, this ADD is always 16-bit.
779     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
780     __ Mov(temp1, str);
781     __ Mov(temp2, arg);
782     __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
783     {
784       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
785                              3 * kMaxInstructionSizeInBytes,
786                              CodeBufferCheckScope::kMaximumSize);
787       __ itt(cs);                             // Interleave with selection of temp1 and temp2.
788       __ mov(cs, temp1, arg);                 // Preserves flags.
789       __ mov(cs, temp2, str);                 // Preserves flags.
790     }
791     __ Sbc(temp0, temp0, 0);                  // Complete the move of the compression flag.
792 
793     // Adjust temp1 and temp2 from string pointers to data pointers.
794     __ Add(temp1, temp1, value_offset);
795     __ Add(temp2, temp2, value_offset);
796 
797     vixl32::Label different_compression_loop;
798     vixl32::Label different_compression_diff;
799 
800     // Main loop for different compression.
801     temp_reg = temps.Acquire();
802     __ Bind(&different_compression_loop);
803     __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
804     __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
805     __ Cmp(temp_reg, temp3);
806     __ B(ne, &different_compression_diff, /* is_far_target= */ false);
807     __ Subs(temp0, temp0, 2);
808     __ B(hi, &different_compression_loop, /* is_far_target= */ false);
809     __ B(end);
810 
811     // Calculate the difference.
812     __ Bind(&different_compression_diff);
813     __ Sub(out, temp_reg, temp3);
814     temps.Release(temp_reg);
815     // Flip the difference if the `arg` is compressed.
816     // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
817     __ Lsrs(temp0, temp0, 1u);
818     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
819                   "Expecting 0=compressed, 1=uncompressed");
820 
821     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
822                            2 * kMaxInstructionSizeInBytes,
823                            CodeBufferCheckScope::kMaximumSize);
824     __ it(cc);
825     __ rsb(cc, out, out, 0);
826   }
827 }
828 
829 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
830 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
831 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
832 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
833 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
834 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
835 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)836 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
837   if (candidate->IsLoadString()) {
838     HLoadString* load_string = candidate->AsLoadString();
839     const DexFile& dex_file = load_string->GetDexFile();
840     return dex_file.GetStringDataAndUtf16Length(load_string->GetStringIndex(), utf16_length);
841   }
842   return nullptr;
843 }
844 
VisitStringEquals(HInvoke * invoke)845 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
846   LocationSummary* locations =
847       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
848   InvokeRuntimeCallingConventionARMVIXL calling_convention;
849   locations->SetInAt(0, Location::RequiresRegister());
850   locations->SetInAt(1, Location::RequiresRegister());
851 
852   // Temporary registers to store lengths of strings and for calculations.
853   // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
854   locations->AddTemp(LocationFrom(r0));
855 
856   // For the generic implementation and for long const strings we need an extra temporary.
857   // We do not need it for short const strings, up to 4 bytes, see code generation below.
858   uint32_t const_string_length = 0u;
859   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
860   if (const_string == nullptr) {
861     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
862   }
863   bool is_compressed =
864       mirror::kUseStringCompression &&
865       const_string != nullptr &&
866       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
867   if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
868     locations->AddTemp(Location::RequiresRegister());
869   }
870 
871   // TODO: If the String.equals() is used only for an immediately following HIf, we can
872   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
873   // Then we shall need an extra temporary register instead of the output register.
874   locations->SetOut(Location::RequiresRegister());
875 }
876 
VisitStringEquals(HInvoke * invoke)877 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
878   ArmVIXLAssembler* assembler = GetAssembler();
879   LocationSummary* locations = invoke->GetLocations();
880 
881   vixl32::Register str = InputRegisterAt(invoke, 0);
882   vixl32::Register arg = InputRegisterAt(invoke, 1);
883   vixl32::Register out = OutputRegister(invoke);
884 
885   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
886 
887   vixl32::Label loop;
888   vixl32::Label end;
889   vixl32::Label return_true;
890   vixl32::Label return_false;
891   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
892 
893   // Get offsets of count, value, and class fields within a string object.
894   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
895   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
896   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
897 
898   // Note that the null check must have been done earlier.
899   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
900 
901   StringEqualsOptimizations optimizations(invoke);
902   if (!optimizations.GetArgumentNotNull()) {
903     // Check if input is null, return false if it is.
904     __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false);
905   }
906 
907   // Reference equality check, return true if same reference.
908   __ Cmp(str, arg);
909   __ B(eq, &return_true, /* is_far_target= */ false);
910 
911   if (!optimizations.GetArgumentIsString()) {
912     // Instanceof check for the argument by comparing class fields.
913     // All string objects must have the same type since String cannot be subclassed.
914     // Receiver must be a string object, so its class field is equal to all strings' class fields.
915     // If the argument is a string object, its class field must be equal to receiver's class field.
916     //
917     // As the String class is expected to be non-movable, we can read the class
918     // field from String.equals' arguments without read barriers.
919     AssertNonMovableStringClass();
920     // /* HeapReference<Class> */ temp = str->klass_
921     __ Ldr(temp, MemOperand(str, class_offset));
922     // /* HeapReference<Class> */ out = arg->klass_
923     __ Ldr(out, MemOperand(arg, class_offset));
924     // Also, because we use the previously loaded class references only in the
925     // following comparison, we don't need to unpoison them.
926     __ Cmp(temp, out);
927     __ B(ne, &return_false, /* is_far_target= */ false);
928   }
929 
930   // Check if one of the inputs is a const string. Do not special-case both strings
931   // being const, such cases should be handled by constant folding if needed.
932   uint32_t const_string_length = 0u;
933   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
934   if (const_string == nullptr) {
935     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
936     if (const_string != nullptr) {
937       std::swap(str, arg);  // Make sure the const string is in `str`.
938     }
939   }
940   bool is_compressed =
941       mirror::kUseStringCompression &&
942       const_string != nullptr &&
943       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
944 
945   if (const_string != nullptr) {
946     // Load `count` field of the argument string and check if it matches the const string.
947     // Also compares the compression style, if differs return false.
948     __ Ldr(temp, MemOperand(arg, count_offset));
949     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
950     __ B(ne, &return_false, /* is_far_target= */ false);
951   } else {
952     // Load `count` fields of this and argument strings.
953     __ Ldr(temp, MemOperand(str, count_offset));
954     __ Ldr(out, MemOperand(arg, count_offset));
955     // Check if `count` fields are equal, return false if they're not.
956     // Also compares the compression style, if differs return false.
957     __ Cmp(temp, out);
958     __ B(ne, &return_false, /* is_far_target= */ false);
959   }
960 
961   // Assertions that must hold in order to compare strings 4 bytes at a time.
962   // Ok to do this because strings are zero-padded to kObjectAlignment.
963   DCHECK_ALIGNED(value_offset, 4);
964   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
965 
966   if (const_string != nullptr &&
967       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
968                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
969     // Load and compare the contents. Though we know the contents of the short const string
970     // at compile time, materializing constants may be more code than loading from memory.
971     int32_t offset = value_offset;
972     size_t remaining_bytes =
973         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
974     while (remaining_bytes > sizeof(uint32_t)) {
975       vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
976       UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
977       vixl32::Register temp2 = scratch_scope.Acquire();
978       __ Ldrd(temp, temp1, MemOperand(str, offset));
979       __ Ldrd(temp2, out, MemOperand(arg, offset));
980       __ Cmp(temp, temp2);
981       __ B(ne, &return_false, /* is_far_target= */ false);
982       __ Cmp(temp1, out);
983       __ B(ne, &return_false, /* is_far_target= */ false);
984       offset += 2u * sizeof(uint32_t);
985       remaining_bytes -= 2u * sizeof(uint32_t);
986     }
987     if (remaining_bytes != 0u) {
988       __ Ldr(temp, MemOperand(str, offset));
989       __ Ldr(out, MemOperand(arg, offset));
990       __ Cmp(temp, out);
991       __ B(ne, &return_false, /* is_far_target= */ false);
992     }
993   } else {
994     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
995     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
996                   "Expecting 0=compressed, 1=uncompressed");
997     __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false);
998 
999     if (mirror::kUseStringCompression) {
1000       // For string compression, calculate the number of bytes to compare (not chars).
1001       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1002       __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
1003       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1004                              2 * kMaxInstructionSizeInBytes,
1005                              CodeBufferCheckScope::kMaximumSize);
1006       __ it(cs);                                      // If uncompressed,
1007       __ add(cs, temp, temp, temp);                   //   double the byte count.
1008     }
1009 
1010     vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1011     UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1012     vixl32::Register temp2 = scratch_scope.Acquire();
1013 
1014     // Store offset of string value in preparation for comparison loop.
1015     __ Mov(temp1, value_offset);
1016 
1017     // Loop to compare strings 4 bytes at a time starting at the front of the string.
1018     __ Bind(&loop);
1019     __ Ldr(out, MemOperand(str, temp1));
1020     __ Ldr(temp2, MemOperand(arg, temp1));
1021     __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1022     __ Cmp(out, temp2);
1023     __ B(ne, &return_false, /* is_far_target= */ false);
1024     // With string compression, we have compared 4 bytes, otherwise 2 chars.
1025     __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1026     __ B(hi, &loop, /* is_far_target= */ false);
1027   }
1028 
1029   // Return true and exit the function.
1030   // If loop does not result in returning false, we return true.
1031   __ Bind(&return_true);
1032   __ Mov(out, 1);
1033   __ B(final_label);
1034 
1035   // Return false and exit the function.
1036   __ Bind(&return_false);
1037   __ Mov(out, 0);
1038 
1039   if (end.IsReferenced()) {
1040     __ Bind(&end);
1041   }
1042 }
1043 
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1044 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1045                                        ArmVIXLAssembler* assembler,
1046                                        CodeGeneratorARMVIXL* codegen,
1047                                        bool start_at_zero) {
1048   LocationSummary* locations = invoke->GetLocations();
1049 
1050   // Note that the null check must have been done earlier.
1051   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1052 
1053   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1054   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1055   SlowPathCodeARMVIXL* slow_path = nullptr;
1056   HInstruction* code_point = invoke->InputAt(1);
1057   if (code_point->IsIntConstant()) {
1058     if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1059         std::numeric_limits<uint16_t>::max()) {
1060       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1061       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1062       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1063       codegen->AddSlowPath(slow_path);
1064       __ B(slow_path->GetEntryLabel());
1065       __ Bind(slow_path->GetExitLabel());
1066       return;
1067     }
1068   } else if (code_point->GetType() != DataType::Type::kUint16) {
1069     vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1070     // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1071     __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1072     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1073     codegen->AddSlowPath(slow_path);
1074     __ B(hs, slow_path->GetEntryLabel());
1075   }
1076 
1077   if (start_at_zero) {
1078     vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1079     DCHECK(tmp_reg.Is(r2));
1080     // Start-index = 0.
1081     __ Mov(tmp_reg, 0);
1082   }
1083 
1084   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1085   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1086 
1087   if (slow_path != nullptr) {
1088     __ Bind(slow_path->GetExitLabel());
1089   }
1090 }
1091 
VisitStringIndexOf(HInvoke * invoke)1092 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1093   LocationSummary* locations = new (allocator_) LocationSummary(
1094       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1095   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1096   // best to align the inputs accordingly.
1097   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1098   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1099   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1100   locations->SetOut(LocationFrom(r0));
1101 
1102   // Need to send start-index=0.
1103   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1104 }
1105 
VisitStringIndexOf(HInvoke * invoke)1106 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1107   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1108 }
1109 
VisitStringIndexOfAfter(HInvoke * invoke)1110 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1111   LocationSummary* locations = new (allocator_) LocationSummary(
1112       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1113   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1114   // best to align the inputs accordingly.
1115   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1116   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1117   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1118   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1119   locations->SetOut(LocationFrom(r0));
1120 }
1121 
VisitStringIndexOfAfter(HInvoke * invoke)1122 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1123   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1124 }
1125 
VisitStringNewStringFromBytes(HInvoke * invoke)1126 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1127   LocationSummary* locations = new (allocator_) LocationSummary(
1128       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1129   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1130   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1131   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1132   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1133   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1134   locations->SetOut(LocationFrom(r0));
1135 }
1136 
VisitStringNewStringFromBytes(HInvoke * invoke)1137 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1138   ArmVIXLAssembler* assembler = GetAssembler();
1139   vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1140   __ Cmp(byte_array, 0);
1141   SlowPathCodeARMVIXL* slow_path =
1142       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1143   codegen_->AddSlowPath(slow_path);
1144   __ B(eq, slow_path->GetEntryLabel());
1145 
1146   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1147   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1148   __ Bind(slow_path->GetExitLabel());
1149 }
1150 
VisitStringNewStringFromChars(HInvoke * invoke)1151 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1152   LocationSummary* locations =
1153       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1154   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1155   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1156   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1157   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1158   locations->SetOut(LocationFrom(r0));
1159 }
1160 
VisitStringNewStringFromChars(HInvoke * invoke)1161 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1162   // No need to emit code checking whether `locations->InAt(2)` is a null
1163   // pointer, as callers of the native method
1164   //
1165   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1166   //
1167   // all include a null check on `data` before calling that method.
1168   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1169   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1170 }
1171 
VisitStringNewStringFromString(HInvoke * invoke)1172 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1173   LocationSummary* locations = new (allocator_) LocationSummary(
1174       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1175   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1176   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1177   locations->SetOut(LocationFrom(r0));
1178 }
1179 
VisitStringNewStringFromString(HInvoke * invoke)1180 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1181   ArmVIXLAssembler* assembler = GetAssembler();
1182   vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1183   __ Cmp(string_to_copy, 0);
1184   SlowPathCodeARMVIXL* slow_path =
1185       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1186   codegen_->AddSlowPath(slow_path);
1187   __ B(eq, slow_path->GetEntryLabel());
1188 
1189   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1190   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1191 
1192   __ Bind(slow_path->GetExitLabel());
1193 }
1194 
GenArrayAddress(ArmVIXLAssembler * assembler,vixl32::Register dest,vixl32::Register base,Location pos,DataType::Type type,int32_t data_offset)1195 static void GenArrayAddress(ArmVIXLAssembler* assembler,
1196                             vixl32::Register dest,
1197                             vixl32::Register base,
1198                             Location pos,
1199                             DataType::Type type,
1200                             int32_t data_offset) {
1201   if (pos.IsConstant()) {
1202     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
1203     __ Add(dest, base, static_cast<int32_t>(DataType::Size(type)) * constant + data_offset);
1204   } else {
1205     if (data_offset != 0) {
1206       __ Add(dest, base, data_offset);
1207       base = dest;
1208     }
1209     __ Add(dest, base, Operand(RegisterFrom(pos), LSL, DataType::SizeShift(type)));
1210   }
1211 }
1212 
LocationForSystemArrayCopyInput(ArmVIXLAssembler * assembler,HInstruction * input)1213 static Location LocationForSystemArrayCopyInput(ArmVIXLAssembler* assembler, HInstruction* input) {
1214   HIntConstant* const_input = input->AsIntConstantOrNull();
1215   if (const_input != nullptr && assembler->ShifterOperandCanAlwaysHold(const_input->GetValue())) {
1216     return Location::ConstantLocation(const_input);
1217   } else {
1218     return Location::RequiresRegister();
1219   }
1220 }
1221 
1222 // We choose to use the native implementation for longer copy lengths.
1223 static constexpr int32_t kSystemArrayCopyThreshold = 128;
1224 
VisitSystemArrayCopy(HInvoke * invoke)1225 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1226   // The only read barrier implementation supporting the
1227   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1228   if (codegen_->EmitNonBakerReadBarrier()) {
1229     return;
1230   }
1231 
1232   constexpr size_t kInitialNumTemps = 3u;  // We need at least three temps.
1233   LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
1234       invoke, kSystemArrayCopyThreshold, kInitialNumTemps);
1235   if (locations != nullptr) {
1236     locations->SetInAt(1, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(1)));
1237     locations->SetInAt(3, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(3)));
1238     locations->SetInAt(4, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(4)));
1239     if (codegen_->EmitBakerReadBarrier()) {
1240       // Temporary register IP cannot be used in
1241       // ReadBarrierSystemArrayCopySlowPathARM (because that register
1242       // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1243       // temporary register from the register allocator.
1244       locations->AddTemp(Location::RequiresRegister());
1245     }
1246   }
1247 }
1248 
CheckSystemArrayCopyPosition(ArmVIXLAssembler * assembler,vixl32::Register array,Location pos,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_array_length,bool position_sign_checked)1249 static void CheckSystemArrayCopyPosition(ArmVIXLAssembler* assembler,
1250                                          vixl32::Register array,
1251                                          Location pos,
1252                                          Location length,
1253                                          SlowPathCodeARMVIXL* slow_path,
1254                                          vixl32::Register temp,
1255                                          bool length_is_array_length,
1256                                          bool position_sign_checked) {
1257   // Where is the length in the Array?
1258   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1259 
1260   if (pos.IsConstant()) {
1261     int32_t pos_const = Int32ConstantFrom(pos);
1262     if (pos_const == 0) {
1263       if (!length_is_array_length) {
1264         // Check that length(array) >= length.
1265         __ Ldr(temp, MemOperand(array, length_offset));
1266         __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1267         __ B(lt, slow_path->GetEntryLabel());
1268       }
1269     } else {
1270       // Calculate length(array) - pos.
1271       // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1272       // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1273       __ Ldr(temp, MemOperand(array, length_offset));
1274       __ Sub(temp, temp, pos_const);
1275 
1276       // Check that (length(array) - pos) >= length.
1277       __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1278       __ B(lt, slow_path->GetEntryLabel());
1279     }
1280   } else if (length_is_array_length) {
1281     // The only way the copy can succeed is if pos is zero.
1282     vixl32::Register pos_reg = RegisterFrom(pos);
1283     __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1284   } else {
1285     // Check that pos >= 0.
1286     vixl32::Register pos_reg = RegisterFrom(pos);
1287     if (!position_sign_checked) {
1288       __ Cmp(pos_reg, 0);
1289       __ B(lt, slow_path->GetEntryLabel());
1290     }
1291 
1292     // Calculate length(array) - pos.
1293     // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1294     // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1295     __ Ldr(temp, MemOperand(array, length_offset));
1296     __ Sub(temp, temp, pos_reg);
1297 
1298     // Check that (length(array) - pos) >= length.
1299     __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1300     __ B(lt, slow_path->GetEntryLabel());
1301   }
1302 }
1303 
VisitSystemArrayCopy(HInvoke * invoke)1304 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1305   // The only read barrier implementation supporting the
1306   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1307   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
1308 
1309   ArmVIXLAssembler* assembler = GetAssembler();
1310   LocationSummary* locations = invoke->GetLocations();
1311 
1312   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1313   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1314   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1315   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1316   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1317 
1318   vixl32::Register src = InputRegisterAt(invoke, 0);
1319   Location src_pos = locations->InAt(1);
1320   vixl32::Register dest = InputRegisterAt(invoke, 2);
1321   Location dest_pos = locations->InAt(3);
1322   Location length = locations->InAt(4);
1323   Location temp1_loc = locations->GetTemp(0);
1324   vixl32::Register temp1 = RegisterFrom(temp1_loc);
1325   Location temp2_loc = locations->GetTemp(1);
1326   vixl32::Register temp2 = RegisterFrom(temp2_loc);
1327   Location temp3_loc = locations->GetTemp(2);
1328   vixl32::Register temp3 = RegisterFrom(temp3_loc);
1329 
1330   SlowPathCodeARMVIXL* intrinsic_slow_path =
1331       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1332   codegen_->AddSlowPath(intrinsic_slow_path);
1333 
1334   vixl32::Label conditions_on_positions_validated;
1335   SystemArrayCopyOptimizations optimizations(invoke);
1336 
1337   // If source and destination are the same, we go to slow path if we need to do forward copying.
1338   // We do not need to do this check if the source and destination positions are the same.
1339   if (!optimizations.GetSourcePositionIsDestinationPosition()) {
1340     if (src_pos.IsConstant()) {
1341       int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1342       if (dest_pos.IsConstant()) {
1343         int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1344         if (optimizations.GetDestinationIsSource()) {
1345           // Checked when building locations.
1346           DCHECK_GE(src_pos_constant, dest_pos_constant);
1347         } else if (src_pos_constant < dest_pos_constant) {
1348           __ Cmp(src, dest);
1349           __ B(eq, intrinsic_slow_path->GetEntryLabel());
1350         }
1351       } else {
1352         if (!optimizations.GetDestinationIsSource()) {
1353           __ Cmp(src, dest);
1354           __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1355         }
1356         __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1357         __ B(gt, intrinsic_slow_path->GetEntryLabel());
1358       }
1359     } else {
1360       if (!optimizations.GetDestinationIsSource()) {
1361         __ Cmp(src, dest);
1362         __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1363       }
1364       __ Cmp(RegisterFrom(src_pos), OperandFrom(dest_pos, DataType::Type::kInt32));
1365       __ B(lt, intrinsic_slow_path->GetEntryLabel());
1366     }
1367   }
1368 
1369   __ Bind(&conditions_on_positions_validated);
1370 
1371   if (!optimizations.GetSourceIsNotNull()) {
1372     // Bail out if the source is null.
1373     __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1374   }
1375 
1376   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1377     // Bail out if the destination is null.
1378     __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1379   }
1380 
1381   // We have already checked in the LocationsBuilder for the constant case.
1382   if (!length.IsConstant()) {
1383     // Merge the following two comparisons into one:
1384     //   If the length is negative, bail out (delegate to libcore's native implementation).
1385     //   If the length >= 128 then (currently) prefer native implementation.
1386     __ Cmp(RegisterFrom(length), kSystemArrayCopyThreshold);
1387     __ B(hs, intrinsic_slow_path->GetEntryLabel());
1388   }
1389 
1390   // Validity checks: source.
1391   CheckSystemArrayCopyPosition(assembler,
1392                                src,
1393                                src_pos,
1394                                length,
1395                                intrinsic_slow_path,
1396                                temp1,
1397                                optimizations.GetCountIsSourceLength(),
1398                                /*position_sign_checked=*/ false);
1399 
1400   // Validity checks: dest.
1401   bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
1402   CheckSystemArrayCopyPosition(assembler,
1403                                dest,
1404                                dest_pos,
1405                                length,
1406                                intrinsic_slow_path,
1407                                temp1,
1408                                optimizations.GetCountIsDestinationLength(),
1409                                dest_position_sign_checked);
1410 
1411   auto check_non_primitive_array_class = [&](vixl32::Register klass, vixl32::Register temp) {
1412     // No read barrier is needed for reading a chain of constant references for comparing
1413     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1414     // /* HeapReference<Class> */ temp = klass->component_type_
1415     __ Ldr(temp, MemOperand(klass, component_offset));
1416     codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp);
1417     // Check that the component type is not null.
1418     __ CompareAndBranchIfZero(temp, intrinsic_slow_path->GetEntryLabel());
1419     // Check that the component type is not a primitive.
1420     // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_);
1421     __ Ldrh(temp, MemOperand(temp, primitive_offset));
1422     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1423     __ CompareAndBranchIfNonZero(temp, intrinsic_slow_path->GetEntryLabel());
1424   };
1425 
1426   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1427     // Check whether all elements of the source array are assignable to the component
1428     // type of the destination array. We do two checks: the classes are the same,
1429     // or the destination is Object[]. If none of these checks succeed, we go to the
1430     // slow path.
1431 
1432     if (codegen_->EmitBakerReadBarrier()) {
1433       // /* HeapReference<Class> */ temp1 = dest->klass_
1434       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1435           invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false);
1436       // Register `temp1` is not trashed by the read barrier emitted
1437       // by GenerateFieldLoadWithBakerReadBarrier below, as that
1438       // method produces a call to a ReadBarrierMarkRegX entry point,
1439       // which saves all potentially live registers, including
1440       // temporaries such a `temp1`.
1441       // /* HeapReference<Class> */ temp2 = src->klass_
1442       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1443           invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false);
1444     } else {
1445       // /* HeapReference<Class> */ temp1 = dest->klass_
1446       __ Ldr(temp1, MemOperand(dest, class_offset));
1447       assembler->MaybeUnpoisonHeapReference(temp1);
1448       // /* HeapReference<Class> */ temp2 = src->klass_
1449       __ Ldr(temp2, MemOperand(src, class_offset));
1450       assembler->MaybeUnpoisonHeapReference(temp2);
1451     }
1452 
1453     __ Cmp(temp1, temp2);
1454     if (optimizations.GetDestinationIsTypedObjectArray()) {
1455       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1456       vixl32::Label do_copy;
1457       // For class match, we can skip the source type check regardless of the optimization flag.
1458       __ B(eq, &do_copy, /* is_far_target= */ false);
1459       // No read barrier is needed for reading a chain of constant references
1460       // for comparing with null, see `ReadBarrierOption`.
1461       // /* HeapReference<Class> */ temp1 = temp1->component_type_
1462       __ Ldr(temp1, MemOperand(temp1, component_offset));
1463       assembler->MaybeUnpoisonHeapReference(temp1);
1464       // /* HeapReference<Class> */ temp1 = temp1->super_class_
1465       __ Ldr(temp1, MemOperand(temp1, super_offset));
1466       // No need to unpoison the result, we're comparing against null.
1467       __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1468       // Bail out if the source is not a non primitive array.
1469       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1470         check_non_primitive_array_class(temp2, temp2);
1471       }
1472       __ Bind(&do_copy);
1473     } else {
1474       DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1475       // For class match, we can skip the array type check completely if at least one of source
1476       // and destination is known to be a non primitive array, otherwise one check is enough.
1477       __ B(ne, intrinsic_slow_path->GetEntryLabel());
1478       if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1479           !optimizations.GetSourceIsNonPrimitiveArray()) {
1480         check_non_primitive_array_class(temp2, temp2);
1481       }
1482     }
1483   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1484     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1485     // Bail out if the source is not a non primitive array.
1486     // No read barrier is needed for reading a chain of constant references for comparing
1487     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1488      // /* HeapReference<Class> */ temp2 = src->klass_
1489     __ Ldr(temp2, MemOperand(src, class_offset));
1490     assembler->MaybeUnpoisonHeapReference(temp2);
1491     check_non_primitive_array_class(temp2, temp2);
1492   }
1493 
1494   if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
1495     // Null constant length: not need to emit the loop code at all.
1496   } else {
1497     vixl32::Label skip_copy_and_write_barrier;
1498     if (length.IsRegister()) {
1499       // Don't enter the copy loop if the length is null.
1500       __ CompareAndBranchIfZero(
1501           RegisterFrom(length), &skip_copy_and_write_barrier, /* is_far_target= */ false);
1502     }
1503 
1504     const DataType::Type type = DataType::Type::kReference;
1505     const int32_t element_size = DataType::Size(type);
1506     const int32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1507 
1508     SlowPathCodeARMVIXL* read_barrier_slow_path = nullptr;
1509     vixl32::Register rb_tmp;
1510     bool emit_rb = codegen_->EmitBakerReadBarrier();
1511     if (emit_rb) {
1512       // TODO: Also convert this intrinsic to the IsGcMarking strategy?
1513 
1514       // SystemArrayCopy implementation for Baker read barriers (see
1515       // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
1516       //
1517       //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1518       //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1519       //   bool is_gray = (rb_state == ReadBarrier::GrayState());
1520       //   if (is_gray) {
1521       //     // Slow-path copy.
1522       //     do {
1523       //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1524       //     } while (src_ptr != end_ptr)
1525       //   } else {
1526       //     // Fast-path copy.
1527       //     do {
1528       //       *dest_ptr++ = *src_ptr++;
1529       //     } while (src_ptr != end_ptr)
1530       //   }
1531 
1532       // /* int32_t */ monitor = src->monitor_
1533       rb_tmp = RegisterFrom(locations->GetTemp(3));
1534       __ Ldr(rb_tmp, MemOperand(src, monitor_offset));
1535       // /* LockWord */ lock_word = LockWord(monitor)
1536       static_assert(sizeof(LockWord) == sizeof(int32_t),
1537                     "art::LockWord and int32_t have different sizes.");
1538 
1539       // Introduce a dependency on the lock_word including the rb_state,
1540       // which shall prevent load-load reordering without using
1541       // a memory barrier (which would be more expensive).
1542       // `src` is unchanged by this operation, but its value now depends
1543       // on `temp2`.
1544       __ Add(src, src, Operand(rb_tmp, vixl32::LSR, 32));
1545 
1546       // Slow path used to copy array when `src` is gray.
1547       // Note that the base destination address is computed in `temp2`
1548       // by the slow path code.
1549       read_barrier_slow_path =
1550           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
1551       codegen_->AddSlowPath(read_barrier_slow_path);
1552     }
1553 
1554     // Compute the base source address in `temp1`.
1555     // Note that for read barrier, `temp1` (the base source address) is computed from `src`
1556     // (and `src_pos`) here, and thus honors the artificial dependency of `src` on `rb_tmp`.
1557     GenArrayAddress(GetAssembler(), temp1, src, src_pos, type, data_offset);
1558     // Compute the base destination address in `temp2`.
1559     GenArrayAddress(GetAssembler(), temp2, dest, dest_pos, type, data_offset);
1560     // Compute the end source address in `temp3`.
1561     GenArrayAddress(GetAssembler(), temp3, temp1, length, type, /*data_offset=*/ 0);
1562 
1563     if (emit_rb) {
1564       // Given the numeric representation, it's enough to check the low bit of the
1565       // rb_state. We do that by shifting the bit out of the lock word with LSRS
1566       // which can be a 16-bit instruction unlike the TST immediate.
1567       static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1568       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1569       DCHECK(rb_tmp.IsValid());
1570       __ Lsrs(rb_tmp, rb_tmp, LockWord::kReadBarrierStateShift + 1);
1571       // Carry flag is the last bit shifted out by LSRS.
1572       __ B(cs, read_barrier_slow_path->GetEntryLabel());
1573     }
1574 
1575     // Iterate over the arrays and do a raw copy of the objects. We don't need to
1576     // poison/unpoison.
1577     vixl32::Label loop;
1578     __ Bind(&loop);
1579     {
1580       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1581       const vixl32::Register temp_reg = temps.Acquire();
1582       __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
1583       __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
1584     }
1585     __ Cmp(temp1, temp3);
1586     __ B(ne, &loop, /* is_far_target= */ false);
1587 
1588     if (emit_rb) {
1589       DCHECK(read_barrier_slow_path != nullptr);
1590       __ Bind(read_barrier_slow_path->GetExitLabel());
1591     }
1592 
1593     // We only need one card marking on the destination array.
1594     codegen_->MarkGCCard(temp1, temp2, dest);
1595 
1596     __ Bind(&skip_copy_and_write_barrier);
1597   }
1598 
1599   __ Bind(intrinsic_slow_path->GetExitLabel());
1600 }
1601 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1602 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1603   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1604   // the code generator. Furthermore, the register allocator creates fixed live intervals
1605   // for all caller-saved registers because we are doing a function call. As a result, if
1606   // the input and output locations are unallocated, the register allocator runs out of
1607   // registers and fails; however, a debuggable graph is not the common case.
1608   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1609     return;
1610   }
1611 
1612   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1613   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1614   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1615 
1616   LocationSummary* const locations =
1617       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1618   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1619 
1620   locations->SetInAt(0, Location::RequiresFpuRegister());
1621   locations->SetOut(Location::RequiresFpuRegister());
1622   // Native code uses the soft float ABI.
1623   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1624   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1625 }
1626 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1627 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1628   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1629   // the code generator. Furthermore, the register allocator creates fixed live intervals
1630   // for all caller-saved registers because we are doing a function call. As a result, if
1631   // the input and output locations are unallocated, the register allocator runs out of
1632   // registers and fails; however, a debuggable graph is not the common case.
1633   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1634     return;
1635   }
1636 
1637   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1638   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1639   DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
1640   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1641 
1642   LocationSummary* const locations =
1643       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1644   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1645 
1646   locations->SetInAt(0, Location::RequiresFpuRegister());
1647   locations->SetInAt(1, Location::RequiresFpuRegister());
1648   locations->SetOut(Location::RequiresFpuRegister());
1649   // Native code uses the soft float ABI.
1650   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1651   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1652   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1653   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
1654 }
1655 
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1656 static void GenFPToFPCall(HInvoke* invoke,
1657                           ArmVIXLAssembler* assembler,
1658                           CodeGeneratorARMVIXL* codegen,
1659                           QuickEntrypointEnum entry) {
1660   LocationSummary* const locations = invoke->GetLocations();
1661 
1662   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1663   DCHECK(locations->WillCall() && locations->Intrinsified());
1664 
1665   // Native code uses the soft float ABI.
1666   __ Vmov(RegisterFrom(locations->GetTemp(0)),
1667           RegisterFrom(locations->GetTemp(1)),
1668           InputDRegisterAt(invoke, 0));
1669   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1670   __ Vmov(OutputDRegister(invoke),
1671           RegisterFrom(locations->GetTemp(0)),
1672           RegisterFrom(locations->GetTemp(1)));
1673 }
1674 
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1675 static void GenFPFPToFPCall(HInvoke* invoke,
1676                             ArmVIXLAssembler* assembler,
1677                             CodeGeneratorARMVIXL* codegen,
1678                             QuickEntrypointEnum entry) {
1679   LocationSummary* const locations = invoke->GetLocations();
1680 
1681   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1682   DCHECK(locations->WillCall() && locations->Intrinsified());
1683 
1684   // Native code uses the soft float ABI.
1685   __ Vmov(RegisterFrom(locations->GetTemp(0)),
1686           RegisterFrom(locations->GetTemp(1)),
1687           InputDRegisterAt(invoke, 0));
1688   __ Vmov(RegisterFrom(locations->GetTemp(2)),
1689           RegisterFrom(locations->GetTemp(3)),
1690           InputDRegisterAt(invoke, 1));
1691   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1692   __ Vmov(OutputDRegister(invoke),
1693           RegisterFrom(locations->GetTemp(0)),
1694           RegisterFrom(locations->GetTemp(1)));
1695 }
1696 
VisitMathCos(HInvoke * invoke)1697 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
1698   CreateFPToFPCallLocations(allocator_, invoke);
1699 }
1700 
VisitMathCos(HInvoke * invoke)1701 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
1702   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
1703 }
1704 
VisitMathSin(HInvoke * invoke)1705 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
1706   CreateFPToFPCallLocations(allocator_, invoke);
1707 }
1708 
VisitMathSin(HInvoke * invoke)1709 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
1710   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
1711 }
1712 
VisitMathAcos(HInvoke * invoke)1713 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
1714   CreateFPToFPCallLocations(allocator_, invoke);
1715 }
1716 
VisitMathAcos(HInvoke * invoke)1717 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
1718   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
1719 }
1720 
VisitMathAsin(HInvoke * invoke)1721 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
1722   CreateFPToFPCallLocations(allocator_, invoke);
1723 }
1724 
VisitMathAsin(HInvoke * invoke)1725 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
1726   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
1727 }
1728 
VisitMathAtan(HInvoke * invoke)1729 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
1730   CreateFPToFPCallLocations(allocator_, invoke);
1731 }
1732 
VisitMathAtan(HInvoke * invoke)1733 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
1734   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
1735 }
1736 
VisitMathCbrt(HInvoke * invoke)1737 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1738   CreateFPToFPCallLocations(allocator_, invoke);
1739 }
1740 
VisitMathCbrt(HInvoke * invoke)1741 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1742   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
1743 }
1744 
VisitMathCosh(HInvoke * invoke)1745 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
1746   CreateFPToFPCallLocations(allocator_, invoke);
1747 }
1748 
VisitMathCosh(HInvoke * invoke)1749 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
1750   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
1751 }
1752 
VisitMathExp(HInvoke * invoke)1753 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
1754   CreateFPToFPCallLocations(allocator_, invoke);
1755 }
1756 
VisitMathExp(HInvoke * invoke)1757 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
1758   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
1759 }
1760 
VisitMathExpm1(HInvoke * invoke)1761 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1762   CreateFPToFPCallLocations(allocator_, invoke);
1763 }
1764 
VisitMathExpm1(HInvoke * invoke)1765 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1766   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
1767 }
1768 
VisitMathLog(HInvoke * invoke)1769 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
1770   CreateFPToFPCallLocations(allocator_, invoke);
1771 }
1772 
VisitMathLog(HInvoke * invoke)1773 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
1774   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
1775 }
1776 
VisitMathLog10(HInvoke * invoke)1777 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
1778   CreateFPToFPCallLocations(allocator_, invoke);
1779 }
1780 
VisitMathLog10(HInvoke * invoke)1781 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
1782   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
1783 }
1784 
VisitMathSinh(HInvoke * invoke)1785 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
1786   CreateFPToFPCallLocations(allocator_, invoke);
1787 }
1788 
VisitMathSinh(HInvoke * invoke)1789 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
1790   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
1791 }
1792 
VisitMathTan(HInvoke * invoke)1793 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
1794   CreateFPToFPCallLocations(allocator_, invoke);
1795 }
1796 
VisitMathTan(HInvoke * invoke)1797 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
1798   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
1799 }
1800 
VisitMathTanh(HInvoke * invoke)1801 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
1802   CreateFPToFPCallLocations(allocator_, invoke);
1803 }
1804 
VisitMathTanh(HInvoke * invoke)1805 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
1806   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
1807 }
1808 
VisitMathAtan2(HInvoke * invoke)1809 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1810   CreateFPFPToFPCallLocations(allocator_, invoke);
1811 }
1812 
VisitMathAtan2(HInvoke * invoke)1813 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1814   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
1815 }
1816 
VisitMathPow(HInvoke * invoke)1817 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
1818   CreateFPFPToFPCallLocations(allocator_, invoke);
1819 }
1820 
VisitMathPow(HInvoke * invoke)1821 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
1822   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
1823 }
1824 
VisitMathHypot(HInvoke * invoke)1825 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
1826   CreateFPFPToFPCallLocations(allocator_, invoke);
1827 }
1828 
VisitMathHypot(HInvoke * invoke)1829 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
1830   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
1831 }
1832 
VisitMathNextAfter(HInvoke * invoke)1833 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1834   CreateFPFPToFPCallLocations(allocator_, invoke);
1835 }
1836 
VisitMathNextAfter(HInvoke * invoke)1837 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1838   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
1839 }
1840 
VisitIntegerReverse(HInvoke * invoke)1841 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1842   CreateIntToIntLocations(allocator_, invoke);
1843 }
1844 
VisitIntegerReverse(HInvoke * invoke)1845 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1846   ArmVIXLAssembler* assembler = GetAssembler();
1847   __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
1848 }
1849 
VisitLongReverse(HInvoke * invoke)1850 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
1851   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
1852 }
1853 
VisitLongReverse(HInvoke * invoke)1854 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
1855   ArmVIXLAssembler* assembler = GetAssembler();
1856   LocationSummary* locations = invoke->GetLocations();
1857 
1858   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
1859   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
1860   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
1861   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
1862 
1863   __ Rbit(out_reg_lo, in_reg_hi);
1864   __ Rbit(out_reg_hi, in_reg_lo);
1865 }
1866 
GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler * assembler,Location pair)1867 static void GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler* assembler, Location pair) {
1868   DCHECK(pair.IsRegisterPair());
1869   __ Rev(LowRegisterFrom(pair), LowRegisterFrom(pair));
1870   __ Rev(HighRegisterFrom(pair), HighRegisterFrom(pair));
1871 }
1872 
GenerateReverseBytes(ArmVIXLAssembler * assembler,DataType::Type type,Location in,Location out)1873 static void GenerateReverseBytes(ArmVIXLAssembler* assembler,
1874                                  DataType::Type type,
1875                                  Location in,
1876                                  Location out) {
1877   switch (type) {
1878     case DataType::Type::kUint16:
1879       __ Rev16(RegisterFrom(out), RegisterFrom(in));
1880       break;
1881     case DataType::Type::kInt16:
1882       __ Revsh(RegisterFrom(out), RegisterFrom(in));
1883       break;
1884     case DataType::Type::kInt32:
1885       __ Rev(RegisterFrom(out), RegisterFrom(in));
1886       break;
1887     case DataType::Type::kInt64:
1888       DCHECK(!LowRegisterFrom(out).Is(LowRegisterFrom(in)));
1889       __ Rev(LowRegisterFrom(out), HighRegisterFrom(in));
1890       __ Rev(HighRegisterFrom(out), LowRegisterFrom(in));
1891       break;
1892     case DataType::Type::kFloat32:
1893       __ Rev(RegisterFrom(in), RegisterFrom(in));  // Note: Clobbers `in`.
1894       __ Vmov(SRegisterFrom(out), RegisterFrom(in));
1895       break;
1896     case DataType::Type::kFloat64:
1897       GenerateReverseBytesInPlaceForEachWord(assembler, in);  // Note: Clobbers `in`.
1898       __ Vmov(DRegisterFrom(out), HighRegisterFrom(in), LowRegisterFrom(in));  // Swap high/low.
1899       break;
1900     default:
1901       LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
1902       UNREACHABLE();
1903   }
1904 }
1905 
VisitIntegerReverseBytes(HInvoke * invoke)1906 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
1907   CreateIntToIntLocations(allocator_, invoke);
1908 }
1909 
VisitIntegerReverseBytes(HInvoke * invoke)1910 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
1911   ArmVIXLAssembler* assembler = GetAssembler();
1912   LocationSummary* locations = invoke->GetLocations();
1913   GenerateReverseBytes(assembler, DataType::Type::kInt32, locations->InAt(0), locations->Out());
1914 }
1915 
VisitLongReverseBytes(HInvoke * invoke)1916 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
1917   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
1918 }
1919 
VisitLongReverseBytes(HInvoke * invoke)1920 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
1921   ArmVIXLAssembler* assembler = GetAssembler();
1922   LocationSummary* locations = invoke->GetLocations();
1923   GenerateReverseBytes(assembler, DataType::Type::kInt64, locations->InAt(0), locations->Out());
1924 }
1925 
VisitShortReverseBytes(HInvoke * invoke)1926 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
1927   CreateIntToIntLocations(allocator_, invoke);
1928 }
1929 
VisitShortReverseBytes(HInvoke * invoke)1930 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
1931   ArmVIXLAssembler* assembler = GetAssembler();
1932   LocationSummary* locations = invoke->GetLocations();
1933   GenerateReverseBytes(assembler, DataType::Type::kInt16, locations->InAt(0), locations->Out());
1934 }
1935 
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)1936 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
1937   DCHECK(DataType::IsIntOrLongType(type)) << type;
1938   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
1939   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
1940 
1941   bool is_long = type == DataType::Type::kInt64;
1942   LocationSummary* locations = instr->GetLocations();
1943   Location in = locations->InAt(0);
1944   vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
1945   vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
1946   vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
1947   vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
1948   vixl32::Register  out_r = OutputRegister(instr);
1949 
1950   // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
1951   // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
1952   // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
1953   // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
1954   __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
1955   __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
1956   __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
1957   __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
1958   if (is_long) {
1959     __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
1960   }
1961   __ Vmov(out_r, tmp_s);
1962 }
1963 
VisitIntegerBitCount(HInvoke * invoke)1964 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
1965   CreateIntToIntLocations(allocator_, invoke);
1966   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1967 }
1968 
VisitIntegerBitCount(HInvoke * invoke)1969 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
1970   GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
1971 }
1972 
VisitLongBitCount(HInvoke * invoke)1973 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
1974   VisitIntegerBitCount(invoke);
1975 }
1976 
VisitLongBitCount(HInvoke * invoke)1977 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
1978   GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
1979 }
1980 
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)1981 static void GenHighestOneBit(HInvoke* invoke,
1982                              DataType::Type type,
1983                              CodeGeneratorARMVIXL* codegen) {
1984   DCHECK(DataType::IsIntOrLongType(type));
1985 
1986   ArmVIXLAssembler* assembler = codegen->GetAssembler();
1987   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1988   const vixl32::Register temp = temps.Acquire();
1989 
1990   if (type == DataType::Type::kInt64) {
1991     LocationSummary* locations = invoke->GetLocations();
1992     Location in = locations->InAt(0);
1993     Location out = locations->Out();
1994 
1995     vixl32::Register in_reg_lo = LowRegisterFrom(in);
1996     vixl32::Register in_reg_hi = HighRegisterFrom(in);
1997     vixl32::Register out_reg_lo = LowRegisterFrom(out);
1998     vixl32::Register out_reg_hi = HighRegisterFrom(out);
1999 
2000     __ Mov(temp, 0x80000000);  // Modified immediate.
2001     __ Clz(out_reg_lo, in_reg_lo);
2002     __ Clz(out_reg_hi, in_reg_hi);
2003     __ Lsr(out_reg_lo, temp, out_reg_lo);
2004     __ Lsrs(out_reg_hi, temp, out_reg_hi);
2005 
2006     // Discard result for lowest 32 bits if highest 32 bits are not zero.
2007     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2008     // we check that the output is in a low register, so that a 16-bit MOV
2009     // encoding can be used. If output is in a high register, then we generate
2010     // 4 more bytes of code to avoid a branch.
2011     Operand mov_src(0);
2012     if (!out_reg_lo.IsLow()) {
2013       __ Mov(LeaveFlags, temp, 0);
2014       mov_src = Operand(temp);
2015     }
2016     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2017                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2018                                   CodeBufferCheckScope::kExactSize);
2019     __ it(ne);
2020     __ mov(ne, out_reg_lo, mov_src);
2021   } else {
2022     vixl32::Register out = OutputRegister(invoke);
2023     vixl32::Register in = InputRegisterAt(invoke, 0);
2024 
2025     __ Mov(temp, 0x80000000);  // Modified immediate.
2026     __ Clz(out, in);
2027     __ Lsr(out, temp, out);
2028   }
2029 }
2030 
VisitIntegerHighestOneBit(HInvoke * invoke)2031 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2032   CreateIntToIntLocations(allocator_, invoke);
2033 }
2034 
VisitIntegerHighestOneBit(HInvoke * invoke)2035 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2036   GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
2037 }
2038 
VisitLongHighestOneBit(HInvoke * invoke)2039 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2040   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2041 }
2042 
VisitLongHighestOneBit(HInvoke * invoke)2043 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2044   GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
2045 }
2046 
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2047 static void GenLowestOneBit(HInvoke* invoke,
2048                             DataType::Type type,
2049                             CodeGeneratorARMVIXL* codegen) {
2050   DCHECK(DataType::IsIntOrLongType(type));
2051 
2052   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2053   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2054   const vixl32::Register temp = temps.Acquire();
2055 
2056   if (type == DataType::Type::kInt64) {
2057     LocationSummary* locations = invoke->GetLocations();
2058     Location in = locations->InAt(0);
2059     Location out = locations->Out();
2060 
2061     vixl32::Register in_reg_lo = LowRegisterFrom(in);
2062     vixl32::Register in_reg_hi = HighRegisterFrom(in);
2063     vixl32::Register out_reg_lo = LowRegisterFrom(out);
2064     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2065 
2066     __ Rsb(out_reg_hi, in_reg_hi, 0);
2067     __ Rsb(out_reg_lo, in_reg_lo, 0);
2068     __ And(out_reg_hi, out_reg_hi, in_reg_hi);
2069     // The result of this operation is 0 iff in_reg_lo is 0
2070     __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
2071 
2072     // Discard result for highest 32 bits if lowest 32 bits are not zero.
2073     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2074     // we check that the output is in a low register, so that a 16-bit MOV
2075     // encoding can be used. If output is in a high register, then we generate
2076     // 4 more bytes of code to avoid a branch.
2077     Operand mov_src(0);
2078     if (!out_reg_lo.IsLow()) {
2079       __ Mov(LeaveFlags, temp, 0);
2080       mov_src = Operand(temp);
2081     }
2082     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2083                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2084                                   CodeBufferCheckScope::kExactSize);
2085     __ it(ne);
2086     __ mov(ne, out_reg_hi, mov_src);
2087   } else {
2088     vixl32::Register out = OutputRegister(invoke);
2089     vixl32::Register in = InputRegisterAt(invoke, 0);
2090 
2091     __ Rsb(temp, in, 0);
2092     __ And(out, temp, in);
2093   }
2094 }
2095 
VisitIntegerLowestOneBit(HInvoke * invoke)2096 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2097   CreateIntToIntLocations(allocator_, invoke);
2098 }
2099 
VisitIntegerLowestOneBit(HInvoke * invoke)2100 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2101   GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
2102 }
2103 
VisitLongLowestOneBit(HInvoke * invoke)2104 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2105   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2106 }
2107 
VisitLongLowestOneBit(HInvoke * invoke)2108 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2109   GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
2110 }
2111 
VisitStringGetCharsNoCheck(HInvoke * invoke)2112 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2113   LocationSummary* locations =
2114       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2115   locations->SetInAt(0, Location::RequiresRegister());
2116   locations->SetInAt(1, Location::RequiresRegister());
2117   locations->SetInAt(2, Location::RequiresRegister());
2118   locations->SetInAt(3, Location::RequiresRegister());
2119   locations->SetInAt(4, Location::RequiresRegister());
2120 
2121   // Temporary registers to store lengths of strings and for calculations.
2122   locations->AddRegisterTemps(3);
2123 }
2124 
VisitStringGetCharsNoCheck(HInvoke * invoke)2125 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2126   ArmVIXLAssembler* assembler = GetAssembler();
2127   LocationSummary* locations = invoke->GetLocations();
2128 
2129   // Check assumption that sizeof(Char) is 2 (used in scaling below).
2130   const size_t char_size = DataType::Size(DataType::Type::kUint16);
2131   DCHECK_EQ(char_size, 2u);
2132 
2133   // Location of data in char array buffer.
2134   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2135 
2136   // Location of char array data in string.
2137   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2138 
2139   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2140   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2141   vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2142   vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2143   vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2144   vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2145   vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2146 
2147   vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2148   vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2149   vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2150 
2151   vixl32::Label done, compressed_string_loop;
2152   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2153   // dst to be copied.
2154   __ Add(dst_ptr, dstObj, data_offset);
2155   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2156 
2157   __ Subs(num_chr, srcEnd, srcBegin);
2158   // Early out for valid zero-length retrievals.
2159   __ B(eq, final_label, /* is_far_target= */ false);
2160 
2161   // src range to copy.
2162   __ Add(src_ptr, srcObj, value_offset);
2163 
2164   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2165   vixl32::Register temp;
2166   vixl32::Label compressed_string_preloop;
2167   if (mirror::kUseStringCompression) {
2168     // Location of count in string.
2169     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2170     temp = temps.Acquire();
2171     // String's length.
2172     __ Ldr(temp, MemOperand(srcObj, count_offset));
2173     __ Tst(temp, 1);
2174     temps.Release(temp);
2175     __ B(eq, &compressed_string_preloop, /* is_far_target= */ false);
2176   }
2177   __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2178 
2179   // Do the copy.
2180   vixl32::Label loop, remainder;
2181 
2182   temp = temps.Acquire();
2183   // Save repairing the value of num_chr on the < 4 character path.
2184   __ Subs(temp, num_chr, 4);
2185   __ B(lt, &remainder, /* is_far_target= */ false);
2186 
2187   // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2188   __ Mov(num_chr, temp);
2189 
2190   // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2191   // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2192   // to rectify these everywhere this intrinsic applies.)
2193   __ Bind(&loop);
2194   __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2195   __ Subs(num_chr, num_chr, 4);
2196   __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2197   __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2198   __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2199   temps.Release(temp);
2200   __ B(ge, &loop, /* is_far_target= */ false);
2201 
2202   __ Adds(num_chr, num_chr, 4);
2203   __ B(eq, final_label, /* is_far_target= */ false);
2204 
2205   // Main loop for < 4 character case and remainder handling. Loads and stores one
2206   // 16-bit Java character at a time.
2207   __ Bind(&remainder);
2208   temp = temps.Acquire();
2209   __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2210   __ Subs(num_chr, num_chr, 1);
2211   __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2212   temps.Release(temp);
2213   __ B(gt, &remainder, /* is_far_target= */ false);
2214 
2215   if (mirror::kUseStringCompression) {
2216     __ B(final_label);
2217 
2218     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2219     DCHECK_EQ(c_char_size, 1u);
2220     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2221     __ Bind(&compressed_string_preloop);
2222     __ Add(src_ptr, src_ptr, srcBegin);
2223     __ Bind(&compressed_string_loop);
2224     temp = temps.Acquire();
2225     __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2226     __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2227     temps.Release(temp);
2228     __ Subs(num_chr, num_chr, 1);
2229     __ B(gt, &compressed_string_loop, /* is_far_target= */ false);
2230   }
2231 
2232   if (done.IsReferenced()) {
2233     __ Bind(&done);
2234   }
2235 }
2236 
VisitFloatIsInfinite(HInvoke * invoke)2237 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2238   CreateFPToIntLocations(allocator_, invoke);
2239 }
2240 
VisitFloatIsInfinite(HInvoke * invoke)2241 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2242   ArmVIXLAssembler* const assembler = GetAssembler();
2243   const vixl32::Register out = OutputRegister(invoke);
2244   // Shifting left by 1 bit makes the value encodable as an immediate operand;
2245   // we don't care about the sign bit anyway.
2246   constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2247 
2248   __ Vmov(out, InputSRegisterAt(invoke, 0));
2249   // We don't care about the sign bit, so shift left.
2250   __ Lsl(out, out, 1);
2251   __ Eor(out, out, infinity);
2252   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2253 }
2254 
VisitDoubleIsInfinite(HInvoke * invoke)2255 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2256   CreateFPToIntLocations(allocator_, invoke);
2257 }
2258 
VisitDoubleIsInfinite(HInvoke * invoke)2259 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2260   ArmVIXLAssembler* const assembler = GetAssembler();
2261   const vixl32::Register out = OutputRegister(invoke);
2262   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2263   const vixl32::Register temp = temps.Acquire();
2264   // The highest 32 bits of double precision positive infinity separated into
2265   // two constants encodable as immediate operands.
2266   constexpr uint32_t infinity_high  = 0x7f000000U;
2267   constexpr uint32_t infinity_high2 = 0x00f00000U;
2268 
2269   static_assert((infinity_high | infinity_high2) ==
2270                     static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2271                 "The constants do not add up to the high 32 bits of double "
2272                 "precision positive infinity.");
2273   __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2274   __ Eor(out, out, infinity_high);
2275   __ Eor(out, out, infinity_high2);
2276   // We don't care about the sign bit, so shift left.
2277   __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2278   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2279 }
2280 
VisitMathCeil(HInvoke * invoke)2281 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2282   if (features_.HasARMv8AInstructions()) {
2283     CreateFPToFPLocations(allocator_, invoke);
2284   }
2285 }
2286 
VisitMathCeil(HInvoke * invoke)2287 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2288   ArmVIXLAssembler* assembler = GetAssembler();
2289   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2290   __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2291 }
2292 
VisitMathFloor(HInvoke * invoke)2293 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2294   if (features_.HasARMv8AInstructions()) {
2295     CreateFPToFPLocations(allocator_, invoke);
2296   }
2297 }
2298 
VisitMathFloor(HInvoke * invoke)2299 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2300   ArmVIXLAssembler* assembler = GetAssembler();
2301   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2302   __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2303 }
2304 
2305 #define VISIT_INTRINSIC(name, low, high, type, start_index)                                       \
2306   void IntrinsicLocationsBuilderARMVIXL::Visit##name##ValueOf(HInvoke* invoke) {                  \
2307     InvokeRuntimeCallingConventionARMVIXL calling_convention;                                     \
2308     IntrinsicVisitor::ComputeValueOfLocations(invoke,                                             \
2309                                               codegen_,                                           \
2310                                               low,                                                \
2311                                               (high) - (low) + 1,                                 \
2312                                               LocationFrom(r0),                                   \
2313                                               LocationFrom(calling_convention.GetRegisterAt(0))); \
2314   }                                                                                               \
2315   void IntrinsicCodeGeneratorARMVIXL::Visit##name##ValueOf(HInvoke* invoke) {                     \
2316     IntrinsicVisitor::ValueOfInfo info =                                                          \
2317         IntrinsicVisitor::ComputeValueOfInfo(invoke,                                              \
2318                                              codegen_->GetCompilerOptions(),                      \
2319                                              WellKnownClasses::java_lang_##name##_value,          \
2320                                              low,                                                 \
2321                                              (high) - (low) + 1,                                  \
2322                                              start_index);                                        \
2323     HandleValueOf(invoke, info, type);                                                            \
2324   }
BOXED_TYPES(VISIT_INTRINSIC)2325   BOXED_TYPES(VISIT_INTRINSIC)
2326 #undef VISIT_INTRINSIC
2327 
2328 
2329 void IntrinsicCodeGeneratorARMVIXL::HandleValueOf(HInvoke* invoke,
2330                                                   const IntrinsicVisitor::ValueOfInfo& info,
2331                                                   DataType::Type type) {
2332   LocationSummary* locations = invoke->GetLocations();
2333   ArmVIXLAssembler* const assembler = GetAssembler();
2334 
2335   vixl32::Register out = RegisterFrom(locations->Out());
2336   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2337   vixl32::Register temp = temps.Acquire();
2338   auto allocate_instance = [&]() {
2339     DCHECK(out.Is(InvokeRuntimeCallingConventionARMVIXL().GetRegisterAt(0)));
2340     codegen_->LoadIntrinsicDeclaringClass(out, invoke);
2341     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2342     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2343   };
2344   if (invoke->InputAt(0)->IsIntConstant()) {
2345     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2346     if (static_cast<uint32_t>(value - info.low) < info.length) {
2347       // Just embed the object in the code.
2348       DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
2349       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2350     } else {
2351       DCHECK(locations->CanCall());
2352       // Allocate and initialize a new object.
2353       // TODO: If we JIT, we could allocate the object now, and store it in the
2354       // JIT object table.
2355       allocate_instance();
2356       __ Mov(temp, value);
2357       assembler->StoreToOffset(GetStoreOperandType(type), temp, out, info.value_offset);
2358       // Class pointer and `value` final field stores require a barrier before publication.
2359       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2360     }
2361   } else {
2362     DCHECK(locations->CanCall());
2363     vixl32::Register in = RegisterFrom(locations->InAt(0));
2364     // Check bounds of our cache.
2365     __ Add(out, in, -info.low);
2366     __ Cmp(out, info.length);
2367     vixl32::Label allocate, done;
2368     __ B(hs, &allocate, /* is_far_target= */ false);
2369     // If the value is within the bounds, load the object directly from the array.
2370     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
2371     codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
2372     assembler->MaybeUnpoisonHeapReference(out);
2373     __ B(&done);
2374     __ Bind(&allocate);
2375     // Otherwise allocate and initialize a new object.
2376     allocate_instance();
2377     assembler->StoreToOffset(GetStoreOperandType(type), in, out, info.value_offset);
2378     // Class pointer and `value` final field stores require a barrier before publication.
2379     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2380     __ Bind(&done);
2381   }
2382 }
2383 
VisitReferenceGetReferent(HInvoke * invoke)2384 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2385   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
2386 }
2387 
VisitReferenceGetReferent(HInvoke * invoke)2388 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2389   ArmVIXLAssembler* assembler = GetAssembler();
2390   LocationSummary* locations = invoke->GetLocations();
2391 
2392   Location obj = locations->InAt(0);
2393   Location out = locations->Out();
2394 
2395   SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2396   codegen_->AddSlowPath(slow_path);
2397 
2398   if (codegen_->EmitReadBarrier()) {
2399     // Check self->GetWeakRefAccessEnabled().
2400     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2401     vixl32::Register temp = temps.Acquire();
2402     __ Ldr(temp,
2403            MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArmPointerSize>().Uint32Value()));
2404     __ Cmp(temp, enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled));
2405     __ B(ne, slow_path->GetEntryLabel());
2406   }
2407 
2408   {
2409     // Load the java.lang.ref.Reference class.
2410     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2411     vixl32::Register temp = temps.Acquire();
2412     codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
2413 
2414     // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
2415     MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
2416     DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
2417     DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
2418               IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
2419     __ Ldrh(temp, MemOperand(temp, disable_intrinsic_offset.Uint32Value()));
2420     __ Cmp(temp, 0);
2421     __ B(ne, slow_path->GetEntryLabel());
2422   }
2423 
2424   // Load the value from the field.
2425   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2426   if (codegen_->EmitBakerReadBarrier()) {
2427     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2428                                                     out,
2429                                                     RegisterFrom(obj),
2430                                                     referent_offset,
2431                                                     /*maybe_temp=*/ Location::NoLocation(),
2432                                                     /*needs_null_check=*/ true);
2433     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2434   } else {
2435     {
2436       vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2437       __ Ldr(RegisterFrom(out), MemOperand(RegisterFrom(obj), referent_offset));
2438       codegen_->MaybeRecordImplicitNullCheck(invoke);
2439     }
2440     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2441     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
2442   }
2443   __ Bind(slow_path->GetExitLabel());
2444 }
2445 
VisitReferenceRefersTo(HInvoke * invoke)2446 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2447   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
2448 }
2449 
VisitReferenceRefersTo(HInvoke * invoke)2450 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2451   LocationSummary* locations = invoke->GetLocations();
2452   ArmVIXLAssembler* assembler = GetAssembler();
2453   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2454 
2455   vixl32::Register obj = RegisterFrom(locations->InAt(0));
2456   vixl32::Register other = RegisterFrom(locations->InAt(1));
2457   vixl32::Register out = RegisterFrom(locations->Out());
2458   vixl32::Register tmp = temps.Acquire();
2459 
2460   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2461   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2462 
2463   {
2464     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2465     // Loading scratch register always uses 32-bit encoding.
2466     vixl::ExactAssemblyScope eas(assembler->GetVIXLAssembler(),
2467                                  vixl32::k32BitT32InstructionSizeInBytes);
2468     __ ldr(tmp, MemOperand(obj, referent_offset));
2469     codegen_->MaybeRecordImplicitNullCheck(invoke);
2470   }
2471   assembler->MaybeUnpoisonHeapReference(tmp);
2472   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2473 
2474   if (codegen_->EmitReadBarrier()) {
2475     DCHECK(kUseBakerReadBarrier);
2476 
2477     vixl32::Label calculate_result;
2478     __ Subs(out, tmp, other);
2479     __ B(eq, &calculate_result);  // `out` is 0 if taken.
2480 
2481     // Check if the loaded reference is null.
2482     __ Cmp(tmp, 0);
2483     __ B(eq, &calculate_result);  // `out` is not 0 if taken.
2484 
2485     // For correct memory visibility, we need a barrier before loading the lock word
2486     // but we already have the barrier emitted for volatile load above which is sufficient.
2487 
2488     // Load the lockword and check if it is a forwarding address.
2489     static_assert(LockWord::kStateShift == 30u);
2490     static_assert(LockWord::kStateForwardingAddress == 3u);
2491     __ Ldr(tmp, MemOperand(tmp, monitor_offset));
2492     __ Cmp(tmp, Operand(0xc0000000));
2493     __ B(lo, &calculate_result);   // `out` is not 0 if taken.
2494 
2495     // Extract the forwarding address and subtract from `other`.
2496     __ Sub(out, other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
2497 
2498     __ Bind(&calculate_result);
2499   } else {
2500     DCHECK(!codegen_->EmitReadBarrier());
2501     __ Sub(out, tmp, other);
2502   }
2503 
2504   // Convert 0 to 1 and non-zero to 0 for the Boolean result (`out = (out == 0)`).
2505   __ Clz(out, out);
2506   __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
2507 }
2508 
VisitThreadInterrupted(HInvoke * invoke)2509 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2510   LocationSummary* locations =
2511       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2512   locations->SetOut(Location::RequiresRegister());
2513 }
2514 
VisitThreadInterrupted(HInvoke * invoke)2515 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2516   ArmVIXLAssembler* assembler = GetAssembler();
2517   vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
2518   int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
2519   __ Ldr(out, MemOperand(tr, offset));
2520   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2521   vixl32::Register temp = temps.Acquire();
2522   vixl32::Label done;
2523   vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
2524   __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
2525   __ Dmb(vixl32::ISH);
2526   __ Mov(temp, 0);
2527   assembler->StoreToOffset(kStoreWord, temp, tr, offset);
2528   __ Dmb(vixl32::ISH);
2529   if (done.IsReferenced()) {
2530     __ Bind(&done);
2531   }
2532 }
2533 
VisitReachabilityFence(HInvoke * invoke)2534 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
2535   LocationSummary* locations =
2536       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2537   locations->SetInAt(0, Location::Any());
2538 }
2539 
VisitReachabilityFence(HInvoke * invoke)2540 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
2541 
VisitIntegerDivideUnsigned(HInvoke * invoke)2542 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2543   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
2544 }
2545 
VisitIntegerDivideUnsigned(HInvoke * invoke)2546 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2547   ArmVIXLAssembler* assembler = GetAssembler();
2548   LocationSummary* locations = invoke->GetLocations();
2549   vixl32::Register dividend = RegisterFrom(locations->InAt(0));
2550   vixl32::Register divisor = RegisterFrom(locations->InAt(1));
2551   vixl32::Register out = RegisterFrom(locations->Out());
2552 
2553   // Check if divisor is zero, bail to managed implementation to handle.
2554   SlowPathCodeARMVIXL* slow_path =
2555       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2556   codegen_->AddSlowPath(slow_path);
2557   __ CompareAndBranchIfZero(divisor, slow_path->GetEntryLabel());
2558 
2559   __ Udiv(out, dividend, divisor);
2560 
2561   __ Bind(slow_path->GetExitLabel());
2562 }
2563 
Use64BitExclusiveLoadStore(bool atomic,CodeGeneratorARMVIXL * codegen)2564 static inline bool Use64BitExclusiveLoadStore(bool atomic, CodeGeneratorARMVIXL* codegen) {
2565   return atomic && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
2566 }
2567 
GenerateIntrinsicGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location out,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2568 static void GenerateIntrinsicGet(HInvoke* invoke,
2569                                  CodeGeneratorARMVIXL* codegen,
2570                                  DataType::Type type,
2571                                  std::memory_order order,
2572                                  bool atomic,
2573                                  vixl32::Register base,
2574                                  vixl32::Register offset,
2575                                  Location out,
2576                                  Location maybe_temp,
2577                                  Location maybe_temp2,
2578                                  Location maybe_temp3) {
2579   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2580   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
2581   DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2582   DCHECK(atomic || order == std::memory_order_relaxed);
2583 
2584   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2585   MemOperand address(base);
2586   if (offset.IsValid()) {
2587     // If offset is valid then this is a get from a relative address.
2588     address = MemOperand(base, offset);
2589   }
2590 
2591   switch (type) {
2592     case DataType::Type::kBool:
2593       __ Ldrb(RegisterFrom(out), address);
2594       break;
2595     case DataType::Type::kInt8:
2596       __ Ldrsb(RegisterFrom(out), address);
2597       break;
2598     case DataType::Type::kUint16:
2599       __ Ldrh(RegisterFrom(out), address);
2600       break;
2601     case DataType::Type::kInt16:
2602       __ Ldrsh(RegisterFrom(out), address);
2603       break;
2604     case DataType::Type::kInt32:
2605       __ Ldr(RegisterFrom(out), address);
2606       break;
2607     case DataType::Type::kInt64: {
2608       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2609         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2610         if (offset.IsValid()) {
2611           vixl32::Register temp_reg = temps.Acquire();
2612           __ Add(temp_reg, base, offset);
2613           address = MemOperand(temp_reg);
2614         }
2615 
2616         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp);
2617         vixl32::Label loop;
2618         __ Bind(&loop);
2619         __ Ldrexd(LowRegisterFrom(out), HighRegisterFrom(out), address);
2620         __ Strexd(strexd_tmp, LowRegisterFrom(out), HighRegisterFrom(out), address);
2621         __ Cmp(strexd_tmp, 0);
2622         __ B(ne, &loop);
2623       } else {
2624         __ Ldrd(LowRegisterFrom(out), HighRegisterFrom(out), address);
2625       }
2626       break;
2627     }
2628     case DataType::Type::kReference: {
2629       if (codegen->EmitBakerReadBarrier()) {
2630         // Piggy-back on the field load path using introspection for the Baker read barrier.
2631         vixl32::Register temp = RegisterFrom(maybe_temp);
2632         __ Add(temp, base, offset);
2633         codegen->GenerateFieldLoadWithBakerReadBarrier(
2634             invoke, out, base, MemOperand(temp), /* needs_null_check= */ false);
2635       } else {
2636         __ Ldr(RegisterFrom(out), address);
2637       }
2638       break;
2639     }
2640     case DataType::Type::kFloat32: {
2641       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2642       if (offset.IsValid()) {
2643         const vixl32::Register temp_reg = temps.Acquire();
2644         __ Add(temp_reg, base, offset);
2645         address = MemOperand(temp_reg);
2646       }
2647       __ Vldr(SRegisterFrom(out), address);
2648       break;
2649     }
2650     case DataType::Type::kFloat64: {
2651       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2652       if (offset.IsValid()) {
2653         const vixl32::Register temp_reg = temps.Acquire();
2654         __ Add(temp_reg, base, offset);
2655         address = MemOperand(temp_reg);
2656       }
2657       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2658         vixl32::Register lo = RegisterFrom(maybe_temp);
2659         vixl32::Register hi = RegisterFrom(maybe_temp2);
2660         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2661         vixl32::Label loop;
2662         __ Bind(&loop);
2663         __ Ldrexd(lo, hi, address);
2664         __ Strexd(strexd_tmp, lo, hi, address);
2665         __ Cmp(strexd_tmp, 0);
2666         __ B(ne, &loop);
2667         __ Vmov(DRegisterFrom(out), lo, hi);
2668       } else {
2669         __ Vldr(DRegisterFrom(out), address);
2670       }
2671       break;
2672     }
2673     default:
2674       LOG(FATAL) << "Unexpected type " << type;
2675       UNREACHABLE();
2676   }
2677   if (acquire_barrier) {
2678     codegen->GenerateMemoryBarrier(
2679         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
2680   }
2681   if (type == DataType::Type::kReference && !codegen->EmitBakerReadBarrier()) {
2682     Location base_loc = LocationFrom(base);
2683     Location index_loc = LocationFrom(offset);
2684     codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc);
2685   }
2686 }
2687 
CreateUnsafeGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)2688 static void CreateUnsafeGetLocations(HInvoke* invoke,
2689                                      CodeGeneratorARMVIXL* codegen,
2690                                      DataType::Type type,
2691                                      bool atomic) {
2692   bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
2693   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2694   LocationSummary* locations =
2695       new (allocator) LocationSummary(invoke,
2696                                       can_call
2697                                           ? LocationSummary::kCallOnSlowPath
2698                                           : LocationSummary::kNoCall,
2699                                       kIntrinsified);
2700   if (can_call && kUseBakerReadBarrier) {
2701     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2702   }
2703   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2704   locations->SetInAt(1, Location::RequiresRegister());
2705   locations->SetInAt(2, Location::RequiresRegister());
2706   locations->SetOut(Location::RequiresRegister(),
2707                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2708   if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) ||
2709       (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2710     // We need a temporary register for the read barrier marking slow
2711     // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier,
2712     // or the STREXD result for LDREXD/STREXD sequence when LDRD is non-atomic.
2713     locations->AddTemp(Location::RequiresRegister());
2714   }
2715 }
2716 
CreateUnsafeGetAbsoluteLocations(HInvoke * invoke)2717 static void CreateUnsafeGetAbsoluteLocations(HInvoke* invoke) {
2718   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2719   LocationSummary* locations =
2720       new (allocator) LocationSummary(invoke,
2721                                       LocationSummary::kNoCall,
2722                                       kIntrinsified);
2723   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2724   locations->SetInAt(1, Location::RequiresRegister());
2725   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2726 }
2727 
GenUnsafeGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic)2728 static void GenUnsafeGet(HInvoke* invoke,
2729                          CodeGeneratorARMVIXL* codegen,
2730                          DataType::Type type,
2731                          std::memory_order order,
2732                          bool atomic) {
2733   LocationSummary* locations = invoke->GetLocations();
2734   vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
2735   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
2736   Location out = locations->Out();
2737   Location maybe_temp = Location::NoLocation();
2738   if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) ||
2739       (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2740     maybe_temp = locations->GetTemp(0);
2741   }
2742   GenerateIntrinsicGet(invoke,
2743                        codegen,
2744                        type,
2745                        order,
2746                        atomic,
2747                        base,
2748                        offset,
2749                        out,
2750                        maybe_temp,
2751                        /*maybe_temp2=*/ Location::NoLocation(),
2752                        /*maybe_temp3=*/ Location::NoLocation());
2753 }
2754 
GenUnsafeGetAbsolute(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic)2755 static void GenUnsafeGetAbsolute(HInvoke* invoke,
2756                                  CodeGeneratorARMVIXL* codegen,
2757                                  DataType::Type type,
2758                                  std::memory_order order,
2759                                  bool atomic) {
2760   LocationSummary* locations = invoke->GetLocations();
2761   vixl32::Register address = LowRegisterFrom(locations->InAt(1));  // Long offset, lo part only.
2762   Location out = locations->Out();
2763   Location maybe_temp = Location::NoLocation();
2764   if (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
2765     maybe_temp = locations->GetTemp(0);
2766   }
2767   GenerateIntrinsicGet(invoke,
2768                        codegen,
2769                        type,
2770                        order,
2771                        atomic,
2772                        address,
2773                        NoReg,  // No offset.
2774                        out,
2775                        maybe_temp,
2776                        /*maybe_temp2=*/ Location::NoLocation(),
2777                        /*maybe_temp3=*/ Location::NoLocation());
2778 }
2779 
VisitUnsafeGet(HInvoke * invoke)2780 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2781   VisitJdkUnsafeGet(invoke);
2782 }
2783 
VisitUnsafeGetAbsolute(HInvoke * invoke)2784 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2785   VisitJdkUnsafeGetAbsolute(invoke);
2786 }
2787 
VisitUnsafeGet(HInvoke * invoke)2788 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2789   VisitJdkUnsafeGet(invoke);
2790 }
2791 
VisitUnsafeGetAbsolute(HInvoke * invoke)2792 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2793   VisitJdkUnsafeGetAbsolute(invoke);
2794 }
2795 
VisitUnsafeGetVolatile(HInvoke * invoke)2796 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2797   VisitJdkUnsafeGetVolatile(invoke);
2798 }
2799 
VisitUnsafeGetVolatile(HInvoke * invoke)2800 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2801   VisitJdkUnsafeGetVolatile(invoke);
2802 }
2803 
VisitUnsafeGetLong(HInvoke * invoke)2804 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2805   VisitJdkUnsafeGetLong(invoke);
2806 }
2807 
VisitUnsafeGetLong(HInvoke * invoke)2808 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2809   VisitJdkUnsafeGetLong(invoke);
2810 }
2811 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2812 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2813   VisitJdkUnsafeGetLongVolatile(invoke);
2814 }
2815 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2816 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2817   VisitJdkUnsafeGetLongVolatile(invoke);
2818 }
2819 
VisitUnsafeGetObject(HInvoke * invoke)2820 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2821   VisitJdkUnsafeGetReference(invoke);
2822 }
2823 
VisitUnsafeGetObject(HInvoke * invoke)2824 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2825   VisitJdkUnsafeGetReference(invoke);
2826 }
2827 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2828 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2829   VisitJdkUnsafeGetReferenceVolatile(invoke);
2830 }
2831 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2832 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2833   VisitJdkUnsafeGetReferenceVolatile(invoke);
2834 }
2835 
VisitUnsafeGetByte(HInvoke * invoke)2836 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) {
2837   VisitJdkUnsafeGetByte(invoke);
2838 }
2839 
VisitUnsafeGetByte(HInvoke * invoke)2840 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) {
2841   VisitJdkUnsafeGetByte(invoke);
2842 }
2843 
VisitJdkUnsafeGet(HInvoke * invoke)2844 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) {
2845   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
2846 }
2847 
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2848 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2849   CreateUnsafeGetAbsoluteLocations(invoke);
2850 }
2851 
VisitJdkUnsafeGet(HInvoke * invoke)2852 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) {
2853   GenUnsafeGet(
2854       invoke, codegen_, DataType::Type::kInt32, std::memory_order_relaxed, /*atomic=*/ false);
2855 }
2856 
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2857 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2858   GenUnsafeGetAbsolute(
2859       invoke, codegen_, DataType::Type::kInt32, std::memory_order_relaxed, /*atomic=*/ false);
2860 }
2861 
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2862 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2863   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2864 }
2865 
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2866 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2867   GenUnsafeGet(
2868       invoke, codegen_, DataType::Type::kInt32, std::memory_order_seq_cst, /*atomic=*/ true);
2869 }
2870 
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2871 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2872   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2873 }
2874 
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2875 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2876   GenUnsafeGet(
2877       invoke, codegen_, DataType::Type::kInt32, std::memory_order_acquire, /*atomic=*/ true);
2878 }
2879 
VisitJdkUnsafeGetLong(HInvoke * invoke)2880 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2881   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
2882 }
2883 
VisitJdkUnsafeGetLong(HInvoke * invoke)2884 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2885   GenUnsafeGet(
2886       invoke, codegen_, DataType::Type::kInt64, std::memory_order_relaxed, /*atomic=*/ false);
2887 }
2888 
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2889 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2890   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2891 }
2892 
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2893 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2894   GenUnsafeGet(
2895       invoke, codegen_, DataType::Type::kInt64, std::memory_order_seq_cst, /*atomic=*/ true);
2896 }
2897 
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2898 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2899   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2900 }
2901 
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2902 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2903   GenUnsafeGet(
2904       invoke, codegen_, DataType::Type::kInt64, std::memory_order_acquire, /*atomic=*/ true);
2905 }
2906 
VisitJdkUnsafeGetReference(HInvoke * invoke)2907 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2908   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
2909 }
2910 
VisitJdkUnsafeGetReference(HInvoke * invoke)2911 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2912   GenUnsafeGet(
2913       invoke, codegen_, DataType::Type::kReference, std::memory_order_relaxed, /*atomic=*/ false);
2914 }
2915 
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2916 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2917   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2918 }
2919 
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2920 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2921   GenUnsafeGet(
2922       invoke, codegen_, DataType::Type::kReference, std::memory_order_seq_cst, /*atomic=*/ true);
2923 }
2924 
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2925 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2926   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2927 }
2928 
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2929 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2930   GenUnsafeGet(
2931       invoke, codegen_, DataType::Type::kReference, std::memory_order_acquire, /*atomic=*/ true);
2932 }
2933 
VisitJdkUnsafeGetByte(HInvoke * invoke)2934 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2935   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false);
2936 }
2937 
VisitJdkUnsafeGetByte(HInvoke * invoke)2938 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2939   GenUnsafeGet(
2940       invoke, codegen_, DataType::Type::kInt8, std::memory_order_relaxed, /*atomic=*/ false);
2941 }
2942 
GenerateIntrinsicSetStore(CodeGeneratorARMVIXL * codegen,ArmVIXLAssembler * assembler,DataType::Type type,bool atomic,vixl32::MemOperand address,Location value,bool seq_cst_barrier,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2943 static void GenerateIntrinsicSetStore(CodeGeneratorARMVIXL* codegen,
2944                                       ArmVIXLAssembler* assembler,
2945                                       DataType::Type type,
2946                                       bool atomic,
2947                                       vixl32::MemOperand address,
2948                                       Location value,
2949                                       bool seq_cst_barrier,
2950                                       Location maybe_temp,
2951                                       Location maybe_temp2,
2952                                       Location maybe_temp3) {
2953   switch (type) {
2954     case DataType::Type::kBool:
2955     case DataType::Type::kInt8:
2956       __ Strb(RegisterFrom(value), address);
2957       break;
2958     case DataType::Type::kUint16:
2959     case DataType::Type::kInt16:
2960       __ Strh(RegisterFrom(value), address);
2961       break;
2962     case DataType::Type::kReference:
2963     case DataType::Type::kInt32:
2964       __ Str(RegisterFrom(value), address);
2965       break;
2966     case DataType::Type::kInt64:
2967       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2968         vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2969         vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2970         vixl32::Label loop;
2971         __ Bind(&loop);
2972         __ Ldrexd(lo_tmp, hi_tmp, address);  // Ignore the retrieved value.
2973         __ Strexd(lo_tmp, LowRegisterFrom(value), HighRegisterFrom(value), address);
2974         __ Cmp(lo_tmp, 0);
2975         __ B(ne, &loop);
2976       } else {
2977         __ Strd(LowRegisterFrom(value), HighRegisterFrom(value), address);
2978       }
2979       break;
2980     case DataType::Type::kFloat32:
2981       __ Vstr(SRegisterFrom(value), address);
2982       break;
2983     case DataType::Type::kFloat64:
2984       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2985         vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2986         vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2987         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2988         vixl32::Label loop;
2989         __ Bind(&loop);
2990         __ Ldrexd(lo_tmp, hi_tmp, address);  // Ignore the retrieved value.
2991         __ Vmov(lo_tmp, hi_tmp, DRegisterFrom(value));
2992         __ Strexd(strexd_tmp, lo_tmp, hi_tmp, address);
2993         __ Cmp(strexd_tmp, 0);
2994         __ B(ne, &loop);
2995       } else {
2996         __ Vstr(DRegisterFrom(value), address);
2997       }
2998       break;
2999     default:
3000       LOG(FATAL) << "Unexpected type " << type;
3001       UNREACHABLE();
3002   }
3003 
3004   if (seq_cst_barrier) {
3005     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3006   }
3007 }
3008 
GenerateIntrinsicSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register address,Location value,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)3009 static void GenerateIntrinsicSet(CodeGeneratorARMVIXL* codegen,
3010                                  DataType::Type type,
3011                                  std::memory_order order,
3012                                  bool atomic,
3013                                  vixl32::Register address,
3014                                  Location value,
3015                                  Location maybe_temp,
3016                                  Location maybe_temp2,
3017                                  Location maybe_temp3) {
3018   bool seq_cst_barrier = order == std::memory_order_seq_cst;
3019   bool release_barrier = seq_cst_barrier || order == std::memory_order_release;
3020   DCHECK(release_barrier || order == std::memory_order_relaxed);
3021   DCHECK(atomic || order == std::memory_order_relaxed);
3022 
3023   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3024   if (release_barrier) {
3025     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
3026   }
3027   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3028   GenerateIntrinsicSetStore(codegen,
3029                             assembler,
3030                             type,
3031                             atomic,
3032                             MemOperand(address),
3033                             value,
3034                             seq_cst_barrier,
3035                             maybe_temp,
3036                             maybe_temp2,
3037                             maybe_temp3);
3038 }
3039 
GenerateIntrinsicSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location value,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)3040 static void GenerateIntrinsicSet(CodeGeneratorARMVIXL* codegen,
3041                                  DataType::Type type,
3042                                  std::memory_order order,
3043                                  bool atomic,
3044                                  vixl32::Register base,
3045                                  vixl32::Register offset,
3046                                  Location value,
3047                                  Location maybe_temp,
3048                                  Location maybe_temp2,
3049                                  Location maybe_temp3) {
3050   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
3051   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
3052   DCHECK(release_barrier || order == std::memory_order_relaxed);
3053   DCHECK(atomic || order == std::memory_order_relaxed);
3054 
3055   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3056   if (release_barrier) {
3057     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
3058   }
3059   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3060   if (kPoisonHeapReferences && type == DataType::Type::kReference) {
3061     vixl32::Register temp = temps.Acquire();
3062     __ Mov(temp, RegisterFrom(value));
3063     assembler->PoisonHeapReference(temp);
3064     value = LocationFrom(temp);
3065   }
3066   MemOperand address = offset.IsValid() ? MemOperand(base, offset) : MemOperand(base);
3067   if (offset.IsValid() && (DataType::Is64BitType(type) || type == DataType::Type::kFloat32)) {
3068     const vixl32::Register temp_reg = temps.Acquire();
3069     __ Add(temp_reg, base, offset);
3070     address = MemOperand(temp_reg);
3071   }
3072   GenerateIntrinsicSetStore(codegen,
3073                             assembler,
3074                             type,
3075                             atomic,
3076                             address,
3077                             value,
3078                             seq_cst_barrier,
3079                             maybe_temp,
3080                             maybe_temp2,
3081                             maybe_temp3);
3082 }
3083 
CreateUnsafePutTempLocations(CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic,LocationSummary * locations)3084 static void CreateUnsafePutTempLocations(CodeGeneratorARMVIXL* codegen,
3085                                          DataType::Type type,
3086                                          bool atomic,
3087                                          LocationSummary* locations) {
3088   if (type == DataType::Type::kInt64) {
3089     // Potentially need temps for ldrexd-strexd loop.
3090     if (Use64BitExclusiveLoadStore(atomic, codegen)) {
3091       locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
3092       locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
3093     }
3094   } else if (type == DataType::Type::kReference) {
3095     // Temp for card-marking.
3096     locations->AddTemp(Location::RequiresRegister());  // Temp.
3097   }
3098 }
3099 
CreateUnsafePutLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)3100 static void CreateUnsafePutLocations(HInvoke* invoke,
3101                                      CodeGeneratorARMVIXL* codegen,
3102                                      DataType::Type type,
3103                                      bool atomic) {
3104   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3105   LocationSummary* locations =
3106       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3107   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
3108   locations->SetInAt(1, Location::RequiresRegister());
3109   locations->SetInAt(2, Location::RequiresRegister());
3110   locations->SetInAt(3, Location::RequiresRegister());
3111   CreateUnsafePutTempLocations(codegen, type, atomic, locations);
3112 }
3113 
CreateUnsafePutAbsoluteLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)3114 static void CreateUnsafePutAbsoluteLocations(HInvoke* invoke,
3115                                      CodeGeneratorARMVIXL* codegen,
3116                                      DataType::Type type,
3117                                      bool atomic) {
3118   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3119   LocationSummary* locations =
3120       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3121   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
3122   locations->SetInAt(1, Location::RequiresRegister());
3123   locations->SetInAt(2, Location::RequiresRegister());
3124   CreateUnsafePutTempLocations(codegen, type, atomic, locations);
3125 }
3126 
GenUnsafePut(HInvoke * invoke,DataType::Type type,std::memory_order order,bool atomic,CodeGeneratorARMVIXL * codegen)3127 static void GenUnsafePut(HInvoke* invoke,
3128                          DataType::Type type,
3129                          std::memory_order order,
3130                          bool atomic,
3131                          CodeGeneratorARMVIXL* codegen) {
3132   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3133 
3134   LocationSummary* locations = invoke->GetLocations();
3135   vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
3136   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
3137   Location value = locations->InAt(3);
3138   Location maybe_temp = Location::NoLocation();
3139   Location maybe_temp2 = Location::NoLocation();
3140   if (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
3141     maybe_temp = locations->GetTemp(0);
3142     maybe_temp2 = locations->GetTemp(1);
3143   }
3144 
3145   GenerateIntrinsicSet(codegen,
3146                        type,
3147                        order,
3148                        atomic,
3149                        base,
3150                        offset,
3151                        value,
3152                        maybe_temp,
3153                        maybe_temp2,
3154                        /*maybe_temp3=*/ Location::NoLocation());
3155 
3156   if (type == DataType::Type::kReference) {
3157     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3158     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3159     vixl32::Register card = temps.Acquire();
3160     bool value_can_be_null = true;  // TODO: Worth finding out this information?
3161     codegen->MaybeMarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
3162   }
3163 }
3164 
GenUnsafePutAbsolute(HInvoke * invoke,DataType::Type type,std::memory_order order,bool atomic,CodeGeneratorARMVIXL * codegen)3165 static void GenUnsafePutAbsolute(HInvoke* invoke,
3166                                  DataType::Type type,
3167                                  std::memory_order order,
3168                                  bool atomic,
3169                                  CodeGeneratorARMVIXL* codegen) {
3170   LocationSummary* locations = invoke->GetLocations();
3171   vixl32::Register address = LowRegisterFrom(locations->InAt(1));  // Long offset, lo part only.
3172   Location value = locations->InAt(2);
3173   Location maybe_temp = Location::NoLocation();
3174   Location maybe_temp2 = Location::NoLocation();
3175   if (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
3176     maybe_temp = locations->GetTemp(0);
3177     maybe_temp2 = locations->GetTemp(1);
3178   }
3179 
3180   GenerateIntrinsicSet(codegen,
3181                        type,
3182                        order,
3183                        atomic,
3184                        address,
3185                        value,
3186                        maybe_temp,
3187                        maybe_temp2,
3188                        /*maybe_temp3=*/ Location::NoLocation());
3189 }
3190 
VisitUnsafePut(HInvoke * invoke)3191 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3192   VisitJdkUnsafePut(invoke);
3193 }
3194 
VisitUnsafePutAbsolute(HInvoke * invoke)3195 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutAbsolute(HInvoke* invoke) {
3196   VisitJdkUnsafePutAbsolute(invoke);
3197 }
3198 
VisitUnsafePut(HInvoke * invoke)3199 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3200   VisitJdkUnsafePut(invoke);
3201 }
3202 
VisitUnsafePutAbsolute(HInvoke * invoke)3203 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutAbsolute(HInvoke* invoke) {
3204   VisitJdkUnsafePutAbsolute(invoke);
3205 }
3206 
VisitUnsafePutOrdered(HInvoke * invoke)3207 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3208   VisitJdkUnsafePutOrdered(invoke);
3209 }
3210 
VisitUnsafePutOrdered(HInvoke * invoke)3211 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3212   VisitJdkUnsafePutOrdered(invoke);
3213 }
3214 
VisitUnsafePutVolatile(HInvoke * invoke)3215 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3216   VisitJdkUnsafePutVolatile(invoke);
3217 }
3218 
VisitUnsafePutVolatile(HInvoke * invoke)3219 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3220   VisitJdkUnsafePutVolatile(invoke);
3221 }
VisitUnsafePutObject(HInvoke * invoke)3222 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3223   VisitJdkUnsafePutReference(invoke);
3224 }
3225 
VisitUnsafePutObject(HInvoke * invoke)3226 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3227   VisitJdkUnsafePutReference(invoke);
3228 }
3229 
VisitUnsafePutObjectOrdered(HInvoke * invoke)3230 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3231   VisitJdkUnsafePutObjectOrdered(invoke);
3232 }
3233 
VisitUnsafePutObjectOrdered(HInvoke * invoke)3234 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3235   VisitJdkUnsafePutObjectOrdered(invoke);
3236 }
3237 
VisitUnsafePutObjectVolatile(HInvoke * invoke)3238 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3239   VisitJdkUnsafePutReferenceVolatile(invoke);
3240 }
3241 
VisitUnsafePutObjectVolatile(HInvoke * invoke)3242 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3243   VisitJdkUnsafePutReferenceVolatile(invoke);
3244 }
3245 
VisitUnsafePutLong(HInvoke * invoke)3246 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3247   VisitJdkUnsafePutLong(invoke);
3248 }
3249 
VisitUnsafePutLong(HInvoke * invoke)3250 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3251   VisitJdkUnsafePutLong(invoke);
3252 }
3253 
VisitUnsafePutLongOrdered(HInvoke * invoke)3254 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3255   VisitJdkUnsafePutLongOrdered(invoke);
3256 }
3257 
VisitUnsafePutLongOrdered(HInvoke * invoke)3258 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3259   VisitJdkUnsafePutLongOrdered(invoke);
3260 }
3261 
VisitUnsafePutLongVolatile(HInvoke * invoke)3262 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3263   VisitJdkUnsafePutLongVolatile(invoke);
3264 }
3265 
VisitUnsafePutLongVolatile(HInvoke * invoke)3266 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3267   VisitJdkUnsafePutLongVolatile(invoke);
3268 }
3269 
VisitUnsafePutByte(HInvoke * invoke)3270 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutByte(HInvoke* invoke) {
3271   VisitJdkUnsafePutByte(invoke);
3272 }
3273 
VisitUnsafePutByte(HInvoke * invoke)3274 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutByte(HInvoke* invoke) {
3275   VisitJdkUnsafePutByte(invoke);
3276 }
3277 
VisitJdkUnsafePut(HInvoke * invoke)3278 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
3279   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
3280 }
3281 
VisitJdkUnsafePutAbsolute(HInvoke * invoke)3282 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
3283   CreateUnsafePutAbsoluteLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
3284 }
3285 
VisitJdkUnsafePut(HInvoke * invoke)3286 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
3287   GenUnsafePut(invoke,
3288                DataType::Type::kInt32,
3289                std::memory_order_relaxed,
3290                /*atomic=*/ false,
3291                codegen_);
3292 }
3293 
VisitJdkUnsafePutAbsolute(HInvoke * invoke)3294 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
3295   GenUnsafePutAbsolute(invoke,
3296                        DataType::Type::kInt32,
3297                        std::memory_order_relaxed,
3298                        /*atomic=*/false,
3299                        codegen_);
3300 }
3301 
VisitJdkUnsafePutByte(HInvoke * invoke)3302 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) {
3303   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false);
3304 }
3305 
VisitJdkUnsafePutByte(HInvoke * invoke)3306 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) {
3307   GenUnsafePut(invoke,
3308                DataType::Type::kInt8,
3309                std::memory_order_relaxed,
3310                /*atomic=*/ false,
3311                codegen_);
3312 }
3313 
VisitJdkUnsafePutOrdered(HInvoke * invoke)3314 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
3315   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3316 }
3317 
VisitJdkUnsafePutOrdered(HInvoke * invoke)3318 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
3319   GenUnsafePut(invoke,
3320                DataType::Type::kInt32,
3321                std::memory_order_release,
3322                /*atomic=*/ true,
3323                codegen_);
3324 }
3325 
VisitJdkUnsafePutVolatile(HInvoke * invoke)3326 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
3327   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3328 }
3329 
VisitJdkUnsafePutVolatile(HInvoke * invoke)3330 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
3331   GenUnsafePut(invoke,
3332                DataType::Type::kInt32,
3333                std::memory_order_seq_cst,
3334                /*atomic=*/ true,
3335                codegen_);
3336 }
3337 
VisitJdkUnsafePutRelease(HInvoke * invoke)3338 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) {
3339   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3340 }
3341 
VisitJdkUnsafePutRelease(HInvoke * invoke)3342 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) {
3343   GenUnsafePut(invoke,
3344                DataType::Type::kInt32,
3345                std::memory_order_release,
3346                /*atomic=*/ true,
3347                codegen_);
3348 }
3349 
VisitJdkUnsafePutReference(HInvoke * invoke)3350 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) {
3351   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
3352 }
3353 
VisitJdkUnsafePutReference(HInvoke * invoke)3354 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) {
3355   GenUnsafePut(invoke,
3356                DataType::Type::kReference,
3357                std::memory_order_relaxed,
3358                /*atomic=*/ false,
3359                codegen_);
3360 }
3361 
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)3362 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
3363   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3364 }
3365 
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)3366 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
3367   GenUnsafePut(invoke,
3368                DataType::Type::kReference,
3369                std::memory_order_release,
3370                /*atomic=*/ true,
3371                codegen_);
3372 }
3373 
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)3374 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
3375   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3376 }
3377 
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)3378 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
3379   GenUnsafePut(invoke,
3380                DataType::Type::kReference,
3381                std::memory_order_seq_cst,
3382                /*atomic=*/ true,
3383                codegen_);
3384 }
3385 
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)3386 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
3387   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3388 }
3389 
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)3390 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
3391   GenUnsafePut(invoke,
3392                DataType::Type::kReference,
3393                std::memory_order_release,
3394                /*atomic=*/ true,
3395                codegen_);
3396 }
3397 
VisitJdkUnsafePutLong(HInvoke * invoke)3398 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLong(HInvoke* invoke) {
3399   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
3400 }
3401 
VisitJdkUnsafePutLong(HInvoke * invoke)3402 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLong(HInvoke* invoke) {
3403   GenUnsafePut(invoke,
3404                DataType::Type::kInt64,
3405                std::memory_order_relaxed,
3406                /*atomic=*/ false,
3407                codegen_);
3408 }
3409 
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)3410 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
3411   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3412 }
3413 
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)3414 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
3415   GenUnsafePut(invoke,
3416                DataType::Type::kInt64,
3417                std::memory_order_release,
3418                /*atomic=*/ true,
3419                codegen_);
3420 }
3421 
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)3422 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
3423   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3424 }
3425 
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)3426 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
3427   GenUnsafePut(invoke,
3428                DataType::Type::kInt64,
3429                std::memory_order_seq_cst,
3430                /*atomic=*/ true,
3431                codegen_);
3432 }
3433 
VisitJdkUnsafePutLongRelease(HInvoke * invoke)3434 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
3435   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3436 }
3437 
VisitJdkUnsafePutLongRelease(HInvoke * invoke)3438 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
3439   GenUnsafePut(invoke,
3440                DataType::Type::kInt64,
3441                std::memory_order_release,
3442                /*atomic=*/ true,
3443                codegen_);
3444 }
3445 
EmitLoadExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,Location old_value)3446 static void EmitLoadExclusive(CodeGeneratorARMVIXL* codegen,
3447                               DataType::Type type,
3448                               vixl32::Register ptr,
3449                               Location old_value) {
3450   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3451   switch (type) {
3452     case DataType::Type::kBool:
3453     case DataType::Type::kInt8:
3454       __ Ldrexb(RegisterFrom(old_value), MemOperand(ptr));
3455       break;
3456     case DataType::Type::kUint16:
3457     case DataType::Type::kInt16:
3458       __ Ldrexh(RegisterFrom(old_value), MemOperand(ptr));
3459       break;
3460     case DataType::Type::kInt32:
3461     case DataType::Type::kReference:
3462       __ Ldrex(RegisterFrom(old_value), MemOperand(ptr));
3463       break;
3464     case DataType::Type::kInt64:
3465       __ Ldrexd(LowRegisterFrom(old_value), HighRegisterFrom(old_value), MemOperand(ptr));
3466       break;
3467     default:
3468       LOG(FATAL) << "Unexpected type: " << type;
3469       UNREACHABLE();
3470   }
3471   switch (type) {
3472     case DataType::Type::kInt8:
3473       __ Sxtb(RegisterFrom(old_value), RegisterFrom(old_value));
3474       break;
3475     case DataType::Type::kInt16:
3476       __ Sxth(RegisterFrom(old_value), RegisterFrom(old_value));
3477       break;
3478     case DataType::Type::kReference:
3479       assembler->MaybeUnpoisonHeapReference(RegisterFrom(old_value));
3480       break;
3481     default:
3482       break;
3483   }
3484 }
3485 
EmitStoreExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,vixl32::Register store_result,Location new_value)3486 static void EmitStoreExclusive(CodeGeneratorARMVIXL* codegen,
3487                                DataType::Type type,
3488                                vixl32::Register ptr,
3489                                vixl32::Register store_result,
3490                                Location new_value) {
3491   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3492   if (type == DataType::Type::kReference) {
3493     assembler->MaybePoisonHeapReference(RegisterFrom(new_value));
3494   }
3495   switch (type) {
3496     case DataType::Type::kBool:
3497     case DataType::Type::kInt8:
3498       __ Strexb(store_result, RegisterFrom(new_value), MemOperand(ptr));
3499       break;
3500     case DataType::Type::kUint16:
3501     case DataType::Type::kInt16:
3502       __ Strexh(store_result, RegisterFrom(new_value), MemOperand(ptr));
3503       break;
3504     case DataType::Type::kInt32:
3505     case DataType::Type::kReference:
3506       __ Strex(store_result, RegisterFrom(new_value), MemOperand(ptr));
3507       break;
3508     case DataType::Type::kInt64:
3509       __ Strexd(
3510           store_result, LowRegisterFrom(new_value), HighRegisterFrom(new_value), MemOperand(ptr));
3511       break;
3512     default:
3513       LOG(FATAL) << "Unexpected type: " << type;
3514       UNREACHABLE();
3515   }
3516   if (type == DataType::Type::kReference) {
3517     assembler->MaybeUnpoisonHeapReference(RegisterFrom(new_value));
3518   }
3519 }
3520 
GenerateCompareAndSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,bool strong,vixl32::Label * cmp_failure,bool cmp_failure_is_far_target,vixl32::Register ptr,Location expected,Location new_value,Location old_value,vixl32::Register store_result,vixl32::Register success)3521 static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen,
3522                                   DataType::Type type,
3523                                   bool strong,
3524                                   vixl32::Label* cmp_failure,
3525                                   bool cmp_failure_is_far_target,
3526                                   vixl32::Register ptr,
3527                                   Location expected,
3528                                   Location new_value,
3529                                   Location old_value,
3530                                   vixl32::Register store_result,
3531                                   vixl32::Register success) {
3532   // For kReference, the `expected` shall be a register pair when called from a read barrier
3533   // slow path, specifying both the original `expected` as well as the unmarked old value from
3534   // the main path attempt to emit CAS when it matched `expected` after marking.
3535   // Otherwise the type of `expected` shall match the type of `new_value` and `old_value`.
3536   if (type == DataType::Type::kInt64) {
3537     DCHECK(expected.IsRegisterPair());
3538     DCHECK(new_value.IsRegisterPair());
3539     DCHECK(old_value.IsRegisterPair());
3540   } else {
3541     DCHECK(expected.IsRegister() ||
3542            (type == DataType::Type::kReference && expected.IsRegisterPair()));
3543     DCHECK(new_value.IsRegister());
3544     DCHECK(old_value.IsRegister());
3545     // Make sure the unmarked old value for reference CAS slow path is not clobbered by STREX.
3546     DCHECK(!expected.Contains(LocationFrom(store_result)));
3547   }
3548 
3549   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3550 
3551   // do {
3552   //   old_value = [ptr];  // Load exclusive.
3553   //   if (old_value != expected) goto cmp_failure;
3554   //   store_result = failed([ptr] <- new_value);  // Store exclusive.
3555   // } while (strong && store_result);
3556   //
3557   // If `success` is a valid register, there are additional instructions in the above code
3558   // to report success with value 1 and failure with value 0 in that register.
3559 
3560   vixl32::Label loop_head;
3561   if (strong) {
3562     __ Bind(&loop_head);
3563   }
3564   EmitLoadExclusive(codegen, type, ptr, old_value);
3565   // We do not need to initialize the failure code for comparison failure if the
3566   // branch goes to the read barrier slow path that clobbers `success` anyway.
3567   bool init_failure_for_cmp =
3568       success.IsValid() &&
3569       !(type == DataType::Type::kReference && codegen->EmitReadBarrier() && expected.IsRegister());
3570   // Instruction scheduling: Loading a constant between LDREX* and using the loaded value
3571   // is essentially free, so prepare the failure value here if we can.
3572   bool init_failure_for_cmp_early =
3573       init_failure_for_cmp && !old_value.Contains(LocationFrom(success));
3574   if (init_failure_for_cmp_early) {
3575     __ Mov(success, 0);  // Indicate failure if the comparison fails.
3576   }
3577   if (type == DataType::Type::kInt64) {
3578     __ Cmp(LowRegisterFrom(old_value), LowRegisterFrom(expected));
3579     ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3580     __ it(eq);
3581     __ cmp(eq, HighRegisterFrom(old_value), HighRegisterFrom(expected));
3582   } else if (expected.IsRegisterPair()) {
3583     DCHECK_EQ(type, DataType::Type::kReference);
3584     DCHECK(!expected.Contains(old_value));
3585     // Check if the loaded value matches any of the two registers in `expected`.
3586     __ Cmp(RegisterFrom(old_value), LowRegisterFrom(expected));
3587     ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3588     __ it(ne);
3589     __ cmp(ne, RegisterFrom(old_value), HighRegisterFrom(expected));
3590   } else {
3591     __ Cmp(RegisterFrom(old_value), RegisterFrom(expected));
3592   }
3593   if (init_failure_for_cmp && !init_failure_for_cmp_early) {
3594     __ Mov(LeaveFlags, success, 0);  // Indicate failure if the comparison fails.
3595   }
3596   __ B(ne, cmp_failure, /*is_far_target=*/ cmp_failure_is_far_target);
3597   EmitStoreExclusive(codegen, type, ptr, store_result, new_value);
3598   if (strong) {
3599     // Instruction scheduling: Loading a constant between STREX* and using its result
3600     // is essentially free, so prepare the success value here if needed and possible.
3601     if (success.IsValid() && !success.Is(store_result)) {
3602       __ Mov(success, 1);  // Indicate success if the store succeeds.
3603     }
3604     __ Cmp(store_result, 0);
3605     if (success.IsValid() && success.Is(store_result)) {
3606       __ Mov(LeaveFlags, success, 1);  // Indicate success if the store succeeds.
3607     }
3608     __ B(ne, &loop_head, /*is_far_target=*/ false);
3609   } else {
3610     // Weak CAS (VarHandle.CompareAndExchange variants) always indicates success.
3611     DCHECK(success.IsValid());
3612     // Flip the `store_result` to indicate success by 1 and failure by 0.
3613     __ Eor(success, store_result, 1);
3614   }
3615 }
3616 
3617 class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
3618  public:
ReadBarrierCasSlowPathARMVIXL(HInvoke * invoke,bool strong,vixl32::Register base,vixl32::Register offset,vixl32::Register expected,vixl32::Register new_value,vixl32::Register old_value,vixl32::Register old_value_temp,vixl32::Register store_result,vixl32::Register success,CodeGeneratorARMVIXL * arm_codegen)3619   explicit ReadBarrierCasSlowPathARMVIXL(HInvoke* invoke,
3620                                          bool strong,
3621                                          vixl32::Register base,
3622                                          vixl32::Register offset,
3623                                          vixl32::Register expected,
3624                                          vixl32::Register new_value,
3625                                          vixl32::Register old_value,
3626                                          vixl32::Register old_value_temp,
3627                                          vixl32::Register store_result,
3628                                          vixl32::Register success,
3629                                          CodeGeneratorARMVIXL* arm_codegen)
3630       : SlowPathCodeARMVIXL(invoke),
3631         strong_(strong),
3632         base_(base),
3633         offset_(offset),
3634         expected_(expected),
3635         new_value_(new_value),
3636         old_value_(old_value),
3637         old_value_temp_(old_value_temp),
3638         store_result_(store_result),
3639         success_(success),
3640         mark_old_value_slow_path_(nullptr),
3641         update_old_value_slow_path_(nullptr) {
3642     if (!kUseBakerReadBarrier) {
3643       // We need to add the slow path now, it is too late when emitting slow path code.
3644       mark_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3645           invoke,
3646           Location::RegisterLocation(old_value_temp.GetCode()),
3647           Location::RegisterLocation(old_value.GetCode()),
3648           Location::RegisterLocation(base.GetCode()),
3649           /*offset=*/ 0u,
3650           /*index=*/ Location::RegisterLocation(offset.GetCode()));
3651       if (!success.IsValid()) {
3652         update_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3653             invoke,
3654             Location::RegisterLocation(old_value.GetCode()),
3655             Location::RegisterLocation(old_value_temp.GetCode()),
3656             Location::RegisterLocation(base.GetCode()),
3657             /*offset=*/ 0u,
3658             /*index=*/ Location::RegisterLocation(offset.GetCode()));
3659       }
3660     }
3661   }
3662 
GetDescription() const3663   const char* GetDescription() const override { return "ReadBarrierCasSlowPathARMVIXL"; }
3664 
EmitNativeCode(CodeGenerator * codegen)3665   void EmitNativeCode(CodeGenerator* codegen) override {
3666     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
3667     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
3668     __ Bind(GetEntryLabel());
3669 
3670     // Mark the `old_value_` from the main path and compare with `expected_`.
3671     if (kUseBakerReadBarrier) {
3672       DCHECK(mark_old_value_slow_path_ == nullptr);
3673       arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_temp_, old_value_);
3674     } else {
3675       DCHECK(mark_old_value_slow_path_ != nullptr);
3676       __ B(mark_old_value_slow_path_->GetEntryLabel());
3677       __ Bind(mark_old_value_slow_path_->GetExitLabel());
3678     }
3679     __ Cmp(old_value_temp_, expected_);
3680     if (success_.IsValid()) {
3681       __ Mov(LeaveFlags, success_, 0);  // Indicate failure if we take the branch out.
3682     } else {
3683       // In case of failure, update the `old_value_` with the marked reference.
3684       ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3685       __ it(ne);
3686       __ mov(ne, old_value_, old_value_temp_);
3687     }
3688     __ B(ne, GetExitLabel());
3689 
3690     // The old value we have read did not match `expected` (which is always a to-space
3691     // reference) but after the read barrier the marked to-space value matched, so the
3692     // old value must be a from-space reference to the same object. Do the same CAS loop
3693     // as the main path but check for both `expected` and the unmarked old value
3694     // representing the to-space and from-space references for the same object.
3695 
3696     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3697     vixl32::Register tmp_ptr = temps.Acquire();
3698 
3699     // Recalculate the `tmp_ptr` clobbered above.
3700     __ Add(tmp_ptr, base_, offset_);
3701 
3702     vixl32::Label mark_old_value;
3703     GenerateCompareAndSet(arm_codegen,
3704                           DataType::Type::kReference,
3705                           strong_,
3706                           /*cmp_failure=*/ success_.IsValid() ? GetExitLabel() : &mark_old_value,
3707                           /*cmp_failure_is_far_target=*/ success_.IsValid(),
3708                           tmp_ptr,
3709                           /*expected=*/ LocationFrom(expected_, old_value_),
3710                           /*new_value=*/ LocationFrom(new_value_),
3711                           /*old_value=*/ LocationFrom(old_value_temp_),
3712                           store_result_,
3713                           success_);
3714     if (!success_.IsValid()) {
3715       // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
3716       // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
3717       __ Mov(old_value_, expected_);
3718     }
3719 
3720     __ B(GetExitLabel());
3721 
3722     if (!success_.IsValid()) {
3723       __ Bind(&mark_old_value);
3724       if (kUseBakerReadBarrier) {
3725         DCHECK(update_old_value_slow_path_ == nullptr);
3726         arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_, old_value_temp_);
3727       } else {
3728         // Note: We could redirect the `failure` above directly to the entry label and bind
3729         // the exit label in the main path, but the main path would need to access the
3730         // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
3731         DCHECK(update_old_value_slow_path_ != nullptr);
3732         __ B(update_old_value_slow_path_->GetEntryLabel());
3733         __ Bind(update_old_value_slow_path_->GetExitLabel());
3734       }
3735       __ B(GetExitLabel());
3736     }
3737   }
3738 
3739  private:
3740   bool strong_;
3741   vixl32::Register base_;
3742   vixl32::Register offset_;
3743   vixl32::Register expected_;
3744   vixl32::Register new_value_;
3745   vixl32::Register old_value_;
3746   vixl32::Register old_value_temp_;
3747   vixl32::Register store_result_;
3748   vixl32::Register success_;
3749   SlowPathCodeARMVIXL* mark_old_value_slow_path_;
3750   SlowPathCodeARMVIXL* update_old_value_slow_path_;
3751 };
3752 
CreateUnsafeCASLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3753 static void CreateUnsafeCASLocations(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3754   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
3755   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3756   LocationSummary* locations =
3757       new (allocator) LocationSummary(invoke,
3758                                       can_call
3759                                           ? LocationSummary::kCallOnSlowPath
3760                                           : LocationSummary::kNoCall,
3761                                       kIntrinsified);
3762   if (can_call && kUseBakerReadBarrier) {
3763     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3764   }
3765   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
3766   locations->SetInAt(1, Location::RequiresRegister());
3767   locations->SetInAt(2, Location::RequiresRegister());
3768   locations->SetInAt(3, Location::RequiresRegister());
3769   locations->SetInAt(4, Location::RequiresRegister());
3770 
3771   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3772 
3773   // Temporary register used in CAS. In the object case (UnsafeCASObject intrinsic),
3774   // this is also used for card-marking, and possibly for read barrier.
3775   locations->AddTemp(Location::RequiresRegister());
3776 }
3777 
GenUnsafeCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)3778 static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
3779   DCHECK_NE(type, DataType::Type::kInt64);
3780 
3781   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3782   LocationSummary* locations = invoke->GetLocations();
3783 
3784   vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
3785   vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
3786   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));      // Offset (discard high 4B).
3787   vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
3788   vixl32::Register new_value = InputRegisterAt(invoke, 4);            // New value.
3789 
3790   vixl32::Register tmp = RegisterFrom(locations->GetTemp(0));         // Temporary.
3791 
3792   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3793   vixl32::Register tmp_ptr = temps.Acquire();
3794 
3795   if (type == DataType::Type::kReference) {
3796     // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
3797     // object and scan the receiver at the next GC for nothing.
3798     bool value_can_be_null = true;  // TODO: Worth finding out this information?
3799     codegen->MaybeMarkGCCard(tmp_ptr, tmp, base, new_value, value_can_be_null);
3800   }
3801 
3802   vixl32::Label exit_loop_label;
3803   vixl32::Label* exit_loop = &exit_loop_label;
3804   vixl32::Label* cmp_failure = &exit_loop_label;
3805 
3806   if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
3807     // If marking, check if the stored reference is a from-space reference to the same
3808     // object as the to-space reference `expected`. If so, perform a custom CAS loop.
3809     ReadBarrierCasSlowPathARMVIXL* slow_path =
3810         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
3811             invoke,
3812             /*strong=*/ true,
3813             base,
3814             offset,
3815             expected,
3816             new_value,
3817             /*old_value=*/ tmp,
3818             /*old_value_temp=*/ out,
3819             /*store_result=*/ out,
3820             /*success=*/ out,
3821             codegen);
3822     codegen->AddSlowPath(slow_path);
3823     exit_loop = slow_path->GetExitLabel();
3824     cmp_failure = slow_path->GetEntryLabel();
3825   }
3826 
3827   // Unsafe CAS operations have std::memory_order_seq_cst semantics.
3828   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3829   __ Add(tmp_ptr, base, offset);
3830   GenerateCompareAndSet(codegen,
3831                         type,
3832                         /*strong=*/ true,
3833                         cmp_failure,
3834                         /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
3835                         tmp_ptr,
3836                         /*expected=*/ LocationFrom(expected),  // TODO: Int64
3837                         /*new_value=*/ LocationFrom(new_value),  // TODO: Int64
3838                         /*old_value=*/ LocationFrom(tmp),  // TODO: Int64
3839                         /*store_result=*/ tmp,
3840                         /*success=*/ out);
3841   __ Bind(exit_loop);
3842   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3843 
3844   if (type == DataType::Type::kReference) {
3845     codegen->MaybeGenerateMarkingRegisterCheck(/*code=*/ 128, /*temp_loc=*/ LocationFrom(tmp_ptr));
3846   }
3847 }
3848 
VisitUnsafeCASInt(HInvoke * invoke)3849 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3850   VisitJdkUnsafeCASInt(invoke);
3851 }
VisitUnsafeCASObject(HInvoke * invoke)3852 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3853   VisitJdkUnsafeCASObject(invoke);
3854 }
3855 
VisitJdkUnsafeCASInt(HInvoke * invoke)3856 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
3857   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
3858   VisitJdkUnsafeCompareAndSetInt(invoke);
3859 }
VisitJdkUnsafeCASObject(HInvoke * invoke)3860 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3861   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3862   VisitJdkUnsafeCompareAndSetReference(invoke);
3863 }
3864 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3865 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3866   CreateUnsafeCASLocations(invoke, codegen_);
3867 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3868 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3869   // The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
3870   if (codegen_->EmitNonBakerReadBarrier()) {
3871     return;
3872   }
3873 
3874   CreateUnsafeCASLocations(invoke, codegen_);
3875 }
3876 
VisitUnsafeCASInt(HInvoke * invoke)3877 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3878   VisitJdkUnsafeCASInt(invoke);
3879 }
VisitUnsafeCASObject(HInvoke * invoke)3880 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3881   VisitJdkUnsafeCASObject(invoke);
3882 }
3883 
VisitJdkUnsafeCASInt(HInvoke * invoke)3884 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
3885   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
3886   VisitJdkUnsafeCompareAndSetInt(invoke);
3887 }
VisitJdkUnsafeCASObject(HInvoke * invoke)3888 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3889   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3890   VisitJdkUnsafeCompareAndSetReference(invoke);
3891 }
3892 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3893 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3894   GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
3895 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3896 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3897   // The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
3898   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
3899 
3900   GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
3901 }
3902 
3903 enum class GetAndUpdateOp {
3904   kSet,
3905   kAdd,
3906   kAddWithByteSwap,
3907   kAnd,
3908   kOr,
3909   kXor
3910 };
3911 
GenerateGetAndUpdate(CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,DataType::Type load_store_type,vixl32::Register ptr,Location arg,Location old_value,vixl32::Register store_result,Location maybe_temp,Location maybe_vreg_temp)3912 static void GenerateGetAndUpdate(CodeGeneratorARMVIXL* codegen,
3913                                  GetAndUpdateOp get_and_update_op,
3914                                  DataType::Type load_store_type,
3915                                  vixl32::Register ptr,
3916                                  Location arg,
3917                                  Location old_value,
3918                                  vixl32::Register store_result,
3919                                  Location maybe_temp,
3920                                  Location maybe_vreg_temp) {
3921   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3922 
3923   Location loaded_value;
3924   Location new_value;
3925   switch (get_and_update_op) {
3926     case GetAndUpdateOp::kSet:
3927       loaded_value = old_value;
3928       new_value = arg;
3929       break;
3930     case GetAndUpdateOp::kAddWithByteSwap:
3931       if (old_value.IsRegisterPair()) {
3932         // To avoid register overlap when reversing bytes, load into temps.
3933         DCHECK(maybe_temp.IsRegisterPair());
3934         loaded_value = maybe_temp;
3935         new_value = loaded_value;  // Use the same temporaries for the new value.
3936         break;
3937       }
3938       FALLTHROUGH_INTENDED;
3939     case GetAndUpdateOp::kAdd:
3940       if (old_value.IsFpuRegisterPair()) {
3941         DCHECK(maybe_temp.IsRegisterPair());
3942         loaded_value = maybe_temp;
3943         new_value = loaded_value;  // Use the same temporaries for the new value.
3944         break;
3945       }
3946       if (old_value.IsFpuRegister()) {
3947         DCHECK(maybe_temp.IsRegister());
3948         loaded_value = maybe_temp;
3949         new_value = loaded_value;  // Use the same temporary for the new value.
3950         break;
3951       }
3952       FALLTHROUGH_INTENDED;
3953     case GetAndUpdateOp::kAnd:
3954     case GetAndUpdateOp::kOr:
3955     case GetAndUpdateOp::kXor:
3956       loaded_value = old_value;
3957       new_value = maybe_temp;
3958       break;
3959   }
3960 
3961   vixl32::Label loop_label;
3962   __ Bind(&loop_label);
3963   EmitLoadExclusive(codegen, load_store_type, ptr, loaded_value);
3964   switch (get_and_update_op) {
3965     case GetAndUpdateOp::kSet:
3966       break;
3967     case GetAndUpdateOp::kAddWithByteSwap:
3968       if (arg.IsFpuRegisterPair()) {
3969         GenerateReverseBytes(assembler, DataType::Type::kFloat64, loaded_value, old_value);
3970         vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3971         __ Vadd(sum, DRegisterFrom(old_value), DRegisterFrom(arg));
3972         __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), sum);  // Swap low/high.
3973       } else if (arg.IsFpuRegister()) {
3974         GenerateReverseBytes(assembler, DataType::Type::kFloat32, loaded_value, old_value);
3975         vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp);  // The temporary is a pair.
3976         __ Vadd(sum, SRegisterFrom(old_value), SRegisterFrom(arg));
3977         __ Vmov(RegisterFrom(new_value), sum);
3978       } else if (load_store_type == DataType::Type::kInt64) {
3979         GenerateReverseBytes(assembler, DataType::Type::kInt64, loaded_value, old_value);
3980         // Swap low/high registers for the addition results.
3981         __ Adds(HighRegisterFrom(new_value), LowRegisterFrom(old_value), LowRegisterFrom(arg));
3982         __ Adc(LowRegisterFrom(new_value), HighRegisterFrom(old_value), HighRegisterFrom(arg));
3983       } else {
3984         GenerateReverseBytes(assembler, DataType::Type::kInt32, loaded_value, old_value);
3985         __ Add(RegisterFrom(new_value), RegisterFrom(old_value), RegisterFrom(arg));
3986       }
3987       if (load_store_type == DataType::Type::kInt64) {
3988         // The `new_value` already has the high and low word swapped. Reverse bytes in each.
3989         GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
3990       } else {
3991         GenerateReverseBytes(assembler, load_store_type, new_value, new_value);
3992       }
3993       break;
3994     case GetAndUpdateOp::kAdd:
3995       if (arg.IsFpuRegisterPair()) {
3996         vixl32::DRegister old_value_vreg = DRegisterFrom(old_value);
3997         vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3998         __ Vmov(old_value_vreg, LowRegisterFrom(loaded_value), HighRegisterFrom(loaded_value));
3999         __ Vadd(sum, old_value_vreg, DRegisterFrom(arg));
4000         __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), sum);
4001       } else if (arg.IsFpuRegister()) {
4002         vixl32::SRegister old_value_vreg = SRegisterFrom(old_value);
4003         vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp);  // The temporary is a pair.
4004         __ Vmov(old_value_vreg, RegisterFrom(loaded_value));
4005         __ Vadd(sum, old_value_vreg, SRegisterFrom(arg));
4006         __ Vmov(RegisterFrom(new_value), sum);
4007       } else if (load_store_type == DataType::Type::kInt64) {
4008         __ Adds(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
4009         __ Adc(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
4010       } else {
4011         __ Add(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
4012       }
4013       break;
4014     case GetAndUpdateOp::kAnd:
4015       if (load_store_type == DataType::Type::kInt64) {
4016         __ And(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
4017         __ And(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
4018       } else {
4019         __ And(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
4020       }
4021       break;
4022     case GetAndUpdateOp::kOr:
4023       if (load_store_type == DataType::Type::kInt64) {
4024         __ Orr(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
4025         __ Orr(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
4026       } else {
4027         __ Orr(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
4028       }
4029       break;
4030     case GetAndUpdateOp::kXor:
4031       if (load_store_type == DataType::Type::kInt64) {
4032         __ Eor(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
4033         __ Eor(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
4034       } else {
4035         __ Eor(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
4036       }
4037       break;
4038   }
4039   EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value);
4040   __ Cmp(store_result, 0);
4041   __ B(ne, &loop_label);
4042 }
4043 
CreateUnsafeGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,GetAndUpdateOp get_and_update_op)4044 static void CreateUnsafeGetAndUpdateLocations(HInvoke* invoke,
4045                                               CodeGeneratorARMVIXL* codegen,
4046                                               DataType::Type type,
4047                                               GetAndUpdateOp get_and_update_op) {
4048   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
4049   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4050   LocationSummary* locations =
4051       new (allocator) LocationSummary(invoke,
4052                                       can_call
4053                                           ? LocationSummary::kCallOnSlowPath
4054                                           : LocationSummary::kNoCall,
4055                                       kIntrinsified);
4056   if (can_call && kUseBakerReadBarrier) {
4057     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4058   }
4059   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
4060   locations->SetInAt(1, Location::RequiresRegister());
4061   locations->SetInAt(2, Location::RequiresRegister());
4062   locations->SetInAt(3, Location::RequiresRegister());
4063 
4064   // Request another temporary register for methods that don't return a value.
4065   size_t num_temps = 1u;  // We always need `tmp_ptr`.
4066   const bool is_void = invoke->GetType() == DataType::Type::kVoid;
4067   if (is_void) {
4068     num_temps++;
4069   } else {
4070     locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4071   }
4072 
4073   if (get_and_update_op == GetAndUpdateOp::kAdd) {
4074     // Add `maybe_temp` used for the new value in `GenerateGetAndUpdate()`.
4075     num_temps += (type == DataType::Type::kInt64) ? 2u : 1u;
4076     if (type == DataType::Type::kInt64) {
4077       // There are enough available registers but the register allocator can fail to allocate
4078       // them correctly because it can block register pairs by single-register inputs and temps.
4079       // To work around this limitation, use a fixed register pair for both the output as well
4080       // as the offset which is not needed anymore after the address calculation.
4081       // (Alternatively, we could set up distinct fixed locations for `offset`, `arg` and `out`.)
4082       locations->SetInAt(2, LocationFrom(r0, r1));
4083       locations->UpdateOut(LocationFrom(r0, r1));
4084     }
4085   }
4086   locations->AddRegisterTemps(num_temps);
4087 }
4088 
GenUnsafeGetAndUpdate(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,GetAndUpdateOp get_and_update_op)4089 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
4090                                   CodeGeneratorARMVIXL* codegen,
4091                                   DataType::Type type,
4092                                   GetAndUpdateOp get_and_update_op) {
4093   // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check
4094   // before using.
4095   DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet);
4096 
4097   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4098   LocationSummary* locations = invoke->GetLocations();
4099 
4100   const bool is_void = invoke->GetType() == DataType::Type::kVoid;
4101 
4102   // We use a temporary for void methods, as we don't return the value.
4103   Location out_or_temp =
4104       is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
4105   vixl32::Register base = InputRegisterAt(invoke, 1);               // Object pointer.
4106   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));    // Offset (discard high 4B).
4107   Location arg = locations->InAt(3);                                // New value or addend.
4108   vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0));   // Pointer to actual memory.
4109   Location maybe_temp = Location::NoLocation();
4110   if (get_and_update_op == GetAndUpdateOp::kAdd) {
4111     maybe_temp = (type == DataType::Type::kInt64)
4112         ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
4113         : locations->GetTemp(1);
4114   }
4115 
4116   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4117   vixl32::Register temp = temps.Acquire();
4118 
4119   if (type == DataType::Type::kReference) {
4120     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
4121     // Mark card for object as a new value shall be stored.
4122     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
4123     vixl32::Register card = tmp_ptr;  // Use the `tmp_ptr` also as the `card` temporary.
4124     codegen->MaybeMarkGCCard(temp, card, base, /*value=*/ RegisterFrom(arg), new_value_can_be_null);
4125   }
4126 
4127   // Note: UnsafeGetAndUpdate operations are sequentially consistent, requiring
4128   // a barrier before and after the raw load/store-exclusive operation.
4129 
4130   __ Add(tmp_ptr, base, Operand(offset));
4131   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4132   GenerateGetAndUpdate(codegen,
4133                        get_and_update_op,
4134                        type,
4135                        tmp_ptr,
4136                        arg,
4137                        /*old_value=*/ out_or_temp,
4138                        /*store_result=*/ temp,
4139                        maybe_temp,
4140                        /*maybe_vreg_temp=*/ Location::NoLocation());
4141   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4142 
4143   if (!is_void && type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
4144     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
4145     if (kUseBakerReadBarrier) {
4146       codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out_or_temp),
4147                                                          RegisterFrom(out_or_temp));
4148     } else {
4149       codegen->GenerateReadBarrierSlow(invoke,
4150                                        out_or_temp,
4151                                        out_or_temp,
4152                                        Location::RegisterLocation(base.GetCode()),
4153                                        /*offset=*/ 0u,
4154                                        /*index=*/ Location::RegisterLocation(offset.GetCode()));
4155     }
4156   }
4157 }
4158 
VisitUnsafeGetAndAddInt(HInvoke * invoke)4159 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
4160   VisitJdkUnsafeGetAndAddInt(invoke);
4161 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)4162 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
4163   VisitJdkUnsafeGetAndAddLong(invoke);
4164 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)4165 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
4166   VisitJdkUnsafeGetAndSetInt(invoke);
4167 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)4168 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
4169   VisitJdkUnsafeGetAndSetLong(invoke);
4170 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)4171 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
4172   VisitJdkUnsafeGetAndSetReference(invoke);
4173 }
4174 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)4175 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
4176   CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
4177 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)4178 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
4179   CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
4180 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)4181 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
4182   CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
4183 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)4184 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
4185   CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
4186 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)4187 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
4188   CreateUnsafeGetAndUpdateLocations(
4189       invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
4190 }
4191 
VisitUnsafeGetAndAddInt(HInvoke * invoke)4192 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
4193   VisitJdkUnsafeGetAndAddInt(invoke);
4194 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)4195 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
4196   VisitJdkUnsafeGetAndAddLong(invoke);
4197 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)4198 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
4199   VisitJdkUnsafeGetAndSetInt(invoke);
4200 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)4201 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
4202   VisitJdkUnsafeGetAndSetLong(invoke);
4203 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)4204 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
4205   VisitJdkUnsafeGetAndSetReference(invoke);
4206 }
4207 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)4208 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
4209   GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
4210 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)4211 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
4212   GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
4213 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)4214 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
4215   GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
4216 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)4217 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
4218   GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
4219 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)4220 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
4221   GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
4222 }
4223 
4224 class VarHandleSlowPathARMVIXL : public IntrinsicSlowPathARMVIXL {
4225  public:
VarHandleSlowPathARMVIXL(HInvoke * invoke,std::memory_order order)4226   VarHandleSlowPathARMVIXL(HInvoke* invoke, std::memory_order order)
4227       : IntrinsicSlowPathARMVIXL(invoke),
4228         order_(order),
4229         atomic_(false),
4230         return_success_(false),
4231         strong_(false),
4232         get_and_update_op_(GetAndUpdateOp::kAdd) {
4233   }
4234 
GetByteArrayViewCheckLabel()4235   vixl32::Label* GetByteArrayViewCheckLabel() {
4236     return &byte_array_view_check_label_;
4237   }
4238 
GetNativeByteOrderLabel()4239   vixl32::Label* GetNativeByteOrderLabel() {
4240     return &native_byte_order_label_;
4241   }
4242 
SetAtomic(bool atomic)4243   void SetAtomic(bool atomic) {
4244     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGet ||
4245            GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kSet);
4246     atomic_ = atomic;
4247   }
4248 
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)4249   void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
4250     if (return_success) {
4251       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
4252     } else {
4253       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
4254     }
4255     return_success_ = return_success;
4256     strong_ = strong;
4257   }
4258 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)4259   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
4260     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
4261     get_and_update_op_ = get_and_update_op;
4262   }
4263 
EmitNativeCode(CodeGenerator * codegen_in)4264   void EmitNativeCode(CodeGenerator* codegen_in) override {
4265     if (GetByteArrayViewCheckLabel()->IsReferenced()) {
4266       EmitByteArrayViewCode(codegen_in);
4267     }
4268     IntrinsicSlowPathARMVIXL::EmitNativeCode(codegen_in);
4269   }
4270 
4271  private:
GetInvoke() const4272   HInvoke* GetInvoke() const {
4273     return GetInstruction()->AsInvoke();
4274   }
4275 
GetAccessModeTemplate() const4276   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
4277     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
4278   }
4279 
4280   void EmitByteArrayViewCode(CodeGenerator* codegen_in);
4281 
4282   vixl32::Label byte_array_view_check_label_;
4283   vixl32::Label native_byte_order_label_;
4284   // Shared parameter for all VarHandle intrinsics.
4285   std::memory_order order_;
4286   // Extra argument for GenerateVarHandleGet() and GenerateVarHandleSet().
4287   bool atomic_;
4288   // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
4289   bool return_success_;
4290   bool strong_;
4291   // Extra argument for GenerateVarHandleGetAndUpdate().
4292   GetAndUpdateOp get_and_update_op_;
4293 };
4294 
4295 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,vixl32::Register object,vixl32::Register type,bool object_can_be_null=true)4296 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL* codegen,
4297                                                     SlowPathCodeARMVIXL* slow_path,
4298                                                     vixl32::Register object,
4299                                                     vixl32::Register type,
4300                                                     bool object_can_be_null = true) {
4301   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4302 
4303   const MemberOffset class_offset = mirror::Object::ClassOffset();
4304   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
4305 
4306   vixl32::Label success;
4307   if (object_can_be_null) {
4308     __ CompareAndBranchIfZero(object, &success, /*is_far_target=*/ false);
4309   }
4310 
4311   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4312   vixl32::Register temp = temps.Acquire();
4313 
4314   __ Ldr(temp, MemOperand(object, class_offset.Int32Value()));
4315   assembler->MaybeUnpoisonHeapReference(temp);
4316   vixl32::Label loop;
4317   __ Bind(&loop);
4318   __ Cmp(type, temp);
4319   __ B(eq, &success, /*is_far_target=*/ false);
4320   __ Ldr(temp, MemOperand(temp, super_class_offset.Int32Value()));
4321   assembler->MaybeUnpoisonHeapReference(temp);
4322   __ Cmp(temp, 0);
4323   __ B(eq, slow_path->GetEntryLabel());
4324   __ B(&loop);
4325   __ Bind(&success);
4326 }
4327 
4328 // Check access mode and the primitive type from VarHandle.varType.
4329 // Check reference arguments against the VarHandle.varType; for references this is a subclass
4330 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,DataType::Type type)4331 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
4332                                                         CodeGeneratorARMVIXL* codegen,
4333                                                         SlowPathCodeARMVIXL* slow_path,
4334                                                         DataType::Type type) {
4335   mirror::VarHandle::AccessMode access_mode =
4336       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
4337   Primitive::Type primitive_type = DataTypeToPrimitive(type);
4338 
4339   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4340   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4341 
4342   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
4343   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
4344   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4345 
4346   // Use the temporary register reserved for offset. It is not used yet at this point.
4347   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4348   vixl32::Register var_type_no_rb =
4349       RegisterFrom(invoke->GetLocations()->GetTemp(expected_coordinates_count == 0u ? 1u : 0u));
4350 
4351   // Check that the operation is permitted and the primitive type of varhandle.varType.
4352   // We do not need a read barrier when loading a reference only for loading constant
4353   // primitive field through the reference. Use LDRD to load the fields together.
4354   {
4355     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4356     vixl32::Register temp2 = temps.Acquire();
4357     DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
4358     __ Ldrd(var_type_no_rb, temp2, MemOperand(varhandle, var_type_offset.Int32Value()));
4359     assembler->MaybeUnpoisonHeapReference(var_type_no_rb);
4360     __ Tst(temp2, 1u << static_cast<uint32_t>(access_mode));
4361     __ B(eq, slow_path->GetEntryLabel());
4362     __ Ldrh(temp2, MemOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
4363     __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4364     __ B(ne, slow_path->GetEntryLabel());
4365   }
4366 
4367   if (type == DataType::Type::kReference) {
4368     // Check reference arguments against the varType.
4369     // False negatives due to varType being an interface or array type
4370     // or due to the missing read barrier are handled by the slow path.
4371     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4372     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4373     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4374       HInstruction* arg = invoke->InputAt(arg_index);
4375       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
4376       if (!arg->IsNullConstant()) {
4377         vixl32::Register arg_reg = RegisterFrom(invoke->GetLocations()->InAt(arg_index));
4378         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
4379       }
4380     }
4381   }
4382 }
4383 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)4384 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
4385                                               CodeGeneratorARMVIXL* codegen,
4386                                               SlowPathCodeARMVIXL* slow_path) {
4387   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4388   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4389 
4390   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4391 
4392   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4393   vixl32::Register temp = temps.Acquire();
4394 
4395   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
4396   // Do not emit read barrier (or unpoison the reference) for comparing to null.
4397   __ Ldr(temp, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4398   __ Cmp(temp, 0);
4399   __ B(ne, slow_path->GetEntryLabel());
4400 }
4401 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)4402 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
4403                                                  CodeGeneratorARMVIXL* codegen,
4404                                                  SlowPathCodeARMVIXL* slow_path) {
4405   VarHandleOptimizations optimizations(invoke);
4406   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4407   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4408   vixl32::Register object = InputRegisterAt(invoke, 1);
4409 
4410   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4411   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4412 
4413   // Null-check the object.
4414   if (!optimizations.GetSkipObjectNullCheck()) {
4415     __ Cmp(object, 0);
4416     __ B(eq, slow_path->GetEntryLabel());
4417   }
4418 
4419   if (!optimizations.GetUseKnownImageVarHandle()) {
4420     // Use the first temporary register, whether it's for the declaring class or the offset.
4421     // It is not used yet at this point.
4422     vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
4423 
4424     // Check that the VarHandle references an instance field by checking that
4425     // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
4426     // type compatibility check with the source object's type, which will fail for null.
4427     {
4428       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4429       vixl32::Register temp2 = temps.Acquire();
4430       DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4431       __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4432       assembler->MaybeUnpoisonHeapReference(temp);
4433       // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4434       __ Cmp(temp2, 0);
4435       __ B(ne, slow_path->GetEntryLabel());
4436     }
4437 
4438     // Check that the object has the correct type.
4439     // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
4440     GenerateSubTypeObjectCheckNoReadBarrier(
4441         codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
4442   }
4443 }
4444 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4445 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
4446                                          CodeGeneratorARMVIXL* codegen,
4447                                          VarHandleSlowPathARMVIXL* slow_path) {
4448   VarHandleOptimizations optimizations(invoke);
4449   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4450   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4451   vixl32::Register object = InputRegisterAt(invoke, 1);
4452   vixl32::Register index = InputRegisterAt(invoke, 2);
4453   DataType::Type value_type =
4454       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4455   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
4456 
4457   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4458   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4459   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
4460   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4461   const MemberOffset class_offset = mirror::Object::ClassOffset();
4462   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
4463 
4464   // Null-check the object.
4465   if (!optimizations.GetSkipObjectNullCheck()) {
4466     __ Cmp(object, 0);
4467     __ B(eq, slow_path->GetEntryLabel());
4468   }
4469 
4470   // Use the offset temporary register. It is not used yet at this point.
4471   vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
4472 
4473   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4474   vixl32::Register temp2 = temps.Acquire();
4475 
4476   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
4477   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
4478   // coordinateType0 shall not be null but we do not explicitly verify that.
4479   DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4480   __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4481   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4482   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4483   __ Cmp(temp2, 0);
4484   __ B(eq, slow_path->GetEntryLabel());
4485 
4486   // Check object class against componentType0.
4487   //
4488   // This is an exact check and we defer other cases to the runtime. This includes
4489   // conversion to array of superclass references, which is valid but subsequently
4490   // requires all update operations to check that the value can indeed be stored.
4491   // We do not want to perform such extra checks in the intrinsified code.
4492   //
4493   // We do this check without read barrier, so there can be false negatives which we
4494   // defer to the slow path. There shall be no false negatives for array classes in the
4495   // boot image (including Object[] and primitive arrays) because they are non-movable.
4496   __ Ldr(temp2, MemOperand(object, class_offset.Int32Value()));
4497   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4498   __ Cmp(temp, temp2);
4499   __ B(ne, slow_path->GetEntryLabel());
4500 
4501   // Check that the coordinateType0 is an array type. We do not need a read barrier
4502   // for loading constant reference fields (or chains of them) for comparison with null,
4503   // nor for finally loading a constant primitive field (primitive type) below.
4504   __ Ldr(temp2, MemOperand(temp, component_type_offset.Int32Value()));
4505   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4506   __ Cmp(temp2, 0);
4507   __ B(eq, slow_path->GetEntryLabel());
4508 
4509   // Check that the array component type matches the primitive type.
4510   // With the exception of `kPrimNot`, `kPrimByte` and `kPrimBoolean`,
4511   // we shall check for a byte array view in the slow path.
4512   // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4513   // so we cannot emit that if we're JITting without boot image.
4514   bool boot_image_available =
4515       codegen->GetCompilerOptions().IsBootImage() ||
4516       !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4517   bool can_be_view =
4518       ((value_type != DataType::Type::kReference) && (DataType::Size(value_type) != 1u)) &&
4519       boot_image_available;
4520   vixl32::Label* slow_path_label =
4521       can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4522   __ Ldrh(temp2, MemOperand(temp2, primitive_type_offset.Int32Value()));
4523   __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4524   __ B(ne, slow_path_label);
4525 
4526   // Check for array index out of bounds.
4527   __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
4528   __ Cmp(index, temp);
4529   __ B(hs, slow_path->GetEntryLabel());
4530 }
4531 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4532 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4533                                               CodeGeneratorARMVIXL* codegen,
4534                                               VarHandleSlowPathARMVIXL* slow_path) {
4535   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4536   if (expected_coordinates_count == 0u) {
4537     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4538   } else if (expected_coordinates_count == 1u) {
4539     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4540   } else {
4541     DCHECK_EQ(expected_coordinates_count, 2u);
4542     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4543   }
4544 }
4545 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,DataType::Type type)4546 static VarHandleSlowPathARMVIXL* GenerateVarHandleChecks(HInvoke* invoke,
4547                                                          CodeGeneratorARMVIXL* codegen,
4548                                                          std::memory_order order,
4549                                                          DataType::Type type) {
4550   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4551   VarHandleOptimizations optimizations(invoke);
4552   if (optimizations.GetUseKnownImageVarHandle()) {
4553     DCHECK_NE(expected_coordinates_count, 2u);
4554     if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4555       return nullptr;
4556     }
4557   }
4558 
4559   VarHandleSlowPathARMVIXL* slow_path =
4560       new (codegen->GetScopedAllocator()) VarHandleSlowPathARMVIXL(invoke, order);
4561   codegen->AddSlowPath(slow_path);
4562 
4563   if (!optimizations.GetUseKnownImageVarHandle()) {
4564     GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4565   }
4566   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4567 
4568   return slow_path;
4569 }
4570 
4571 struct VarHandleTarget {
4572   vixl32::Register object;  // The object holding the value to operate on.
4573   vixl32::Register offset;  // The offset of the value to operate on.
4574 };
4575 
GetVarHandleTarget(HInvoke * invoke)4576 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4577   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4578   LocationSummary* locations = invoke->GetLocations();
4579 
4580   VarHandleTarget target;
4581   // The temporary allocated for loading the offset.
4582   target.offset = RegisterFrom(locations->GetTemp(0u));
4583   // The reference to the object that holds the value to operate on.
4584   target.object = (expected_coordinates_count == 0u)
4585       ? RegisterFrom(locations->GetTemp(1u))
4586       : InputRegisterAt(invoke, 1);
4587   return target;
4588 }
4589 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorARMVIXL * codegen)4590 static void GenerateVarHandleTarget(HInvoke* invoke,
4591                                     const VarHandleTarget& target,
4592                                     CodeGeneratorARMVIXL* codegen) {
4593   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4594   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4595   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4596 
4597   if (expected_coordinates_count <= 1u) {
4598     if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
4599       ScopedObjectAccess soa(Thread::Current());
4600       ArtField* target_field = GetBootImageVarHandleField(invoke);
4601       if (expected_coordinates_count == 0u) {
4602         ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4603         if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4604           uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
4605           codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
4606         } else {
4607           codegen->LoadTypeForBootImageIntrinsic(
4608               target.object,
4609               TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
4610         }
4611       }
4612       __ Mov(target.offset, target_field->GetOffset().Uint32Value());
4613     } else {
4614       // For static fields, we need to fill the `target.object` with the declaring class,
4615       // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
4616       // we do not need the declaring class, so we can forget the `ArtField*` when
4617       // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
4618       vixl32::Register field = (expected_coordinates_count == 0) ? target.object : target.offset;
4619 
4620       const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4621       const MemberOffset offset_offset = ArtField::OffsetOffset();
4622 
4623       // Load the ArtField*, the offset and, if needed, declaring class.
4624       __ Ldr(field, MemOperand(varhandle, art_field_offset.Int32Value()));
4625       __ Ldr(target.offset, MemOperand(field, offset_offset.Int32Value()));
4626       if (expected_coordinates_count == 0u) {
4627         codegen->GenerateGcRootFieldLoad(invoke,
4628                                          LocationFrom(target.object),
4629                                          field,
4630                                          ArtField::DeclaringClassOffset().Int32Value(),
4631                                          codegen->GetCompilerReadBarrierOption());
4632       }
4633     }
4634   } else {
4635     DCHECK_EQ(expected_coordinates_count, 2u);
4636     DataType::Type value_type =
4637         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4638     uint32_t size_shift = DataType::SizeShift(value_type);
4639     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4640 
4641     vixl32::Register index = InputRegisterAt(invoke, 2);
4642     vixl32::Register shifted_index = index;
4643     if (size_shift != 0u) {
4644       shifted_index = target.offset;
4645       __ Lsl(shifted_index, index, size_shift);
4646     }
4647     __ Add(target.offset, shifted_index, data_offset.Int32Value());
4648   }
4649 }
4650 
CreateVarHandleCommonLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)4651 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
4652                                                        CodeGeneratorARMVIXL* codegen) {
4653   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4654   DataType::Type return_type = invoke->GetType();
4655 
4656   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4657   LocationSummary* locations =
4658       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4659   locations->SetInAt(0, Location::RequiresRegister());
4660   // Require coordinates in registers. These are the object holding the value
4661   // to operate on (except for static fields) and index (for arrays and views).
4662   for (size_t i = 0; i != expected_coordinates_count; ++i) {
4663     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4664   }
4665   if (return_type != DataType::Type::kVoid) {
4666     if (DataType::IsFloatingPointType(return_type)) {
4667       locations->SetOut(Location::RequiresFpuRegister());
4668     } else {
4669       locations->SetOut(Location::RequiresRegister());
4670     }
4671   }
4672   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4673   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4674   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4675     HInstruction* arg = invoke->InputAt(arg_index);
4676     if (DataType::IsFloatingPointType(arg->GetType())) {
4677       locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4678     } else {
4679       locations->SetInAt(arg_index, Location::RequiresRegister());
4680     }
4681   }
4682 
4683   // Add a temporary for offset.
4684   if (codegen->EmitNonBakerReadBarrier() &&
4685       GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
4686     // To preserve the offset value across the non-Baker read barrier slow path
4687     // for loading the declaring class, use a fixed callee-save register.
4688     constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4689     locations->AddTemp(Location::RegisterLocation(first_callee_save));
4690   } else {
4691     locations->AddTemp(Location::RequiresRegister());
4692   }
4693   if (expected_coordinates_count == 0u) {
4694     // Add a temporary to hold the declaring class.
4695     locations->AddTemp(Location::RequiresRegister());
4696   }
4697 
4698   return locations;
4699 }
4700 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4701 static void CreateVarHandleGetLocations(HInvoke* invoke,
4702                                         CodeGeneratorARMVIXL* codegen,
4703                                         bool atomic) {
4704   VarHandleOptimizations optimizations(invoke);
4705   if (optimizations.GetDoNotIntrinsify()) {
4706     return;
4707   }
4708 
4709   if (codegen->EmitNonBakerReadBarrier() &&
4710       invoke->GetType() == DataType::Type::kReference &&
4711       invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4712       invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4713     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4714     // the passed reference and reloads it from the field. This gets the memory visibility
4715     // wrong for Acquire/Volatile operations. b/173104084
4716     return;
4717   }
4718 
4719   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4720 
4721   DataType::Type type = invoke->GetType();
4722   if (type == DataType::Type::kFloat64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
4723     // We need 3 temporaries for GenerateIntrinsicGet() but we can reuse the
4724     // declaring class (if present) and offset temporary.
4725     DCHECK_EQ(locations->GetTempCount(),
4726               (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4727     locations->AddRegisterTemps(3u - locations->GetTempCount());
4728   }
4729 }
4730 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4731 static void GenerateVarHandleGet(HInvoke* invoke,
4732                                  CodeGeneratorARMVIXL* codegen,
4733                                  std::memory_order order,
4734                                  bool atomic,
4735                                  bool byte_swap = false) {
4736   DataType::Type type = invoke->GetType();
4737   DCHECK_NE(type, DataType::Type::kVoid);
4738 
4739   LocationSummary* locations = invoke->GetLocations();
4740   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4741   Location out = locations->Out();
4742 
4743   VarHandleTarget target = GetVarHandleTarget(invoke);
4744   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4745   if (!byte_swap) {
4746     slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4747     GenerateVarHandleTarget(invoke, target, codegen);
4748     if (slow_path != nullptr) {
4749       slow_path->SetAtomic(atomic);
4750       __ Bind(slow_path->GetNativeByteOrderLabel());
4751     }
4752   }
4753 
4754   Location maybe_temp = Location::NoLocation();
4755   Location maybe_temp2 = Location::NoLocation();
4756   Location maybe_temp3 = Location::NoLocation();
4757   if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
4758     // Reuse the offset temporary.
4759     maybe_temp = LocationFrom(target.offset);
4760   } else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4761     // Reuse the offset temporary and declaring class (if present).
4762     // The address shall be constructed in the scratch register before they are clobbered.
4763     maybe_temp = LocationFrom(target.offset);
4764     DCHECK(maybe_temp.Equals(locations->GetTemp(0)));
4765     if (type == DataType::Type::kFloat64) {
4766       maybe_temp2 = locations->GetTemp(1);
4767       maybe_temp3 = locations->GetTemp(2);
4768     }
4769   }
4770 
4771   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4772   Location loaded_value = out;
4773   DataType::Type load_type = type;
4774   if (byte_swap) {
4775     if (type == DataType::Type::kFloat64) {
4776       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4777         // Change load type to Int64 and promote `maybe_temp2` and `maybe_temp3` to `loaded_value`.
4778         loaded_value = LocationFrom(RegisterFrom(maybe_temp2), RegisterFrom(maybe_temp3));
4779         maybe_temp2 = Location::NoLocation();
4780         maybe_temp3 = Location::NoLocation();
4781       } else {
4782         // Use the offset temporary and the scratch register.
4783         loaded_value = LocationFrom(target.offset, temps.Acquire());
4784       }
4785       load_type = DataType::Type::kInt64;
4786     } else if (type == DataType::Type::kFloat32) {
4787       // Reuse the offset temporary.
4788       loaded_value = LocationFrom(target.offset);
4789       load_type = DataType::Type::kInt32;
4790     } else if (type == DataType::Type::kInt64) {
4791       // Swap the high and low registers and reverse the bytes in each after the load.
4792       loaded_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
4793     }
4794   }
4795 
4796   GenerateIntrinsicGet(invoke,
4797                        codegen,
4798                        load_type,
4799                        order,
4800                        atomic,
4801                        target.object,
4802                        target.offset,
4803                        loaded_value,
4804                        maybe_temp,
4805                        maybe_temp2,
4806                        maybe_temp3);
4807   if (byte_swap) {
4808     if (type == DataType::Type::kInt64) {
4809       GenerateReverseBytesInPlaceForEachWord(assembler, loaded_value);
4810     } else {
4811       GenerateReverseBytes(assembler, type, loaded_value, out);
4812     }
4813   }
4814 
4815   if (slow_path != nullptr) {
4816     DCHECK(!byte_swap);
4817     __ Bind(slow_path->GetExitLabel());
4818   }
4819 }
4820 
VisitVarHandleGet(HInvoke * invoke)4821 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4822   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ false);
4823 }
4824 
VisitVarHandleGet(HInvoke * invoke)4825 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4826   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4827 }
4828 
VisitVarHandleGetOpaque(HInvoke * invoke)4829 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4830   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4831 }
4832 
VisitVarHandleGetOpaque(HInvoke * invoke)4833 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4834   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4835 }
4836 
VisitVarHandleGetAcquire(HInvoke * invoke)4837 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4838   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4839 }
4840 
VisitVarHandleGetAcquire(HInvoke * invoke)4841 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4842   GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire, /*atomic=*/ true);
4843 }
4844 
VisitVarHandleGetVolatile(HInvoke * invoke)4845 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4846   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4847 }
4848 
VisitVarHandleGetVolatile(HInvoke * invoke)4849 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4850   GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4851 }
4852 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4853 static void CreateVarHandleSetLocations(HInvoke* invoke,
4854                                         CodeGeneratorARMVIXL* codegen,
4855                                         bool atomic) {
4856   VarHandleOptimizations optimizations(invoke);
4857   if (optimizations.GetDoNotIntrinsify()) {
4858     return;
4859   }
4860 
4861   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4862 
4863   // Get the type from the shorty as the invokes may not return a value.
4864   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4865   DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4866   if (DataType::Is64BitType(value_type)) {
4867     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4868     DCHECK_EQ(locations->GetTempCount(), (expected_coordinates_count == 0) ? 2u : 1u);
4869     HInstruction* arg = invoke->InputAt(number_of_arguments - 1u);
4870     bool has_reverse_bytes_slow_path =
4871         (expected_coordinates_count == 2u) &&
4872         !IsZeroBitPattern(arg);
4873     if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4874       // We need 4 temporaries in the byte array view slow path. Otherwise, we need
4875       // 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type.
4876       // We can reuse the offset temporary and declaring class (if present).
4877       size_t temps_needed = has_reverse_bytes_slow_path
4878           ? 4u
4879           : ((value_type == DataType::Type::kFloat64) ? 3u : 2u);
4880       locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4881     } else if (has_reverse_bytes_slow_path) {
4882       // We need 2 temps for the value with reversed bytes in the byte array view slow path.
4883       // We can reuse the offset temporary.
4884       DCHECK_EQ(locations->GetTempCount(), 1u);
4885       locations->AddTemp(Location::RequiresRegister());
4886     }
4887   }
4888 }
4889 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4890 static void GenerateVarHandleSet(HInvoke* invoke,
4891                                  CodeGeneratorARMVIXL* codegen,
4892                                  std::memory_order order,
4893                                  bool atomic,
4894                                  bool byte_swap = false) {
4895   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4896   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4897 
4898   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4899   LocationSummary* locations = invoke->GetLocations();
4900   Location value = locations->InAt(value_index);
4901 
4902   VarHandleTarget target = GetVarHandleTarget(invoke);
4903   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4904   if (!byte_swap) {
4905     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4906     GenerateVarHandleTarget(invoke, target, codegen);
4907     if (slow_path != nullptr) {
4908       slow_path->SetAtomic(atomic);
4909       __ Bind(slow_path->GetNativeByteOrderLabel());
4910     }
4911   }
4912 
4913   Location maybe_temp = Location::NoLocation();
4914   Location maybe_temp2 = Location::NoLocation();
4915   Location maybe_temp3 = Location::NoLocation();
4916   if (DataType::Is64BitType(value_type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4917     // Reuse the offset temporary and declaring class (if present).
4918     // The address shall be constructed in the scratch register before they are clobbered.
4919     maybe_temp = locations->GetTemp(0);
4920     maybe_temp2 = locations->GetTemp(1);
4921     if (value_type == DataType::Type::kFloat64) {
4922       maybe_temp3 = locations->GetTemp(2);
4923     }
4924   }
4925 
4926   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4927   if (byte_swap) {
4928     if (DataType::Is64BitType(value_type) || value_type == DataType::Type::kFloat32) {
4929       // Calculate the address in scratch register, so that we can use the offset temporary.
4930       vixl32::Register base = temps.Acquire();
4931       __ Add(base, target.object, target.offset);
4932       target.object = base;
4933       target.offset = vixl32::Register();
4934     }
4935     Location original_value = value;
4936     if (DataType::Is64BitType(value_type)) {
4937       size_t temp_start = 0u;
4938       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4939         // Clear `maybe_temp3` which was initialized above for Float64.
4940         DCHECK_IMPLIES(value_type == DataType::Type::kFloat64,
4941                        maybe_temp3.Equals(locations->GetTemp(2)));
4942         maybe_temp3 = Location::NoLocation();
4943         temp_start = 2u;
4944       }
4945       value = LocationFrom(RegisterFrom(locations->GetTemp(temp_start)),
4946                            RegisterFrom(locations->GetTemp(temp_start + 1u)));
4947       if (value_type == DataType::Type::kFloat64) {
4948         __ Vmov(HighRegisterFrom(value), LowRegisterFrom(value), DRegisterFrom(original_value));
4949         GenerateReverseBytesInPlaceForEachWord(assembler, value);
4950         value_type = DataType::Type::kInt64;
4951       } else {
4952         GenerateReverseBytes(assembler, value_type, original_value, value);
4953       }
4954     } else if (value_type == DataType::Type::kFloat32) {
4955       value = locations->GetTemp(0);  // Use the offset temporary which was freed above.
4956       __ Vmov(RegisterFrom(value), SRegisterFrom(original_value));
4957       GenerateReverseBytes(assembler, DataType::Type::kInt32, value, value);
4958       value_type = DataType::Type::kInt32;
4959     } else {
4960       value = LocationFrom(temps.Acquire());
4961       GenerateReverseBytes(assembler, value_type, original_value, value);
4962     }
4963   }
4964 
4965   GenerateIntrinsicSet(codegen,
4966                        value_type,
4967                        order,
4968                        atomic,
4969                        target.object,
4970                        target.offset,
4971                        value,
4972                        maybe_temp,
4973                        maybe_temp2,
4974                        maybe_temp3);
4975 
4976   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4977     // Reuse the offset temporary for MarkGCCard.
4978     vixl32::Register temp = target.offset;
4979     vixl32::Register card = temps.Acquire();
4980     vixl32::Register value_reg = RegisterFrom(value);
4981     codegen->MaybeMarkGCCard(temp, card, target.object, value_reg, /* emit_null_check= */ true);
4982   }
4983 
4984   if (slow_path != nullptr) {
4985     DCHECK(!byte_swap);
4986     __ Bind(slow_path->GetExitLabel());
4987   }
4988 }
4989 
VisitVarHandleSet(HInvoke * invoke)4990 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4991   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ false);
4992 }
4993 
VisitVarHandleSet(HInvoke * invoke)4994 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4995   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4996 }
4997 
VisitVarHandleSetOpaque(HInvoke * invoke)4998 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4999   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
5000 }
5001 
VisitVarHandleSetOpaque(HInvoke * invoke)5002 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
5003   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
5004 }
5005 
VisitVarHandleSetRelease(HInvoke * invoke)5006 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
5007   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
5008 }
5009 
VisitVarHandleSetRelease(HInvoke * invoke)5010 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
5011   GenerateVarHandleSet(invoke, codegen_, std::memory_order_release, /*atomic=*/ true);
5012 }
5013 
VisitVarHandleSetVolatile(HInvoke * invoke)5014 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
5015   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
5016 }
5017 
VisitVarHandleSetVolatile(HInvoke * invoke)5018 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
5019   // ARM store-release instructions are implicitly sequentially consistent.
5020   GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
5021 }
5022 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool return_success)5023 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
5024                                                             CodeGeneratorARMVIXL* codegen,
5025                                                             bool return_success) {
5026   VarHandleOptimizations optimizations(invoke);
5027   if (optimizations.GetDoNotIntrinsify()) {
5028     return;
5029   }
5030 
5031   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
5032   DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
5033   if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5034     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5035     // the passed reference and reloads it from the field. This breaks the read barriers
5036     // in slow path in different ways. The marked old value may not actually be a to-space
5037     // reference to the same object as `old_value`, breaking slow path assumptions. And
5038     // for CompareAndExchange, marking the old value after comparison failure may actually
5039     // return the reference to `expected`, erroneously indicating success even though we
5040     // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
5041     return;
5042   }
5043 
5044   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
5045 
5046   if (codegen->EmitNonBakerReadBarrier()) {
5047     // We need callee-save registers for both the class object and offset instead of
5048     // the temporaries reserved in CreateVarHandleCommonLocations().
5049     static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u);
5050     constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
5051     constexpr int second_callee_save = CTZ(kArmCalleeSaveRefSpills ^ (1u << first_callee_save));
5052     if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
5053       DCHECK_EQ(locations->GetTempCount(), 2u);
5054       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
5055       DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
5056       locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
5057     } else {
5058       DCHECK_EQ(locations->GetTempCount(), 1u);
5059       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
5060       locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
5061     }
5062   }
5063 
5064   if (DataType::IsFloatingPointType(value_type)) {
5065     // We can reuse the declaring class (if present) and offset temporary.
5066     DCHECK_EQ(locations->GetTempCount(),
5067               (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
5068     size_t temps_needed = (value_type == DataType::Type::kFloat64)
5069         ? (return_success ? 5u : 7u)
5070         : (return_success ? 3u : 4u);
5071     locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5072   } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5073     // Add temps for the byte-reversed `expected` and `new_value` in the byte array view slow path.
5074     DCHECK_EQ(locations->GetTempCount(), 1u);
5075     if (value_type == DataType::Type::kInt64) {
5076       // We would ideally add 4 temps for Int64 but that would simply run out of registers,
5077       // so we instead need to reverse bytes in actual arguments and undo it at the end.
5078     } else {
5079       locations->AddRegisterTemps(2u);
5080     }
5081   }
5082   if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5083     // Add a temporary for store result, also used for the `old_value_temp` in slow path.
5084     locations->AddTemp(Location::RequiresRegister());
5085   }
5086 }
5087 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)5088 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
5089                                                      CodeGeneratorARMVIXL* codegen,
5090                                                      std::memory_order order,
5091                                                      bool return_success,
5092                                                      bool strong,
5093                                                      bool byte_swap = false) {
5094   DCHECK(return_success || strong);
5095 
5096   uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
5097   uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
5098   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
5099   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
5100 
5101   ArmVIXLAssembler* assembler = codegen->GetAssembler();
5102   LocationSummary* locations = invoke->GetLocations();
5103   Location expected = locations->InAt(expected_index);
5104   Location new_value = locations->InAt(new_value_index);
5105   Location out = locations->Out();
5106 
5107   VarHandleTarget target = GetVarHandleTarget(invoke);
5108   VarHandleSlowPathARMVIXL* slow_path = nullptr;
5109   if (!byte_swap) {
5110     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5111     GenerateVarHandleTarget(invoke, target, codegen);
5112     if (slow_path != nullptr) {
5113       slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
5114       __ Bind(slow_path->GetNativeByteOrderLabel());
5115     }
5116   }
5117 
5118   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
5119   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
5120   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
5121   DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
5122 
5123   if (release_barrier) {
5124     codegen->GenerateMemoryBarrier(
5125         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
5126   }
5127 
5128   // Calculate the pointer to the value.
5129   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5130   vixl32::Register tmp_ptr = temps.Acquire();
5131   __ Add(tmp_ptr, target.object, target.offset);
5132 
5133   // Move floating point values to temporaries and prepare output registers.
5134   // Note that float/double CAS uses bitwise comparison, rather than the operator==.
5135   // Reuse the declaring class (if present) and offset temporary for non-reference types,
5136   // the address has already been constructed in the scratch register. We are more careful
5137   // for references due to read and write barrier, see below.
5138   Location old_value;
5139   vixl32::Register store_result;
5140   vixl32::Register success = return_success ? RegisterFrom(out) : vixl32::Register();
5141   DataType::Type cas_type = value_type;
5142   if (value_type == DataType::Type::kFloat64) {
5143     vixl32::DRegister expected_vreg = DRegisterFrom(expected);
5144     vixl32::DRegister new_value_vreg = DRegisterFrom(new_value);
5145     expected =
5146         LocationFrom(RegisterFrom(locations->GetTemp(0)), RegisterFrom(locations->GetTemp(1)));
5147     new_value =
5148         LocationFrom(RegisterFrom(locations->GetTemp(2)), RegisterFrom(locations->GetTemp(3)));
5149     store_result = RegisterFrom(locations->GetTemp(4));
5150     old_value = return_success
5151         ? LocationFrom(success, store_result)
5152         : LocationFrom(RegisterFrom(locations->GetTemp(5)), RegisterFrom(locations->GetTemp(6)));
5153     if (byte_swap) {
5154       __ Vmov(HighRegisterFrom(expected), LowRegisterFrom(expected), expected_vreg);
5155       __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), new_value_vreg);
5156       GenerateReverseBytesInPlaceForEachWord(assembler, expected);
5157       GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
5158     } else {
5159       __ Vmov(LowRegisterFrom(expected), HighRegisterFrom(expected), expected_vreg);
5160       __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), new_value_vreg);
5161     }
5162     cas_type = DataType::Type::kInt64;
5163   } else if (value_type == DataType::Type::kFloat32) {
5164     vixl32::SRegister expected_vreg = SRegisterFrom(expected);
5165     vixl32::SRegister new_value_vreg = SRegisterFrom(new_value);
5166     expected = locations->GetTemp(0);
5167     new_value = locations->GetTemp(1);
5168     store_result = RegisterFrom(locations->GetTemp(2));
5169     old_value = return_success ? LocationFrom(store_result) : locations->GetTemp(3);
5170     __ Vmov(RegisterFrom(expected), expected_vreg);
5171     __ Vmov(RegisterFrom(new_value), new_value_vreg);
5172     if (byte_swap) {
5173       GenerateReverseBytes(assembler, DataType::Type::kInt32, expected, expected);
5174       GenerateReverseBytes(assembler, DataType::Type::kInt32, new_value, new_value);
5175     }
5176     cas_type = DataType::Type::kInt32;
5177   } else if (value_type == DataType::Type::kInt64) {
5178     store_result = RegisterFrom(locations->GetTemp(0));
5179     old_value = return_success
5180         ? LocationFrom(success, store_result)
5181         // If swapping bytes, swap the high/low regs and reverse the bytes in each after the load.
5182         : byte_swap ? LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out)) : out;
5183     if (byte_swap) {
5184       // Due to lack of registers, reverse bytes in `expected` and `new_value` and undo that later.
5185       GenerateReverseBytesInPlaceForEachWord(assembler, expected);
5186       expected = LocationFrom(HighRegisterFrom(expected), LowRegisterFrom(expected));
5187       GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
5188       new_value = LocationFrom(HighRegisterFrom(new_value), LowRegisterFrom(new_value));
5189     }
5190   } else {
5191     // Use the last temp. For references with read barriers, this is an extra temporary
5192     // allocated to avoid overwriting the temporaries for declaring class (if present)
5193     // and offset as they are needed in the slow path. Otherwise, this is the offset
5194     // temporary which also works for references without read barriers that need the
5195     // object register preserved for the write barrier.
5196     store_result = RegisterFrom(locations->GetTemp(locations->GetTempCount() - 1u));
5197     old_value = return_success ? LocationFrom(store_result) : out;
5198     if (byte_swap) {
5199       DCHECK_EQ(locations->GetTempCount(), 3u);
5200       Location original_expected = expected;
5201       Location original_new_value = new_value;
5202       expected = locations->GetTemp(0);
5203       new_value = locations->GetTemp(1);
5204       GenerateReverseBytes(assembler, value_type, original_expected, expected);
5205       GenerateReverseBytes(assembler, value_type, original_new_value, new_value);
5206     }
5207   }
5208 
5209   vixl32::Label exit_loop_label;
5210   vixl32::Label* exit_loop = &exit_loop_label;
5211   vixl32::Label* cmp_failure = &exit_loop_label;
5212 
5213   if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5214     // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
5215     // reloaded old value for subsequent CAS in the slow path. This must not clobber `old_value`.
5216     vixl32::Register old_value_temp = return_success ? RegisterFrom(out) : store_result;
5217     // The slow path store result must not clobber `old_value`.
5218     vixl32::Register slow_path_store_result = old_value_temp;
5219     ReadBarrierCasSlowPathARMVIXL* rb_slow_path =
5220         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
5221             invoke,
5222             strong,
5223             target.object,
5224             target.offset,
5225             RegisterFrom(expected),
5226             RegisterFrom(new_value),
5227             RegisterFrom(old_value),
5228             old_value_temp,
5229             slow_path_store_result,
5230             success,
5231             codegen);
5232     codegen->AddSlowPath(rb_slow_path);
5233     exit_loop = rb_slow_path->GetExitLabel();
5234     cmp_failure = rb_slow_path->GetEntryLabel();
5235   }
5236 
5237   GenerateCompareAndSet(codegen,
5238                         cas_type,
5239                         strong,
5240                         cmp_failure,
5241                         /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
5242                         tmp_ptr,
5243                         expected,
5244                         new_value,
5245                         old_value,
5246                         store_result,
5247                         success);
5248   __ Bind(exit_loop);
5249 
5250   if (acquire_barrier) {
5251     codegen->GenerateMemoryBarrier(
5252         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
5253   }
5254 
5255   if (byte_swap && value_type == DataType::Type::kInt64) {
5256     // Undo byte swapping in `expected` and `new_value`. We do not have the
5257     // information whether the value in these registers shall be needed later.
5258     GenerateReverseBytesInPlaceForEachWord(assembler, expected);
5259     GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
5260   }
5261   if (!return_success) {
5262     if (byte_swap) {
5263       if (value_type == DataType::Type::kInt64) {
5264         GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
5265       } else {
5266         GenerateReverseBytes(assembler, value_type, old_value, out);
5267       }
5268     } else if (value_type == DataType::Type::kFloat64) {
5269       __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
5270     } else if (value_type == DataType::Type::kFloat32) {
5271       __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
5272     }
5273   }
5274 
5275   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
5276     // Reuse the offset temporary and scratch register for MarkGCCard.
5277     vixl32::Register temp = target.offset;
5278     vixl32::Register card = tmp_ptr;
5279     // Mark card for object assuming new value is stored.
5280     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
5281     codegen->MaybeMarkGCCard(
5282         temp, card, target.object, RegisterFrom(new_value), new_value_can_be_null);
5283   }
5284 
5285   if (slow_path != nullptr) {
5286     DCHECK(!byte_swap);
5287     __ Bind(slow_path->GetExitLabel());
5288   }
5289 }
5290 
VisitVarHandleCompareAndExchange(HInvoke * invoke)5291 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5292   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5293 }
5294 
VisitVarHandleCompareAndExchange(HInvoke * invoke)5295 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5296   GenerateVarHandleCompareAndSetOrExchange(
5297       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
5298 }
5299 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5300 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5301   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5302 }
5303 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5304 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5305   GenerateVarHandleCompareAndSetOrExchange(
5306       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
5307 }
5308 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5309 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5310   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5311 }
5312 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5313 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5314   GenerateVarHandleCompareAndSetOrExchange(
5315       invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
5316 }
5317 
VisitVarHandleCompareAndSet(HInvoke * invoke)5318 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5319   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5320 }
5321 
VisitVarHandleCompareAndSet(HInvoke * invoke)5322 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5323   GenerateVarHandleCompareAndSetOrExchange(
5324       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
5325 }
5326 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5327 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5328   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5329 }
5330 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5331 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5332   GenerateVarHandleCompareAndSetOrExchange(
5333       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
5334 }
5335 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5336 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5337   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5338 }
5339 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5340 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5341   GenerateVarHandleCompareAndSetOrExchange(
5342       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
5343 }
5344 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5345 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5346   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5347 }
5348 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5349 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5350   GenerateVarHandleCompareAndSetOrExchange(
5351       invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
5352 }
5353 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5354 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5355   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5356 }
5357 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5358 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5359   GenerateVarHandleCompareAndSetOrExchange(
5360       invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
5361 }
5362 
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op)5363 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
5364                                                  CodeGeneratorARMVIXL* codegen,
5365                                                  GetAndUpdateOp get_and_update_op) {
5366   VarHandleOptimizations optimizations(invoke);
5367   if (optimizations.GetDoNotIntrinsify()) {
5368     return;
5369   }
5370 
5371   // Get the type from the shorty as the invokes may not return a value.
5372   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5373   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5374   if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5375     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5376     // the passed reference and reloads it from the field, thus seeing the new value
5377     // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
5378     return;
5379   }
5380 
5381   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
5382 
5383   // We can reuse the declaring class (if present) and offset temporary, except for
5384   // non-Baker read barriers that need them for the slow path.
5385   DCHECK_EQ(locations->GetTempCount(),
5386             (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
5387 
5388   if (get_and_update_op == GetAndUpdateOp::kSet) {
5389     if (DataType::IsFloatingPointType(value_type)) {
5390       // Add temps needed to do the GenerateGetAndUpdate() with core registers.
5391       size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u;
5392       locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5393     } else if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5394       // We need to preserve the declaring class (if present) and offset for read barrier
5395       // slow paths, so we must use a separate temporary for the exclusive store result.
5396       locations->AddTemp(Location::RequiresRegister());
5397     } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5398       // Add temps for the byte-reversed `arg` in the byte array view slow path.
5399       DCHECK_EQ(locations->GetTempCount(), 1u);
5400       locations->AddRegisterTemps((value_type == DataType::Type::kInt64) ? 2u : 1u);
5401     }
5402   } else {
5403     // We need temporaries for the new value and exclusive store result.
5404     size_t temps_needed = DataType::Is64BitType(value_type) ? 3u : 2u;
5405     if (get_and_update_op != GetAndUpdateOp::kAdd &&
5406         GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5407       // Add temps for the byte-reversed `arg` in the byte array view slow path.
5408       if (value_type == DataType::Type::kInt64) {
5409         // We would ideally add 2 temps for Int64 but that would simply run out of registers,
5410         // so we instead need to reverse bytes in the actual argument and undo it at the end.
5411       } else {
5412         temps_needed += 1u;
5413       }
5414     }
5415     locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5416     if (DataType::IsFloatingPointType(value_type)) {
5417       // Note: This shall allocate a D register. There is no way to request an S register.
5418       locations->AddTemp(Location::RequiresFpuRegister());
5419     }
5420   }
5421 
5422   // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`.
5423   DataType::Type return_type = invoke->GetType();
5424   const bool is_void = return_type == DataType::Type::kVoid;
5425   DCHECK_IMPLIES(!is_void, return_type == value_type);
5426   if (is_void) {
5427     if (DataType::IsFloatingPointType(value_type)) {
5428       // Note: This shall allocate a D register. There is no way to request an S register.
5429       locations->AddTemp(Location::RequiresFpuRegister());
5430     } else if (DataType::Is64BitType(value_type)) {
5431       // We need two for non-fpu 64 bit types.
5432       locations->AddTemp(Location::RequiresRegister());
5433       locations->AddTemp(Location::RequiresRegister());
5434     } else {
5435       locations->AddTemp(Location::RequiresRegister());
5436     }
5437   }
5438 }
5439 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)5440 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5441                                           CodeGeneratorARMVIXL* codegen,
5442                                           GetAndUpdateOp get_and_update_op,
5443                                           std::memory_order order,
5444                                           bool byte_swap = false) {
5445   // Get the type from the shorty as the invokes may not return a value.
5446   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5447   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5448 
5449   ArmVIXLAssembler* assembler = codegen->GetAssembler();
5450   LocationSummary* locations = invoke->GetLocations();
5451   Location arg = locations->InAt(arg_index);
5452   DataType::Type return_type = invoke->GetType();
5453   const bool is_void = return_type == DataType::Type::kVoid;
5454   DCHECK_IMPLIES(!is_void, return_type == value_type);
5455 
5456   size_t temps_that_mimic_out;
5457   Location result;
5458   const size_t temp_count = locations->GetTempCount();
5459   if (is_void) {
5460     if (value_type == DataType::Type::kFloat32) {
5461       // Note: Since we allocated a D register, use the low part.
5462       DCHECK(locations->GetTemp(temp_count - 1u).IsFpuRegisterPair());
5463       temps_that_mimic_out = 1u;
5464       result = locations->GetTemp(temp_count - 1u).ToLow();
5465     } else if (!DataType::IsFloatingPointType(value_type) && DataType::Is64BitType(value_type)) {
5466       temps_that_mimic_out = 2u;
5467       result = LocationFrom(RegisterFrom(locations->GetTemp(temp_count - 2u)),
5468                             RegisterFrom(locations->GetTemp(temp_count - 1u)));
5469     } else {
5470       temps_that_mimic_out = 1u;
5471       result = locations->GetTemp(temp_count - 1u);
5472     }
5473   } else {
5474     temps_that_mimic_out = 0u;
5475     result = locations->Out();
5476   }
5477 
5478   VarHandleTarget target = GetVarHandleTarget(invoke);
5479   VarHandleSlowPathARMVIXL* slow_path = nullptr;
5480   if (!byte_swap) {
5481     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5482     GenerateVarHandleTarget(invoke, target, codegen);
5483     if (slow_path != nullptr) {
5484       slow_path->SetGetAndUpdateOp(get_and_update_op);
5485       __ Bind(slow_path->GetNativeByteOrderLabel());
5486     }
5487   }
5488 
5489   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
5490   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
5491   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
5492   DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
5493 
5494   if (release_barrier) {
5495     codegen->GenerateMemoryBarrier(
5496         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
5497   }
5498 
5499   // Use the scratch register for the pointer to the target location.
5500   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5501   vixl32::Register tmp_ptr = temps.Acquire();
5502   __ Add(tmp_ptr, target.object, target.offset);
5503 
5504   // Use the offset temporary for the exclusive store result.
5505   vixl32::Register store_result = target.offset;
5506 
5507   // The load/store type is never floating point.
5508   DataType::Type load_store_type = DataType::IsFloatingPointType(value_type)
5509       ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
5510       : value_type;
5511 
5512   // Prepare register for old value and temporaries if any.
5513   Location old_value = result;
5514   Location maybe_temp = Location::NoLocation();
5515   Location maybe_vreg_temp = Location::NoLocation();
5516   if (get_and_update_op == GetAndUpdateOp::kSet) {
5517     // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5518     // rather than moving between core and FP registers in the loop.
5519     if (value_type == DataType::Type::kFloat64) {
5520       vixl32::DRegister arg_vreg = DRegisterFrom(arg);
5521       // `store_result` and the four here, plus maybe an extra one for the temp that mimics the
5522       // "out" register.
5523       DCHECK_EQ(temp_count, 5u + temps_that_mimic_out);
5524       old_value =
5525           LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)));
5526       arg = LocationFrom(RegisterFrom(locations->GetTemp(3)), RegisterFrom(locations->GetTemp(4)));
5527       if (byte_swap) {
5528         __ Vmov(HighRegisterFrom(arg), LowRegisterFrom(arg), arg_vreg);
5529         GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5530       } else {
5531         __ Vmov(LowRegisterFrom(arg), HighRegisterFrom(arg), arg_vreg);
5532       }
5533     } else if (value_type == DataType::Type::kFloat32) {
5534       vixl32::SRegister arg_vreg = SRegisterFrom(arg);
5535       // `store_result` and the two here, plus maybe an extra one for the temp that mimics the
5536       // "out" register.
5537       DCHECK_EQ(temp_count, 3u + temps_that_mimic_out);
5538       old_value = locations->GetTemp(1);
5539       arg = locations->GetTemp(2);
5540       __ Vmov(RegisterFrom(arg), arg_vreg);
5541       if (byte_swap) {
5542         GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg);
5543       }
5544     } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5545       if (kUseBakerReadBarrier) {
5546         // Load the old value initially to a temporary register.
5547         // We shall move it to `out` later with a read barrier.
5548         old_value = LocationFrom(store_result);
5549         store_result = RegisterFrom(result);  // Use `result` for the exclusive store result.
5550       } else {
5551         // The store_result is a separate temporary.
5552         DCHECK(!store_result.Is(target.object));
5553         DCHECK(!store_result.Is(target.offset));
5554       }
5555     } else if (byte_swap) {
5556       Location original_arg = arg;
5557       arg = locations->GetTemp(1);
5558       if (value_type == DataType::Type::kInt64) {
5559         arg = LocationFrom(RegisterFrom(arg), RegisterFrom(locations->GetTemp(2)));
5560         // Swap the high/low regs and reverse the bytes in each after the load.
5561         old_value = LocationFrom(HighRegisterFrom(result), LowRegisterFrom(result));
5562       }
5563       GenerateReverseBytes(assembler, value_type, original_arg, arg);
5564     }
5565   } else {
5566     maybe_temp = DataType::Is64BitType(value_type)
5567         ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
5568         : locations->GetTemp(1);
5569     DCHECK(!maybe_temp.Contains(LocationFrom(store_result)));
5570     if (DataType::IsFloatingPointType(value_type)) {
5571       maybe_vreg_temp = locations->GetTemp(temp_count - 1u - temps_that_mimic_out);
5572       DCHECK(maybe_vreg_temp.IsFpuRegisterPair());
5573     }
5574     if (byte_swap) {
5575       if (get_and_update_op == GetAndUpdateOp::kAdd) {
5576         // We need to do the byte swapping in the CAS loop for GetAndAdd.
5577         get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5578       } else if (value_type == DataType::Type::kInt64) {
5579         // Swap the high/low regs and reverse the bytes in each after the load.
5580         old_value = LocationFrom(HighRegisterFrom(result), LowRegisterFrom(result));
5581         // Due to lack of registers, reverse bytes in `arg` and undo that later.
5582         GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5583         arg = LocationFrom(HighRegisterFrom(arg), LowRegisterFrom(arg));
5584       } else {
5585         DCHECK(!DataType::IsFloatingPointType(value_type));
5586         Location original_arg = arg;
5587         arg = locations->GetTemp(2);
5588         DCHECK(!arg.Contains(LocationFrom(store_result)));
5589         GenerateReverseBytes(assembler, value_type, original_arg, arg);
5590       }
5591     }
5592   }
5593 
5594   GenerateGetAndUpdate(codegen,
5595                        get_and_update_op,
5596                        load_store_type,
5597                        tmp_ptr,
5598                        arg,
5599                        old_value,
5600                        store_result,
5601                        maybe_temp,
5602                        maybe_vreg_temp);
5603 
5604   if (acquire_barrier) {
5605     codegen->GenerateMemoryBarrier(
5606         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
5607   }
5608 
5609   if (!is_void) {
5610     if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) {
5611       if (value_type == DataType::Type::kInt64) {
5612         GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
5613         if (get_and_update_op != GetAndUpdateOp::kSet) {
5614           // Undo byte swapping in `arg`. We do not have the information
5615           // whether the value in these registers shall be needed later.
5616           GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5617         }
5618       } else {
5619         GenerateReverseBytes(assembler, value_type, old_value, result);
5620       }
5621     } else if (get_and_update_op == GetAndUpdateOp::kSet &&
5622                DataType::IsFloatingPointType(value_type)) {
5623       if (value_type == DataType::Type::kFloat64) {
5624         __ Vmov(DRegisterFrom(result), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
5625       } else {
5626         __ Vmov(SRegisterFrom(result), RegisterFrom(old_value));
5627       }
5628     } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5629       if (kUseBakerReadBarrier) {
5630         codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(result),
5631                                                            RegisterFrom(old_value));
5632       } else {
5633         codegen->GenerateReadBarrierSlow(
5634             invoke,
5635             Location::RegisterLocation(RegisterFrom(result).GetCode()),
5636             Location::RegisterLocation(RegisterFrom(old_value).GetCode()),
5637             Location::RegisterLocation(target.object.GetCode()),
5638             /*offset=*/ 0u,
5639             /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
5640       }
5641     }
5642   }
5643 
5644   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5645     // Reuse the offset temporary and scratch register for MarkGCCard.
5646     vixl32::Register temp = target.offset;
5647     vixl32::Register card = tmp_ptr;
5648     // Mark card for object assuming new value is stored.
5649     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
5650     codegen->MaybeMarkGCCard(temp, card, target.object, RegisterFrom(arg), new_value_can_be_null);
5651   }
5652 
5653   if (slow_path != nullptr) {
5654     DCHECK(!byte_swap);
5655     __ Bind(slow_path->GetExitLabel());
5656   }
5657 }
5658 
VisitVarHandleGetAndSet(HInvoke * invoke)5659 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5660   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5661 }
5662 
VisitVarHandleGetAndSet(HInvoke * invoke)5663 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5664   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
5665 }
5666 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5667 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5668   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5669 }
5670 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5671 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5672   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
5673 }
5674 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5675 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5676   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5677 }
5678 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5679 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5680   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
5681 }
5682 
VisitVarHandleGetAndAdd(HInvoke * invoke)5683 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5684   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5685 }
5686 
VisitVarHandleGetAndAdd(HInvoke * invoke)5687 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5688   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5689 }
5690 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5691 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5692   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5693 }
5694 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5695 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5696   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5697 }
5698 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5699 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5700   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5701 }
5702 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5703 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5704   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5705 }
5706 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5707 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5708   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5709 }
5710 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5711 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5712   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5713 }
5714 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5715 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5716   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5717 }
5718 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5719 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5720   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5721 }
5722 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5723 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5724   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5725 }
5726 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5727 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5728   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5729 }
5730 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5731 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5732   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5733 }
5734 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5735 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5736   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5737 }
5738 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5739 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5740   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5741 }
5742 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5743 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5744   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5745 }
5746 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5747 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5748   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5749 }
5750 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5751 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5752   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5753 }
5754 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5755 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5756   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5757 }
5758 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5759 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5760   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5761 }
5762 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5763 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5764   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5765 }
5766 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5767 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5768   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5769 }
5770 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5771 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5772   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5773 }
5774 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5775 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5776   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5777 }
5778 
EmitByteArrayViewCode(CodeGenerator * codegen_in)5779 void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5780   DCHECK(GetByteArrayViewCheckLabel()->IsReferenced());
5781   CodeGeneratorARMVIXL* codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_in);
5782   ArmVIXLAssembler* assembler = codegen->GetAssembler();
5783   HInvoke* invoke = GetInvoke();
5784   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5785   DataType::Type value_type =
5786       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5787   DCHECK_NE(value_type, DataType::Type::kReference);
5788   size_t size = DataType::Size(value_type);
5789   DCHECK_GT(size, 1u);
5790   vixl32::Operand size_operand(dchecked_integral_cast<int32_t>(size));
5791   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
5792   vixl32::Register object = InputRegisterAt(invoke, 1);
5793   vixl32::Register index = InputRegisterAt(invoke, 2);
5794 
5795   MemberOffset class_offset = mirror::Object::ClassOffset();
5796   MemberOffset array_length_offset = mirror::Array::LengthOffset();
5797   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5798   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5799 
5800   __ Bind(GetByteArrayViewCheckLabel());
5801 
5802   VarHandleTarget target = GetVarHandleTarget(invoke);
5803   {
5804     // Use the offset temporary register. It is not used yet at this point.
5805     vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
5806 
5807     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5808     vixl32::Register temp2 = temps.Acquire();
5809 
5810     // The main path checked that the coordinateType0 is an array class that matches
5811     // the class of the actual coordinate argument but it does not match the value type.
5812     // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5813     __ Ldr(temp, MemOperand(varhandle, class_offset.Int32Value()));
5814     codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
5815     codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5816     __ Cmp(temp, temp2);
5817     __ B(ne, GetEntryLabel());
5818 
5819     // Check for array index out of bounds.
5820     __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
5821     if (!temp.IsLow()) {
5822       // Avoid using the 32-bit `cmp temp, #imm` in IT block by loading `size` into `temp2`.
5823       __ Mov(temp2, size_operand);
5824     }
5825     __ Subs(temp, temp, index);
5826     {
5827       // Use ExactAssemblyScope here because we are using IT.
5828       ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
5829                                   2 * k16BitT32InstructionSizeInBytes);
5830       __ it(hs);
5831       if (temp.IsLow()) {
5832         __ cmp(hs, temp, size_operand);
5833       } else {
5834         __ cmp(hs, temp, temp2);
5835       }
5836     }
5837     __ B(lo, GetEntryLabel());
5838 
5839     // Construct the target.
5840     __ Add(target.offset, index, data_offset.Int32Value());  // Note: `temp` cannot be used below.
5841 
5842     // Alignment check. For unaligned access, go to the runtime.
5843     DCHECK(IsPowerOfTwo(size));
5844     __ Tst(target.offset, dchecked_integral_cast<int32_t>(size - 1u));
5845     __ B(ne, GetEntryLabel());
5846 
5847     // Byte order check. For native byte order return to the main path.
5848     if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
5849       HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u);
5850       if (IsZeroBitPattern(arg)) {
5851         // There is no reason to differentiate between native byte order and byte-swap
5852         // for setting a zero bit pattern. Just return to the main path.
5853         __ B(GetNativeByteOrderLabel());
5854         return;
5855       }
5856     }
5857     __ Ldr(temp2, MemOperand(varhandle, native_byte_order_offset.Int32Value()));
5858     __ Cmp(temp2, 0);
5859     __ B(ne, GetNativeByteOrderLabel());
5860   }
5861 
5862   switch (access_mode_template) {
5863     case mirror::VarHandle::AccessModeTemplate::kGet:
5864       GenerateVarHandleGet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5865       break;
5866     case mirror::VarHandle::AccessModeTemplate::kSet:
5867       GenerateVarHandleSet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5868       break;
5869     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5870     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5871       GenerateVarHandleCompareAndSetOrExchange(
5872           invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5873       break;
5874     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5875       GenerateVarHandleGetAndUpdate(
5876           invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5877       break;
5878   }
5879   __ B(GetExitLabel());
5880 }
5881 
5882 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARMVIXL, Name)
5883 UNIMPLEMENTED_INTRINSIC_LIST_ARM(MARK_UNIMPLEMENTED);
5884 #undef MARK_UNIMPLEMENTED
5885 
5886 UNREACHABLE_INTRINSICS(ARMVIXL)
5887 
5888 #undef __
5889 
5890 }  // namespace arm
5891 }  // namespace art
5892