1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_arm_vixl.h"
18
19 #include "aarch32/constants-aarch32.h"
20 #include "aarch32/operands-aarch32.h"
21 #include "arch/arm/callee_save_frame_arm.h"
22 #include "arch/arm/instruction_set_features_arm.h"
23 #include "art_method.h"
24 #include "code_generator_arm_vixl.h"
25 #include "common_arm.h"
26 #include "heap_poisoning.h"
27 #include "intrinsic_objects.h"
28 #include "intrinsics.h"
29 #include "intrinsics_utils.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/object_array-inl.h"
33 #include "mirror/reference.h"
34 #include "mirror/string-inl.h"
35 #include "optimizing/data_type.h"
36 #include "scoped_thread_state_change-inl.h"
37 #include "thread-current-inl.h"
38 #include "well_known_classes.h"
39
40 namespace art HIDDEN {
41 namespace arm {
42
43 #define __ assembler->GetVIXLAssembler()->
44
45 using helpers::DRegisterFrom;
46 using helpers::HighRegisterFrom;
47 using helpers::InputDRegisterAt;
48 using helpers::InputRegisterAt;
49 using helpers::InputSRegisterAt;
50 using helpers::Int32ConstantFrom;
51 using helpers::LocationFrom;
52 using helpers::LowRegisterFrom;
53 using helpers::LowSRegisterFrom;
54 using helpers::HighSRegisterFrom;
55 using helpers::OperandFrom;
56 using helpers::OutputDRegister;
57 using helpers::OutputRegister;
58 using helpers::RegisterFrom;
59 using helpers::SRegisterFrom;
60
61 using namespace vixl::aarch32; // NOLINT(build/namespaces)
62
63 using vixl::ExactAssemblyScope;
64 using vixl::CodeBufferCheckScope;
65
GetAssembler()66 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
67 return codegen_->GetAssembler();
68 }
69
GetAllocator()70 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
71 return codegen_->GetGraph()->GetAllocator();
72 }
73
74 using IntrinsicSlowPathARMVIXL = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARMVIXL,
75 SlowPathCodeARMVIXL,
76 ArmVIXLAssembler>;
77
78 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
79 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
80 public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)81 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
82 : SlowPathCodeARMVIXL(instruction) {
83 }
84
EmitNativeCode(CodeGenerator * codegen)85 void EmitNativeCode(CodeGenerator* codegen) override {
86 DCHECK(codegen->EmitBakerReadBarrier());
87 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
88 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
89 LocationSummary* locations = instruction_->GetLocations();
90 DCHECK(locations->CanCall());
91 DCHECK(instruction_->IsInvokeStaticOrDirect())
92 << "Unexpected instruction in read barrier arraycopy slow path: "
93 << instruction_->DebugName();
94 DCHECK(instruction_->GetLocations()->Intrinsified());
95 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
96
97 DataType::Type type = DataType::Type::kReference;
98 const int32_t element_size = DataType::Size(type);
99
100 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
101 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
102 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
103 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
104
105 __ Bind(GetEntryLabel());
106 // The source range and destination pointer were initialized before entering the slow-path.
107 vixl32::Label loop;
108 __ Bind(&loop);
109 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
110 assembler->MaybeUnpoisonHeapReference(tmp);
111 // TODO: Inline the mark bit check before calling the runtime?
112 // tmp = ReadBarrier::Mark(tmp);
113 // No need to save live registers; it's taken care of by the
114 // entrypoint. Also, there is no need to update the stack mask,
115 // as this runtime call will not trigger a garbage collection.
116 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
117 // explanations.)
118 DCHECK(!tmp.IsSP());
119 DCHECK(!tmp.IsLR());
120 DCHECK(!tmp.IsPC());
121 // IP is used internally by the ReadBarrierMarkRegX entry point
122 // as a temporary (and not preserved). It thus cannot be used by
123 // any live register in this slow path.
124 DCHECK(!src_curr_addr.Is(ip));
125 DCHECK(!dst_curr_addr.Is(ip));
126 DCHECK(!src_stop_addr.Is(ip));
127 DCHECK(!tmp.Is(ip));
128 DCHECK(tmp.IsRegister()) << tmp;
129 // TODO: Load the entrypoint once before the loop, instead of
130 // loading it at every iteration.
131 int32_t entry_point_offset =
132 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
133 // This runtime call does not require a stack map.
134 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
135 assembler->MaybePoisonHeapReference(tmp);
136 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
137 __ Cmp(src_curr_addr, src_stop_addr);
138 __ B(ne, &loop, /* is_far_target= */ false);
139 __ B(GetExitLabel());
140 }
141
GetDescription() const142 const char* GetDescription() const override {
143 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
144 }
145
146 private:
147 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
148 };
149
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)150 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
151 : allocator_(codegen->GetGraph()->GetAllocator()),
152 codegen_(codegen),
153 assembler_(codegen->GetAssembler()),
154 features_(codegen->GetInstructionSetFeatures()) {}
155
TryDispatch(HInvoke * invoke)156 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
157 Dispatch(invoke);
158 LocationSummary* res = invoke->GetLocations();
159 if (res == nullptr) {
160 return false;
161 }
162 return res->Intrinsified();
163 }
164
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)165 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
166 LocationSummary* locations =
167 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
168 locations->SetInAt(0, Location::RequiresFpuRegister());
169 locations->SetOut(Location::RequiresRegister());
170 }
171
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)172 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
173 LocationSummary* locations =
174 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
175 locations->SetInAt(0, Location::RequiresRegister());
176 locations->SetOut(Location::RequiresFpuRegister());
177 }
178
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)179 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
180 Location input = locations->InAt(0);
181 Location output = locations->Out();
182 if (is64bit) {
183 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
184 } else {
185 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
186 }
187 }
188
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)189 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
190 Location input = locations->InAt(0);
191 Location output = locations->Out();
192 if (is64bit) {
193 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
194 } else {
195 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
196 }
197 }
198
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)199 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
200 CreateFPToIntLocations(allocator_, invoke);
201 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)202 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
203 CreateIntToFPLocations(allocator_, invoke);
204 }
205
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)206 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
207 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
208 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)209 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
210 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
211 }
212
VisitFloatFloatToRawIntBits(HInvoke * invoke)213 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
214 CreateFPToIntLocations(allocator_, invoke);
215 }
VisitFloatIntBitsToFloat(HInvoke * invoke)216 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
217 CreateIntToFPLocations(allocator_, invoke);
218 }
219
VisitFloatFloatToRawIntBits(HInvoke * invoke)220 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
221 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
222 }
VisitFloatIntBitsToFloat(HInvoke * invoke)223 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
224 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
225 }
226
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)227 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
228 LocationSummary* locations =
229 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
230 locations->SetInAt(0, Location::RequiresRegister());
231 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
232 }
233
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)234 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
235 LocationSummary* locations =
236 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
237 locations->SetInAt(0, Location::RequiresRegister());
238 locations->SetInAt(1, Location::RequiresRegister());
239 // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
240 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
241 }
242
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)243 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
244 LocationSummary* locations =
245 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
246 locations->SetInAt(0, Location::RequiresRegister());
247 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
248 }
249
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)250 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
251 LocationSummary* locations =
252 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
253 locations->SetInAt(0, Location::RequiresFpuRegister());
254 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
255 }
256
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)257 static void GenNumberOfLeadingZeros(HInvoke* invoke,
258 DataType::Type type,
259 CodeGeneratorARMVIXL* codegen) {
260 ArmVIXLAssembler* assembler = codegen->GetAssembler();
261 LocationSummary* locations = invoke->GetLocations();
262 Location in = locations->InAt(0);
263 vixl32::Register out = RegisterFrom(locations->Out());
264
265 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
266
267 if (type == DataType::Type::kInt64) {
268 vixl32::Register in_reg_lo = LowRegisterFrom(in);
269 vixl32::Register in_reg_hi = HighRegisterFrom(in);
270 vixl32::Label end;
271 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
272 __ Clz(out, in_reg_hi);
273 __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false);
274 __ Clz(out, in_reg_lo);
275 __ Add(out, out, 32);
276 if (end.IsReferenced()) {
277 __ Bind(&end);
278 }
279 } else {
280 __ Clz(out, RegisterFrom(in));
281 }
282 }
283
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)284 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
285 CreateIntToIntLocations(allocator_, invoke);
286 }
287
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)288 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
289 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
290 }
291
VisitLongNumberOfLeadingZeros(HInvoke * invoke)292 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
293 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
294 }
295
VisitLongNumberOfLeadingZeros(HInvoke * invoke)296 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
297 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
298 }
299
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)300 static void GenNumberOfTrailingZeros(HInvoke* invoke,
301 DataType::Type type,
302 CodeGeneratorARMVIXL* codegen) {
303 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
304
305 ArmVIXLAssembler* assembler = codegen->GetAssembler();
306 LocationSummary* locations = invoke->GetLocations();
307 vixl32::Register out = RegisterFrom(locations->Out());
308
309 if (type == DataType::Type::kInt64) {
310 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
311 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
312 vixl32::Label end;
313 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
314 __ Rbit(out, in_reg_lo);
315 __ Clz(out, out);
316 __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false);
317 __ Rbit(out, in_reg_hi);
318 __ Clz(out, out);
319 __ Add(out, out, 32);
320 if (end.IsReferenced()) {
321 __ Bind(&end);
322 }
323 } else {
324 vixl32::Register in = RegisterFrom(locations->InAt(0));
325 __ Rbit(out, in);
326 __ Clz(out, out);
327 }
328 }
329
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)330 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
331 CreateIntToIntLocations(allocator_, invoke);
332 }
333
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)334 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
335 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
336 }
337
VisitLongNumberOfTrailingZeros(HInvoke * invoke)338 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
339 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
340 }
341
VisitLongNumberOfTrailingZeros(HInvoke * invoke)342 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
343 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
344 }
345
VisitMathSqrt(HInvoke * invoke)346 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
347 CreateFPToFPLocations(allocator_, invoke);
348 }
349
VisitMathSqrt(HInvoke * invoke)350 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
351 ArmVIXLAssembler* assembler = GetAssembler();
352 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
353 }
354
VisitMathRint(HInvoke * invoke)355 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
356 if (features_.HasARMv8AInstructions()) {
357 CreateFPToFPLocations(allocator_, invoke);
358 }
359 }
360
VisitMathRint(HInvoke * invoke)361 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
362 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
363 ArmVIXLAssembler* assembler = GetAssembler();
364 __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
365 }
366
VisitMathRoundFloat(HInvoke * invoke)367 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
368 if (features_.HasARMv8AInstructions()) {
369 LocationSummary* locations =
370 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
371 locations->SetInAt(0, Location::RequiresFpuRegister());
372 locations->SetOut(Location::RequiresRegister());
373 locations->AddTemp(Location::RequiresFpuRegister());
374 }
375 }
376
VisitMathRoundFloat(HInvoke * invoke)377 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
378 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
379
380 ArmVIXLAssembler* assembler = GetAssembler();
381 vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
382 vixl32::Register out_reg = OutputRegister(invoke);
383 vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
384 vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
385 vixl32::Label done;
386 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
387
388 // Round to nearest integer, ties away from zero.
389 __ Vcvta(S32, F32, temp1, in_reg);
390 __ Vmov(out_reg, temp1);
391
392 // For positive, zero or NaN inputs, rounding is done.
393 __ Cmp(out_reg, 0);
394 __ B(ge, final_label, /* is_far_target= */ false);
395
396 // Handle input < 0 cases.
397 // If input is negative but not a tie, previous result (round to nearest) is valid.
398 // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
399 __ Vrinta(F32, temp1, in_reg);
400 __ Vmov(temp2, 0.5);
401 __ Vsub(F32, temp1, in_reg, temp1);
402 __ Vcmp(F32, temp1, temp2);
403 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
404 {
405 // Use ExactAssemblyScope here because we are using IT.
406 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
407 2 * kMaxInstructionSizeInBytes,
408 CodeBufferCheckScope::kMaximumSize);
409 __ it(eq);
410 __ add(eq, out_reg, out_reg, 1);
411 }
412
413 if (done.IsReferenced()) {
414 __ Bind(&done);
415 }
416 }
417
VisitMemoryPeekByte(HInvoke * invoke)418 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
419 CreateIntToIntLocations(allocator_, invoke);
420 }
421
VisitMemoryPeekByte(HInvoke * invoke)422 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
423 ArmVIXLAssembler* assembler = GetAssembler();
424 // Ignore upper 4B of long address.
425 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
426 }
427
VisitMemoryPeekIntNative(HInvoke * invoke)428 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
429 CreateIntToIntLocations(allocator_, invoke);
430 }
431
VisitMemoryPeekIntNative(HInvoke * invoke)432 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
433 ArmVIXLAssembler* assembler = GetAssembler();
434 // Ignore upper 4B of long address.
435 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
436 }
437
VisitMemoryPeekLongNative(HInvoke * invoke)438 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
439 CreateIntToIntLocations(allocator_, invoke);
440 }
441
VisitMemoryPeekLongNative(HInvoke * invoke)442 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
443 ArmVIXLAssembler* assembler = GetAssembler();
444 // Ignore upper 4B of long address.
445 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
446 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
447 // exception. So we can't use ldrd as addr may be unaligned.
448 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
449 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
450 if (addr.Is(lo)) {
451 __ Ldr(hi, MemOperand(addr, 4));
452 __ Ldr(lo, MemOperand(addr));
453 } else {
454 __ Ldr(lo, MemOperand(addr));
455 __ Ldr(hi, MemOperand(addr, 4));
456 }
457 }
458
VisitMemoryPeekShortNative(HInvoke * invoke)459 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
460 CreateIntToIntLocations(allocator_, invoke);
461 }
462
VisitMemoryPeekShortNative(HInvoke * invoke)463 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
464 ArmVIXLAssembler* assembler = GetAssembler();
465 // Ignore upper 4B of long address.
466 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
467 }
468
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)469 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
470 LocationSummary* locations =
471 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
472 locations->SetInAt(0, Location::RequiresRegister());
473 locations->SetInAt(1, Location::RequiresRegister());
474 }
475
VisitMemoryPokeByte(HInvoke * invoke)476 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
477 CreateIntIntToVoidLocations(allocator_, invoke);
478 }
479
VisitMemoryPokeByte(HInvoke * invoke)480 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
481 ArmVIXLAssembler* assembler = GetAssembler();
482 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
483 }
484
VisitMemoryPokeIntNative(HInvoke * invoke)485 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
486 CreateIntIntToVoidLocations(allocator_, invoke);
487 }
488
VisitMemoryPokeIntNative(HInvoke * invoke)489 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
490 ArmVIXLAssembler* assembler = GetAssembler();
491 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
492 }
493
VisitMemoryPokeLongNative(HInvoke * invoke)494 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
495 CreateIntIntToVoidLocations(allocator_, invoke);
496 }
497
VisitMemoryPokeLongNative(HInvoke * invoke)498 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
499 ArmVIXLAssembler* assembler = GetAssembler();
500 // Ignore upper 4B of long address.
501 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
502 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
503 // exception. So we can't use ldrd as addr may be unaligned.
504 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
505 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
506 }
507
VisitMemoryPokeShortNative(HInvoke * invoke)508 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
509 CreateIntIntToVoidLocations(allocator_, invoke);
510 }
511
VisitMemoryPokeShortNative(HInvoke * invoke)512 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
513 ArmVIXLAssembler* assembler = GetAssembler();
514 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
515 }
516
VisitThreadCurrentThread(HInvoke * invoke)517 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
518 LocationSummary* locations =
519 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
520 locations->SetOut(Location::RequiresRegister());
521 }
522
VisitThreadCurrentThread(HInvoke * invoke)523 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
524 ArmVIXLAssembler* assembler = GetAssembler();
525 __ Ldr(OutputRegister(invoke),
526 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
527 }
528
VisitStringCompareTo(HInvoke * invoke)529 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
530 // The inputs plus one temp.
531 LocationSummary* locations =
532 new (allocator_) LocationSummary(invoke,
533 invoke->InputAt(1)->CanBeNull()
534 ? LocationSummary::kCallOnSlowPath
535 : LocationSummary::kNoCall,
536 kIntrinsified);
537 locations->SetInAt(0, Location::RequiresRegister());
538 locations->SetInAt(1, Location::RequiresRegister());
539 locations->AddRegisterTemps(3);
540 // Need temporary registers for String compression's feature.
541 if (mirror::kUseStringCompression) {
542 locations->AddTemp(Location::RequiresRegister());
543 }
544 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
545 }
546
547 // Forward declaration.
548 //
549 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
550 // by the compiler for every C++ function, and if this function gets inlined in
551 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
552 // build failure. That is the reason why NO_INLINE attribute is used.
553 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
554 HInvoke* invoke,
555 vixl32::Label* end,
556 vixl32::Label* different_compression);
557
VisitStringCompareTo(HInvoke * invoke)558 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
559 ArmVIXLAssembler* assembler = GetAssembler();
560 LocationSummary* locations = invoke->GetLocations();
561
562 const vixl32::Register str = InputRegisterAt(invoke, 0);
563 const vixl32::Register arg = InputRegisterAt(invoke, 1);
564 const vixl32::Register out = OutputRegister(invoke);
565
566 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
567 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
568 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
569 vixl32::Register temp3;
570 if (mirror::kUseStringCompression) {
571 temp3 = RegisterFrom(locations->GetTemp(3));
572 }
573
574 vixl32::Label end;
575 vixl32::Label different_compression;
576
577 // Get offsets of count and value fields within a string object.
578 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
579
580 // Note that the null check must have been done earlier.
581 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
582
583 // Take slow path and throw if input can be and is null.
584 SlowPathCodeARMVIXL* slow_path = nullptr;
585 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
586 if (can_slow_path) {
587 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
588 codegen_->AddSlowPath(slow_path);
589 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
590 }
591
592 // Reference equality check, return 0 if same reference.
593 __ Subs(out, str, arg);
594 __ B(eq, &end);
595
596 if (mirror::kUseStringCompression) {
597 // Load `count` fields of this and argument strings.
598 __ Ldr(temp3, MemOperand(str, count_offset));
599 __ Ldr(temp2, MemOperand(arg, count_offset));
600 // Extract lengths from the `count` fields.
601 __ Lsr(temp0, temp3, 1u);
602 __ Lsr(temp1, temp2, 1u);
603 } else {
604 // Load lengths of this and argument strings.
605 __ Ldr(temp0, MemOperand(str, count_offset));
606 __ Ldr(temp1, MemOperand(arg, count_offset));
607 }
608 // out = length diff.
609 __ Subs(out, temp0, temp1);
610 // temp0 = min(len(str), len(arg)).
611
612 {
613 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
614 2 * kMaxInstructionSizeInBytes,
615 CodeBufferCheckScope::kMaximumSize);
616
617 __ it(gt);
618 __ mov(gt, temp0, temp1);
619 }
620
621 // Shorter string is empty?
622 // Note that mirror::kUseStringCompression==true introduces lots of instructions,
623 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
624 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
625
626 if (mirror::kUseStringCompression) {
627 // Check if both strings using same compression style to use this comparison loop.
628 __ Eors(temp2, temp2, temp3);
629 __ Lsrs(temp2, temp2, 1u);
630 __ B(cs, &different_compression);
631 // For string compression, calculate the number of bytes to compare (not chars).
632 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
633 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
634
635 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
636 2 * kMaxInstructionSizeInBytes,
637 CodeBufferCheckScope::kMaximumSize);
638
639 __ it(ne);
640 __ add(ne, temp0, temp0, temp0);
641 }
642
643
644 GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
645
646 __ Bind(&end);
647
648 if (can_slow_path) {
649 __ Bind(slow_path->GetExitLabel());
650 }
651 }
652
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)653 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
654 HInvoke* invoke,
655 vixl32::Label* end,
656 vixl32::Label* different_compression) {
657 LocationSummary* locations = invoke->GetLocations();
658
659 const vixl32::Register str = InputRegisterAt(invoke, 0);
660 const vixl32::Register arg = InputRegisterAt(invoke, 1);
661 const vixl32::Register out = OutputRegister(invoke);
662
663 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
664 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
665 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
666 vixl32::Register temp3;
667 if (mirror::kUseStringCompression) {
668 temp3 = RegisterFrom(locations->GetTemp(3));
669 }
670
671 vixl32::Label loop;
672 vixl32::Label find_char_diff;
673
674 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
675 // Store offset of string value in preparation for comparison loop.
676 __ Mov(temp1, value_offset);
677
678 // Assertions that must hold in order to compare multiple characters at a time.
679 CHECK_ALIGNED(value_offset, 8);
680 static_assert(IsAligned<8>(kObjectAlignment),
681 "String data must be 8-byte aligned for unrolled CompareTo loop.");
682
683 const unsigned char_size = DataType::Size(DataType::Type::kUint16);
684 DCHECK_EQ(char_size, 2u);
685
686 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
687
688 vixl32::Label find_char_diff_2nd_cmp;
689 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
690 __ Bind(&loop);
691 vixl32::Register temp_reg = temps.Acquire();
692 __ Ldr(temp_reg, MemOperand(str, temp1));
693 __ Ldr(temp2, MemOperand(arg, temp1));
694 __ Cmp(temp_reg, temp2);
695 __ B(ne, &find_char_diff, /* is_far_target= */ false);
696 __ Add(temp1, temp1, char_size * 2);
697
698 __ Ldr(temp_reg, MemOperand(str, temp1));
699 __ Ldr(temp2, MemOperand(arg, temp1));
700 __ Cmp(temp_reg, temp2);
701 __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false);
702 __ Add(temp1, temp1, char_size * 2);
703 // With string compression, we have compared 8 bytes, otherwise 4 chars.
704 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
705 __ B(hi, &loop, /* is_far_target= */ false);
706 __ B(end);
707
708 __ Bind(&find_char_diff_2nd_cmp);
709 if (mirror::kUseStringCompression) {
710 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
711 __ B(ls, end, /* is_far_target= */ false); // Was the second comparison fully beyond the end?
712 } else {
713 // Without string compression, we can start treating temp0 as signed
714 // and rely on the signed comparison below.
715 __ Sub(temp0, temp0, 2);
716 }
717
718 // Find the single character difference.
719 __ Bind(&find_char_diff);
720 // Get the bit position of the first character that differs.
721 __ Eor(temp1, temp2, temp_reg);
722 __ Rbit(temp1, temp1);
723 __ Clz(temp1, temp1);
724
725 // temp0 = number of characters remaining to compare.
726 // (Without string compression, it could be < 1 if a difference is found by the second CMP
727 // in the comparison loop, and after the end of the shorter string data).
728
729 // Without string compression (temp1 >> 4) = character where difference occurs between the last
730 // two words compared, in the interval [0,1].
731 // (0 for low half-word different, 1 for high half-word different).
732 // With string compression, (temp1 << 3) = byte where the difference occurs,
733 // in the interval [0,3].
734
735 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
736 // the remaining string data, so just return length diff (out).
737 // The comparison is unsigned for string compression, otherwise signed.
738 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
739 __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false);
740
741 // Extract the characters and calculate the difference.
742 if (mirror::kUseStringCompression) {
743 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
744 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
745 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
746 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
747 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
748 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
749 __ Lsr(temp2, temp2, temp1); // Extract second character.
750 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
751 __ Lsr(out, temp_reg, temp1); // Extract first character.
752 __ And(temp2, temp2, temp3);
753 __ And(out, out, temp3);
754 } else {
755 __ Bic(temp1, temp1, 0xf);
756 __ Lsr(temp2, temp2, temp1);
757 __ Lsr(out, temp_reg, temp1);
758 __ Movt(temp2, 0);
759 __ Movt(out, 0);
760 }
761
762 __ Sub(out, out, temp2);
763 temps.Release(temp_reg);
764
765 if (mirror::kUseStringCompression) {
766 __ B(end);
767 __ Bind(different_compression);
768
769 // Comparison for different compression style.
770 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
771 DCHECK_EQ(c_char_size, 1u);
772
773 // We want to free up the temp3, currently holding `str.count`, for comparison.
774 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
775 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
776 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
777 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
778 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
779 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
780 __ Mov(temp1, str);
781 __ Mov(temp2, arg);
782 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
783 {
784 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
785 3 * kMaxInstructionSizeInBytes,
786 CodeBufferCheckScope::kMaximumSize);
787 __ itt(cs); // Interleave with selection of temp1 and temp2.
788 __ mov(cs, temp1, arg); // Preserves flags.
789 __ mov(cs, temp2, str); // Preserves flags.
790 }
791 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag.
792
793 // Adjust temp1 and temp2 from string pointers to data pointers.
794 __ Add(temp1, temp1, value_offset);
795 __ Add(temp2, temp2, value_offset);
796
797 vixl32::Label different_compression_loop;
798 vixl32::Label different_compression_diff;
799
800 // Main loop for different compression.
801 temp_reg = temps.Acquire();
802 __ Bind(&different_compression_loop);
803 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
804 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
805 __ Cmp(temp_reg, temp3);
806 __ B(ne, &different_compression_diff, /* is_far_target= */ false);
807 __ Subs(temp0, temp0, 2);
808 __ B(hi, &different_compression_loop, /* is_far_target= */ false);
809 __ B(end);
810
811 // Calculate the difference.
812 __ Bind(&different_compression_diff);
813 __ Sub(out, temp_reg, temp3);
814 temps.Release(temp_reg);
815 // Flip the difference if the `arg` is compressed.
816 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
817 __ Lsrs(temp0, temp0, 1u);
818 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
819 "Expecting 0=compressed, 1=uncompressed");
820
821 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
822 2 * kMaxInstructionSizeInBytes,
823 CodeBufferCheckScope::kMaximumSize);
824 __ it(cc);
825 __ rsb(cc, out, out, 0);
826 }
827 }
828
829 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
830 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
831 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
832 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
833 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
834 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
835
GetConstString(HInstruction * candidate,uint32_t * utf16_length)836 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
837 if (candidate->IsLoadString()) {
838 HLoadString* load_string = candidate->AsLoadString();
839 const DexFile& dex_file = load_string->GetDexFile();
840 return dex_file.GetStringDataAndUtf16Length(load_string->GetStringIndex(), utf16_length);
841 }
842 return nullptr;
843 }
844
VisitStringEquals(HInvoke * invoke)845 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
846 LocationSummary* locations =
847 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
848 InvokeRuntimeCallingConventionARMVIXL calling_convention;
849 locations->SetInAt(0, Location::RequiresRegister());
850 locations->SetInAt(1, Location::RequiresRegister());
851
852 // Temporary registers to store lengths of strings and for calculations.
853 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
854 locations->AddTemp(LocationFrom(r0));
855
856 // For the generic implementation and for long const strings we need an extra temporary.
857 // We do not need it for short const strings, up to 4 bytes, see code generation below.
858 uint32_t const_string_length = 0u;
859 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
860 if (const_string == nullptr) {
861 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
862 }
863 bool is_compressed =
864 mirror::kUseStringCompression &&
865 const_string != nullptr &&
866 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
867 if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
868 locations->AddTemp(Location::RequiresRegister());
869 }
870
871 // TODO: If the String.equals() is used only for an immediately following HIf, we can
872 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
873 // Then we shall need an extra temporary register instead of the output register.
874 locations->SetOut(Location::RequiresRegister());
875 }
876
VisitStringEquals(HInvoke * invoke)877 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
878 ArmVIXLAssembler* assembler = GetAssembler();
879 LocationSummary* locations = invoke->GetLocations();
880
881 vixl32::Register str = InputRegisterAt(invoke, 0);
882 vixl32::Register arg = InputRegisterAt(invoke, 1);
883 vixl32::Register out = OutputRegister(invoke);
884
885 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
886
887 vixl32::Label loop;
888 vixl32::Label end;
889 vixl32::Label return_true;
890 vixl32::Label return_false;
891 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
892
893 // Get offsets of count, value, and class fields within a string object.
894 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
895 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
896 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
897
898 // Note that the null check must have been done earlier.
899 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
900
901 StringEqualsOptimizations optimizations(invoke);
902 if (!optimizations.GetArgumentNotNull()) {
903 // Check if input is null, return false if it is.
904 __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false);
905 }
906
907 // Reference equality check, return true if same reference.
908 __ Cmp(str, arg);
909 __ B(eq, &return_true, /* is_far_target= */ false);
910
911 if (!optimizations.GetArgumentIsString()) {
912 // Instanceof check for the argument by comparing class fields.
913 // All string objects must have the same type since String cannot be subclassed.
914 // Receiver must be a string object, so its class field is equal to all strings' class fields.
915 // If the argument is a string object, its class field must be equal to receiver's class field.
916 //
917 // As the String class is expected to be non-movable, we can read the class
918 // field from String.equals' arguments without read barriers.
919 AssertNonMovableStringClass();
920 // /* HeapReference<Class> */ temp = str->klass_
921 __ Ldr(temp, MemOperand(str, class_offset));
922 // /* HeapReference<Class> */ out = arg->klass_
923 __ Ldr(out, MemOperand(arg, class_offset));
924 // Also, because we use the previously loaded class references only in the
925 // following comparison, we don't need to unpoison them.
926 __ Cmp(temp, out);
927 __ B(ne, &return_false, /* is_far_target= */ false);
928 }
929
930 // Check if one of the inputs is a const string. Do not special-case both strings
931 // being const, such cases should be handled by constant folding if needed.
932 uint32_t const_string_length = 0u;
933 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
934 if (const_string == nullptr) {
935 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
936 if (const_string != nullptr) {
937 std::swap(str, arg); // Make sure the const string is in `str`.
938 }
939 }
940 bool is_compressed =
941 mirror::kUseStringCompression &&
942 const_string != nullptr &&
943 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
944
945 if (const_string != nullptr) {
946 // Load `count` field of the argument string and check if it matches the const string.
947 // Also compares the compression style, if differs return false.
948 __ Ldr(temp, MemOperand(arg, count_offset));
949 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
950 __ B(ne, &return_false, /* is_far_target= */ false);
951 } else {
952 // Load `count` fields of this and argument strings.
953 __ Ldr(temp, MemOperand(str, count_offset));
954 __ Ldr(out, MemOperand(arg, count_offset));
955 // Check if `count` fields are equal, return false if they're not.
956 // Also compares the compression style, if differs return false.
957 __ Cmp(temp, out);
958 __ B(ne, &return_false, /* is_far_target= */ false);
959 }
960
961 // Assertions that must hold in order to compare strings 4 bytes at a time.
962 // Ok to do this because strings are zero-padded to kObjectAlignment.
963 DCHECK_ALIGNED(value_offset, 4);
964 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
965
966 if (const_string != nullptr &&
967 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
968 : kShortConstStringEqualsCutoffInBytes / 2u)) {
969 // Load and compare the contents. Though we know the contents of the short const string
970 // at compile time, materializing constants may be more code than loading from memory.
971 int32_t offset = value_offset;
972 size_t remaining_bytes =
973 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
974 while (remaining_bytes > sizeof(uint32_t)) {
975 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
976 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
977 vixl32::Register temp2 = scratch_scope.Acquire();
978 __ Ldrd(temp, temp1, MemOperand(str, offset));
979 __ Ldrd(temp2, out, MemOperand(arg, offset));
980 __ Cmp(temp, temp2);
981 __ B(ne, &return_false, /* is_far_target= */ false);
982 __ Cmp(temp1, out);
983 __ B(ne, &return_false, /* is_far_target= */ false);
984 offset += 2u * sizeof(uint32_t);
985 remaining_bytes -= 2u * sizeof(uint32_t);
986 }
987 if (remaining_bytes != 0u) {
988 __ Ldr(temp, MemOperand(str, offset));
989 __ Ldr(out, MemOperand(arg, offset));
990 __ Cmp(temp, out);
991 __ B(ne, &return_false, /* is_far_target= */ false);
992 }
993 } else {
994 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
995 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
996 "Expecting 0=compressed, 1=uncompressed");
997 __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false);
998
999 if (mirror::kUseStringCompression) {
1000 // For string compression, calculate the number of bytes to compare (not chars).
1001 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1002 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
1003 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1004 2 * kMaxInstructionSizeInBytes,
1005 CodeBufferCheckScope::kMaximumSize);
1006 __ it(cs); // If uncompressed,
1007 __ add(cs, temp, temp, temp); // double the byte count.
1008 }
1009
1010 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1011 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1012 vixl32::Register temp2 = scratch_scope.Acquire();
1013
1014 // Store offset of string value in preparation for comparison loop.
1015 __ Mov(temp1, value_offset);
1016
1017 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1018 __ Bind(&loop);
1019 __ Ldr(out, MemOperand(str, temp1));
1020 __ Ldr(temp2, MemOperand(arg, temp1));
1021 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1022 __ Cmp(out, temp2);
1023 __ B(ne, &return_false, /* is_far_target= */ false);
1024 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1025 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1026 __ B(hi, &loop, /* is_far_target= */ false);
1027 }
1028
1029 // Return true and exit the function.
1030 // If loop does not result in returning false, we return true.
1031 __ Bind(&return_true);
1032 __ Mov(out, 1);
1033 __ B(final_label);
1034
1035 // Return false and exit the function.
1036 __ Bind(&return_false);
1037 __ Mov(out, 0);
1038
1039 if (end.IsReferenced()) {
1040 __ Bind(&end);
1041 }
1042 }
1043
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1044 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1045 ArmVIXLAssembler* assembler,
1046 CodeGeneratorARMVIXL* codegen,
1047 bool start_at_zero) {
1048 LocationSummary* locations = invoke->GetLocations();
1049
1050 // Note that the null check must have been done earlier.
1051 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1052
1053 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1054 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1055 SlowPathCodeARMVIXL* slow_path = nullptr;
1056 HInstruction* code_point = invoke->InputAt(1);
1057 if (code_point->IsIntConstant()) {
1058 if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1059 std::numeric_limits<uint16_t>::max()) {
1060 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1061 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1062 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1063 codegen->AddSlowPath(slow_path);
1064 __ B(slow_path->GetEntryLabel());
1065 __ Bind(slow_path->GetExitLabel());
1066 return;
1067 }
1068 } else if (code_point->GetType() != DataType::Type::kUint16) {
1069 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1070 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1071 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1072 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1073 codegen->AddSlowPath(slow_path);
1074 __ B(hs, slow_path->GetEntryLabel());
1075 }
1076
1077 if (start_at_zero) {
1078 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1079 DCHECK(tmp_reg.Is(r2));
1080 // Start-index = 0.
1081 __ Mov(tmp_reg, 0);
1082 }
1083
1084 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1085 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1086
1087 if (slow_path != nullptr) {
1088 __ Bind(slow_path->GetExitLabel());
1089 }
1090 }
1091
VisitStringIndexOf(HInvoke * invoke)1092 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1093 LocationSummary* locations = new (allocator_) LocationSummary(
1094 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1095 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1096 // best to align the inputs accordingly.
1097 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1098 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1099 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1100 locations->SetOut(LocationFrom(r0));
1101
1102 // Need to send start-index=0.
1103 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1104 }
1105
VisitStringIndexOf(HInvoke * invoke)1106 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1107 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1108 }
1109
VisitStringIndexOfAfter(HInvoke * invoke)1110 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1111 LocationSummary* locations = new (allocator_) LocationSummary(
1112 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1113 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1114 // best to align the inputs accordingly.
1115 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1116 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1117 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1118 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1119 locations->SetOut(LocationFrom(r0));
1120 }
1121
VisitStringIndexOfAfter(HInvoke * invoke)1122 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1123 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1124 }
1125
VisitStringNewStringFromBytes(HInvoke * invoke)1126 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1127 LocationSummary* locations = new (allocator_) LocationSummary(
1128 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1129 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1130 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1131 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1132 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1133 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1134 locations->SetOut(LocationFrom(r0));
1135 }
1136
VisitStringNewStringFromBytes(HInvoke * invoke)1137 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1138 ArmVIXLAssembler* assembler = GetAssembler();
1139 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1140 __ Cmp(byte_array, 0);
1141 SlowPathCodeARMVIXL* slow_path =
1142 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1143 codegen_->AddSlowPath(slow_path);
1144 __ B(eq, slow_path->GetEntryLabel());
1145
1146 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1147 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1148 __ Bind(slow_path->GetExitLabel());
1149 }
1150
VisitStringNewStringFromChars(HInvoke * invoke)1151 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1152 LocationSummary* locations =
1153 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1154 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1155 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1156 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1157 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1158 locations->SetOut(LocationFrom(r0));
1159 }
1160
VisitStringNewStringFromChars(HInvoke * invoke)1161 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1162 // No need to emit code checking whether `locations->InAt(2)` is a null
1163 // pointer, as callers of the native method
1164 //
1165 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1166 //
1167 // all include a null check on `data` before calling that method.
1168 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1169 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1170 }
1171
VisitStringNewStringFromString(HInvoke * invoke)1172 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1173 LocationSummary* locations = new (allocator_) LocationSummary(
1174 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1175 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1176 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1177 locations->SetOut(LocationFrom(r0));
1178 }
1179
VisitStringNewStringFromString(HInvoke * invoke)1180 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1181 ArmVIXLAssembler* assembler = GetAssembler();
1182 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1183 __ Cmp(string_to_copy, 0);
1184 SlowPathCodeARMVIXL* slow_path =
1185 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1186 codegen_->AddSlowPath(slow_path);
1187 __ B(eq, slow_path->GetEntryLabel());
1188
1189 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1190 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1191
1192 __ Bind(slow_path->GetExitLabel());
1193 }
1194
GenArrayAddress(ArmVIXLAssembler * assembler,vixl32::Register dest,vixl32::Register base,Location pos,DataType::Type type,int32_t data_offset)1195 static void GenArrayAddress(ArmVIXLAssembler* assembler,
1196 vixl32::Register dest,
1197 vixl32::Register base,
1198 Location pos,
1199 DataType::Type type,
1200 int32_t data_offset) {
1201 if (pos.IsConstant()) {
1202 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
1203 __ Add(dest, base, static_cast<int32_t>(DataType::Size(type)) * constant + data_offset);
1204 } else {
1205 if (data_offset != 0) {
1206 __ Add(dest, base, data_offset);
1207 base = dest;
1208 }
1209 __ Add(dest, base, Operand(RegisterFrom(pos), LSL, DataType::SizeShift(type)));
1210 }
1211 }
1212
LocationForSystemArrayCopyInput(ArmVIXLAssembler * assembler,HInstruction * input)1213 static Location LocationForSystemArrayCopyInput(ArmVIXLAssembler* assembler, HInstruction* input) {
1214 HIntConstant* const_input = input->AsIntConstantOrNull();
1215 if (const_input != nullptr && assembler->ShifterOperandCanAlwaysHold(const_input->GetValue())) {
1216 return Location::ConstantLocation(const_input);
1217 } else {
1218 return Location::RequiresRegister();
1219 }
1220 }
1221
1222 // We choose to use the native implementation for longer copy lengths.
1223 static constexpr int32_t kSystemArrayCopyThreshold = 128;
1224
VisitSystemArrayCopy(HInvoke * invoke)1225 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1226 // The only read barrier implementation supporting the
1227 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1228 if (codegen_->EmitNonBakerReadBarrier()) {
1229 return;
1230 }
1231
1232 constexpr size_t kInitialNumTemps = 3u; // We need at least three temps.
1233 LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
1234 invoke, kSystemArrayCopyThreshold, kInitialNumTemps);
1235 if (locations != nullptr) {
1236 locations->SetInAt(1, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(1)));
1237 locations->SetInAt(3, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(3)));
1238 locations->SetInAt(4, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(4)));
1239 if (codegen_->EmitBakerReadBarrier()) {
1240 // Temporary register IP cannot be used in
1241 // ReadBarrierSystemArrayCopySlowPathARM (because that register
1242 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1243 // temporary register from the register allocator.
1244 locations->AddTemp(Location::RequiresRegister());
1245 }
1246 }
1247 }
1248
CheckSystemArrayCopyPosition(ArmVIXLAssembler * assembler,vixl32::Register array,Location pos,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_array_length,bool position_sign_checked)1249 static void CheckSystemArrayCopyPosition(ArmVIXLAssembler* assembler,
1250 vixl32::Register array,
1251 Location pos,
1252 Location length,
1253 SlowPathCodeARMVIXL* slow_path,
1254 vixl32::Register temp,
1255 bool length_is_array_length,
1256 bool position_sign_checked) {
1257 // Where is the length in the Array?
1258 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1259
1260 if (pos.IsConstant()) {
1261 int32_t pos_const = Int32ConstantFrom(pos);
1262 if (pos_const == 0) {
1263 if (!length_is_array_length) {
1264 // Check that length(array) >= length.
1265 __ Ldr(temp, MemOperand(array, length_offset));
1266 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1267 __ B(lt, slow_path->GetEntryLabel());
1268 }
1269 } else {
1270 // Calculate length(array) - pos.
1271 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1272 // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1273 __ Ldr(temp, MemOperand(array, length_offset));
1274 __ Sub(temp, temp, pos_const);
1275
1276 // Check that (length(array) - pos) >= length.
1277 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1278 __ B(lt, slow_path->GetEntryLabel());
1279 }
1280 } else if (length_is_array_length) {
1281 // The only way the copy can succeed is if pos is zero.
1282 vixl32::Register pos_reg = RegisterFrom(pos);
1283 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1284 } else {
1285 // Check that pos >= 0.
1286 vixl32::Register pos_reg = RegisterFrom(pos);
1287 if (!position_sign_checked) {
1288 __ Cmp(pos_reg, 0);
1289 __ B(lt, slow_path->GetEntryLabel());
1290 }
1291
1292 // Calculate length(array) - pos.
1293 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1294 // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1295 __ Ldr(temp, MemOperand(array, length_offset));
1296 __ Sub(temp, temp, pos_reg);
1297
1298 // Check that (length(array) - pos) >= length.
1299 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1300 __ B(lt, slow_path->GetEntryLabel());
1301 }
1302 }
1303
VisitSystemArrayCopy(HInvoke * invoke)1304 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1305 // The only read barrier implementation supporting the
1306 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1307 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
1308
1309 ArmVIXLAssembler* assembler = GetAssembler();
1310 LocationSummary* locations = invoke->GetLocations();
1311
1312 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1313 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1314 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1315 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1316 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1317
1318 vixl32::Register src = InputRegisterAt(invoke, 0);
1319 Location src_pos = locations->InAt(1);
1320 vixl32::Register dest = InputRegisterAt(invoke, 2);
1321 Location dest_pos = locations->InAt(3);
1322 Location length = locations->InAt(4);
1323 Location temp1_loc = locations->GetTemp(0);
1324 vixl32::Register temp1 = RegisterFrom(temp1_loc);
1325 Location temp2_loc = locations->GetTemp(1);
1326 vixl32::Register temp2 = RegisterFrom(temp2_loc);
1327 Location temp3_loc = locations->GetTemp(2);
1328 vixl32::Register temp3 = RegisterFrom(temp3_loc);
1329
1330 SlowPathCodeARMVIXL* intrinsic_slow_path =
1331 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1332 codegen_->AddSlowPath(intrinsic_slow_path);
1333
1334 vixl32::Label conditions_on_positions_validated;
1335 SystemArrayCopyOptimizations optimizations(invoke);
1336
1337 // If source and destination are the same, we go to slow path if we need to do forward copying.
1338 // We do not need to do this check if the source and destination positions are the same.
1339 if (!optimizations.GetSourcePositionIsDestinationPosition()) {
1340 if (src_pos.IsConstant()) {
1341 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1342 if (dest_pos.IsConstant()) {
1343 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1344 if (optimizations.GetDestinationIsSource()) {
1345 // Checked when building locations.
1346 DCHECK_GE(src_pos_constant, dest_pos_constant);
1347 } else if (src_pos_constant < dest_pos_constant) {
1348 __ Cmp(src, dest);
1349 __ B(eq, intrinsic_slow_path->GetEntryLabel());
1350 }
1351 } else {
1352 if (!optimizations.GetDestinationIsSource()) {
1353 __ Cmp(src, dest);
1354 __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1355 }
1356 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1357 __ B(gt, intrinsic_slow_path->GetEntryLabel());
1358 }
1359 } else {
1360 if (!optimizations.GetDestinationIsSource()) {
1361 __ Cmp(src, dest);
1362 __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1363 }
1364 __ Cmp(RegisterFrom(src_pos), OperandFrom(dest_pos, DataType::Type::kInt32));
1365 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1366 }
1367 }
1368
1369 __ Bind(&conditions_on_positions_validated);
1370
1371 if (!optimizations.GetSourceIsNotNull()) {
1372 // Bail out if the source is null.
1373 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1374 }
1375
1376 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1377 // Bail out if the destination is null.
1378 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1379 }
1380
1381 // We have already checked in the LocationsBuilder for the constant case.
1382 if (!length.IsConstant()) {
1383 // Merge the following two comparisons into one:
1384 // If the length is negative, bail out (delegate to libcore's native implementation).
1385 // If the length >= 128 then (currently) prefer native implementation.
1386 __ Cmp(RegisterFrom(length), kSystemArrayCopyThreshold);
1387 __ B(hs, intrinsic_slow_path->GetEntryLabel());
1388 }
1389
1390 // Validity checks: source.
1391 CheckSystemArrayCopyPosition(assembler,
1392 src,
1393 src_pos,
1394 length,
1395 intrinsic_slow_path,
1396 temp1,
1397 optimizations.GetCountIsSourceLength(),
1398 /*position_sign_checked=*/ false);
1399
1400 // Validity checks: dest.
1401 bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
1402 CheckSystemArrayCopyPosition(assembler,
1403 dest,
1404 dest_pos,
1405 length,
1406 intrinsic_slow_path,
1407 temp1,
1408 optimizations.GetCountIsDestinationLength(),
1409 dest_position_sign_checked);
1410
1411 auto check_non_primitive_array_class = [&](vixl32::Register klass, vixl32::Register temp) {
1412 // No read barrier is needed for reading a chain of constant references for comparing
1413 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1414 // /* HeapReference<Class> */ temp = klass->component_type_
1415 __ Ldr(temp, MemOperand(klass, component_offset));
1416 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp);
1417 // Check that the component type is not null.
1418 __ CompareAndBranchIfZero(temp, intrinsic_slow_path->GetEntryLabel());
1419 // Check that the component type is not a primitive.
1420 // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_);
1421 __ Ldrh(temp, MemOperand(temp, primitive_offset));
1422 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1423 __ CompareAndBranchIfNonZero(temp, intrinsic_slow_path->GetEntryLabel());
1424 };
1425
1426 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1427 // Check whether all elements of the source array are assignable to the component
1428 // type of the destination array. We do two checks: the classes are the same,
1429 // or the destination is Object[]. If none of these checks succeed, we go to the
1430 // slow path.
1431
1432 if (codegen_->EmitBakerReadBarrier()) {
1433 // /* HeapReference<Class> */ temp1 = dest->klass_
1434 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1435 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false);
1436 // Register `temp1` is not trashed by the read barrier emitted
1437 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1438 // method produces a call to a ReadBarrierMarkRegX entry point,
1439 // which saves all potentially live registers, including
1440 // temporaries such a `temp1`.
1441 // /* HeapReference<Class> */ temp2 = src->klass_
1442 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1443 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false);
1444 } else {
1445 // /* HeapReference<Class> */ temp1 = dest->klass_
1446 __ Ldr(temp1, MemOperand(dest, class_offset));
1447 assembler->MaybeUnpoisonHeapReference(temp1);
1448 // /* HeapReference<Class> */ temp2 = src->klass_
1449 __ Ldr(temp2, MemOperand(src, class_offset));
1450 assembler->MaybeUnpoisonHeapReference(temp2);
1451 }
1452
1453 __ Cmp(temp1, temp2);
1454 if (optimizations.GetDestinationIsTypedObjectArray()) {
1455 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1456 vixl32::Label do_copy;
1457 // For class match, we can skip the source type check regardless of the optimization flag.
1458 __ B(eq, &do_copy, /* is_far_target= */ false);
1459 // No read barrier is needed for reading a chain of constant references
1460 // for comparing with null, see `ReadBarrierOption`.
1461 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1462 __ Ldr(temp1, MemOperand(temp1, component_offset));
1463 assembler->MaybeUnpoisonHeapReference(temp1);
1464 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1465 __ Ldr(temp1, MemOperand(temp1, super_offset));
1466 // No need to unpoison the result, we're comparing against null.
1467 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1468 // Bail out if the source is not a non primitive array.
1469 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1470 check_non_primitive_array_class(temp2, temp2);
1471 }
1472 __ Bind(&do_copy);
1473 } else {
1474 DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1475 // For class match, we can skip the array type check completely if at least one of source
1476 // and destination is known to be a non primitive array, otherwise one check is enough.
1477 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1478 if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1479 !optimizations.GetSourceIsNonPrimitiveArray()) {
1480 check_non_primitive_array_class(temp2, temp2);
1481 }
1482 }
1483 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1484 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1485 // Bail out if the source is not a non primitive array.
1486 // No read barrier is needed for reading a chain of constant references for comparing
1487 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1488 // /* HeapReference<Class> */ temp2 = src->klass_
1489 __ Ldr(temp2, MemOperand(src, class_offset));
1490 assembler->MaybeUnpoisonHeapReference(temp2);
1491 check_non_primitive_array_class(temp2, temp2);
1492 }
1493
1494 if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
1495 // Null constant length: not need to emit the loop code at all.
1496 } else {
1497 vixl32::Label skip_copy_and_write_barrier;
1498 if (length.IsRegister()) {
1499 // Don't enter the copy loop if the length is null.
1500 __ CompareAndBranchIfZero(
1501 RegisterFrom(length), &skip_copy_and_write_barrier, /* is_far_target= */ false);
1502 }
1503
1504 const DataType::Type type = DataType::Type::kReference;
1505 const int32_t element_size = DataType::Size(type);
1506 const int32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1507
1508 SlowPathCodeARMVIXL* read_barrier_slow_path = nullptr;
1509 vixl32::Register rb_tmp;
1510 bool emit_rb = codegen_->EmitBakerReadBarrier();
1511 if (emit_rb) {
1512 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
1513
1514 // SystemArrayCopy implementation for Baker read barriers (see
1515 // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
1516 //
1517 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1518 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
1519 // bool is_gray = (rb_state == ReadBarrier::GrayState());
1520 // if (is_gray) {
1521 // // Slow-path copy.
1522 // do {
1523 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1524 // } while (src_ptr != end_ptr)
1525 // } else {
1526 // // Fast-path copy.
1527 // do {
1528 // *dest_ptr++ = *src_ptr++;
1529 // } while (src_ptr != end_ptr)
1530 // }
1531
1532 // /* int32_t */ monitor = src->monitor_
1533 rb_tmp = RegisterFrom(locations->GetTemp(3));
1534 __ Ldr(rb_tmp, MemOperand(src, monitor_offset));
1535 // /* LockWord */ lock_word = LockWord(monitor)
1536 static_assert(sizeof(LockWord) == sizeof(int32_t),
1537 "art::LockWord and int32_t have different sizes.");
1538
1539 // Introduce a dependency on the lock_word including the rb_state,
1540 // which shall prevent load-load reordering without using
1541 // a memory barrier (which would be more expensive).
1542 // `src` is unchanged by this operation, but its value now depends
1543 // on `temp2`.
1544 __ Add(src, src, Operand(rb_tmp, vixl32::LSR, 32));
1545
1546 // Slow path used to copy array when `src` is gray.
1547 // Note that the base destination address is computed in `temp2`
1548 // by the slow path code.
1549 read_barrier_slow_path =
1550 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
1551 codegen_->AddSlowPath(read_barrier_slow_path);
1552 }
1553
1554 // Compute the base source address in `temp1`.
1555 // Note that for read barrier, `temp1` (the base source address) is computed from `src`
1556 // (and `src_pos`) here, and thus honors the artificial dependency of `src` on `rb_tmp`.
1557 GenArrayAddress(GetAssembler(), temp1, src, src_pos, type, data_offset);
1558 // Compute the base destination address in `temp2`.
1559 GenArrayAddress(GetAssembler(), temp2, dest, dest_pos, type, data_offset);
1560 // Compute the end source address in `temp3`.
1561 GenArrayAddress(GetAssembler(), temp3, temp1, length, type, /*data_offset=*/ 0);
1562
1563 if (emit_rb) {
1564 // Given the numeric representation, it's enough to check the low bit of the
1565 // rb_state. We do that by shifting the bit out of the lock word with LSRS
1566 // which can be a 16-bit instruction unlike the TST immediate.
1567 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1568 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1569 DCHECK(rb_tmp.IsValid());
1570 __ Lsrs(rb_tmp, rb_tmp, LockWord::kReadBarrierStateShift + 1);
1571 // Carry flag is the last bit shifted out by LSRS.
1572 __ B(cs, read_barrier_slow_path->GetEntryLabel());
1573 }
1574
1575 // Iterate over the arrays and do a raw copy of the objects. We don't need to
1576 // poison/unpoison.
1577 vixl32::Label loop;
1578 __ Bind(&loop);
1579 {
1580 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1581 const vixl32::Register temp_reg = temps.Acquire();
1582 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
1583 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
1584 }
1585 __ Cmp(temp1, temp3);
1586 __ B(ne, &loop, /* is_far_target= */ false);
1587
1588 if (emit_rb) {
1589 DCHECK(read_barrier_slow_path != nullptr);
1590 __ Bind(read_barrier_slow_path->GetExitLabel());
1591 }
1592
1593 // We only need one card marking on the destination array.
1594 codegen_->MarkGCCard(temp1, temp2, dest);
1595
1596 __ Bind(&skip_copy_and_write_barrier);
1597 }
1598
1599 __ Bind(intrinsic_slow_path->GetExitLabel());
1600 }
1601
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1602 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1603 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1604 // the code generator. Furthermore, the register allocator creates fixed live intervals
1605 // for all caller-saved registers because we are doing a function call. As a result, if
1606 // the input and output locations are unallocated, the register allocator runs out of
1607 // registers and fails; however, a debuggable graph is not the common case.
1608 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1609 return;
1610 }
1611
1612 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1613 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1614 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1615
1616 LocationSummary* const locations =
1617 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1618 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1619
1620 locations->SetInAt(0, Location::RequiresFpuRegister());
1621 locations->SetOut(Location::RequiresFpuRegister());
1622 // Native code uses the soft float ABI.
1623 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1624 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1625 }
1626
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1627 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1628 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1629 // the code generator. Furthermore, the register allocator creates fixed live intervals
1630 // for all caller-saved registers because we are doing a function call. As a result, if
1631 // the input and output locations are unallocated, the register allocator runs out of
1632 // registers and fails; however, a debuggable graph is not the common case.
1633 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1634 return;
1635 }
1636
1637 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1638 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1639 DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
1640 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1641
1642 LocationSummary* const locations =
1643 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1644 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1645
1646 locations->SetInAt(0, Location::RequiresFpuRegister());
1647 locations->SetInAt(1, Location::RequiresFpuRegister());
1648 locations->SetOut(Location::RequiresFpuRegister());
1649 // Native code uses the soft float ABI.
1650 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1651 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1652 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1653 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
1654 }
1655
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1656 static void GenFPToFPCall(HInvoke* invoke,
1657 ArmVIXLAssembler* assembler,
1658 CodeGeneratorARMVIXL* codegen,
1659 QuickEntrypointEnum entry) {
1660 LocationSummary* const locations = invoke->GetLocations();
1661
1662 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1663 DCHECK(locations->WillCall() && locations->Intrinsified());
1664
1665 // Native code uses the soft float ABI.
1666 __ Vmov(RegisterFrom(locations->GetTemp(0)),
1667 RegisterFrom(locations->GetTemp(1)),
1668 InputDRegisterAt(invoke, 0));
1669 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1670 __ Vmov(OutputDRegister(invoke),
1671 RegisterFrom(locations->GetTemp(0)),
1672 RegisterFrom(locations->GetTemp(1)));
1673 }
1674
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1675 static void GenFPFPToFPCall(HInvoke* invoke,
1676 ArmVIXLAssembler* assembler,
1677 CodeGeneratorARMVIXL* codegen,
1678 QuickEntrypointEnum entry) {
1679 LocationSummary* const locations = invoke->GetLocations();
1680
1681 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1682 DCHECK(locations->WillCall() && locations->Intrinsified());
1683
1684 // Native code uses the soft float ABI.
1685 __ Vmov(RegisterFrom(locations->GetTemp(0)),
1686 RegisterFrom(locations->GetTemp(1)),
1687 InputDRegisterAt(invoke, 0));
1688 __ Vmov(RegisterFrom(locations->GetTemp(2)),
1689 RegisterFrom(locations->GetTemp(3)),
1690 InputDRegisterAt(invoke, 1));
1691 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1692 __ Vmov(OutputDRegister(invoke),
1693 RegisterFrom(locations->GetTemp(0)),
1694 RegisterFrom(locations->GetTemp(1)));
1695 }
1696
VisitMathCos(HInvoke * invoke)1697 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
1698 CreateFPToFPCallLocations(allocator_, invoke);
1699 }
1700
VisitMathCos(HInvoke * invoke)1701 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
1702 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
1703 }
1704
VisitMathSin(HInvoke * invoke)1705 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
1706 CreateFPToFPCallLocations(allocator_, invoke);
1707 }
1708
VisitMathSin(HInvoke * invoke)1709 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
1710 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
1711 }
1712
VisitMathAcos(HInvoke * invoke)1713 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
1714 CreateFPToFPCallLocations(allocator_, invoke);
1715 }
1716
VisitMathAcos(HInvoke * invoke)1717 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
1718 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
1719 }
1720
VisitMathAsin(HInvoke * invoke)1721 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
1722 CreateFPToFPCallLocations(allocator_, invoke);
1723 }
1724
VisitMathAsin(HInvoke * invoke)1725 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
1726 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
1727 }
1728
VisitMathAtan(HInvoke * invoke)1729 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
1730 CreateFPToFPCallLocations(allocator_, invoke);
1731 }
1732
VisitMathAtan(HInvoke * invoke)1733 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
1734 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
1735 }
1736
VisitMathCbrt(HInvoke * invoke)1737 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1738 CreateFPToFPCallLocations(allocator_, invoke);
1739 }
1740
VisitMathCbrt(HInvoke * invoke)1741 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1742 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
1743 }
1744
VisitMathCosh(HInvoke * invoke)1745 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
1746 CreateFPToFPCallLocations(allocator_, invoke);
1747 }
1748
VisitMathCosh(HInvoke * invoke)1749 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
1750 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
1751 }
1752
VisitMathExp(HInvoke * invoke)1753 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
1754 CreateFPToFPCallLocations(allocator_, invoke);
1755 }
1756
VisitMathExp(HInvoke * invoke)1757 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
1758 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
1759 }
1760
VisitMathExpm1(HInvoke * invoke)1761 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1762 CreateFPToFPCallLocations(allocator_, invoke);
1763 }
1764
VisitMathExpm1(HInvoke * invoke)1765 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1766 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
1767 }
1768
VisitMathLog(HInvoke * invoke)1769 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
1770 CreateFPToFPCallLocations(allocator_, invoke);
1771 }
1772
VisitMathLog(HInvoke * invoke)1773 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
1774 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
1775 }
1776
VisitMathLog10(HInvoke * invoke)1777 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
1778 CreateFPToFPCallLocations(allocator_, invoke);
1779 }
1780
VisitMathLog10(HInvoke * invoke)1781 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
1782 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
1783 }
1784
VisitMathSinh(HInvoke * invoke)1785 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
1786 CreateFPToFPCallLocations(allocator_, invoke);
1787 }
1788
VisitMathSinh(HInvoke * invoke)1789 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
1790 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
1791 }
1792
VisitMathTan(HInvoke * invoke)1793 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
1794 CreateFPToFPCallLocations(allocator_, invoke);
1795 }
1796
VisitMathTan(HInvoke * invoke)1797 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
1798 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
1799 }
1800
VisitMathTanh(HInvoke * invoke)1801 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
1802 CreateFPToFPCallLocations(allocator_, invoke);
1803 }
1804
VisitMathTanh(HInvoke * invoke)1805 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
1806 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
1807 }
1808
VisitMathAtan2(HInvoke * invoke)1809 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1810 CreateFPFPToFPCallLocations(allocator_, invoke);
1811 }
1812
VisitMathAtan2(HInvoke * invoke)1813 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1814 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
1815 }
1816
VisitMathPow(HInvoke * invoke)1817 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
1818 CreateFPFPToFPCallLocations(allocator_, invoke);
1819 }
1820
VisitMathPow(HInvoke * invoke)1821 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
1822 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
1823 }
1824
VisitMathHypot(HInvoke * invoke)1825 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
1826 CreateFPFPToFPCallLocations(allocator_, invoke);
1827 }
1828
VisitMathHypot(HInvoke * invoke)1829 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
1830 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
1831 }
1832
VisitMathNextAfter(HInvoke * invoke)1833 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1834 CreateFPFPToFPCallLocations(allocator_, invoke);
1835 }
1836
VisitMathNextAfter(HInvoke * invoke)1837 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1838 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
1839 }
1840
VisitIntegerReverse(HInvoke * invoke)1841 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1842 CreateIntToIntLocations(allocator_, invoke);
1843 }
1844
VisitIntegerReverse(HInvoke * invoke)1845 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1846 ArmVIXLAssembler* assembler = GetAssembler();
1847 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
1848 }
1849
VisitLongReverse(HInvoke * invoke)1850 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
1851 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
1852 }
1853
VisitLongReverse(HInvoke * invoke)1854 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
1855 ArmVIXLAssembler* assembler = GetAssembler();
1856 LocationSummary* locations = invoke->GetLocations();
1857
1858 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
1859 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
1860 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
1861 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
1862
1863 __ Rbit(out_reg_lo, in_reg_hi);
1864 __ Rbit(out_reg_hi, in_reg_lo);
1865 }
1866
GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler * assembler,Location pair)1867 static void GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler* assembler, Location pair) {
1868 DCHECK(pair.IsRegisterPair());
1869 __ Rev(LowRegisterFrom(pair), LowRegisterFrom(pair));
1870 __ Rev(HighRegisterFrom(pair), HighRegisterFrom(pair));
1871 }
1872
GenerateReverseBytes(ArmVIXLAssembler * assembler,DataType::Type type,Location in,Location out)1873 static void GenerateReverseBytes(ArmVIXLAssembler* assembler,
1874 DataType::Type type,
1875 Location in,
1876 Location out) {
1877 switch (type) {
1878 case DataType::Type::kUint16:
1879 __ Rev16(RegisterFrom(out), RegisterFrom(in));
1880 break;
1881 case DataType::Type::kInt16:
1882 __ Revsh(RegisterFrom(out), RegisterFrom(in));
1883 break;
1884 case DataType::Type::kInt32:
1885 __ Rev(RegisterFrom(out), RegisterFrom(in));
1886 break;
1887 case DataType::Type::kInt64:
1888 DCHECK(!LowRegisterFrom(out).Is(LowRegisterFrom(in)));
1889 __ Rev(LowRegisterFrom(out), HighRegisterFrom(in));
1890 __ Rev(HighRegisterFrom(out), LowRegisterFrom(in));
1891 break;
1892 case DataType::Type::kFloat32:
1893 __ Rev(RegisterFrom(in), RegisterFrom(in)); // Note: Clobbers `in`.
1894 __ Vmov(SRegisterFrom(out), RegisterFrom(in));
1895 break;
1896 case DataType::Type::kFloat64:
1897 GenerateReverseBytesInPlaceForEachWord(assembler, in); // Note: Clobbers `in`.
1898 __ Vmov(DRegisterFrom(out), HighRegisterFrom(in), LowRegisterFrom(in)); // Swap high/low.
1899 break;
1900 default:
1901 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
1902 UNREACHABLE();
1903 }
1904 }
1905
VisitIntegerReverseBytes(HInvoke * invoke)1906 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
1907 CreateIntToIntLocations(allocator_, invoke);
1908 }
1909
VisitIntegerReverseBytes(HInvoke * invoke)1910 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
1911 ArmVIXLAssembler* assembler = GetAssembler();
1912 LocationSummary* locations = invoke->GetLocations();
1913 GenerateReverseBytes(assembler, DataType::Type::kInt32, locations->InAt(0), locations->Out());
1914 }
1915
VisitLongReverseBytes(HInvoke * invoke)1916 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
1917 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
1918 }
1919
VisitLongReverseBytes(HInvoke * invoke)1920 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
1921 ArmVIXLAssembler* assembler = GetAssembler();
1922 LocationSummary* locations = invoke->GetLocations();
1923 GenerateReverseBytes(assembler, DataType::Type::kInt64, locations->InAt(0), locations->Out());
1924 }
1925
VisitShortReverseBytes(HInvoke * invoke)1926 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
1927 CreateIntToIntLocations(allocator_, invoke);
1928 }
1929
VisitShortReverseBytes(HInvoke * invoke)1930 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
1931 ArmVIXLAssembler* assembler = GetAssembler();
1932 LocationSummary* locations = invoke->GetLocations();
1933 GenerateReverseBytes(assembler, DataType::Type::kInt16, locations->InAt(0), locations->Out());
1934 }
1935
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)1936 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
1937 DCHECK(DataType::IsIntOrLongType(type)) << type;
1938 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
1939 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
1940
1941 bool is_long = type == DataType::Type::kInt64;
1942 LocationSummary* locations = instr->GetLocations();
1943 Location in = locations->InAt(0);
1944 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
1945 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
1946 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
1947 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
1948 vixl32::Register out_r = OutputRegister(instr);
1949
1950 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
1951 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
1952 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
1953 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
1954 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
1955 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
1956 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
1957 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
1958 if (is_long) {
1959 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
1960 }
1961 __ Vmov(out_r, tmp_s);
1962 }
1963
VisitIntegerBitCount(HInvoke * invoke)1964 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
1965 CreateIntToIntLocations(allocator_, invoke);
1966 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1967 }
1968
VisitIntegerBitCount(HInvoke * invoke)1969 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
1970 GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
1971 }
1972
VisitLongBitCount(HInvoke * invoke)1973 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
1974 VisitIntegerBitCount(invoke);
1975 }
1976
VisitLongBitCount(HInvoke * invoke)1977 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
1978 GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
1979 }
1980
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)1981 static void GenHighestOneBit(HInvoke* invoke,
1982 DataType::Type type,
1983 CodeGeneratorARMVIXL* codegen) {
1984 DCHECK(DataType::IsIntOrLongType(type));
1985
1986 ArmVIXLAssembler* assembler = codegen->GetAssembler();
1987 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1988 const vixl32::Register temp = temps.Acquire();
1989
1990 if (type == DataType::Type::kInt64) {
1991 LocationSummary* locations = invoke->GetLocations();
1992 Location in = locations->InAt(0);
1993 Location out = locations->Out();
1994
1995 vixl32::Register in_reg_lo = LowRegisterFrom(in);
1996 vixl32::Register in_reg_hi = HighRegisterFrom(in);
1997 vixl32::Register out_reg_lo = LowRegisterFrom(out);
1998 vixl32::Register out_reg_hi = HighRegisterFrom(out);
1999
2000 __ Mov(temp, 0x80000000); // Modified immediate.
2001 __ Clz(out_reg_lo, in_reg_lo);
2002 __ Clz(out_reg_hi, in_reg_hi);
2003 __ Lsr(out_reg_lo, temp, out_reg_lo);
2004 __ Lsrs(out_reg_hi, temp, out_reg_hi);
2005
2006 // Discard result for lowest 32 bits if highest 32 bits are not zero.
2007 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2008 // we check that the output is in a low register, so that a 16-bit MOV
2009 // encoding can be used. If output is in a high register, then we generate
2010 // 4 more bytes of code to avoid a branch.
2011 Operand mov_src(0);
2012 if (!out_reg_lo.IsLow()) {
2013 __ Mov(LeaveFlags, temp, 0);
2014 mov_src = Operand(temp);
2015 }
2016 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2017 2 * vixl32::k16BitT32InstructionSizeInBytes,
2018 CodeBufferCheckScope::kExactSize);
2019 __ it(ne);
2020 __ mov(ne, out_reg_lo, mov_src);
2021 } else {
2022 vixl32::Register out = OutputRegister(invoke);
2023 vixl32::Register in = InputRegisterAt(invoke, 0);
2024
2025 __ Mov(temp, 0x80000000); // Modified immediate.
2026 __ Clz(out, in);
2027 __ Lsr(out, temp, out);
2028 }
2029 }
2030
VisitIntegerHighestOneBit(HInvoke * invoke)2031 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2032 CreateIntToIntLocations(allocator_, invoke);
2033 }
2034
VisitIntegerHighestOneBit(HInvoke * invoke)2035 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2036 GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
2037 }
2038
VisitLongHighestOneBit(HInvoke * invoke)2039 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2040 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2041 }
2042
VisitLongHighestOneBit(HInvoke * invoke)2043 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2044 GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
2045 }
2046
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2047 static void GenLowestOneBit(HInvoke* invoke,
2048 DataType::Type type,
2049 CodeGeneratorARMVIXL* codegen) {
2050 DCHECK(DataType::IsIntOrLongType(type));
2051
2052 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2053 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2054 const vixl32::Register temp = temps.Acquire();
2055
2056 if (type == DataType::Type::kInt64) {
2057 LocationSummary* locations = invoke->GetLocations();
2058 Location in = locations->InAt(0);
2059 Location out = locations->Out();
2060
2061 vixl32::Register in_reg_lo = LowRegisterFrom(in);
2062 vixl32::Register in_reg_hi = HighRegisterFrom(in);
2063 vixl32::Register out_reg_lo = LowRegisterFrom(out);
2064 vixl32::Register out_reg_hi = HighRegisterFrom(out);
2065
2066 __ Rsb(out_reg_hi, in_reg_hi, 0);
2067 __ Rsb(out_reg_lo, in_reg_lo, 0);
2068 __ And(out_reg_hi, out_reg_hi, in_reg_hi);
2069 // The result of this operation is 0 iff in_reg_lo is 0
2070 __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
2071
2072 // Discard result for highest 32 bits if lowest 32 bits are not zero.
2073 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2074 // we check that the output is in a low register, so that a 16-bit MOV
2075 // encoding can be used. If output is in a high register, then we generate
2076 // 4 more bytes of code to avoid a branch.
2077 Operand mov_src(0);
2078 if (!out_reg_lo.IsLow()) {
2079 __ Mov(LeaveFlags, temp, 0);
2080 mov_src = Operand(temp);
2081 }
2082 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2083 2 * vixl32::k16BitT32InstructionSizeInBytes,
2084 CodeBufferCheckScope::kExactSize);
2085 __ it(ne);
2086 __ mov(ne, out_reg_hi, mov_src);
2087 } else {
2088 vixl32::Register out = OutputRegister(invoke);
2089 vixl32::Register in = InputRegisterAt(invoke, 0);
2090
2091 __ Rsb(temp, in, 0);
2092 __ And(out, temp, in);
2093 }
2094 }
2095
VisitIntegerLowestOneBit(HInvoke * invoke)2096 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2097 CreateIntToIntLocations(allocator_, invoke);
2098 }
2099
VisitIntegerLowestOneBit(HInvoke * invoke)2100 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2101 GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
2102 }
2103
VisitLongLowestOneBit(HInvoke * invoke)2104 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2105 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2106 }
2107
VisitLongLowestOneBit(HInvoke * invoke)2108 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2109 GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
2110 }
2111
VisitStringGetCharsNoCheck(HInvoke * invoke)2112 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2113 LocationSummary* locations =
2114 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2115 locations->SetInAt(0, Location::RequiresRegister());
2116 locations->SetInAt(1, Location::RequiresRegister());
2117 locations->SetInAt(2, Location::RequiresRegister());
2118 locations->SetInAt(3, Location::RequiresRegister());
2119 locations->SetInAt(4, Location::RequiresRegister());
2120
2121 // Temporary registers to store lengths of strings and for calculations.
2122 locations->AddRegisterTemps(3);
2123 }
2124
VisitStringGetCharsNoCheck(HInvoke * invoke)2125 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2126 ArmVIXLAssembler* assembler = GetAssembler();
2127 LocationSummary* locations = invoke->GetLocations();
2128
2129 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2130 const size_t char_size = DataType::Size(DataType::Type::kUint16);
2131 DCHECK_EQ(char_size, 2u);
2132
2133 // Location of data in char array buffer.
2134 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2135
2136 // Location of char array data in string.
2137 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2138
2139 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2140 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2141 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2142 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2143 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2144 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2145 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2146
2147 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2148 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2149 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2150
2151 vixl32::Label done, compressed_string_loop;
2152 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2153 // dst to be copied.
2154 __ Add(dst_ptr, dstObj, data_offset);
2155 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2156
2157 __ Subs(num_chr, srcEnd, srcBegin);
2158 // Early out for valid zero-length retrievals.
2159 __ B(eq, final_label, /* is_far_target= */ false);
2160
2161 // src range to copy.
2162 __ Add(src_ptr, srcObj, value_offset);
2163
2164 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2165 vixl32::Register temp;
2166 vixl32::Label compressed_string_preloop;
2167 if (mirror::kUseStringCompression) {
2168 // Location of count in string.
2169 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2170 temp = temps.Acquire();
2171 // String's length.
2172 __ Ldr(temp, MemOperand(srcObj, count_offset));
2173 __ Tst(temp, 1);
2174 temps.Release(temp);
2175 __ B(eq, &compressed_string_preloop, /* is_far_target= */ false);
2176 }
2177 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2178
2179 // Do the copy.
2180 vixl32::Label loop, remainder;
2181
2182 temp = temps.Acquire();
2183 // Save repairing the value of num_chr on the < 4 character path.
2184 __ Subs(temp, num_chr, 4);
2185 __ B(lt, &remainder, /* is_far_target= */ false);
2186
2187 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2188 __ Mov(num_chr, temp);
2189
2190 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2191 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2192 // to rectify these everywhere this intrinsic applies.)
2193 __ Bind(&loop);
2194 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2195 __ Subs(num_chr, num_chr, 4);
2196 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2197 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2198 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2199 temps.Release(temp);
2200 __ B(ge, &loop, /* is_far_target= */ false);
2201
2202 __ Adds(num_chr, num_chr, 4);
2203 __ B(eq, final_label, /* is_far_target= */ false);
2204
2205 // Main loop for < 4 character case and remainder handling. Loads and stores one
2206 // 16-bit Java character at a time.
2207 __ Bind(&remainder);
2208 temp = temps.Acquire();
2209 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2210 __ Subs(num_chr, num_chr, 1);
2211 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2212 temps.Release(temp);
2213 __ B(gt, &remainder, /* is_far_target= */ false);
2214
2215 if (mirror::kUseStringCompression) {
2216 __ B(final_label);
2217
2218 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2219 DCHECK_EQ(c_char_size, 1u);
2220 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2221 __ Bind(&compressed_string_preloop);
2222 __ Add(src_ptr, src_ptr, srcBegin);
2223 __ Bind(&compressed_string_loop);
2224 temp = temps.Acquire();
2225 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2226 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2227 temps.Release(temp);
2228 __ Subs(num_chr, num_chr, 1);
2229 __ B(gt, &compressed_string_loop, /* is_far_target= */ false);
2230 }
2231
2232 if (done.IsReferenced()) {
2233 __ Bind(&done);
2234 }
2235 }
2236
VisitFloatIsInfinite(HInvoke * invoke)2237 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2238 CreateFPToIntLocations(allocator_, invoke);
2239 }
2240
VisitFloatIsInfinite(HInvoke * invoke)2241 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2242 ArmVIXLAssembler* const assembler = GetAssembler();
2243 const vixl32::Register out = OutputRegister(invoke);
2244 // Shifting left by 1 bit makes the value encodable as an immediate operand;
2245 // we don't care about the sign bit anyway.
2246 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2247
2248 __ Vmov(out, InputSRegisterAt(invoke, 0));
2249 // We don't care about the sign bit, so shift left.
2250 __ Lsl(out, out, 1);
2251 __ Eor(out, out, infinity);
2252 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2253 }
2254
VisitDoubleIsInfinite(HInvoke * invoke)2255 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2256 CreateFPToIntLocations(allocator_, invoke);
2257 }
2258
VisitDoubleIsInfinite(HInvoke * invoke)2259 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2260 ArmVIXLAssembler* const assembler = GetAssembler();
2261 const vixl32::Register out = OutputRegister(invoke);
2262 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2263 const vixl32::Register temp = temps.Acquire();
2264 // The highest 32 bits of double precision positive infinity separated into
2265 // two constants encodable as immediate operands.
2266 constexpr uint32_t infinity_high = 0x7f000000U;
2267 constexpr uint32_t infinity_high2 = 0x00f00000U;
2268
2269 static_assert((infinity_high | infinity_high2) ==
2270 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2271 "The constants do not add up to the high 32 bits of double "
2272 "precision positive infinity.");
2273 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2274 __ Eor(out, out, infinity_high);
2275 __ Eor(out, out, infinity_high2);
2276 // We don't care about the sign bit, so shift left.
2277 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2278 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2279 }
2280
VisitMathCeil(HInvoke * invoke)2281 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2282 if (features_.HasARMv8AInstructions()) {
2283 CreateFPToFPLocations(allocator_, invoke);
2284 }
2285 }
2286
VisitMathCeil(HInvoke * invoke)2287 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2288 ArmVIXLAssembler* assembler = GetAssembler();
2289 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2290 __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2291 }
2292
VisitMathFloor(HInvoke * invoke)2293 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2294 if (features_.HasARMv8AInstructions()) {
2295 CreateFPToFPLocations(allocator_, invoke);
2296 }
2297 }
2298
VisitMathFloor(HInvoke * invoke)2299 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2300 ArmVIXLAssembler* assembler = GetAssembler();
2301 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2302 __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2303 }
2304
2305 #define VISIT_INTRINSIC(name, low, high, type, start_index) \
2306 void IntrinsicLocationsBuilderARMVIXL::Visit##name##ValueOf(HInvoke* invoke) { \
2307 InvokeRuntimeCallingConventionARMVIXL calling_convention; \
2308 IntrinsicVisitor::ComputeValueOfLocations(invoke, \
2309 codegen_, \
2310 low, \
2311 (high) - (low) + 1, \
2312 LocationFrom(r0), \
2313 LocationFrom(calling_convention.GetRegisterAt(0))); \
2314 } \
2315 void IntrinsicCodeGeneratorARMVIXL::Visit##name##ValueOf(HInvoke* invoke) { \
2316 IntrinsicVisitor::ValueOfInfo info = \
2317 IntrinsicVisitor::ComputeValueOfInfo(invoke, \
2318 codegen_->GetCompilerOptions(), \
2319 WellKnownClasses::java_lang_##name##_value, \
2320 low, \
2321 (high) - (low) + 1, \
2322 start_index); \
2323 HandleValueOf(invoke, info, type); \
2324 }
BOXED_TYPES(VISIT_INTRINSIC)2325 BOXED_TYPES(VISIT_INTRINSIC)
2326 #undef VISIT_INTRINSIC
2327
2328
2329 void IntrinsicCodeGeneratorARMVIXL::HandleValueOf(HInvoke* invoke,
2330 const IntrinsicVisitor::ValueOfInfo& info,
2331 DataType::Type type) {
2332 LocationSummary* locations = invoke->GetLocations();
2333 ArmVIXLAssembler* const assembler = GetAssembler();
2334
2335 vixl32::Register out = RegisterFrom(locations->Out());
2336 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2337 vixl32::Register temp = temps.Acquire();
2338 auto allocate_instance = [&]() {
2339 DCHECK(out.Is(InvokeRuntimeCallingConventionARMVIXL().GetRegisterAt(0)));
2340 codegen_->LoadIntrinsicDeclaringClass(out, invoke);
2341 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2342 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2343 };
2344 if (invoke->InputAt(0)->IsIntConstant()) {
2345 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2346 if (static_cast<uint32_t>(value - info.low) < info.length) {
2347 // Just embed the object in the code.
2348 DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
2349 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2350 } else {
2351 DCHECK(locations->CanCall());
2352 // Allocate and initialize a new object.
2353 // TODO: If we JIT, we could allocate the object now, and store it in the
2354 // JIT object table.
2355 allocate_instance();
2356 __ Mov(temp, value);
2357 assembler->StoreToOffset(GetStoreOperandType(type), temp, out, info.value_offset);
2358 // Class pointer and `value` final field stores require a barrier before publication.
2359 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2360 }
2361 } else {
2362 DCHECK(locations->CanCall());
2363 vixl32::Register in = RegisterFrom(locations->InAt(0));
2364 // Check bounds of our cache.
2365 __ Add(out, in, -info.low);
2366 __ Cmp(out, info.length);
2367 vixl32::Label allocate, done;
2368 __ B(hs, &allocate, /* is_far_target= */ false);
2369 // If the value is within the bounds, load the object directly from the array.
2370 codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
2371 codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
2372 assembler->MaybeUnpoisonHeapReference(out);
2373 __ B(&done);
2374 __ Bind(&allocate);
2375 // Otherwise allocate and initialize a new object.
2376 allocate_instance();
2377 assembler->StoreToOffset(GetStoreOperandType(type), in, out, info.value_offset);
2378 // Class pointer and `value` final field stores require a barrier before publication.
2379 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2380 __ Bind(&done);
2381 }
2382 }
2383
VisitReferenceGetReferent(HInvoke * invoke)2384 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2385 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
2386 }
2387
VisitReferenceGetReferent(HInvoke * invoke)2388 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2389 ArmVIXLAssembler* assembler = GetAssembler();
2390 LocationSummary* locations = invoke->GetLocations();
2391
2392 Location obj = locations->InAt(0);
2393 Location out = locations->Out();
2394
2395 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2396 codegen_->AddSlowPath(slow_path);
2397
2398 if (codegen_->EmitReadBarrier()) {
2399 // Check self->GetWeakRefAccessEnabled().
2400 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2401 vixl32::Register temp = temps.Acquire();
2402 __ Ldr(temp,
2403 MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArmPointerSize>().Uint32Value()));
2404 __ Cmp(temp, enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled));
2405 __ B(ne, slow_path->GetEntryLabel());
2406 }
2407
2408 {
2409 // Load the java.lang.ref.Reference class.
2410 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2411 vixl32::Register temp = temps.Acquire();
2412 codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
2413
2414 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
2415 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
2416 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
2417 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
2418 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
2419 __ Ldrh(temp, MemOperand(temp, disable_intrinsic_offset.Uint32Value()));
2420 __ Cmp(temp, 0);
2421 __ B(ne, slow_path->GetEntryLabel());
2422 }
2423
2424 // Load the value from the field.
2425 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2426 if (codegen_->EmitBakerReadBarrier()) {
2427 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2428 out,
2429 RegisterFrom(obj),
2430 referent_offset,
2431 /*maybe_temp=*/ Location::NoLocation(),
2432 /*needs_null_check=*/ true);
2433 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2434 } else {
2435 {
2436 vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2437 __ Ldr(RegisterFrom(out), MemOperand(RegisterFrom(obj), referent_offset));
2438 codegen_->MaybeRecordImplicitNullCheck(invoke);
2439 }
2440 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2441 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
2442 }
2443 __ Bind(slow_path->GetExitLabel());
2444 }
2445
VisitReferenceRefersTo(HInvoke * invoke)2446 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2447 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
2448 }
2449
VisitReferenceRefersTo(HInvoke * invoke)2450 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2451 LocationSummary* locations = invoke->GetLocations();
2452 ArmVIXLAssembler* assembler = GetAssembler();
2453 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2454
2455 vixl32::Register obj = RegisterFrom(locations->InAt(0));
2456 vixl32::Register other = RegisterFrom(locations->InAt(1));
2457 vixl32::Register out = RegisterFrom(locations->Out());
2458 vixl32::Register tmp = temps.Acquire();
2459
2460 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2461 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2462
2463 {
2464 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2465 // Loading scratch register always uses 32-bit encoding.
2466 vixl::ExactAssemblyScope eas(assembler->GetVIXLAssembler(),
2467 vixl32::k32BitT32InstructionSizeInBytes);
2468 __ ldr(tmp, MemOperand(obj, referent_offset));
2469 codegen_->MaybeRecordImplicitNullCheck(invoke);
2470 }
2471 assembler->MaybeUnpoisonHeapReference(tmp);
2472 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2473
2474 if (codegen_->EmitReadBarrier()) {
2475 DCHECK(kUseBakerReadBarrier);
2476
2477 vixl32::Label calculate_result;
2478 __ Subs(out, tmp, other);
2479 __ B(eq, &calculate_result); // `out` is 0 if taken.
2480
2481 // Check if the loaded reference is null.
2482 __ Cmp(tmp, 0);
2483 __ B(eq, &calculate_result); // `out` is not 0 if taken.
2484
2485 // For correct memory visibility, we need a barrier before loading the lock word
2486 // but we already have the barrier emitted for volatile load above which is sufficient.
2487
2488 // Load the lockword and check if it is a forwarding address.
2489 static_assert(LockWord::kStateShift == 30u);
2490 static_assert(LockWord::kStateForwardingAddress == 3u);
2491 __ Ldr(tmp, MemOperand(tmp, monitor_offset));
2492 __ Cmp(tmp, Operand(0xc0000000));
2493 __ B(lo, &calculate_result); // `out` is not 0 if taken.
2494
2495 // Extract the forwarding address and subtract from `other`.
2496 __ Sub(out, other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
2497
2498 __ Bind(&calculate_result);
2499 } else {
2500 DCHECK(!codegen_->EmitReadBarrier());
2501 __ Sub(out, tmp, other);
2502 }
2503
2504 // Convert 0 to 1 and non-zero to 0 for the Boolean result (`out = (out == 0)`).
2505 __ Clz(out, out);
2506 __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
2507 }
2508
VisitThreadInterrupted(HInvoke * invoke)2509 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2510 LocationSummary* locations =
2511 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2512 locations->SetOut(Location::RequiresRegister());
2513 }
2514
VisitThreadInterrupted(HInvoke * invoke)2515 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2516 ArmVIXLAssembler* assembler = GetAssembler();
2517 vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
2518 int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
2519 __ Ldr(out, MemOperand(tr, offset));
2520 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2521 vixl32::Register temp = temps.Acquire();
2522 vixl32::Label done;
2523 vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
2524 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
2525 __ Dmb(vixl32::ISH);
2526 __ Mov(temp, 0);
2527 assembler->StoreToOffset(kStoreWord, temp, tr, offset);
2528 __ Dmb(vixl32::ISH);
2529 if (done.IsReferenced()) {
2530 __ Bind(&done);
2531 }
2532 }
2533
VisitReachabilityFence(HInvoke * invoke)2534 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
2535 LocationSummary* locations =
2536 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2537 locations->SetInAt(0, Location::Any());
2538 }
2539
VisitReachabilityFence(HInvoke * invoke)2540 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
2541
VisitIntegerDivideUnsigned(HInvoke * invoke)2542 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2543 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
2544 }
2545
VisitIntegerDivideUnsigned(HInvoke * invoke)2546 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2547 ArmVIXLAssembler* assembler = GetAssembler();
2548 LocationSummary* locations = invoke->GetLocations();
2549 vixl32::Register dividend = RegisterFrom(locations->InAt(0));
2550 vixl32::Register divisor = RegisterFrom(locations->InAt(1));
2551 vixl32::Register out = RegisterFrom(locations->Out());
2552
2553 // Check if divisor is zero, bail to managed implementation to handle.
2554 SlowPathCodeARMVIXL* slow_path =
2555 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2556 codegen_->AddSlowPath(slow_path);
2557 __ CompareAndBranchIfZero(divisor, slow_path->GetEntryLabel());
2558
2559 __ Udiv(out, dividend, divisor);
2560
2561 __ Bind(slow_path->GetExitLabel());
2562 }
2563
Use64BitExclusiveLoadStore(bool atomic,CodeGeneratorARMVIXL * codegen)2564 static inline bool Use64BitExclusiveLoadStore(bool atomic, CodeGeneratorARMVIXL* codegen) {
2565 return atomic && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
2566 }
2567
GenerateIntrinsicGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location out,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2568 static void GenerateIntrinsicGet(HInvoke* invoke,
2569 CodeGeneratorARMVIXL* codegen,
2570 DataType::Type type,
2571 std::memory_order order,
2572 bool atomic,
2573 vixl32::Register base,
2574 vixl32::Register offset,
2575 Location out,
2576 Location maybe_temp,
2577 Location maybe_temp2,
2578 Location maybe_temp3) {
2579 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2580 bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
2581 DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2582 DCHECK(atomic || order == std::memory_order_relaxed);
2583
2584 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2585 MemOperand address(base);
2586 if (offset.IsValid()) {
2587 // If offset is valid then this is a get from a relative address.
2588 address = MemOperand(base, offset);
2589 }
2590
2591 switch (type) {
2592 case DataType::Type::kBool:
2593 __ Ldrb(RegisterFrom(out), address);
2594 break;
2595 case DataType::Type::kInt8:
2596 __ Ldrsb(RegisterFrom(out), address);
2597 break;
2598 case DataType::Type::kUint16:
2599 __ Ldrh(RegisterFrom(out), address);
2600 break;
2601 case DataType::Type::kInt16:
2602 __ Ldrsh(RegisterFrom(out), address);
2603 break;
2604 case DataType::Type::kInt32:
2605 __ Ldr(RegisterFrom(out), address);
2606 break;
2607 case DataType::Type::kInt64: {
2608 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2609 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2610 if (offset.IsValid()) {
2611 vixl32::Register temp_reg = temps.Acquire();
2612 __ Add(temp_reg, base, offset);
2613 address = MemOperand(temp_reg);
2614 }
2615
2616 vixl32::Register strexd_tmp = RegisterFrom(maybe_temp);
2617 vixl32::Label loop;
2618 __ Bind(&loop);
2619 __ Ldrexd(LowRegisterFrom(out), HighRegisterFrom(out), address);
2620 __ Strexd(strexd_tmp, LowRegisterFrom(out), HighRegisterFrom(out), address);
2621 __ Cmp(strexd_tmp, 0);
2622 __ B(ne, &loop);
2623 } else {
2624 __ Ldrd(LowRegisterFrom(out), HighRegisterFrom(out), address);
2625 }
2626 break;
2627 }
2628 case DataType::Type::kReference: {
2629 if (codegen->EmitBakerReadBarrier()) {
2630 // Piggy-back on the field load path using introspection for the Baker read barrier.
2631 vixl32::Register temp = RegisterFrom(maybe_temp);
2632 __ Add(temp, base, offset);
2633 codegen->GenerateFieldLoadWithBakerReadBarrier(
2634 invoke, out, base, MemOperand(temp), /* needs_null_check= */ false);
2635 } else {
2636 __ Ldr(RegisterFrom(out), address);
2637 }
2638 break;
2639 }
2640 case DataType::Type::kFloat32: {
2641 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2642 if (offset.IsValid()) {
2643 const vixl32::Register temp_reg = temps.Acquire();
2644 __ Add(temp_reg, base, offset);
2645 address = MemOperand(temp_reg);
2646 }
2647 __ Vldr(SRegisterFrom(out), address);
2648 break;
2649 }
2650 case DataType::Type::kFloat64: {
2651 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2652 if (offset.IsValid()) {
2653 const vixl32::Register temp_reg = temps.Acquire();
2654 __ Add(temp_reg, base, offset);
2655 address = MemOperand(temp_reg);
2656 }
2657 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2658 vixl32::Register lo = RegisterFrom(maybe_temp);
2659 vixl32::Register hi = RegisterFrom(maybe_temp2);
2660 vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2661 vixl32::Label loop;
2662 __ Bind(&loop);
2663 __ Ldrexd(lo, hi, address);
2664 __ Strexd(strexd_tmp, lo, hi, address);
2665 __ Cmp(strexd_tmp, 0);
2666 __ B(ne, &loop);
2667 __ Vmov(DRegisterFrom(out), lo, hi);
2668 } else {
2669 __ Vldr(DRegisterFrom(out), address);
2670 }
2671 break;
2672 }
2673 default:
2674 LOG(FATAL) << "Unexpected type " << type;
2675 UNREACHABLE();
2676 }
2677 if (acquire_barrier) {
2678 codegen->GenerateMemoryBarrier(
2679 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
2680 }
2681 if (type == DataType::Type::kReference && !codegen->EmitBakerReadBarrier()) {
2682 Location base_loc = LocationFrom(base);
2683 Location index_loc = LocationFrom(offset);
2684 codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc);
2685 }
2686 }
2687
CreateUnsafeGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)2688 static void CreateUnsafeGetLocations(HInvoke* invoke,
2689 CodeGeneratorARMVIXL* codegen,
2690 DataType::Type type,
2691 bool atomic) {
2692 bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
2693 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2694 LocationSummary* locations =
2695 new (allocator) LocationSummary(invoke,
2696 can_call
2697 ? LocationSummary::kCallOnSlowPath
2698 : LocationSummary::kNoCall,
2699 kIntrinsified);
2700 if (can_call && kUseBakerReadBarrier) {
2701 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2702 }
2703 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2704 locations->SetInAt(1, Location::RequiresRegister());
2705 locations->SetInAt(2, Location::RequiresRegister());
2706 locations->SetOut(Location::RequiresRegister(),
2707 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2708 if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) ||
2709 (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2710 // We need a temporary register for the read barrier marking slow
2711 // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier,
2712 // or the STREXD result for LDREXD/STREXD sequence when LDRD is non-atomic.
2713 locations->AddTemp(Location::RequiresRegister());
2714 }
2715 }
2716
CreateUnsafeGetAbsoluteLocations(HInvoke * invoke)2717 static void CreateUnsafeGetAbsoluteLocations(HInvoke* invoke) {
2718 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2719 LocationSummary* locations =
2720 new (allocator) LocationSummary(invoke,
2721 LocationSummary::kNoCall,
2722 kIntrinsified);
2723 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2724 locations->SetInAt(1, Location::RequiresRegister());
2725 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2726 }
2727
GenUnsafeGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic)2728 static void GenUnsafeGet(HInvoke* invoke,
2729 CodeGeneratorARMVIXL* codegen,
2730 DataType::Type type,
2731 std::memory_order order,
2732 bool atomic) {
2733 LocationSummary* locations = invoke->GetLocations();
2734 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
2735 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
2736 Location out = locations->Out();
2737 Location maybe_temp = Location::NoLocation();
2738 if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) ||
2739 (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2740 maybe_temp = locations->GetTemp(0);
2741 }
2742 GenerateIntrinsicGet(invoke,
2743 codegen,
2744 type,
2745 order,
2746 atomic,
2747 base,
2748 offset,
2749 out,
2750 maybe_temp,
2751 /*maybe_temp2=*/ Location::NoLocation(),
2752 /*maybe_temp3=*/ Location::NoLocation());
2753 }
2754
GenUnsafeGetAbsolute(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic)2755 static void GenUnsafeGetAbsolute(HInvoke* invoke,
2756 CodeGeneratorARMVIXL* codegen,
2757 DataType::Type type,
2758 std::memory_order order,
2759 bool atomic) {
2760 LocationSummary* locations = invoke->GetLocations();
2761 vixl32::Register address = LowRegisterFrom(locations->InAt(1)); // Long offset, lo part only.
2762 Location out = locations->Out();
2763 Location maybe_temp = Location::NoLocation();
2764 if (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
2765 maybe_temp = locations->GetTemp(0);
2766 }
2767 GenerateIntrinsicGet(invoke,
2768 codegen,
2769 type,
2770 order,
2771 atomic,
2772 address,
2773 NoReg, // No offset.
2774 out,
2775 maybe_temp,
2776 /*maybe_temp2=*/ Location::NoLocation(),
2777 /*maybe_temp3=*/ Location::NoLocation());
2778 }
2779
VisitUnsafeGet(HInvoke * invoke)2780 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2781 VisitJdkUnsafeGet(invoke);
2782 }
2783
VisitUnsafeGetAbsolute(HInvoke * invoke)2784 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2785 VisitJdkUnsafeGetAbsolute(invoke);
2786 }
2787
VisitUnsafeGet(HInvoke * invoke)2788 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2789 VisitJdkUnsafeGet(invoke);
2790 }
2791
VisitUnsafeGetAbsolute(HInvoke * invoke)2792 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2793 VisitJdkUnsafeGetAbsolute(invoke);
2794 }
2795
VisitUnsafeGetVolatile(HInvoke * invoke)2796 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2797 VisitJdkUnsafeGetVolatile(invoke);
2798 }
2799
VisitUnsafeGetVolatile(HInvoke * invoke)2800 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2801 VisitJdkUnsafeGetVolatile(invoke);
2802 }
2803
VisitUnsafeGetLong(HInvoke * invoke)2804 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2805 VisitJdkUnsafeGetLong(invoke);
2806 }
2807
VisitUnsafeGetLong(HInvoke * invoke)2808 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2809 VisitJdkUnsafeGetLong(invoke);
2810 }
2811
VisitUnsafeGetLongVolatile(HInvoke * invoke)2812 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2813 VisitJdkUnsafeGetLongVolatile(invoke);
2814 }
2815
VisitUnsafeGetLongVolatile(HInvoke * invoke)2816 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2817 VisitJdkUnsafeGetLongVolatile(invoke);
2818 }
2819
VisitUnsafeGetObject(HInvoke * invoke)2820 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2821 VisitJdkUnsafeGetReference(invoke);
2822 }
2823
VisitUnsafeGetObject(HInvoke * invoke)2824 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2825 VisitJdkUnsafeGetReference(invoke);
2826 }
2827
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2828 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2829 VisitJdkUnsafeGetReferenceVolatile(invoke);
2830 }
2831
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2832 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2833 VisitJdkUnsafeGetReferenceVolatile(invoke);
2834 }
2835
VisitUnsafeGetByte(HInvoke * invoke)2836 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) {
2837 VisitJdkUnsafeGetByte(invoke);
2838 }
2839
VisitUnsafeGetByte(HInvoke * invoke)2840 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) {
2841 VisitJdkUnsafeGetByte(invoke);
2842 }
2843
VisitJdkUnsafeGet(HInvoke * invoke)2844 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) {
2845 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
2846 }
2847
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2848 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2849 CreateUnsafeGetAbsoluteLocations(invoke);
2850 }
2851
VisitJdkUnsafeGet(HInvoke * invoke)2852 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) {
2853 GenUnsafeGet(
2854 invoke, codegen_, DataType::Type::kInt32, std::memory_order_relaxed, /*atomic=*/ false);
2855 }
2856
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2857 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2858 GenUnsafeGetAbsolute(
2859 invoke, codegen_, DataType::Type::kInt32, std::memory_order_relaxed, /*atomic=*/ false);
2860 }
2861
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2862 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2863 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2864 }
2865
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2866 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2867 GenUnsafeGet(
2868 invoke, codegen_, DataType::Type::kInt32, std::memory_order_seq_cst, /*atomic=*/ true);
2869 }
2870
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2871 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2872 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2873 }
2874
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2875 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2876 GenUnsafeGet(
2877 invoke, codegen_, DataType::Type::kInt32, std::memory_order_acquire, /*atomic=*/ true);
2878 }
2879
VisitJdkUnsafeGetLong(HInvoke * invoke)2880 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2881 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
2882 }
2883
VisitJdkUnsafeGetLong(HInvoke * invoke)2884 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2885 GenUnsafeGet(
2886 invoke, codegen_, DataType::Type::kInt64, std::memory_order_relaxed, /*atomic=*/ false);
2887 }
2888
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2889 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2890 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2891 }
2892
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2893 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2894 GenUnsafeGet(
2895 invoke, codegen_, DataType::Type::kInt64, std::memory_order_seq_cst, /*atomic=*/ true);
2896 }
2897
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2898 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2899 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2900 }
2901
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2902 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2903 GenUnsafeGet(
2904 invoke, codegen_, DataType::Type::kInt64, std::memory_order_acquire, /*atomic=*/ true);
2905 }
2906
VisitJdkUnsafeGetReference(HInvoke * invoke)2907 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2908 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
2909 }
2910
VisitJdkUnsafeGetReference(HInvoke * invoke)2911 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2912 GenUnsafeGet(
2913 invoke, codegen_, DataType::Type::kReference, std::memory_order_relaxed, /*atomic=*/ false);
2914 }
2915
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2916 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2917 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2918 }
2919
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2920 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2921 GenUnsafeGet(
2922 invoke, codegen_, DataType::Type::kReference, std::memory_order_seq_cst, /*atomic=*/ true);
2923 }
2924
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2925 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2926 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2927 }
2928
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2929 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2930 GenUnsafeGet(
2931 invoke, codegen_, DataType::Type::kReference, std::memory_order_acquire, /*atomic=*/ true);
2932 }
2933
VisitJdkUnsafeGetByte(HInvoke * invoke)2934 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2935 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false);
2936 }
2937
VisitJdkUnsafeGetByte(HInvoke * invoke)2938 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2939 GenUnsafeGet(
2940 invoke, codegen_, DataType::Type::kInt8, std::memory_order_relaxed, /*atomic=*/ false);
2941 }
2942
GenerateIntrinsicSetStore(CodeGeneratorARMVIXL * codegen,ArmVIXLAssembler * assembler,DataType::Type type,bool atomic,vixl32::MemOperand address,Location value,bool seq_cst_barrier,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2943 static void GenerateIntrinsicSetStore(CodeGeneratorARMVIXL* codegen,
2944 ArmVIXLAssembler* assembler,
2945 DataType::Type type,
2946 bool atomic,
2947 vixl32::MemOperand address,
2948 Location value,
2949 bool seq_cst_barrier,
2950 Location maybe_temp,
2951 Location maybe_temp2,
2952 Location maybe_temp3) {
2953 switch (type) {
2954 case DataType::Type::kBool:
2955 case DataType::Type::kInt8:
2956 __ Strb(RegisterFrom(value), address);
2957 break;
2958 case DataType::Type::kUint16:
2959 case DataType::Type::kInt16:
2960 __ Strh(RegisterFrom(value), address);
2961 break;
2962 case DataType::Type::kReference:
2963 case DataType::Type::kInt32:
2964 __ Str(RegisterFrom(value), address);
2965 break;
2966 case DataType::Type::kInt64:
2967 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2968 vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2969 vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2970 vixl32::Label loop;
2971 __ Bind(&loop);
2972 __ Ldrexd(lo_tmp, hi_tmp, address); // Ignore the retrieved value.
2973 __ Strexd(lo_tmp, LowRegisterFrom(value), HighRegisterFrom(value), address);
2974 __ Cmp(lo_tmp, 0);
2975 __ B(ne, &loop);
2976 } else {
2977 __ Strd(LowRegisterFrom(value), HighRegisterFrom(value), address);
2978 }
2979 break;
2980 case DataType::Type::kFloat32:
2981 __ Vstr(SRegisterFrom(value), address);
2982 break;
2983 case DataType::Type::kFloat64:
2984 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2985 vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2986 vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2987 vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2988 vixl32::Label loop;
2989 __ Bind(&loop);
2990 __ Ldrexd(lo_tmp, hi_tmp, address); // Ignore the retrieved value.
2991 __ Vmov(lo_tmp, hi_tmp, DRegisterFrom(value));
2992 __ Strexd(strexd_tmp, lo_tmp, hi_tmp, address);
2993 __ Cmp(strexd_tmp, 0);
2994 __ B(ne, &loop);
2995 } else {
2996 __ Vstr(DRegisterFrom(value), address);
2997 }
2998 break;
2999 default:
3000 LOG(FATAL) << "Unexpected type " << type;
3001 UNREACHABLE();
3002 }
3003
3004 if (seq_cst_barrier) {
3005 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3006 }
3007 }
3008
GenerateIntrinsicSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register address,Location value,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)3009 static void GenerateIntrinsicSet(CodeGeneratorARMVIXL* codegen,
3010 DataType::Type type,
3011 std::memory_order order,
3012 bool atomic,
3013 vixl32::Register address,
3014 Location value,
3015 Location maybe_temp,
3016 Location maybe_temp2,
3017 Location maybe_temp3) {
3018 bool seq_cst_barrier = order == std::memory_order_seq_cst;
3019 bool release_barrier = seq_cst_barrier || order == std::memory_order_release;
3020 DCHECK(release_barrier || order == std::memory_order_relaxed);
3021 DCHECK(atomic || order == std::memory_order_relaxed);
3022
3023 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3024 if (release_barrier) {
3025 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
3026 }
3027 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3028 GenerateIntrinsicSetStore(codegen,
3029 assembler,
3030 type,
3031 atomic,
3032 MemOperand(address),
3033 value,
3034 seq_cst_barrier,
3035 maybe_temp,
3036 maybe_temp2,
3037 maybe_temp3);
3038 }
3039
GenerateIntrinsicSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location value,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)3040 static void GenerateIntrinsicSet(CodeGeneratorARMVIXL* codegen,
3041 DataType::Type type,
3042 std::memory_order order,
3043 bool atomic,
3044 vixl32::Register base,
3045 vixl32::Register offset,
3046 Location value,
3047 Location maybe_temp,
3048 Location maybe_temp2,
3049 Location maybe_temp3) {
3050 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
3051 bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
3052 DCHECK(release_barrier || order == std::memory_order_relaxed);
3053 DCHECK(atomic || order == std::memory_order_relaxed);
3054
3055 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3056 if (release_barrier) {
3057 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
3058 }
3059 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3060 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
3061 vixl32::Register temp = temps.Acquire();
3062 __ Mov(temp, RegisterFrom(value));
3063 assembler->PoisonHeapReference(temp);
3064 value = LocationFrom(temp);
3065 }
3066 MemOperand address = offset.IsValid() ? MemOperand(base, offset) : MemOperand(base);
3067 if (offset.IsValid() && (DataType::Is64BitType(type) || type == DataType::Type::kFloat32)) {
3068 const vixl32::Register temp_reg = temps.Acquire();
3069 __ Add(temp_reg, base, offset);
3070 address = MemOperand(temp_reg);
3071 }
3072 GenerateIntrinsicSetStore(codegen,
3073 assembler,
3074 type,
3075 atomic,
3076 address,
3077 value,
3078 seq_cst_barrier,
3079 maybe_temp,
3080 maybe_temp2,
3081 maybe_temp3);
3082 }
3083
CreateUnsafePutTempLocations(CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic,LocationSummary * locations)3084 static void CreateUnsafePutTempLocations(CodeGeneratorARMVIXL* codegen,
3085 DataType::Type type,
3086 bool atomic,
3087 LocationSummary* locations) {
3088 if (type == DataType::Type::kInt64) {
3089 // Potentially need temps for ldrexd-strexd loop.
3090 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
3091 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
3092 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
3093 }
3094 } else if (type == DataType::Type::kReference) {
3095 // Temp for card-marking.
3096 locations->AddTemp(Location::RequiresRegister()); // Temp.
3097 }
3098 }
3099
CreateUnsafePutLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)3100 static void CreateUnsafePutLocations(HInvoke* invoke,
3101 CodeGeneratorARMVIXL* codegen,
3102 DataType::Type type,
3103 bool atomic) {
3104 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3105 LocationSummary* locations =
3106 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3107 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
3108 locations->SetInAt(1, Location::RequiresRegister());
3109 locations->SetInAt(2, Location::RequiresRegister());
3110 locations->SetInAt(3, Location::RequiresRegister());
3111 CreateUnsafePutTempLocations(codegen, type, atomic, locations);
3112 }
3113
CreateUnsafePutAbsoluteLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)3114 static void CreateUnsafePutAbsoluteLocations(HInvoke* invoke,
3115 CodeGeneratorARMVIXL* codegen,
3116 DataType::Type type,
3117 bool atomic) {
3118 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3119 LocationSummary* locations =
3120 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3121 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
3122 locations->SetInAt(1, Location::RequiresRegister());
3123 locations->SetInAt(2, Location::RequiresRegister());
3124 CreateUnsafePutTempLocations(codegen, type, atomic, locations);
3125 }
3126
GenUnsafePut(HInvoke * invoke,DataType::Type type,std::memory_order order,bool atomic,CodeGeneratorARMVIXL * codegen)3127 static void GenUnsafePut(HInvoke* invoke,
3128 DataType::Type type,
3129 std::memory_order order,
3130 bool atomic,
3131 CodeGeneratorARMVIXL* codegen) {
3132 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3133
3134 LocationSummary* locations = invoke->GetLocations();
3135 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
3136 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
3137 Location value = locations->InAt(3);
3138 Location maybe_temp = Location::NoLocation();
3139 Location maybe_temp2 = Location::NoLocation();
3140 if (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
3141 maybe_temp = locations->GetTemp(0);
3142 maybe_temp2 = locations->GetTemp(1);
3143 }
3144
3145 GenerateIntrinsicSet(codegen,
3146 type,
3147 order,
3148 atomic,
3149 base,
3150 offset,
3151 value,
3152 maybe_temp,
3153 maybe_temp2,
3154 /*maybe_temp3=*/ Location::NoLocation());
3155
3156 if (type == DataType::Type::kReference) {
3157 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3158 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3159 vixl32::Register card = temps.Acquire();
3160 bool value_can_be_null = true; // TODO: Worth finding out this information?
3161 codegen->MaybeMarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
3162 }
3163 }
3164
GenUnsafePutAbsolute(HInvoke * invoke,DataType::Type type,std::memory_order order,bool atomic,CodeGeneratorARMVIXL * codegen)3165 static void GenUnsafePutAbsolute(HInvoke* invoke,
3166 DataType::Type type,
3167 std::memory_order order,
3168 bool atomic,
3169 CodeGeneratorARMVIXL* codegen) {
3170 LocationSummary* locations = invoke->GetLocations();
3171 vixl32::Register address = LowRegisterFrom(locations->InAt(1)); // Long offset, lo part only.
3172 Location value = locations->InAt(2);
3173 Location maybe_temp = Location::NoLocation();
3174 Location maybe_temp2 = Location::NoLocation();
3175 if (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
3176 maybe_temp = locations->GetTemp(0);
3177 maybe_temp2 = locations->GetTemp(1);
3178 }
3179
3180 GenerateIntrinsicSet(codegen,
3181 type,
3182 order,
3183 atomic,
3184 address,
3185 value,
3186 maybe_temp,
3187 maybe_temp2,
3188 /*maybe_temp3=*/ Location::NoLocation());
3189 }
3190
VisitUnsafePut(HInvoke * invoke)3191 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3192 VisitJdkUnsafePut(invoke);
3193 }
3194
VisitUnsafePutAbsolute(HInvoke * invoke)3195 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutAbsolute(HInvoke* invoke) {
3196 VisitJdkUnsafePutAbsolute(invoke);
3197 }
3198
VisitUnsafePut(HInvoke * invoke)3199 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3200 VisitJdkUnsafePut(invoke);
3201 }
3202
VisitUnsafePutAbsolute(HInvoke * invoke)3203 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutAbsolute(HInvoke* invoke) {
3204 VisitJdkUnsafePutAbsolute(invoke);
3205 }
3206
VisitUnsafePutOrdered(HInvoke * invoke)3207 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3208 VisitJdkUnsafePutOrdered(invoke);
3209 }
3210
VisitUnsafePutOrdered(HInvoke * invoke)3211 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3212 VisitJdkUnsafePutOrdered(invoke);
3213 }
3214
VisitUnsafePutVolatile(HInvoke * invoke)3215 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3216 VisitJdkUnsafePutVolatile(invoke);
3217 }
3218
VisitUnsafePutVolatile(HInvoke * invoke)3219 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3220 VisitJdkUnsafePutVolatile(invoke);
3221 }
VisitUnsafePutObject(HInvoke * invoke)3222 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3223 VisitJdkUnsafePutReference(invoke);
3224 }
3225
VisitUnsafePutObject(HInvoke * invoke)3226 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3227 VisitJdkUnsafePutReference(invoke);
3228 }
3229
VisitUnsafePutObjectOrdered(HInvoke * invoke)3230 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3231 VisitJdkUnsafePutObjectOrdered(invoke);
3232 }
3233
VisitUnsafePutObjectOrdered(HInvoke * invoke)3234 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3235 VisitJdkUnsafePutObjectOrdered(invoke);
3236 }
3237
VisitUnsafePutObjectVolatile(HInvoke * invoke)3238 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3239 VisitJdkUnsafePutReferenceVolatile(invoke);
3240 }
3241
VisitUnsafePutObjectVolatile(HInvoke * invoke)3242 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3243 VisitJdkUnsafePutReferenceVolatile(invoke);
3244 }
3245
VisitUnsafePutLong(HInvoke * invoke)3246 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3247 VisitJdkUnsafePutLong(invoke);
3248 }
3249
VisitUnsafePutLong(HInvoke * invoke)3250 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3251 VisitJdkUnsafePutLong(invoke);
3252 }
3253
VisitUnsafePutLongOrdered(HInvoke * invoke)3254 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3255 VisitJdkUnsafePutLongOrdered(invoke);
3256 }
3257
VisitUnsafePutLongOrdered(HInvoke * invoke)3258 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3259 VisitJdkUnsafePutLongOrdered(invoke);
3260 }
3261
VisitUnsafePutLongVolatile(HInvoke * invoke)3262 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3263 VisitJdkUnsafePutLongVolatile(invoke);
3264 }
3265
VisitUnsafePutLongVolatile(HInvoke * invoke)3266 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3267 VisitJdkUnsafePutLongVolatile(invoke);
3268 }
3269
VisitUnsafePutByte(HInvoke * invoke)3270 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutByte(HInvoke* invoke) {
3271 VisitJdkUnsafePutByte(invoke);
3272 }
3273
VisitUnsafePutByte(HInvoke * invoke)3274 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutByte(HInvoke* invoke) {
3275 VisitJdkUnsafePutByte(invoke);
3276 }
3277
VisitJdkUnsafePut(HInvoke * invoke)3278 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
3279 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
3280 }
3281
VisitJdkUnsafePutAbsolute(HInvoke * invoke)3282 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
3283 CreateUnsafePutAbsoluteLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
3284 }
3285
VisitJdkUnsafePut(HInvoke * invoke)3286 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
3287 GenUnsafePut(invoke,
3288 DataType::Type::kInt32,
3289 std::memory_order_relaxed,
3290 /*atomic=*/ false,
3291 codegen_);
3292 }
3293
VisitJdkUnsafePutAbsolute(HInvoke * invoke)3294 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
3295 GenUnsafePutAbsolute(invoke,
3296 DataType::Type::kInt32,
3297 std::memory_order_relaxed,
3298 /*atomic=*/false,
3299 codegen_);
3300 }
3301
VisitJdkUnsafePutByte(HInvoke * invoke)3302 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) {
3303 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false);
3304 }
3305
VisitJdkUnsafePutByte(HInvoke * invoke)3306 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) {
3307 GenUnsafePut(invoke,
3308 DataType::Type::kInt8,
3309 std::memory_order_relaxed,
3310 /*atomic=*/ false,
3311 codegen_);
3312 }
3313
VisitJdkUnsafePutOrdered(HInvoke * invoke)3314 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
3315 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3316 }
3317
VisitJdkUnsafePutOrdered(HInvoke * invoke)3318 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
3319 GenUnsafePut(invoke,
3320 DataType::Type::kInt32,
3321 std::memory_order_release,
3322 /*atomic=*/ true,
3323 codegen_);
3324 }
3325
VisitJdkUnsafePutVolatile(HInvoke * invoke)3326 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
3327 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3328 }
3329
VisitJdkUnsafePutVolatile(HInvoke * invoke)3330 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
3331 GenUnsafePut(invoke,
3332 DataType::Type::kInt32,
3333 std::memory_order_seq_cst,
3334 /*atomic=*/ true,
3335 codegen_);
3336 }
3337
VisitJdkUnsafePutRelease(HInvoke * invoke)3338 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) {
3339 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3340 }
3341
VisitJdkUnsafePutRelease(HInvoke * invoke)3342 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) {
3343 GenUnsafePut(invoke,
3344 DataType::Type::kInt32,
3345 std::memory_order_release,
3346 /*atomic=*/ true,
3347 codegen_);
3348 }
3349
VisitJdkUnsafePutReference(HInvoke * invoke)3350 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) {
3351 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
3352 }
3353
VisitJdkUnsafePutReference(HInvoke * invoke)3354 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) {
3355 GenUnsafePut(invoke,
3356 DataType::Type::kReference,
3357 std::memory_order_relaxed,
3358 /*atomic=*/ false,
3359 codegen_);
3360 }
3361
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)3362 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
3363 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3364 }
3365
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)3366 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
3367 GenUnsafePut(invoke,
3368 DataType::Type::kReference,
3369 std::memory_order_release,
3370 /*atomic=*/ true,
3371 codegen_);
3372 }
3373
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)3374 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
3375 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3376 }
3377
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)3378 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
3379 GenUnsafePut(invoke,
3380 DataType::Type::kReference,
3381 std::memory_order_seq_cst,
3382 /*atomic=*/ true,
3383 codegen_);
3384 }
3385
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)3386 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
3387 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3388 }
3389
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)3390 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
3391 GenUnsafePut(invoke,
3392 DataType::Type::kReference,
3393 std::memory_order_release,
3394 /*atomic=*/ true,
3395 codegen_);
3396 }
3397
VisitJdkUnsafePutLong(HInvoke * invoke)3398 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLong(HInvoke* invoke) {
3399 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
3400 }
3401
VisitJdkUnsafePutLong(HInvoke * invoke)3402 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLong(HInvoke* invoke) {
3403 GenUnsafePut(invoke,
3404 DataType::Type::kInt64,
3405 std::memory_order_relaxed,
3406 /*atomic=*/ false,
3407 codegen_);
3408 }
3409
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)3410 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
3411 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3412 }
3413
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)3414 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
3415 GenUnsafePut(invoke,
3416 DataType::Type::kInt64,
3417 std::memory_order_release,
3418 /*atomic=*/ true,
3419 codegen_);
3420 }
3421
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)3422 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
3423 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3424 }
3425
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)3426 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
3427 GenUnsafePut(invoke,
3428 DataType::Type::kInt64,
3429 std::memory_order_seq_cst,
3430 /*atomic=*/ true,
3431 codegen_);
3432 }
3433
VisitJdkUnsafePutLongRelease(HInvoke * invoke)3434 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
3435 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3436 }
3437
VisitJdkUnsafePutLongRelease(HInvoke * invoke)3438 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
3439 GenUnsafePut(invoke,
3440 DataType::Type::kInt64,
3441 std::memory_order_release,
3442 /*atomic=*/ true,
3443 codegen_);
3444 }
3445
EmitLoadExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,Location old_value)3446 static void EmitLoadExclusive(CodeGeneratorARMVIXL* codegen,
3447 DataType::Type type,
3448 vixl32::Register ptr,
3449 Location old_value) {
3450 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3451 switch (type) {
3452 case DataType::Type::kBool:
3453 case DataType::Type::kInt8:
3454 __ Ldrexb(RegisterFrom(old_value), MemOperand(ptr));
3455 break;
3456 case DataType::Type::kUint16:
3457 case DataType::Type::kInt16:
3458 __ Ldrexh(RegisterFrom(old_value), MemOperand(ptr));
3459 break;
3460 case DataType::Type::kInt32:
3461 case DataType::Type::kReference:
3462 __ Ldrex(RegisterFrom(old_value), MemOperand(ptr));
3463 break;
3464 case DataType::Type::kInt64:
3465 __ Ldrexd(LowRegisterFrom(old_value), HighRegisterFrom(old_value), MemOperand(ptr));
3466 break;
3467 default:
3468 LOG(FATAL) << "Unexpected type: " << type;
3469 UNREACHABLE();
3470 }
3471 switch (type) {
3472 case DataType::Type::kInt8:
3473 __ Sxtb(RegisterFrom(old_value), RegisterFrom(old_value));
3474 break;
3475 case DataType::Type::kInt16:
3476 __ Sxth(RegisterFrom(old_value), RegisterFrom(old_value));
3477 break;
3478 case DataType::Type::kReference:
3479 assembler->MaybeUnpoisonHeapReference(RegisterFrom(old_value));
3480 break;
3481 default:
3482 break;
3483 }
3484 }
3485
EmitStoreExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,vixl32::Register store_result,Location new_value)3486 static void EmitStoreExclusive(CodeGeneratorARMVIXL* codegen,
3487 DataType::Type type,
3488 vixl32::Register ptr,
3489 vixl32::Register store_result,
3490 Location new_value) {
3491 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3492 if (type == DataType::Type::kReference) {
3493 assembler->MaybePoisonHeapReference(RegisterFrom(new_value));
3494 }
3495 switch (type) {
3496 case DataType::Type::kBool:
3497 case DataType::Type::kInt8:
3498 __ Strexb(store_result, RegisterFrom(new_value), MemOperand(ptr));
3499 break;
3500 case DataType::Type::kUint16:
3501 case DataType::Type::kInt16:
3502 __ Strexh(store_result, RegisterFrom(new_value), MemOperand(ptr));
3503 break;
3504 case DataType::Type::kInt32:
3505 case DataType::Type::kReference:
3506 __ Strex(store_result, RegisterFrom(new_value), MemOperand(ptr));
3507 break;
3508 case DataType::Type::kInt64:
3509 __ Strexd(
3510 store_result, LowRegisterFrom(new_value), HighRegisterFrom(new_value), MemOperand(ptr));
3511 break;
3512 default:
3513 LOG(FATAL) << "Unexpected type: " << type;
3514 UNREACHABLE();
3515 }
3516 if (type == DataType::Type::kReference) {
3517 assembler->MaybeUnpoisonHeapReference(RegisterFrom(new_value));
3518 }
3519 }
3520
GenerateCompareAndSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,bool strong,vixl32::Label * cmp_failure,bool cmp_failure_is_far_target,vixl32::Register ptr,Location expected,Location new_value,Location old_value,vixl32::Register store_result,vixl32::Register success)3521 static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen,
3522 DataType::Type type,
3523 bool strong,
3524 vixl32::Label* cmp_failure,
3525 bool cmp_failure_is_far_target,
3526 vixl32::Register ptr,
3527 Location expected,
3528 Location new_value,
3529 Location old_value,
3530 vixl32::Register store_result,
3531 vixl32::Register success) {
3532 // For kReference, the `expected` shall be a register pair when called from a read barrier
3533 // slow path, specifying both the original `expected` as well as the unmarked old value from
3534 // the main path attempt to emit CAS when it matched `expected` after marking.
3535 // Otherwise the type of `expected` shall match the type of `new_value` and `old_value`.
3536 if (type == DataType::Type::kInt64) {
3537 DCHECK(expected.IsRegisterPair());
3538 DCHECK(new_value.IsRegisterPair());
3539 DCHECK(old_value.IsRegisterPair());
3540 } else {
3541 DCHECK(expected.IsRegister() ||
3542 (type == DataType::Type::kReference && expected.IsRegisterPair()));
3543 DCHECK(new_value.IsRegister());
3544 DCHECK(old_value.IsRegister());
3545 // Make sure the unmarked old value for reference CAS slow path is not clobbered by STREX.
3546 DCHECK(!expected.Contains(LocationFrom(store_result)));
3547 }
3548
3549 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3550
3551 // do {
3552 // old_value = [ptr]; // Load exclusive.
3553 // if (old_value != expected) goto cmp_failure;
3554 // store_result = failed([ptr] <- new_value); // Store exclusive.
3555 // } while (strong && store_result);
3556 //
3557 // If `success` is a valid register, there are additional instructions in the above code
3558 // to report success with value 1 and failure with value 0 in that register.
3559
3560 vixl32::Label loop_head;
3561 if (strong) {
3562 __ Bind(&loop_head);
3563 }
3564 EmitLoadExclusive(codegen, type, ptr, old_value);
3565 // We do not need to initialize the failure code for comparison failure if the
3566 // branch goes to the read barrier slow path that clobbers `success` anyway.
3567 bool init_failure_for_cmp =
3568 success.IsValid() &&
3569 !(type == DataType::Type::kReference && codegen->EmitReadBarrier() && expected.IsRegister());
3570 // Instruction scheduling: Loading a constant between LDREX* and using the loaded value
3571 // is essentially free, so prepare the failure value here if we can.
3572 bool init_failure_for_cmp_early =
3573 init_failure_for_cmp && !old_value.Contains(LocationFrom(success));
3574 if (init_failure_for_cmp_early) {
3575 __ Mov(success, 0); // Indicate failure if the comparison fails.
3576 }
3577 if (type == DataType::Type::kInt64) {
3578 __ Cmp(LowRegisterFrom(old_value), LowRegisterFrom(expected));
3579 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3580 __ it(eq);
3581 __ cmp(eq, HighRegisterFrom(old_value), HighRegisterFrom(expected));
3582 } else if (expected.IsRegisterPair()) {
3583 DCHECK_EQ(type, DataType::Type::kReference);
3584 DCHECK(!expected.Contains(old_value));
3585 // Check if the loaded value matches any of the two registers in `expected`.
3586 __ Cmp(RegisterFrom(old_value), LowRegisterFrom(expected));
3587 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3588 __ it(ne);
3589 __ cmp(ne, RegisterFrom(old_value), HighRegisterFrom(expected));
3590 } else {
3591 __ Cmp(RegisterFrom(old_value), RegisterFrom(expected));
3592 }
3593 if (init_failure_for_cmp && !init_failure_for_cmp_early) {
3594 __ Mov(LeaveFlags, success, 0); // Indicate failure if the comparison fails.
3595 }
3596 __ B(ne, cmp_failure, /*is_far_target=*/ cmp_failure_is_far_target);
3597 EmitStoreExclusive(codegen, type, ptr, store_result, new_value);
3598 if (strong) {
3599 // Instruction scheduling: Loading a constant between STREX* and using its result
3600 // is essentially free, so prepare the success value here if needed and possible.
3601 if (success.IsValid() && !success.Is(store_result)) {
3602 __ Mov(success, 1); // Indicate success if the store succeeds.
3603 }
3604 __ Cmp(store_result, 0);
3605 if (success.IsValid() && success.Is(store_result)) {
3606 __ Mov(LeaveFlags, success, 1); // Indicate success if the store succeeds.
3607 }
3608 __ B(ne, &loop_head, /*is_far_target=*/ false);
3609 } else {
3610 // Weak CAS (VarHandle.CompareAndExchange variants) always indicates success.
3611 DCHECK(success.IsValid());
3612 // Flip the `store_result` to indicate success by 1 and failure by 0.
3613 __ Eor(success, store_result, 1);
3614 }
3615 }
3616
3617 class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
3618 public:
ReadBarrierCasSlowPathARMVIXL(HInvoke * invoke,bool strong,vixl32::Register base,vixl32::Register offset,vixl32::Register expected,vixl32::Register new_value,vixl32::Register old_value,vixl32::Register old_value_temp,vixl32::Register store_result,vixl32::Register success,CodeGeneratorARMVIXL * arm_codegen)3619 explicit ReadBarrierCasSlowPathARMVIXL(HInvoke* invoke,
3620 bool strong,
3621 vixl32::Register base,
3622 vixl32::Register offset,
3623 vixl32::Register expected,
3624 vixl32::Register new_value,
3625 vixl32::Register old_value,
3626 vixl32::Register old_value_temp,
3627 vixl32::Register store_result,
3628 vixl32::Register success,
3629 CodeGeneratorARMVIXL* arm_codegen)
3630 : SlowPathCodeARMVIXL(invoke),
3631 strong_(strong),
3632 base_(base),
3633 offset_(offset),
3634 expected_(expected),
3635 new_value_(new_value),
3636 old_value_(old_value),
3637 old_value_temp_(old_value_temp),
3638 store_result_(store_result),
3639 success_(success),
3640 mark_old_value_slow_path_(nullptr),
3641 update_old_value_slow_path_(nullptr) {
3642 if (!kUseBakerReadBarrier) {
3643 // We need to add the slow path now, it is too late when emitting slow path code.
3644 mark_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3645 invoke,
3646 Location::RegisterLocation(old_value_temp.GetCode()),
3647 Location::RegisterLocation(old_value.GetCode()),
3648 Location::RegisterLocation(base.GetCode()),
3649 /*offset=*/ 0u,
3650 /*index=*/ Location::RegisterLocation(offset.GetCode()));
3651 if (!success.IsValid()) {
3652 update_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3653 invoke,
3654 Location::RegisterLocation(old_value.GetCode()),
3655 Location::RegisterLocation(old_value_temp.GetCode()),
3656 Location::RegisterLocation(base.GetCode()),
3657 /*offset=*/ 0u,
3658 /*index=*/ Location::RegisterLocation(offset.GetCode()));
3659 }
3660 }
3661 }
3662
GetDescription() const3663 const char* GetDescription() const override { return "ReadBarrierCasSlowPathARMVIXL"; }
3664
EmitNativeCode(CodeGenerator * codegen)3665 void EmitNativeCode(CodeGenerator* codegen) override {
3666 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
3667 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
3668 __ Bind(GetEntryLabel());
3669
3670 // Mark the `old_value_` from the main path and compare with `expected_`.
3671 if (kUseBakerReadBarrier) {
3672 DCHECK(mark_old_value_slow_path_ == nullptr);
3673 arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_temp_, old_value_);
3674 } else {
3675 DCHECK(mark_old_value_slow_path_ != nullptr);
3676 __ B(mark_old_value_slow_path_->GetEntryLabel());
3677 __ Bind(mark_old_value_slow_path_->GetExitLabel());
3678 }
3679 __ Cmp(old_value_temp_, expected_);
3680 if (success_.IsValid()) {
3681 __ Mov(LeaveFlags, success_, 0); // Indicate failure if we take the branch out.
3682 } else {
3683 // In case of failure, update the `old_value_` with the marked reference.
3684 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3685 __ it(ne);
3686 __ mov(ne, old_value_, old_value_temp_);
3687 }
3688 __ B(ne, GetExitLabel());
3689
3690 // The old value we have read did not match `expected` (which is always a to-space
3691 // reference) but after the read barrier the marked to-space value matched, so the
3692 // old value must be a from-space reference to the same object. Do the same CAS loop
3693 // as the main path but check for both `expected` and the unmarked old value
3694 // representing the to-space and from-space references for the same object.
3695
3696 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3697 vixl32::Register tmp_ptr = temps.Acquire();
3698
3699 // Recalculate the `tmp_ptr` clobbered above.
3700 __ Add(tmp_ptr, base_, offset_);
3701
3702 vixl32::Label mark_old_value;
3703 GenerateCompareAndSet(arm_codegen,
3704 DataType::Type::kReference,
3705 strong_,
3706 /*cmp_failure=*/ success_.IsValid() ? GetExitLabel() : &mark_old_value,
3707 /*cmp_failure_is_far_target=*/ success_.IsValid(),
3708 tmp_ptr,
3709 /*expected=*/ LocationFrom(expected_, old_value_),
3710 /*new_value=*/ LocationFrom(new_value_),
3711 /*old_value=*/ LocationFrom(old_value_temp_),
3712 store_result_,
3713 success_);
3714 if (!success_.IsValid()) {
3715 // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
3716 // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
3717 __ Mov(old_value_, expected_);
3718 }
3719
3720 __ B(GetExitLabel());
3721
3722 if (!success_.IsValid()) {
3723 __ Bind(&mark_old_value);
3724 if (kUseBakerReadBarrier) {
3725 DCHECK(update_old_value_slow_path_ == nullptr);
3726 arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_, old_value_temp_);
3727 } else {
3728 // Note: We could redirect the `failure` above directly to the entry label and bind
3729 // the exit label in the main path, but the main path would need to access the
3730 // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
3731 DCHECK(update_old_value_slow_path_ != nullptr);
3732 __ B(update_old_value_slow_path_->GetEntryLabel());
3733 __ Bind(update_old_value_slow_path_->GetExitLabel());
3734 }
3735 __ B(GetExitLabel());
3736 }
3737 }
3738
3739 private:
3740 bool strong_;
3741 vixl32::Register base_;
3742 vixl32::Register offset_;
3743 vixl32::Register expected_;
3744 vixl32::Register new_value_;
3745 vixl32::Register old_value_;
3746 vixl32::Register old_value_temp_;
3747 vixl32::Register store_result_;
3748 vixl32::Register success_;
3749 SlowPathCodeARMVIXL* mark_old_value_slow_path_;
3750 SlowPathCodeARMVIXL* update_old_value_slow_path_;
3751 };
3752
CreateUnsafeCASLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3753 static void CreateUnsafeCASLocations(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3754 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
3755 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3756 LocationSummary* locations =
3757 new (allocator) LocationSummary(invoke,
3758 can_call
3759 ? LocationSummary::kCallOnSlowPath
3760 : LocationSummary::kNoCall,
3761 kIntrinsified);
3762 if (can_call && kUseBakerReadBarrier) {
3763 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
3764 }
3765 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
3766 locations->SetInAt(1, Location::RequiresRegister());
3767 locations->SetInAt(2, Location::RequiresRegister());
3768 locations->SetInAt(3, Location::RequiresRegister());
3769 locations->SetInAt(4, Location::RequiresRegister());
3770
3771 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3772
3773 // Temporary register used in CAS. In the object case (UnsafeCASObject intrinsic),
3774 // this is also used for card-marking, and possibly for read barrier.
3775 locations->AddTemp(Location::RequiresRegister());
3776 }
3777
GenUnsafeCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)3778 static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
3779 DCHECK_NE(type, DataType::Type::kInt64);
3780
3781 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3782 LocationSummary* locations = invoke->GetLocations();
3783
3784 vixl32::Register out = OutputRegister(invoke); // Boolean result.
3785 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
3786 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
3787 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
3788 vixl32::Register new_value = InputRegisterAt(invoke, 4); // New value.
3789
3790 vixl32::Register tmp = RegisterFrom(locations->GetTemp(0)); // Temporary.
3791
3792 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3793 vixl32::Register tmp_ptr = temps.Acquire();
3794
3795 if (type == DataType::Type::kReference) {
3796 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
3797 // object and scan the receiver at the next GC for nothing.
3798 bool value_can_be_null = true; // TODO: Worth finding out this information?
3799 codegen->MaybeMarkGCCard(tmp_ptr, tmp, base, new_value, value_can_be_null);
3800 }
3801
3802 vixl32::Label exit_loop_label;
3803 vixl32::Label* exit_loop = &exit_loop_label;
3804 vixl32::Label* cmp_failure = &exit_loop_label;
3805
3806 if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
3807 // If marking, check if the stored reference is a from-space reference to the same
3808 // object as the to-space reference `expected`. If so, perform a custom CAS loop.
3809 ReadBarrierCasSlowPathARMVIXL* slow_path =
3810 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
3811 invoke,
3812 /*strong=*/ true,
3813 base,
3814 offset,
3815 expected,
3816 new_value,
3817 /*old_value=*/ tmp,
3818 /*old_value_temp=*/ out,
3819 /*store_result=*/ out,
3820 /*success=*/ out,
3821 codegen);
3822 codegen->AddSlowPath(slow_path);
3823 exit_loop = slow_path->GetExitLabel();
3824 cmp_failure = slow_path->GetEntryLabel();
3825 }
3826
3827 // Unsafe CAS operations have std::memory_order_seq_cst semantics.
3828 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3829 __ Add(tmp_ptr, base, offset);
3830 GenerateCompareAndSet(codegen,
3831 type,
3832 /*strong=*/ true,
3833 cmp_failure,
3834 /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
3835 tmp_ptr,
3836 /*expected=*/ LocationFrom(expected), // TODO: Int64
3837 /*new_value=*/ LocationFrom(new_value), // TODO: Int64
3838 /*old_value=*/ LocationFrom(tmp), // TODO: Int64
3839 /*store_result=*/ tmp,
3840 /*success=*/ out);
3841 __ Bind(exit_loop);
3842 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3843
3844 if (type == DataType::Type::kReference) {
3845 codegen->MaybeGenerateMarkingRegisterCheck(/*code=*/ 128, /*temp_loc=*/ LocationFrom(tmp_ptr));
3846 }
3847 }
3848
VisitUnsafeCASInt(HInvoke * invoke)3849 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3850 VisitJdkUnsafeCASInt(invoke);
3851 }
VisitUnsafeCASObject(HInvoke * invoke)3852 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3853 VisitJdkUnsafeCASObject(invoke);
3854 }
3855
VisitJdkUnsafeCASInt(HInvoke * invoke)3856 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
3857 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
3858 VisitJdkUnsafeCompareAndSetInt(invoke);
3859 }
VisitJdkUnsafeCASObject(HInvoke * invoke)3860 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3861 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3862 VisitJdkUnsafeCompareAndSetReference(invoke);
3863 }
3864
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3865 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3866 CreateUnsafeCASLocations(invoke, codegen_);
3867 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3868 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3869 // The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
3870 if (codegen_->EmitNonBakerReadBarrier()) {
3871 return;
3872 }
3873
3874 CreateUnsafeCASLocations(invoke, codegen_);
3875 }
3876
VisitUnsafeCASInt(HInvoke * invoke)3877 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3878 VisitJdkUnsafeCASInt(invoke);
3879 }
VisitUnsafeCASObject(HInvoke * invoke)3880 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3881 VisitJdkUnsafeCASObject(invoke);
3882 }
3883
VisitJdkUnsafeCASInt(HInvoke * invoke)3884 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
3885 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
3886 VisitJdkUnsafeCompareAndSetInt(invoke);
3887 }
VisitJdkUnsafeCASObject(HInvoke * invoke)3888 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3889 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3890 VisitJdkUnsafeCompareAndSetReference(invoke);
3891 }
3892
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3893 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3894 GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
3895 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3896 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3897 // The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
3898 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
3899
3900 GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
3901 }
3902
3903 enum class GetAndUpdateOp {
3904 kSet,
3905 kAdd,
3906 kAddWithByteSwap,
3907 kAnd,
3908 kOr,
3909 kXor
3910 };
3911
GenerateGetAndUpdate(CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,DataType::Type load_store_type,vixl32::Register ptr,Location arg,Location old_value,vixl32::Register store_result,Location maybe_temp,Location maybe_vreg_temp)3912 static void GenerateGetAndUpdate(CodeGeneratorARMVIXL* codegen,
3913 GetAndUpdateOp get_and_update_op,
3914 DataType::Type load_store_type,
3915 vixl32::Register ptr,
3916 Location arg,
3917 Location old_value,
3918 vixl32::Register store_result,
3919 Location maybe_temp,
3920 Location maybe_vreg_temp) {
3921 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3922
3923 Location loaded_value;
3924 Location new_value;
3925 switch (get_and_update_op) {
3926 case GetAndUpdateOp::kSet:
3927 loaded_value = old_value;
3928 new_value = arg;
3929 break;
3930 case GetAndUpdateOp::kAddWithByteSwap:
3931 if (old_value.IsRegisterPair()) {
3932 // To avoid register overlap when reversing bytes, load into temps.
3933 DCHECK(maybe_temp.IsRegisterPair());
3934 loaded_value = maybe_temp;
3935 new_value = loaded_value; // Use the same temporaries for the new value.
3936 break;
3937 }
3938 FALLTHROUGH_INTENDED;
3939 case GetAndUpdateOp::kAdd:
3940 if (old_value.IsFpuRegisterPair()) {
3941 DCHECK(maybe_temp.IsRegisterPair());
3942 loaded_value = maybe_temp;
3943 new_value = loaded_value; // Use the same temporaries for the new value.
3944 break;
3945 }
3946 if (old_value.IsFpuRegister()) {
3947 DCHECK(maybe_temp.IsRegister());
3948 loaded_value = maybe_temp;
3949 new_value = loaded_value; // Use the same temporary for the new value.
3950 break;
3951 }
3952 FALLTHROUGH_INTENDED;
3953 case GetAndUpdateOp::kAnd:
3954 case GetAndUpdateOp::kOr:
3955 case GetAndUpdateOp::kXor:
3956 loaded_value = old_value;
3957 new_value = maybe_temp;
3958 break;
3959 }
3960
3961 vixl32::Label loop_label;
3962 __ Bind(&loop_label);
3963 EmitLoadExclusive(codegen, load_store_type, ptr, loaded_value);
3964 switch (get_and_update_op) {
3965 case GetAndUpdateOp::kSet:
3966 break;
3967 case GetAndUpdateOp::kAddWithByteSwap:
3968 if (arg.IsFpuRegisterPair()) {
3969 GenerateReverseBytes(assembler, DataType::Type::kFloat64, loaded_value, old_value);
3970 vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3971 __ Vadd(sum, DRegisterFrom(old_value), DRegisterFrom(arg));
3972 __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), sum); // Swap low/high.
3973 } else if (arg.IsFpuRegister()) {
3974 GenerateReverseBytes(assembler, DataType::Type::kFloat32, loaded_value, old_value);
3975 vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp); // The temporary is a pair.
3976 __ Vadd(sum, SRegisterFrom(old_value), SRegisterFrom(arg));
3977 __ Vmov(RegisterFrom(new_value), sum);
3978 } else if (load_store_type == DataType::Type::kInt64) {
3979 GenerateReverseBytes(assembler, DataType::Type::kInt64, loaded_value, old_value);
3980 // Swap low/high registers for the addition results.
3981 __ Adds(HighRegisterFrom(new_value), LowRegisterFrom(old_value), LowRegisterFrom(arg));
3982 __ Adc(LowRegisterFrom(new_value), HighRegisterFrom(old_value), HighRegisterFrom(arg));
3983 } else {
3984 GenerateReverseBytes(assembler, DataType::Type::kInt32, loaded_value, old_value);
3985 __ Add(RegisterFrom(new_value), RegisterFrom(old_value), RegisterFrom(arg));
3986 }
3987 if (load_store_type == DataType::Type::kInt64) {
3988 // The `new_value` already has the high and low word swapped. Reverse bytes in each.
3989 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
3990 } else {
3991 GenerateReverseBytes(assembler, load_store_type, new_value, new_value);
3992 }
3993 break;
3994 case GetAndUpdateOp::kAdd:
3995 if (arg.IsFpuRegisterPair()) {
3996 vixl32::DRegister old_value_vreg = DRegisterFrom(old_value);
3997 vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3998 __ Vmov(old_value_vreg, LowRegisterFrom(loaded_value), HighRegisterFrom(loaded_value));
3999 __ Vadd(sum, old_value_vreg, DRegisterFrom(arg));
4000 __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), sum);
4001 } else if (arg.IsFpuRegister()) {
4002 vixl32::SRegister old_value_vreg = SRegisterFrom(old_value);
4003 vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp); // The temporary is a pair.
4004 __ Vmov(old_value_vreg, RegisterFrom(loaded_value));
4005 __ Vadd(sum, old_value_vreg, SRegisterFrom(arg));
4006 __ Vmov(RegisterFrom(new_value), sum);
4007 } else if (load_store_type == DataType::Type::kInt64) {
4008 __ Adds(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
4009 __ Adc(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
4010 } else {
4011 __ Add(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
4012 }
4013 break;
4014 case GetAndUpdateOp::kAnd:
4015 if (load_store_type == DataType::Type::kInt64) {
4016 __ And(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
4017 __ And(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
4018 } else {
4019 __ And(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
4020 }
4021 break;
4022 case GetAndUpdateOp::kOr:
4023 if (load_store_type == DataType::Type::kInt64) {
4024 __ Orr(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
4025 __ Orr(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
4026 } else {
4027 __ Orr(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
4028 }
4029 break;
4030 case GetAndUpdateOp::kXor:
4031 if (load_store_type == DataType::Type::kInt64) {
4032 __ Eor(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
4033 __ Eor(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
4034 } else {
4035 __ Eor(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
4036 }
4037 break;
4038 }
4039 EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value);
4040 __ Cmp(store_result, 0);
4041 __ B(ne, &loop_label);
4042 }
4043
CreateUnsafeGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,GetAndUpdateOp get_and_update_op)4044 static void CreateUnsafeGetAndUpdateLocations(HInvoke* invoke,
4045 CodeGeneratorARMVIXL* codegen,
4046 DataType::Type type,
4047 GetAndUpdateOp get_and_update_op) {
4048 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
4049 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4050 LocationSummary* locations =
4051 new (allocator) LocationSummary(invoke,
4052 can_call
4053 ? LocationSummary::kCallOnSlowPath
4054 : LocationSummary::kNoCall,
4055 kIntrinsified);
4056 if (can_call && kUseBakerReadBarrier) {
4057 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4058 }
4059 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
4060 locations->SetInAt(1, Location::RequiresRegister());
4061 locations->SetInAt(2, Location::RequiresRegister());
4062 locations->SetInAt(3, Location::RequiresRegister());
4063
4064 // Request another temporary register for methods that don't return a value.
4065 size_t num_temps = 1u; // We always need `tmp_ptr`.
4066 const bool is_void = invoke->GetType() == DataType::Type::kVoid;
4067 if (is_void) {
4068 num_temps++;
4069 } else {
4070 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4071 }
4072
4073 if (get_and_update_op == GetAndUpdateOp::kAdd) {
4074 // Add `maybe_temp` used for the new value in `GenerateGetAndUpdate()`.
4075 num_temps += (type == DataType::Type::kInt64) ? 2u : 1u;
4076 if (type == DataType::Type::kInt64) {
4077 // There are enough available registers but the register allocator can fail to allocate
4078 // them correctly because it can block register pairs by single-register inputs and temps.
4079 // To work around this limitation, use a fixed register pair for both the output as well
4080 // as the offset which is not needed anymore after the address calculation.
4081 // (Alternatively, we could set up distinct fixed locations for `offset`, `arg` and `out`.)
4082 locations->SetInAt(2, LocationFrom(r0, r1));
4083 locations->UpdateOut(LocationFrom(r0, r1));
4084 }
4085 }
4086 locations->AddRegisterTemps(num_temps);
4087 }
4088
GenUnsafeGetAndUpdate(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,GetAndUpdateOp get_and_update_op)4089 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
4090 CodeGeneratorARMVIXL* codegen,
4091 DataType::Type type,
4092 GetAndUpdateOp get_and_update_op) {
4093 // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check
4094 // before using.
4095 DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet);
4096
4097 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4098 LocationSummary* locations = invoke->GetLocations();
4099
4100 const bool is_void = invoke->GetType() == DataType::Type::kVoid;
4101
4102 // We use a temporary for void methods, as we don't return the value.
4103 Location out_or_temp =
4104 is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
4105 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
4106 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
4107 Location arg = locations->InAt(3); // New value or addend.
4108 vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory.
4109 Location maybe_temp = Location::NoLocation();
4110 if (get_and_update_op == GetAndUpdateOp::kAdd) {
4111 maybe_temp = (type == DataType::Type::kInt64)
4112 ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
4113 : locations->GetTemp(1);
4114 }
4115
4116 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4117 vixl32::Register temp = temps.Acquire();
4118
4119 if (type == DataType::Type::kReference) {
4120 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
4121 // Mark card for object as a new value shall be stored.
4122 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
4123 vixl32::Register card = tmp_ptr; // Use the `tmp_ptr` also as the `card` temporary.
4124 codegen->MaybeMarkGCCard(temp, card, base, /*value=*/ RegisterFrom(arg), new_value_can_be_null);
4125 }
4126
4127 // Note: UnsafeGetAndUpdate operations are sequentially consistent, requiring
4128 // a barrier before and after the raw load/store-exclusive operation.
4129
4130 __ Add(tmp_ptr, base, Operand(offset));
4131 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4132 GenerateGetAndUpdate(codegen,
4133 get_and_update_op,
4134 type,
4135 tmp_ptr,
4136 arg,
4137 /*old_value=*/ out_or_temp,
4138 /*store_result=*/ temp,
4139 maybe_temp,
4140 /*maybe_vreg_temp=*/ Location::NoLocation());
4141 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4142
4143 if (!is_void && type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
4144 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
4145 if (kUseBakerReadBarrier) {
4146 codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out_or_temp),
4147 RegisterFrom(out_or_temp));
4148 } else {
4149 codegen->GenerateReadBarrierSlow(invoke,
4150 out_or_temp,
4151 out_or_temp,
4152 Location::RegisterLocation(base.GetCode()),
4153 /*offset=*/ 0u,
4154 /*index=*/ Location::RegisterLocation(offset.GetCode()));
4155 }
4156 }
4157 }
4158
VisitUnsafeGetAndAddInt(HInvoke * invoke)4159 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
4160 VisitJdkUnsafeGetAndAddInt(invoke);
4161 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)4162 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
4163 VisitJdkUnsafeGetAndAddLong(invoke);
4164 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)4165 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
4166 VisitJdkUnsafeGetAndSetInt(invoke);
4167 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)4168 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
4169 VisitJdkUnsafeGetAndSetLong(invoke);
4170 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)4171 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
4172 VisitJdkUnsafeGetAndSetReference(invoke);
4173 }
4174
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)4175 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
4176 CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
4177 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)4178 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
4179 CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
4180 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)4181 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
4182 CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
4183 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)4184 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
4185 CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
4186 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)4187 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
4188 CreateUnsafeGetAndUpdateLocations(
4189 invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
4190 }
4191
VisitUnsafeGetAndAddInt(HInvoke * invoke)4192 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
4193 VisitJdkUnsafeGetAndAddInt(invoke);
4194 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)4195 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
4196 VisitJdkUnsafeGetAndAddLong(invoke);
4197 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)4198 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
4199 VisitJdkUnsafeGetAndSetInt(invoke);
4200 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)4201 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
4202 VisitJdkUnsafeGetAndSetLong(invoke);
4203 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)4204 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
4205 VisitJdkUnsafeGetAndSetReference(invoke);
4206 }
4207
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)4208 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
4209 GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
4210 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)4211 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
4212 GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
4213 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)4214 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
4215 GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
4216 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)4217 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
4218 GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
4219 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)4220 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
4221 GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
4222 }
4223
4224 class VarHandleSlowPathARMVIXL : public IntrinsicSlowPathARMVIXL {
4225 public:
VarHandleSlowPathARMVIXL(HInvoke * invoke,std::memory_order order)4226 VarHandleSlowPathARMVIXL(HInvoke* invoke, std::memory_order order)
4227 : IntrinsicSlowPathARMVIXL(invoke),
4228 order_(order),
4229 atomic_(false),
4230 return_success_(false),
4231 strong_(false),
4232 get_and_update_op_(GetAndUpdateOp::kAdd) {
4233 }
4234
GetByteArrayViewCheckLabel()4235 vixl32::Label* GetByteArrayViewCheckLabel() {
4236 return &byte_array_view_check_label_;
4237 }
4238
GetNativeByteOrderLabel()4239 vixl32::Label* GetNativeByteOrderLabel() {
4240 return &native_byte_order_label_;
4241 }
4242
SetAtomic(bool atomic)4243 void SetAtomic(bool atomic) {
4244 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGet ||
4245 GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kSet);
4246 atomic_ = atomic;
4247 }
4248
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)4249 void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
4250 if (return_success) {
4251 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
4252 } else {
4253 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
4254 }
4255 return_success_ = return_success;
4256 strong_ = strong;
4257 }
4258
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)4259 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
4260 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
4261 get_and_update_op_ = get_and_update_op;
4262 }
4263
EmitNativeCode(CodeGenerator * codegen_in)4264 void EmitNativeCode(CodeGenerator* codegen_in) override {
4265 if (GetByteArrayViewCheckLabel()->IsReferenced()) {
4266 EmitByteArrayViewCode(codegen_in);
4267 }
4268 IntrinsicSlowPathARMVIXL::EmitNativeCode(codegen_in);
4269 }
4270
4271 private:
GetInvoke() const4272 HInvoke* GetInvoke() const {
4273 return GetInstruction()->AsInvoke();
4274 }
4275
GetAccessModeTemplate() const4276 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
4277 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
4278 }
4279
4280 void EmitByteArrayViewCode(CodeGenerator* codegen_in);
4281
4282 vixl32::Label byte_array_view_check_label_;
4283 vixl32::Label native_byte_order_label_;
4284 // Shared parameter for all VarHandle intrinsics.
4285 std::memory_order order_;
4286 // Extra argument for GenerateVarHandleGet() and GenerateVarHandleSet().
4287 bool atomic_;
4288 // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
4289 bool return_success_;
4290 bool strong_;
4291 // Extra argument for GenerateVarHandleGetAndUpdate().
4292 GetAndUpdateOp get_and_update_op_;
4293 };
4294
4295 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,vixl32::Register object,vixl32::Register type,bool object_can_be_null=true)4296 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL* codegen,
4297 SlowPathCodeARMVIXL* slow_path,
4298 vixl32::Register object,
4299 vixl32::Register type,
4300 bool object_can_be_null = true) {
4301 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4302
4303 const MemberOffset class_offset = mirror::Object::ClassOffset();
4304 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
4305
4306 vixl32::Label success;
4307 if (object_can_be_null) {
4308 __ CompareAndBranchIfZero(object, &success, /*is_far_target=*/ false);
4309 }
4310
4311 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4312 vixl32::Register temp = temps.Acquire();
4313
4314 __ Ldr(temp, MemOperand(object, class_offset.Int32Value()));
4315 assembler->MaybeUnpoisonHeapReference(temp);
4316 vixl32::Label loop;
4317 __ Bind(&loop);
4318 __ Cmp(type, temp);
4319 __ B(eq, &success, /*is_far_target=*/ false);
4320 __ Ldr(temp, MemOperand(temp, super_class_offset.Int32Value()));
4321 assembler->MaybeUnpoisonHeapReference(temp);
4322 __ Cmp(temp, 0);
4323 __ B(eq, slow_path->GetEntryLabel());
4324 __ B(&loop);
4325 __ Bind(&success);
4326 }
4327
4328 // Check access mode and the primitive type from VarHandle.varType.
4329 // Check reference arguments against the VarHandle.varType; for references this is a subclass
4330 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,DataType::Type type)4331 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
4332 CodeGeneratorARMVIXL* codegen,
4333 SlowPathCodeARMVIXL* slow_path,
4334 DataType::Type type) {
4335 mirror::VarHandle::AccessMode access_mode =
4336 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
4337 Primitive::Type primitive_type = DataTypeToPrimitive(type);
4338
4339 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4340 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4341
4342 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
4343 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
4344 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4345
4346 // Use the temporary register reserved for offset. It is not used yet at this point.
4347 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4348 vixl32::Register var_type_no_rb =
4349 RegisterFrom(invoke->GetLocations()->GetTemp(expected_coordinates_count == 0u ? 1u : 0u));
4350
4351 // Check that the operation is permitted and the primitive type of varhandle.varType.
4352 // We do not need a read barrier when loading a reference only for loading constant
4353 // primitive field through the reference. Use LDRD to load the fields together.
4354 {
4355 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4356 vixl32::Register temp2 = temps.Acquire();
4357 DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
4358 __ Ldrd(var_type_no_rb, temp2, MemOperand(varhandle, var_type_offset.Int32Value()));
4359 assembler->MaybeUnpoisonHeapReference(var_type_no_rb);
4360 __ Tst(temp2, 1u << static_cast<uint32_t>(access_mode));
4361 __ B(eq, slow_path->GetEntryLabel());
4362 __ Ldrh(temp2, MemOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
4363 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4364 __ B(ne, slow_path->GetEntryLabel());
4365 }
4366
4367 if (type == DataType::Type::kReference) {
4368 // Check reference arguments against the varType.
4369 // False negatives due to varType being an interface or array type
4370 // or due to the missing read barrier are handled by the slow path.
4371 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4372 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4373 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4374 HInstruction* arg = invoke->InputAt(arg_index);
4375 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
4376 if (!arg->IsNullConstant()) {
4377 vixl32::Register arg_reg = RegisterFrom(invoke->GetLocations()->InAt(arg_index));
4378 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
4379 }
4380 }
4381 }
4382 }
4383
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)4384 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
4385 CodeGeneratorARMVIXL* codegen,
4386 SlowPathCodeARMVIXL* slow_path) {
4387 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4388 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4389
4390 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4391
4392 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4393 vixl32::Register temp = temps.Acquire();
4394
4395 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
4396 // Do not emit read barrier (or unpoison the reference) for comparing to null.
4397 __ Ldr(temp, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4398 __ Cmp(temp, 0);
4399 __ B(ne, slow_path->GetEntryLabel());
4400 }
4401
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)4402 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
4403 CodeGeneratorARMVIXL* codegen,
4404 SlowPathCodeARMVIXL* slow_path) {
4405 VarHandleOptimizations optimizations(invoke);
4406 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4407 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4408 vixl32::Register object = InputRegisterAt(invoke, 1);
4409
4410 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4411 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4412
4413 // Null-check the object.
4414 if (!optimizations.GetSkipObjectNullCheck()) {
4415 __ Cmp(object, 0);
4416 __ B(eq, slow_path->GetEntryLabel());
4417 }
4418
4419 if (!optimizations.GetUseKnownImageVarHandle()) {
4420 // Use the first temporary register, whether it's for the declaring class or the offset.
4421 // It is not used yet at this point.
4422 vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
4423
4424 // Check that the VarHandle references an instance field by checking that
4425 // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
4426 // type compatibility check with the source object's type, which will fail for null.
4427 {
4428 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4429 vixl32::Register temp2 = temps.Acquire();
4430 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4431 __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4432 assembler->MaybeUnpoisonHeapReference(temp);
4433 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4434 __ Cmp(temp2, 0);
4435 __ B(ne, slow_path->GetEntryLabel());
4436 }
4437
4438 // Check that the object has the correct type.
4439 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
4440 GenerateSubTypeObjectCheckNoReadBarrier(
4441 codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
4442 }
4443 }
4444
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4445 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
4446 CodeGeneratorARMVIXL* codegen,
4447 VarHandleSlowPathARMVIXL* slow_path) {
4448 VarHandleOptimizations optimizations(invoke);
4449 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4450 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4451 vixl32::Register object = InputRegisterAt(invoke, 1);
4452 vixl32::Register index = InputRegisterAt(invoke, 2);
4453 DataType::Type value_type =
4454 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4455 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
4456
4457 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4458 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4459 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
4460 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4461 const MemberOffset class_offset = mirror::Object::ClassOffset();
4462 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
4463
4464 // Null-check the object.
4465 if (!optimizations.GetSkipObjectNullCheck()) {
4466 __ Cmp(object, 0);
4467 __ B(eq, slow_path->GetEntryLabel());
4468 }
4469
4470 // Use the offset temporary register. It is not used yet at this point.
4471 vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
4472
4473 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4474 vixl32::Register temp2 = temps.Acquire();
4475
4476 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
4477 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
4478 // coordinateType0 shall not be null but we do not explicitly verify that.
4479 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4480 __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4481 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4482 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4483 __ Cmp(temp2, 0);
4484 __ B(eq, slow_path->GetEntryLabel());
4485
4486 // Check object class against componentType0.
4487 //
4488 // This is an exact check and we defer other cases to the runtime. This includes
4489 // conversion to array of superclass references, which is valid but subsequently
4490 // requires all update operations to check that the value can indeed be stored.
4491 // We do not want to perform such extra checks in the intrinsified code.
4492 //
4493 // We do this check without read barrier, so there can be false negatives which we
4494 // defer to the slow path. There shall be no false negatives for array classes in the
4495 // boot image (including Object[] and primitive arrays) because they are non-movable.
4496 __ Ldr(temp2, MemOperand(object, class_offset.Int32Value()));
4497 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4498 __ Cmp(temp, temp2);
4499 __ B(ne, slow_path->GetEntryLabel());
4500
4501 // Check that the coordinateType0 is an array type. We do not need a read barrier
4502 // for loading constant reference fields (or chains of them) for comparison with null,
4503 // nor for finally loading a constant primitive field (primitive type) below.
4504 __ Ldr(temp2, MemOperand(temp, component_type_offset.Int32Value()));
4505 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4506 __ Cmp(temp2, 0);
4507 __ B(eq, slow_path->GetEntryLabel());
4508
4509 // Check that the array component type matches the primitive type.
4510 // With the exception of `kPrimNot`, `kPrimByte` and `kPrimBoolean`,
4511 // we shall check for a byte array view in the slow path.
4512 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4513 // so we cannot emit that if we're JITting without boot image.
4514 bool boot_image_available =
4515 codegen->GetCompilerOptions().IsBootImage() ||
4516 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4517 bool can_be_view =
4518 ((value_type != DataType::Type::kReference) && (DataType::Size(value_type) != 1u)) &&
4519 boot_image_available;
4520 vixl32::Label* slow_path_label =
4521 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4522 __ Ldrh(temp2, MemOperand(temp2, primitive_type_offset.Int32Value()));
4523 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4524 __ B(ne, slow_path_label);
4525
4526 // Check for array index out of bounds.
4527 __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
4528 __ Cmp(index, temp);
4529 __ B(hs, slow_path->GetEntryLabel());
4530 }
4531
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4532 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4533 CodeGeneratorARMVIXL* codegen,
4534 VarHandleSlowPathARMVIXL* slow_path) {
4535 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4536 if (expected_coordinates_count == 0u) {
4537 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4538 } else if (expected_coordinates_count == 1u) {
4539 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4540 } else {
4541 DCHECK_EQ(expected_coordinates_count, 2u);
4542 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4543 }
4544 }
4545
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,DataType::Type type)4546 static VarHandleSlowPathARMVIXL* GenerateVarHandleChecks(HInvoke* invoke,
4547 CodeGeneratorARMVIXL* codegen,
4548 std::memory_order order,
4549 DataType::Type type) {
4550 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4551 VarHandleOptimizations optimizations(invoke);
4552 if (optimizations.GetUseKnownImageVarHandle()) {
4553 DCHECK_NE(expected_coordinates_count, 2u);
4554 if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4555 return nullptr;
4556 }
4557 }
4558
4559 VarHandleSlowPathARMVIXL* slow_path =
4560 new (codegen->GetScopedAllocator()) VarHandleSlowPathARMVIXL(invoke, order);
4561 codegen->AddSlowPath(slow_path);
4562
4563 if (!optimizations.GetUseKnownImageVarHandle()) {
4564 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4565 }
4566 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4567
4568 return slow_path;
4569 }
4570
4571 struct VarHandleTarget {
4572 vixl32::Register object; // The object holding the value to operate on.
4573 vixl32::Register offset; // The offset of the value to operate on.
4574 };
4575
GetVarHandleTarget(HInvoke * invoke)4576 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4577 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4578 LocationSummary* locations = invoke->GetLocations();
4579
4580 VarHandleTarget target;
4581 // The temporary allocated for loading the offset.
4582 target.offset = RegisterFrom(locations->GetTemp(0u));
4583 // The reference to the object that holds the value to operate on.
4584 target.object = (expected_coordinates_count == 0u)
4585 ? RegisterFrom(locations->GetTemp(1u))
4586 : InputRegisterAt(invoke, 1);
4587 return target;
4588 }
4589
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorARMVIXL * codegen)4590 static void GenerateVarHandleTarget(HInvoke* invoke,
4591 const VarHandleTarget& target,
4592 CodeGeneratorARMVIXL* codegen) {
4593 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4594 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4595 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4596
4597 if (expected_coordinates_count <= 1u) {
4598 if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
4599 ScopedObjectAccess soa(Thread::Current());
4600 ArtField* target_field = GetBootImageVarHandleField(invoke);
4601 if (expected_coordinates_count == 0u) {
4602 ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4603 if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4604 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
4605 codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
4606 } else {
4607 codegen->LoadTypeForBootImageIntrinsic(
4608 target.object,
4609 TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
4610 }
4611 }
4612 __ Mov(target.offset, target_field->GetOffset().Uint32Value());
4613 } else {
4614 // For static fields, we need to fill the `target.object` with the declaring class,
4615 // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
4616 // we do not need the declaring class, so we can forget the `ArtField*` when
4617 // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
4618 vixl32::Register field = (expected_coordinates_count == 0) ? target.object : target.offset;
4619
4620 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4621 const MemberOffset offset_offset = ArtField::OffsetOffset();
4622
4623 // Load the ArtField*, the offset and, if needed, declaring class.
4624 __ Ldr(field, MemOperand(varhandle, art_field_offset.Int32Value()));
4625 __ Ldr(target.offset, MemOperand(field, offset_offset.Int32Value()));
4626 if (expected_coordinates_count == 0u) {
4627 codegen->GenerateGcRootFieldLoad(invoke,
4628 LocationFrom(target.object),
4629 field,
4630 ArtField::DeclaringClassOffset().Int32Value(),
4631 codegen->GetCompilerReadBarrierOption());
4632 }
4633 }
4634 } else {
4635 DCHECK_EQ(expected_coordinates_count, 2u);
4636 DataType::Type value_type =
4637 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4638 uint32_t size_shift = DataType::SizeShift(value_type);
4639 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4640
4641 vixl32::Register index = InputRegisterAt(invoke, 2);
4642 vixl32::Register shifted_index = index;
4643 if (size_shift != 0u) {
4644 shifted_index = target.offset;
4645 __ Lsl(shifted_index, index, size_shift);
4646 }
4647 __ Add(target.offset, shifted_index, data_offset.Int32Value());
4648 }
4649 }
4650
CreateVarHandleCommonLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)4651 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
4652 CodeGeneratorARMVIXL* codegen) {
4653 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4654 DataType::Type return_type = invoke->GetType();
4655
4656 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4657 LocationSummary* locations =
4658 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4659 locations->SetInAt(0, Location::RequiresRegister());
4660 // Require coordinates in registers. These are the object holding the value
4661 // to operate on (except for static fields) and index (for arrays and views).
4662 for (size_t i = 0; i != expected_coordinates_count; ++i) {
4663 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4664 }
4665 if (return_type != DataType::Type::kVoid) {
4666 if (DataType::IsFloatingPointType(return_type)) {
4667 locations->SetOut(Location::RequiresFpuRegister());
4668 } else {
4669 locations->SetOut(Location::RequiresRegister());
4670 }
4671 }
4672 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4673 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4674 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4675 HInstruction* arg = invoke->InputAt(arg_index);
4676 if (DataType::IsFloatingPointType(arg->GetType())) {
4677 locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4678 } else {
4679 locations->SetInAt(arg_index, Location::RequiresRegister());
4680 }
4681 }
4682
4683 // Add a temporary for offset.
4684 if (codegen->EmitNonBakerReadBarrier() &&
4685 GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4686 // To preserve the offset value across the non-Baker read barrier slow path
4687 // for loading the declaring class, use a fixed callee-save register.
4688 constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4689 locations->AddTemp(Location::RegisterLocation(first_callee_save));
4690 } else {
4691 locations->AddTemp(Location::RequiresRegister());
4692 }
4693 if (expected_coordinates_count == 0u) {
4694 // Add a temporary to hold the declaring class.
4695 locations->AddTemp(Location::RequiresRegister());
4696 }
4697
4698 return locations;
4699 }
4700
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4701 static void CreateVarHandleGetLocations(HInvoke* invoke,
4702 CodeGeneratorARMVIXL* codegen,
4703 bool atomic) {
4704 VarHandleOptimizations optimizations(invoke);
4705 if (optimizations.GetDoNotIntrinsify()) {
4706 return;
4707 }
4708
4709 if (codegen->EmitNonBakerReadBarrier() &&
4710 invoke->GetType() == DataType::Type::kReference &&
4711 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4712 invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4713 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4714 // the passed reference and reloads it from the field. This gets the memory visibility
4715 // wrong for Acquire/Volatile operations. b/173104084
4716 return;
4717 }
4718
4719 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4720
4721 DataType::Type type = invoke->GetType();
4722 if (type == DataType::Type::kFloat64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
4723 // We need 3 temporaries for GenerateIntrinsicGet() but we can reuse the
4724 // declaring class (if present) and offset temporary.
4725 DCHECK_EQ(locations->GetTempCount(),
4726 (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4727 locations->AddRegisterTemps(3u - locations->GetTempCount());
4728 }
4729 }
4730
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4731 static void GenerateVarHandleGet(HInvoke* invoke,
4732 CodeGeneratorARMVIXL* codegen,
4733 std::memory_order order,
4734 bool atomic,
4735 bool byte_swap = false) {
4736 DataType::Type type = invoke->GetType();
4737 DCHECK_NE(type, DataType::Type::kVoid);
4738
4739 LocationSummary* locations = invoke->GetLocations();
4740 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4741 Location out = locations->Out();
4742
4743 VarHandleTarget target = GetVarHandleTarget(invoke);
4744 VarHandleSlowPathARMVIXL* slow_path = nullptr;
4745 if (!byte_swap) {
4746 slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4747 GenerateVarHandleTarget(invoke, target, codegen);
4748 if (slow_path != nullptr) {
4749 slow_path->SetAtomic(atomic);
4750 __ Bind(slow_path->GetNativeByteOrderLabel());
4751 }
4752 }
4753
4754 Location maybe_temp = Location::NoLocation();
4755 Location maybe_temp2 = Location::NoLocation();
4756 Location maybe_temp3 = Location::NoLocation();
4757 if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
4758 // Reuse the offset temporary.
4759 maybe_temp = LocationFrom(target.offset);
4760 } else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4761 // Reuse the offset temporary and declaring class (if present).
4762 // The address shall be constructed in the scratch register before they are clobbered.
4763 maybe_temp = LocationFrom(target.offset);
4764 DCHECK(maybe_temp.Equals(locations->GetTemp(0)));
4765 if (type == DataType::Type::kFloat64) {
4766 maybe_temp2 = locations->GetTemp(1);
4767 maybe_temp3 = locations->GetTemp(2);
4768 }
4769 }
4770
4771 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4772 Location loaded_value = out;
4773 DataType::Type load_type = type;
4774 if (byte_swap) {
4775 if (type == DataType::Type::kFloat64) {
4776 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4777 // Change load type to Int64 and promote `maybe_temp2` and `maybe_temp3` to `loaded_value`.
4778 loaded_value = LocationFrom(RegisterFrom(maybe_temp2), RegisterFrom(maybe_temp3));
4779 maybe_temp2 = Location::NoLocation();
4780 maybe_temp3 = Location::NoLocation();
4781 } else {
4782 // Use the offset temporary and the scratch register.
4783 loaded_value = LocationFrom(target.offset, temps.Acquire());
4784 }
4785 load_type = DataType::Type::kInt64;
4786 } else if (type == DataType::Type::kFloat32) {
4787 // Reuse the offset temporary.
4788 loaded_value = LocationFrom(target.offset);
4789 load_type = DataType::Type::kInt32;
4790 } else if (type == DataType::Type::kInt64) {
4791 // Swap the high and low registers and reverse the bytes in each after the load.
4792 loaded_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
4793 }
4794 }
4795
4796 GenerateIntrinsicGet(invoke,
4797 codegen,
4798 load_type,
4799 order,
4800 atomic,
4801 target.object,
4802 target.offset,
4803 loaded_value,
4804 maybe_temp,
4805 maybe_temp2,
4806 maybe_temp3);
4807 if (byte_swap) {
4808 if (type == DataType::Type::kInt64) {
4809 GenerateReverseBytesInPlaceForEachWord(assembler, loaded_value);
4810 } else {
4811 GenerateReverseBytes(assembler, type, loaded_value, out);
4812 }
4813 }
4814
4815 if (slow_path != nullptr) {
4816 DCHECK(!byte_swap);
4817 __ Bind(slow_path->GetExitLabel());
4818 }
4819 }
4820
VisitVarHandleGet(HInvoke * invoke)4821 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4822 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ false);
4823 }
4824
VisitVarHandleGet(HInvoke * invoke)4825 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4826 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4827 }
4828
VisitVarHandleGetOpaque(HInvoke * invoke)4829 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4830 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4831 }
4832
VisitVarHandleGetOpaque(HInvoke * invoke)4833 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4834 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4835 }
4836
VisitVarHandleGetAcquire(HInvoke * invoke)4837 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4838 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4839 }
4840
VisitVarHandleGetAcquire(HInvoke * invoke)4841 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4842 GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire, /*atomic=*/ true);
4843 }
4844
VisitVarHandleGetVolatile(HInvoke * invoke)4845 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4846 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4847 }
4848
VisitVarHandleGetVolatile(HInvoke * invoke)4849 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4850 GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4851 }
4852
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4853 static void CreateVarHandleSetLocations(HInvoke* invoke,
4854 CodeGeneratorARMVIXL* codegen,
4855 bool atomic) {
4856 VarHandleOptimizations optimizations(invoke);
4857 if (optimizations.GetDoNotIntrinsify()) {
4858 return;
4859 }
4860
4861 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4862
4863 // Get the type from the shorty as the invokes may not return a value.
4864 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4865 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4866 if (DataType::Is64BitType(value_type)) {
4867 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4868 DCHECK_EQ(locations->GetTempCount(), (expected_coordinates_count == 0) ? 2u : 1u);
4869 HInstruction* arg = invoke->InputAt(number_of_arguments - 1u);
4870 bool has_reverse_bytes_slow_path =
4871 (expected_coordinates_count == 2u) &&
4872 !IsZeroBitPattern(arg);
4873 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4874 // We need 4 temporaries in the byte array view slow path. Otherwise, we need
4875 // 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type.
4876 // We can reuse the offset temporary and declaring class (if present).
4877 size_t temps_needed = has_reverse_bytes_slow_path
4878 ? 4u
4879 : ((value_type == DataType::Type::kFloat64) ? 3u : 2u);
4880 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4881 } else if (has_reverse_bytes_slow_path) {
4882 // We need 2 temps for the value with reversed bytes in the byte array view slow path.
4883 // We can reuse the offset temporary.
4884 DCHECK_EQ(locations->GetTempCount(), 1u);
4885 locations->AddTemp(Location::RequiresRegister());
4886 }
4887 }
4888 }
4889
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4890 static void GenerateVarHandleSet(HInvoke* invoke,
4891 CodeGeneratorARMVIXL* codegen,
4892 std::memory_order order,
4893 bool atomic,
4894 bool byte_swap = false) {
4895 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4896 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4897
4898 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4899 LocationSummary* locations = invoke->GetLocations();
4900 Location value = locations->InAt(value_index);
4901
4902 VarHandleTarget target = GetVarHandleTarget(invoke);
4903 VarHandleSlowPathARMVIXL* slow_path = nullptr;
4904 if (!byte_swap) {
4905 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4906 GenerateVarHandleTarget(invoke, target, codegen);
4907 if (slow_path != nullptr) {
4908 slow_path->SetAtomic(atomic);
4909 __ Bind(slow_path->GetNativeByteOrderLabel());
4910 }
4911 }
4912
4913 Location maybe_temp = Location::NoLocation();
4914 Location maybe_temp2 = Location::NoLocation();
4915 Location maybe_temp3 = Location::NoLocation();
4916 if (DataType::Is64BitType(value_type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4917 // Reuse the offset temporary and declaring class (if present).
4918 // The address shall be constructed in the scratch register before they are clobbered.
4919 maybe_temp = locations->GetTemp(0);
4920 maybe_temp2 = locations->GetTemp(1);
4921 if (value_type == DataType::Type::kFloat64) {
4922 maybe_temp3 = locations->GetTemp(2);
4923 }
4924 }
4925
4926 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4927 if (byte_swap) {
4928 if (DataType::Is64BitType(value_type) || value_type == DataType::Type::kFloat32) {
4929 // Calculate the address in scratch register, so that we can use the offset temporary.
4930 vixl32::Register base = temps.Acquire();
4931 __ Add(base, target.object, target.offset);
4932 target.object = base;
4933 target.offset = vixl32::Register();
4934 }
4935 Location original_value = value;
4936 if (DataType::Is64BitType(value_type)) {
4937 size_t temp_start = 0u;
4938 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4939 // Clear `maybe_temp3` which was initialized above for Float64.
4940 DCHECK_IMPLIES(value_type == DataType::Type::kFloat64,
4941 maybe_temp3.Equals(locations->GetTemp(2)));
4942 maybe_temp3 = Location::NoLocation();
4943 temp_start = 2u;
4944 }
4945 value = LocationFrom(RegisterFrom(locations->GetTemp(temp_start)),
4946 RegisterFrom(locations->GetTemp(temp_start + 1u)));
4947 if (value_type == DataType::Type::kFloat64) {
4948 __ Vmov(HighRegisterFrom(value), LowRegisterFrom(value), DRegisterFrom(original_value));
4949 GenerateReverseBytesInPlaceForEachWord(assembler, value);
4950 value_type = DataType::Type::kInt64;
4951 } else {
4952 GenerateReverseBytes(assembler, value_type, original_value, value);
4953 }
4954 } else if (value_type == DataType::Type::kFloat32) {
4955 value = locations->GetTemp(0); // Use the offset temporary which was freed above.
4956 __ Vmov(RegisterFrom(value), SRegisterFrom(original_value));
4957 GenerateReverseBytes(assembler, DataType::Type::kInt32, value, value);
4958 value_type = DataType::Type::kInt32;
4959 } else {
4960 value = LocationFrom(temps.Acquire());
4961 GenerateReverseBytes(assembler, value_type, original_value, value);
4962 }
4963 }
4964
4965 GenerateIntrinsicSet(codegen,
4966 value_type,
4967 order,
4968 atomic,
4969 target.object,
4970 target.offset,
4971 value,
4972 maybe_temp,
4973 maybe_temp2,
4974 maybe_temp3);
4975
4976 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4977 // Reuse the offset temporary for MarkGCCard.
4978 vixl32::Register temp = target.offset;
4979 vixl32::Register card = temps.Acquire();
4980 vixl32::Register value_reg = RegisterFrom(value);
4981 codegen->MaybeMarkGCCard(temp, card, target.object, value_reg, /* emit_null_check= */ true);
4982 }
4983
4984 if (slow_path != nullptr) {
4985 DCHECK(!byte_swap);
4986 __ Bind(slow_path->GetExitLabel());
4987 }
4988 }
4989
VisitVarHandleSet(HInvoke * invoke)4990 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4991 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ false);
4992 }
4993
VisitVarHandleSet(HInvoke * invoke)4994 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4995 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4996 }
4997
VisitVarHandleSetOpaque(HInvoke * invoke)4998 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4999 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
5000 }
5001
VisitVarHandleSetOpaque(HInvoke * invoke)5002 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
5003 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
5004 }
5005
VisitVarHandleSetRelease(HInvoke * invoke)5006 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
5007 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
5008 }
5009
VisitVarHandleSetRelease(HInvoke * invoke)5010 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
5011 GenerateVarHandleSet(invoke, codegen_, std::memory_order_release, /*atomic=*/ true);
5012 }
5013
VisitVarHandleSetVolatile(HInvoke * invoke)5014 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
5015 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
5016 }
5017
VisitVarHandleSetVolatile(HInvoke * invoke)5018 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
5019 // ARM store-release instructions are implicitly sequentially consistent.
5020 GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
5021 }
5022
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool return_success)5023 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
5024 CodeGeneratorARMVIXL* codegen,
5025 bool return_success) {
5026 VarHandleOptimizations optimizations(invoke);
5027 if (optimizations.GetDoNotIntrinsify()) {
5028 return;
5029 }
5030
5031 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
5032 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
5033 if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5034 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5035 // the passed reference and reloads it from the field. This breaks the read barriers
5036 // in slow path in different ways. The marked old value may not actually be a to-space
5037 // reference to the same object as `old_value`, breaking slow path assumptions. And
5038 // for CompareAndExchange, marking the old value after comparison failure may actually
5039 // return the reference to `expected`, erroneously indicating success even though we
5040 // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
5041 return;
5042 }
5043
5044 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
5045
5046 if (codegen->EmitNonBakerReadBarrier()) {
5047 // We need callee-save registers for both the class object and offset instead of
5048 // the temporaries reserved in CreateVarHandleCommonLocations().
5049 static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u);
5050 constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
5051 constexpr int second_callee_save = CTZ(kArmCalleeSaveRefSpills ^ (1u << first_callee_save));
5052 if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
5053 DCHECK_EQ(locations->GetTempCount(), 2u);
5054 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
5055 DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
5056 locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
5057 } else {
5058 DCHECK_EQ(locations->GetTempCount(), 1u);
5059 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
5060 locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
5061 }
5062 }
5063
5064 if (DataType::IsFloatingPointType(value_type)) {
5065 // We can reuse the declaring class (if present) and offset temporary.
5066 DCHECK_EQ(locations->GetTempCount(),
5067 (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
5068 size_t temps_needed = (value_type == DataType::Type::kFloat64)
5069 ? (return_success ? 5u : 7u)
5070 : (return_success ? 3u : 4u);
5071 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5072 } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5073 // Add temps for the byte-reversed `expected` and `new_value` in the byte array view slow path.
5074 DCHECK_EQ(locations->GetTempCount(), 1u);
5075 if (value_type == DataType::Type::kInt64) {
5076 // We would ideally add 4 temps for Int64 but that would simply run out of registers,
5077 // so we instead need to reverse bytes in actual arguments and undo it at the end.
5078 } else {
5079 locations->AddRegisterTemps(2u);
5080 }
5081 }
5082 if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5083 // Add a temporary for store result, also used for the `old_value_temp` in slow path.
5084 locations->AddTemp(Location::RequiresRegister());
5085 }
5086 }
5087
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)5088 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
5089 CodeGeneratorARMVIXL* codegen,
5090 std::memory_order order,
5091 bool return_success,
5092 bool strong,
5093 bool byte_swap = false) {
5094 DCHECK(return_success || strong);
5095
5096 uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
5097 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
5098 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
5099 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
5100
5101 ArmVIXLAssembler* assembler = codegen->GetAssembler();
5102 LocationSummary* locations = invoke->GetLocations();
5103 Location expected = locations->InAt(expected_index);
5104 Location new_value = locations->InAt(new_value_index);
5105 Location out = locations->Out();
5106
5107 VarHandleTarget target = GetVarHandleTarget(invoke);
5108 VarHandleSlowPathARMVIXL* slow_path = nullptr;
5109 if (!byte_swap) {
5110 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5111 GenerateVarHandleTarget(invoke, target, codegen);
5112 if (slow_path != nullptr) {
5113 slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
5114 __ Bind(slow_path->GetNativeByteOrderLabel());
5115 }
5116 }
5117
5118 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
5119 bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
5120 bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
5121 DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
5122
5123 if (release_barrier) {
5124 codegen->GenerateMemoryBarrier(
5125 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
5126 }
5127
5128 // Calculate the pointer to the value.
5129 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5130 vixl32::Register tmp_ptr = temps.Acquire();
5131 __ Add(tmp_ptr, target.object, target.offset);
5132
5133 // Move floating point values to temporaries and prepare output registers.
5134 // Note that float/double CAS uses bitwise comparison, rather than the operator==.
5135 // Reuse the declaring class (if present) and offset temporary for non-reference types,
5136 // the address has already been constructed in the scratch register. We are more careful
5137 // for references due to read and write barrier, see below.
5138 Location old_value;
5139 vixl32::Register store_result;
5140 vixl32::Register success = return_success ? RegisterFrom(out) : vixl32::Register();
5141 DataType::Type cas_type = value_type;
5142 if (value_type == DataType::Type::kFloat64) {
5143 vixl32::DRegister expected_vreg = DRegisterFrom(expected);
5144 vixl32::DRegister new_value_vreg = DRegisterFrom(new_value);
5145 expected =
5146 LocationFrom(RegisterFrom(locations->GetTemp(0)), RegisterFrom(locations->GetTemp(1)));
5147 new_value =
5148 LocationFrom(RegisterFrom(locations->GetTemp(2)), RegisterFrom(locations->GetTemp(3)));
5149 store_result = RegisterFrom(locations->GetTemp(4));
5150 old_value = return_success
5151 ? LocationFrom(success, store_result)
5152 : LocationFrom(RegisterFrom(locations->GetTemp(5)), RegisterFrom(locations->GetTemp(6)));
5153 if (byte_swap) {
5154 __ Vmov(HighRegisterFrom(expected), LowRegisterFrom(expected), expected_vreg);
5155 __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), new_value_vreg);
5156 GenerateReverseBytesInPlaceForEachWord(assembler, expected);
5157 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
5158 } else {
5159 __ Vmov(LowRegisterFrom(expected), HighRegisterFrom(expected), expected_vreg);
5160 __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), new_value_vreg);
5161 }
5162 cas_type = DataType::Type::kInt64;
5163 } else if (value_type == DataType::Type::kFloat32) {
5164 vixl32::SRegister expected_vreg = SRegisterFrom(expected);
5165 vixl32::SRegister new_value_vreg = SRegisterFrom(new_value);
5166 expected = locations->GetTemp(0);
5167 new_value = locations->GetTemp(1);
5168 store_result = RegisterFrom(locations->GetTemp(2));
5169 old_value = return_success ? LocationFrom(store_result) : locations->GetTemp(3);
5170 __ Vmov(RegisterFrom(expected), expected_vreg);
5171 __ Vmov(RegisterFrom(new_value), new_value_vreg);
5172 if (byte_swap) {
5173 GenerateReverseBytes(assembler, DataType::Type::kInt32, expected, expected);
5174 GenerateReverseBytes(assembler, DataType::Type::kInt32, new_value, new_value);
5175 }
5176 cas_type = DataType::Type::kInt32;
5177 } else if (value_type == DataType::Type::kInt64) {
5178 store_result = RegisterFrom(locations->GetTemp(0));
5179 old_value = return_success
5180 ? LocationFrom(success, store_result)
5181 // If swapping bytes, swap the high/low regs and reverse the bytes in each after the load.
5182 : byte_swap ? LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out)) : out;
5183 if (byte_swap) {
5184 // Due to lack of registers, reverse bytes in `expected` and `new_value` and undo that later.
5185 GenerateReverseBytesInPlaceForEachWord(assembler, expected);
5186 expected = LocationFrom(HighRegisterFrom(expected), LowRegisterFrom(expected));
5187 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
5188 new_value = LocationFrom(HighRegisterFrom(new_value), LowRegisterFrom(new_value));
5189 }
5190 } else {
5191 // Use the last temp. For references with read barriers, this is an extra temporary
5192 // allocated to avoid overwriting the temporaries for declaring class (if present)
5193 // and offset as they are needed in the slow path. Otherwise, this is the offset
5194 // temporary which also works for references without read barriers that need the
5195 // object register preserved for the write barrier.
5196 store_result = RegisterFrom(locations->GetTemp(locations->GetTempCount() - 1u));
5197 old_value = return_success ? LocationFrom(store_result) : out;
5198 if (byte_swap) {
5199 DCHECK_EQ(locations->GetTempCount(), 3u);
5200 Location original_expected = expected;
5201 Location original_new_value = new_value;
5202 expected = locations->GetTemp(0);
5203 new_value = locations->GetTemp(1);
5204 GenerateReverseBytes(assembler, value_type, original_expected, expected);
5205 GenerateReverseBytes(assembler, value_type, original_new_value, new_value);
5206 }
5207 }
5208
5209 vixl32::Label exit_loop_label;
5210 vixl32::Label* exit_loop = &exit_loop_label;
5211 vixl32::Label* cmp_failure = &exit_loop_label;
5212
5213 if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5214 // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
5215 // reloaded old value for subsequent CAS in the slow path. This must not clobber `old_value`.
5216 vixl32::Register old_value_temp = return_success ? RegisterFrom(out) : store_result;
5217 // The slow path store result must not clobber `old_value`.
5218 vixl32::Register slow_path_store_result = old_value_temp;
5219 ReadBarrierCasSlowPathARMVIXL* rb_slow_path =
5220 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
5221 invoke,
5222 strong,
5223 target.object,
5224 target.offset,
5225 RegisterFrom(expected),
5226 RegisterFrom(new_value),
5227 RegisterFrom(old_value),
5228 old_value_temp,
5229 slow_path_store_result,
5230 success,
5231 codegen);
5232 codegen->AddSlowPath(rb_slow_path);
5233 exit_loop = rb_slow_path->GetExitLabel();
5234 cmp_failure = rb_slow_path->GetEntryLabel();
5235 }
5236
5237 GenerateCompareAndSet(codegen,
5238 cas_type,
5239 strong,
5240 cmp_failure,
5241 /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
5242 tmp_ptr,
5243 expected,
5244 new_value,
5245 old_value,
5246 store_result,
5247 success);
5248 __ Bind(exit_loop);
5249
5250 if (acquire_barrier) {
5251 codegen->GenerateMemoryBarrier(
5252 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
5253 }
5254
5255 if (byte_swap && value_type == DataType::Type::kInt64) {
5256 // Undo byte swapping in `expected` and `new_value`. We do not have the
5257 // information whether the value in these registers shall be needed later.
5258 GenerateReverseBytesInPlaceForEachWord(assembler, expected);
5259 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
5260 }
5261 if (!return_success) {
5262 if (byte_swap) {
5263 if (value_type == DataType::Type::kInt64) {
5264 GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
5265 } else {
5266 GenerateReverseBytes(assembler, value_type, old_value, out);
5267 }
5268 } else if (value_type == DataType::Type::kFloat64) {
5269 __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
5270 } else if (value_type == DataType::Type::kFloat32) {
5271 __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
5272 }
5273 }
5274
5275 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
5276 // Reuse the offset temporary and scratch register for MarkGCCard.
5277 vixl32::Register temp = target.offset;
5278 vixl32::Register card = tmp_ptr;
5279 // Mark card for object assuming new value is stored.
5280 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
5281 codegen->MaybeMarkGCCard(
5282 temp, card, target.object, RegisterFrom(new_value), new_value_can_be_null);
5283 }
5284
5285 if (slow_path != nullptr) {
5286 DCHECK(!byte_swap);
5287 __ Bind(slow_path->GetExitLabel());
5288 }
5289 }
5290
VisitVarHandleCompareAndExchange(HInvoke * invoke)5291 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5292 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5293 }
5294
VisitVarHandleCompareAndExchange(HInvoke * invoke)5295 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5296 GenerateVarHandleCompareAndSetOrExchange(
5297 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
5298 }
5299
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5300 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5301 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5302 }
5303
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5304 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5305 GenerateVarHandleCompareAndSetOrExchange(
5306 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
5307 }
5308
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5309 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5310 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5311 }
5312
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5313 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5314 GenerateVarHandleCompareAndSetOrExchange(
5315 invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
5316 }
5317
VisitVarHandleCompareAndSet(HInvoke * invoke)5318 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5319 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5320 }
5321
VisitVarHandleCompareAndSet(HInvoke * invoke)5322 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5323 GenerateVarHandleCompareAndSetOrExchange(
5324 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
5325 }
5326
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5327 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5328 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5329 }
5330
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5331 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5332 GenerateVarHandleCompareAndSetOrExchange(
5333 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
5334 }
5335
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5336 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5337 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5338 }
5339
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5340 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5341 GenerateVarHandleCompareAndSetOrExchange(
5342 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
5343 }
5344
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5345 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5346 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5347 }
5348
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5349 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5350 GenerateVarHandleCompareAndSetOrExchange(
5351 invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
5352 }
5353
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5354 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5355 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5356 }
5357
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5358 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5359 GenerateVarHandleCompareAndSetOrExchange(
5360 invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
5361 }
5362
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op)5363 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
5364 CodeGeneratorARMVIXL* codegen,
5365 GetAndUpdateOp get_and_update_op) {
5366 VarHandleOptimizations optimizations(invoke);
5367 if (optimizations.GetDoNotIntrinsify()) {
5368 return;
5369 }
5370
5371 // Get the type from the shorty as the invokes may not return a value.
5372 uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5373 DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5374 if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5375 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5376 // the passed reference and reloads it from the field, thus seeing the new value
5377 // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
5378 return;
5379 }
5380
5381 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
5382
5383 // We can reuse the declaring class (if present) and offset temporary, except for
5384 // non-Baker read barriers that need them for the slow path.
5385 DCHECK_EQ(locations->GetTempCount(),
5386 (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
5387
5388 if (get_and_update_op == GetAndUpdateOp::kSet) {
5389 if (DataType::IsFloatingPointType(value_type)) {
5390 // Add temps needed to do the GenerateGetAndUpdate() with core registers.
5391 size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u;
5392 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5393 } else if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5394 // We need to preserve the declaring class (if present) and offset for read barrier
5395 // slow paths, so we must use a separate temporary for the exclusive store result.
5396 locations->AddTemp(Location::RequiresRegister());
5397 } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5398 // Add temps for the byte-reversed `arg` in the byte array view slow path.
5399 DCHECK_EQ(locations->GetTempCount(), 1u);
5400 locations->AddRegisterTemps((value_type == DataType::Type::kInt64) ? 2u : 1u);
5401 }
5402 } else {
5403 // We need temporaries for the new value and exclusive store result.
5404 size_t temps_needed = DataType::Is64BitType(value_type) ? 3u : 2u;
5405 if (get_and_update_op != GetAndUpdateOp::kAdd &&
5406 GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5407 // Add temps for the byte-reversed `arg` in the byte array view slow path.
5408 if (value_type == DataType::Type::kInt64) {
5409 // We would ideally add 2 temps for Int64 but that would simply run out of registers,
5410 // so we instead need to reverse bytes in the actual argument and undo it at the end.
5411 } else {
5412 temps_needed += 1u;
5413 }
5414 }
5415 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5416 if (DataType::IsFloatingPointType(value_type)) {
5417 // Note: This shall allocate a D register. There is no way to request an S register.
5418 locations->AddTemp(Location::RequiresFpuRegister());
5419 }
5420 }
5421
5422 // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`.
5423 DataType::Type return_type = invoke->GetType();
5424 const bool is_void = return_type == DataType::Type::kVoid;
5425 DCHECK_IMPLIES(!is_void, return_type == value_type);
5426 if (is_void) {
5427 if (DataType::IsFloatingPointType(value_type)) {
5428 // Note: This shall allocate a D register. There is no way to request an S register.
5429 locations->AddTemp(Location::RequiresFpuRegister());
5430 } else if (DataType::Is64BitType(value_type)) {
5431 // We need two for non-fpu 64 bit types.
5432 locations->AddTemp(Location::RequiresRegister());
5433 locations->AddTemp(Location::RequiresRegister());
5434 } else {
5435 locations->AddTemp(Location::RequiresRegister());
5436 }
5437 }
5438 }
5439
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)5440 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5441 CodeGeneratorARMVIXL* codegen,
5442 GetAndUpdateOp get_and_update_op,
5443 std::memory_order order,
5444 bool byte_swap = false) {
5445 // Get the type from the shorty as the invokes may not return a value.
5446 uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5447 DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5448
5449 ArmVIXLAssembler* assembler = codegen->GetAssembler();
5450 LocationSummary* locations = invoke->GetLocations();
5451 Location arg = locations->InAt(arg_index);
5452 DataType::Type return_type = invoke->GetType();
5453 const bool is_void = return_type == DataType::Type::kVoid;
5454 DCHECK_IMPLIES(!is_void, return_type == value_type);
5455
5456 size_t temps_that_mimic_out;
5457 Location result;
5458 const size_t temp_count = locations->GetTempCount();
5459 if (is_void) {
5460 if (value_type == DataType::Type::kFloat32) {
5461 // Note: Since we allocated a D register, use the low part.
5462 DCHECK(locations->GetTemp(temp_count - 1u).IsFpuRegisterPair());
5463 temps_that_mimic_out = 1u;
5464 result = locations->GetTemp(temp_count - 1u).ToLow();
5465 } else if (!DataType::IsFloatingPointType(value_type) && DataType::Is64BitType(value_type)) {
5466 temps_that_mimic_out = 2u;
5467 result = LocationFrom(RegisterFrom(locations->GetTemp(temp_count - 2u)),
5468 RegisterFrom(locations->GetTemp(temp_count - 1u)));
5469 } else {
5470 temps_that_mimic_out = 1u;
5471 result = locations->GetTemp(temp_count - 1u);
5472 }
5473 } else {
5474 temps_that_mimic_out = 0u;
5475 result = locations->Out();
5476 }
5477
5478 VarHandleTarget target = GetVarHandleTarget(invoke);
5479 VarHandleSlowPathARMVIXL* slow_path = nullptr;
5480 if (!byte_swap) {
5481 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5482 GenerateVarHandleTarget(invoke, target, codegen);
5483 if (slow_path != nullptr) {
5484 slow_path->SetGetAndUpdateOp(get_and_update_op);
5485 __ Bind(slow_path->GetNativeByteOrderLabel());
5486 }
5487 }
5488
5489 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
5490 bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
5491 bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
5492 DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
5493
5494 if (release_barrier) {
5495 codegen->GenerateMemoryBarrier(
5496 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
5497 }
5498
5499 // Use the scratch register for the pointer to the target location.
5500 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5501 vixl32::Register tmp_ptr = temps.Acquire();
5502 __ Add(tmp_ptr, target.object, target.offset);
5503
5504 // Use the offset temporary for the exclusive store result.
5505 vixl32::Register store_result = target.offset;
5506
5507 // The load/store type is never floating point.
5508 DataType::Type load_store_type = DataType::IsFloatingPointType(value_type)
5509 ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
5510 : value_type;
5511
5512 // Prepare register for old value and temporaries if any.
5513 Location old_value = result;
5514 Location maybe_temp = Location::NoLocation();
5515 Location maybe_vreg_temp = Location::NoLocation();
5516 if (get_and_update_op == GetAndUpdateOp::kSet) {
5517 // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5518 // rather than moving between core and FP registers in the loop.
5519 if (value_type == DataType::Type::kFloat64) {
5520 vixl32::DRegister arg_vreg = DRegisterFrom(arg);
5521 // `store_result` and the four here, plus maybe an extra one for the temp that mimics the
5522 // "out" register.
5523 DCHECK_EQ(temp_count, 5u + temps_that_mimic_out);
5524 old_value =
5525 LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)));
5526 arg = LocationFrom(RegisterFrom(locations->GetTemp(3)), RegisterFrom(locations->GetTemp(4)));
5527 if (byte_swap) {
5528 __ Vmov(HighRegisterFrom(arg), LowRegisterFrom(arg), arg_vreg);
5529 GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5530 } else {
5531 __ Vmov(LowRegisterFrom(arg), HighRegisterFrom(arg), arg_vreg);
5532 }
5533 } else if (value_type == DataType::Type::kFloat32) {
5534 vixl32::SRegister arg_vreg = SRegisterFrom(arg);
5535 // `store_result` and the two here, plus maybe an extra one for the temp that mimics the
5536 // "out" register.
5537 DCHECK_EQ(temp_count, 3u + temps_that_mimic_out);
5538 old_value = locations->GetTemp(1);
5539 arg = locations->GetTemp(2);
5540 __ Vmov(RegisterFrom(arg), arg_vreg);
5541 if (byte_swap) {
5542 GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg);
5543 }
5544 } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5545 if (kUseBakerReadBarrier) {
5546 // Load the old value initially to a temporary register.
5547 // We shall move it to `out` later with a read barrier.
5548 old_value = LocationFrom(store_result);
5549 store_result = RegisterFrom(result); // Use `result` for the exclusive store result.
5550 } else {
5551 // The store_result is a separate temporary.
5552 DCHECK(!store_result.Is(target.object));
5553 DCHECK(!store_result.Is(target.offset));
5554 }
5555 } else if (byte_swap) {
5556 Location original_arg = arg;
5557 arg = locations->GetTemp(1);
5558 if (value_type == DataType::Type::kInt64) {
5559 arg = LocationFrom(RegisterFrom(arg), RegisterFrom(locations->GetTemp(2)));
5560 // Swap the high/low regs and reverse the bytes in each after the load.
5561 old_value = LocationFrom(HighRegisterFrom(result), LowRegisterFrom(result));
5562 }
5563 GenerateReverseBytes(assembler, value_type, original_arg, arg);
5564 }
5565 } else {
5566 maybe_temp = DataType::Is64BitType(value_type)
5567 ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
5568 : locations->GetTemp(1);
5569 DCHECK(!maybe_temp.Contains(LocationFrom(store_result)));
5570 if (DataType::IsFloatingPointType(value_type)) {
5571 maybe_vreg_temp = locations->GetTemp(temp_count - 1u - temps_that_mimic_out);
5572 DCHECK(maybe_vreg_temp.IsFpuRegisterPair());
5573 }
5574 if (byte_swap) {
5575 if (get_and_update_op == GetAndUpdateOp::kAdd) {
5576 // We need to do the byte swapping in the CAS loop for GetAndAdd.
5577 get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5578 } else if (value_type == DataType::Type::kInt64) {
5579 // Swap the high/low regs and reverse the bytes in each after the load.
5580 old_value = LocationFrom(HighRegisterFrom(result), LowRegisterFrom(result));
5581 // Due to lack of registers, reverse bytes in `arg` and undo that later.
5582 GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5583 arg = LocationFrom(HighRegisterFrom(arg), LowRegisterFrom(arg));
5584 } else {
5585 DCHECK(!DataType::IsFloatingPointType(value_type));
5586 Location original_arg = arg;
5587 arg = locations->GetTemp(2);
5588 DCHECK(!arg.Contains(LocationFrom(store_result)));
5589 GenerateReverseBytes(assembler, value_type, original_arg, arg);
5590 }
5591 }
5592 }
5593
5594 GenerateGetAndUpdate(codegen,
5595 get_and_update_op,
5596 load_store_type,
5597 tmp_ptr,
5598 arg,
5599 old_value,
5600 store_result,
5601 maybe_temp,
5602 maybe_vreg_temp);
5603
5604 if (acquire_barrier) {
5605 codegen->GenerateMemoryBarrier(
5606 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
5607 }
5608
5609 if (!is_void) {
5610 if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) {
5611 if (value_type == DataType::Type::kInt64) {
5612 GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
5613 if (get_and_update_op != GetAndUpdateOp::kSet) {
5614 // Undo byte swapping in `arg`. We do not have the information
5615 // whether the value in these registers shall be needed later.
5616 GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5617 }
5618 } else {
5619 GenerateReverseBytes(assembler, value_type, old_value, result);
5620 }
5621 } else if (get_and_update_op == GetAndUpdateOp::kSet &&
5622 DataType::IsFloatingPointType(value_type)) {
5623 if (value_type == DataType::Type::kFloat64) {
5624 __ Vmov(DRegisterFrom(result), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
5625 } else {
5626 __ Vmov(SRegisterFrom(result), RegisterFrom(old_value));
5627 }
5628 } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5629 if (kUseBakerReadBarrier) {
5630 codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(result),
5631 RegisterFrom(old_value));
5632 } else {
5633 codegen->GenerateReadBarrierSlow(
5634 invoke,
5635 Location::RegisterLocation(RegisterFrom(result).GetCode()),
5636 Location::RegisterLocation(RegisterFrom(old_value).GetCode()),
5637 Location::RegisterLocation(target.object.GetCode()),
5638 /*offset=*/ 0u,
5639 /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
5640 }
5641 }
5642 }
5643
5644 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5645 // Reuse the offset temporary and scratch register for MarkGCCard.
5646 vixl32::Register temp = target.offset;
5647 vixl32::Register card = tmp_ptr;
5648 // Mark card for object assuming new value is stored.
5649 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
5650 codegen->MaybeMarkGCCard(temp, card, target.object, RegisterFrom(arg), new_value_can_be_null);
5651 }
5652
5653 if (slow_path != nullptr) {
5654 DCHECK(!byte_swap);
5655 __ Bind(slow_path->GetExitLabel());
5656 }
5657 }
5658
VisitVarHandleGetAndSet(HInvoke * invoke)5659 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5660 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5661 }
5662
VisitVarHandleGetAndSet(HInvoke * invoke)5663 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5664 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
5665 }
5666
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5667 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5668 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5669 }
5670
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5671 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5672 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
5673 }
5674
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5675 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5676 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5677 }
5678
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5679 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5680 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
5681 }
5682
VisitVarHandleGetAndAdd(HInvoke * invoke)5683 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5684 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5685 }
5686
VisitVarHandleGetAndAdd(HInvoke * invoke)5687 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5688 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5689 }
5690
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5691 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5692 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5693 }
5694
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5695 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5696 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5697 }
5698
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5699 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5700 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5701 }
5702
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5703 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5704 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5705 }
5706
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5707 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5708 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5709 }
5710
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5711 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5712 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5713 }
5714
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5715 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5716 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5717 }
5718
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5719 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5720 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5721 }
5722
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5723 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5724 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5725 }
5726
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5727 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5728 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5729 }
5730
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5731 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5732 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5733 }
5734
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5735 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5736 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5737 }
5738
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5739 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5740 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5741 }
5742
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5743 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5744 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5745 }
5746
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5747 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5748 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5749 }
5750
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5751 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5752 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5753 }
5754
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5755 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5756 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5757 }
5758
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5759 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5760 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5761 }
5762
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5763 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5764 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5765 }
5766
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5767 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5768 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5769 }
5770
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5771 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5772 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5773 }
5774
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5775 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5776 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5777 }
5778
EmitByteArrayViewCode(CodeGenerator * codegen_in)5779 void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5780 DCHECK(GetByteArrayViewCheckLabel()->IsReferenced());
5781 CodeGeneratorARMVIXL* codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_in);
5782 ArmVIXLAssembler* assembler = codegen->GetAssembler();
5783 HInvoke* invoke = GetInvoke();
5784 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5785 DataType::Type value_type =
5786 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5787 DCHECK_NE(value_type, DataType::Type::kReference);
5788 size_t size = DataType::Size(value_type);
5789 DCHECK_GT(size, 1u);
5790 vixl32::Operand size_operand(dchecked_integral_cast<int32_t>(size));
5791 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
5792 vixl32::Register object = InputRegisterAt(invoke, 1);
5793 vixl32::Register index = InputRegisterAt(invoke, 2);
5794
5795 MemberOffset class_offset = mirror::Object::ClassOffset();
5796 MemberOffset array_length_offset = mirror::Array::LengthOffset();
5797 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5798 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5799
5800 __ Bind(GetByteArrayViewCheckLabel());
5801
5802 VarHandleTarget target = GetVarHandleTarget(invoke);
5803 {
5804 // Use the offset temporary register. It is not used yet at this point.
5805 vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
5806
5807 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5808 vixl32::Register temp2 = temps.Acquire();
5809
5810 // The main path checked that the coordinateType0 is an array class that matches
5811 // the class of the actual coordinate argument but it does not match the value type.
5812 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5813 __ Ldr(temp, MemOperand(varhandle, class_offset.Int32Value()));
5814 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
5815 codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5816 __ Cmp(temp, temp2);
5817 __ B(ne, GetEntryLabel());
5818
5819 // Check for array index out of bounds.
5820 __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
5821 if (!temp.IsLow()) {
5822 // Avoid using the 32-bit `cmp temp, #imm` in IT block by loading `size` into `temp2`.
5823 __ Mov(temp2, size_operand);
5824 }
5825 __ Subs(temp, temp, index);
5826 {
5827 // Use ExactAssemblyScope here because we are using IT.
5828 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
5829 2 * k16BitT32InstructionSizeInBytes);
5830 __ it(hs);
5831 if (temp.IsLow()) {
5832 __ cmp(hs, temp, size_operand);
5833 } else {
5834 __ cmp(hs, temp, temp2);
5835 }
5836 }
5837 __ B(lo, GetEntryLabel());
5838
5839 // Construct the target.
5840 __ Add(target.offset, index, data_offset.Int32Value()); // Note: `temp` cannot be used below.
5841
5842 // Alignment check. For unaligned access, go to the runtime.
5843 DCHECK(IsPowerOfTwo(size));
5844 __ Tst(target.offset, dchecked_integral_cast<int32_t>(size - 1u));
5845 __ B(ne, GetEntryLabel());
5846
5847 // Byte order check. For native byte order return to the main path.
5848 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
5849 HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u);
5850 if (IsZeroBitPattern(arg)) {
5851 // There is no reason to differentiate between native byte order and byte-swap
5852 // for setting a zero bit pattern. Just return to the main path.
5853 __ B(GetNativeByteOrderLabel());
5854 return;
5855 }
5856 }
5857 __ Ldr(temp2, MemOperand(varhandle, native_byte_order_offset.Int32Value()));
5858 __ Cmp(temp2, 0);
5859 __ B(ne, GetNativeByteOrderLabel());
5860 }
5861
5862 switch (access_mode_template) {
5863 case mirror::VarHandle::AccessModeTemplate::kGet:
5864 GenerateVarHandleGet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5865 break;
5866 case mirror::VarHandle::AccessModeTemplate::kSet:
5867 GenerateVarHandleSet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5868 break;
5869 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5870 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5871 GenerateVarHandleCompareAndSetOrExchange(
5872 invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5873 break;
5874 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5875 GenerateVarHandleGetAndUpdate(
5876 invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5877 break;
5878 }
5879 __ B(GetExitLabel());
5880 }
5881
5882 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARMVIXL, Name)
5883 UNIMPLEMENTED_INTRINSIC_LIST_ARM(MARK_UNIMPLEMENTED);
5884 #undef MARK_UNIMPLEMENTED
5885
5886 UNREACHABLE_INTRINSICS(ARMVIXL)
5887
5888 #undef __
5889
5890 } // namespace arm
5891 } // namespace art
5892