1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_riscv64.h"
18
19 #include "code_generator_riscv64.h"
20 #include "intrinsic_objects.h"
21 #include "intrinsics_utils.h"
22 #include "optimizing/locations.h"
23 #include "well_known_classes.h"
24
25 namespace art HIDDEN {
26 namespace riscv64 {
27
28 using IntrinsicSlowPathRISCV64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorRISCV64,
29 SlowPathCodeRISCV64,
30 Riscv64Assembler>;
31
32 #define __ assembler->
33
34 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
35 class ReadBarrierSystemArrayCopySlowPathRISCV64 : public SlowPathCodeRISCV64 {
36 public:
ReadBarrierSystemArrayCopySlowPathRISCV64(HInstruction * instruction,Location tmp)37 ReadBarrierSystemArrayCopySlowPathRISCV64(HInstruction* instruction, Location tmp)
38 : SlowPathCodeRISCV64(instruction), tmp_(tmp) {}
39
EmitNativeCode(CodeGenerator * codegen_in)40 void EmitNativeCode(CodeGenerator* codegen_in) override {
41 DCHECK(codegen_in->EmitBakerReadBarrier());
42 CodeGeneratorRISCV64* codegen = down_cast<CodeGeneratorRISCV64*>(codegen_in);
43 Riscv64Assembler* assembler = codegen->GetAssembler();
44 LocationSummary* locations = instruction_->GetLocations();
45 DCHECK(locations->CanCall());
46 DCHECK(instruction_->IsInvokeStaticOrDirect())
47 << "Unexpected instruction in read barrier arraycopy slow path: "
48 << instruction_->DebugName();
49 DCHECK(instruction_->GetLocations()->Intrinsified());
50 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
51
52 const int32_t element_size = DataType::Size(DataType::Type::kReference);
53
54 XRegister src_curr_addr = locations->GetTemp(0).AsRegister<XRegister>();
55 XRegister dst_curr_addr = locations->GetTemp(1).AsRegister<XRegister>();
56 XRegister src_stop_addr = locations->GetTemp(2).AsRegister<XRegister>();
57 XRegister tmp_reg = tmp_.AsRegister<XRegister>();
58
59 __ Bind(GetEntryLabel());
60 // The source range and destination pointer were initialized before entering the slow-path.
61 Riscv64Label slow_copy_loop;
62 __ Bind(&slow_copy_loop);
63 __ Loadwu(tmp_reg, src_curr_addr, 0);
64 codegen->MaybeUnpoisonHeapReference(tmp_reg);
65 // TODO: Inline the mark bit check before calling the runtime?
66 // tmp_reg = ReadBarrier::Mark(tmp_reg);
67 // No need to save live registers; it's taken care of by the
68 // entrypoint. Also, there is no need to update the stack mask,
69 // as this runtime call will not trigger a garbage collection.
70 // (See ReadBarrierMarkSlowPathRISCV64::EmitNativeCode for more
71 // explanations.)
72 int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(tmp_);
73 // This runtime call does not require a stack map.
74 codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
75 codegen->MaybePoisonHeapReference(tmp_reg);
76 __ Storew(tmp_reg, dst_curr_addr, 0);
77 __ Addi(src_curr_addr, src_curr_addr, element_size);
78 __ Addi(dst_curr_addr, dst_curr_addr, element_size);
79 __ Bne(src_curr_addr, src_stop_addr, &slow_copy_loop);
80 __ J(GetExitLabel());
81 }
82
GetDescription() const83 const char* GetDescription() const override {
84 return "ReadBarrierSystemArrayCopySlowPathRISCV64";
85 }
86
87 private:
88 Location tmp_;
89
90 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathRISCV64);
91 };
92
TryDispatch(HInvoke * invoke)93 bool IntrinsicLocationsBuilderRISCV64::TryDispatch(HInvoke* invoke) {
94 Dispatch(invoke);
95 LocationSummary* res = invoke->GetLocations();
96 if (res == nullptr) {
97 return false;
98 }
99 return res->Intrinsified();
100 }
101
GetAssembler()102 Riscv64Assembler* IntrinsicCodeGeneratorRISCV64::GetAssembler() {
103 return codegen_->GetAssembler();
104 }
105
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)106 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
107 LocationSummary* locations =
108 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
109 locations->SetInAt(0, Location::RequiresFpuRegister());
110 locations->SetOut(Location::RequiresRegister());
111 }
112
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)113 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
114 LocationSummary* locations =
115 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
116 locations->SetInAt(0, Location::RequiresRegister());
117 locations->SetOut(Location::RequiresFpuRegister());
118 }
119
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)120 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
121 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
122 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
123 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
124
125 LocationSummary* const locations =
126 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
127 InvokeRuntimeCallingConvention calling_convention;
128
129 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
130 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
131 }
132
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)133 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
134 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
135 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
136 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
137 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
138
139 LocationSummary* const locations =
140 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
141 InvokeRuntimeCallingConvention calling_convention;
142
143 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
144 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
145 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
146 }
147
CreateFpFpFpToFpNoOverlapLocations(ArenaAllocator * allocator,HInvoke * invoke)148 static void CreateFpFpFpToFpNoOverlapLocations(ArenaAllocator* allocator, HInvoke* invoke) {
149 DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
150 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
151 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
152 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(2)->GetType()));
153 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
154
155 LocationSummary* const locations =
156 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
157
158 locations->SetInAt(0, Location::RequiresFpuRegister());
159 locations->SetInAt(1, Location::RequiresFpuRegister());
160 locations->SetInAt(2, Location::RequiresFpuRegister());
161 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
162 }
163
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,Location::OutputOverlap overlaps=Location::kOutputOverlap)164 static void CreateFPToFPLocations(ArenaAllocator* allocator,
165 HInvoke* invoke,
166 Location::OutputOverlap overlaps = Location::kOutputOverlap) {
167 LocationSummary* locations =
168 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
169 locations->SetInAt(0, Location::RequiresFpuRegister());
170 locations->SetOut(Location::RequiresFpuRegister(), overlaps);
171 }
172
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)173 void IntrinsicLocationsBuilderRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
174 CreateFPToIntLocations(allocator_, invoke);
175 }
176
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)177 void IntrinsicCodeGeneratorRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
178 LocationSummary* locations = invoke->GetLocations();
179 Riscv64Assembler* assembler = GetAssembler();
180 __ FMvXD(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
181 }
182
VisitDoubleLongBitsToDouble(HInvoke * invoke)183 void IntrinsicLocationsBuilderRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
184 CreateIntToFPLocations(allocator_, invoke);
185 }
186
VisitDoubleLongBitsToDouble(HInvoke * invoke)187 void IntrinsicCodeGeneratorRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
188 LocationSummary* locations = invoke->GetLocations();
189 Riscv64Assembler* assembler = GetAssembler();
190 __ FMvDX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
191 }
192
VisitFloatFloatToRawIntBits(HInvoke * invoke)193 void IntrinsicLocationsBuilderRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
194 CreateFPToIntLocations(allocator_, invoke);
195 }
196
VisitFloatFloatToRawIntBits(HInvoke * invoke)197 void IntrinsicCodeGeneratorRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
198 LocationSummary* locations = invoke->GetLocations();
199 Riscv64Assembler* assembler = GetAssembler();
200 __ FMvXW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
201 }
202
VisitFloatIntBitsToFloat(HInvoke * invoke)203 void IntrinsicLocationsBuilderRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
204 CreateIntToFPLocations(allocator_, invoke);
205 }
206
VisitFloatIntBitsToFloat(HInvoke * invoke)207 void IntrinsicCodeGeneratorRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
208 LocationSummary* locations = invoke->GetLocations();
209 Riscv64Assembler* assembler = GetAssembler();
210 __ FMvWX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
211 }
212
VisitDoubleIsInfinite(HInvoke * invoke)213 void IntrinsicLocationsBuilderRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
214 CreateFPToIntLocations(allocator_, invoke);
215 }
216
VisitDoubleIsInfinite(HInvoke * invoke)217 void IntrinsicCodeGeneratorRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
218 LocationSummary* locations = invoke->GetLocations();
219 Riscv64Assembler* assembler = GetAssembler();
220 XRegister out = locations->Out().AsRegister<XRegister>();
221 __ FClassD(out, locations->InAt(0).AsFpuRegister<FRegister>());
222 __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
223 __ Snez(out, out);
224 }
225
VisitFloatIsInfinite(HInvoke * invoke)226 void IntrinsicLocationsBuilderRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
227 CreateFPToIntLocations(allocator_, invoke);
228 }
229
VisitFloatIsInfinite(HInvoke * invoke)230 void IntrinsicCodeGeneratorRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
231 LocationSummary* locations = invoke->GetLocations();
232 Riscv64Assembler* assembler = GetAssembler();
233 XRegister out = locations->Out().AsRegister<XRegister>();
234 __ FClassS(out, locations->InAt(0).AsFpuRegister<FRegister>());
235 __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
236 __ Snez(out, out);
237 }
238
CreateIntToIntNoOverlapLocations(ArenaAllocator * allocator,HInvoke * invoke)239 static void CreateIntToIntNoOverlapLocations(ArenaAllocator* allocator, HInvoke* invoke) {
240 LocationSummary* locations =
241 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
242 locations->SetInAt(0, Location::RequiresRegister());
243 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
244 }
245
246 template <typename EmitOp>
EmitMemoryPeek(HInvoke * invoke,EmitOp && emit_op)247 void EmitMemoryPeek(HInvoke* invoke, EmitOp&& emit_op) {
248 LocationSummary* locations = invoke->GetLocations();
249 emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
250 }
251
VisitMemoryPeekByte(HInvoke * invoke)252 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
253 CreateIntToIntNoOverlapLocations(allocator_, invoke);
254 }
255
VisitMemoryPeekByte(HInvoke * invoke)256 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
257 Riscv64Assembler* assembler = GetAssembler();
258 EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lb(rd, rs1, 0); });
259 }
260
VisitMemoryPeekIntNative(HInvoke * invoke)261 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
262 CreateIntToIntNoOverlapLocations(allocator_, invoke);
263 }
264
VisitMemoryPeekIntNative(HInvoke * invoke)265 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
266 Riscv64Assembler* assembler = GetAssembler();
267 EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lw(rd, rs1, 0); });
268 }
269
VisitMemoryPeekLongNative(HInvoke * invoke)270 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
271 CreateIntToIntNoOverlapLocations(allocator_, invoke);
272 }
273
VisitMemoryPeekLongNative(HInvoke * invoke)274 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
275 Riscv64Assembler* assembler = GetAssembler();
276 EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Ld(rd, rs1, 0); });
277 }
278
VisitMemoryPeekShortNative(HInvoke * invoke)279 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
280 CreateIntToIntNoOverlapLocations(allocator_, invoke);
281 }
282
VisitMemoryPeekShortNative(HInvoke * invoke)283 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
284 Riscv64Assembler* assembler = GetAssembler();
285 EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lh(rd, rs1, 0); });
286 }
287
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)288 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
289 LocationSummary* locations =
290 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
291 locations->SetInAt(0, Location::RequiresRegister());
292 locations->SetInAt(1, Location::RequiresRegister());
293 }
294
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)295 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
296 LocationSummary* locations =
297 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
298 locations->SetInAt(0, Location::RequiresRegister());
299 locations->SetInAt(1, Location::RequiresRegister());
300 // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
301 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
302 }
303
304 template <typename EmitOp>
EmitMemoryPoke(HInvoke * invoke,EmitOp && emit_op)305 void EmitMemoryPoke(HInvoke* invoke, EmitOp&& emit_op) {
306 LocationSummary* locations = invoke->GetLocations();
307 emit_op(locations->InAt(1).AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
308 }
309
VisitMemoryPokeByte(HInvoke * invoke)310 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
311 CreateIntIntToVoidLocations(allocator_, invoke);
312 }
313
VisitMemoryPokeByte(HInvoke * invoke)314 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
315 Riscv64Assembler* assembler = GetAssembler();
316 EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sb(rs2, rs1, 0); });
317 }
318
VisitMemoryPokeIntNative(HInvoke * invoke)319 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
320 CreateIntIntToVoidLocations(allocator_, invoke);
321 }
322
VisitMemoryPokeIntNative(HInvoke * invoke)323 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
324 Riscv64Assembler* assembler = GetAssembler();
325 EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sw(rs2, rs1, 0); });
326 }
327
VisitMemoryPokeLongNative(HInvoke * invoke)328 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
329 CreateIntIntToVoidLocations(allocator_, invoke);
330 }
331
VisitMemoryPokeLongNative(HInvoke * invoke)332 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
333 Riscv64Assembler* assembler = GetAssembler();
334 EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sd(rs2, rs1, 0); });
335 }
336
VisitMemoryPokeShortNative(HInvoke * invoke)337 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
338 CreateIntIntToVoidLocations(allocator_, invoke);
339 }
340
VisitMemoryPokeShortNative(HInvoke * invoke)341 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
342 Riscv64Assembler* assembler = GetAssembler();
343 EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sh(rs2, rs1, 0); });
344 }
345
GenerateReverseBytes(CodeGeneratorRISCV64 * codegen,Location rd,XRegister rs1,DataType::Type type)346 static void GenerateReverseBytes(CodeGeneratorRISCV64* codegen,
347 Location rd,
348 XRegister rs1,
349 DataType::Type type) {
350 Riscv64Assembler* assembler = codegen->GetAssembler();
351 switch (type) {
352 case DataType::Type::kUint16:
353 // There is no 16-bit reverse bytes instruction.
354 __ Rev8(rd.AsRegister<XRegister>(), rs1);
355 __ Srli(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 48);
356 break;
357 case DataType::Type::kInt16:
358 // There is no 16-bit reverse bytes instruction.
359 __ Rev8(rd.AsRegister<XRegister>(), rs1);
360 __ Srai(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 48);
361 break;
362 case DataType::Type::kInt32:
363 // There is no 32-bit reverse bytes instruction.
364 __ Rev8(rd.AsRegister<XRegister>(), rs1);
365 __ Srai(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 32);
366 break;
367 case DataType::Type::kInt64:
368 __ Rev8(rd.AsRegister<XRegister>(), rs1);
369 break;
370 case DataType::Type::kFloat32:
371 // There is no 32-bit reverse bytes instruction.
372 __ Rev8(rs1, rs1); // Note: Clobbers `rs1`.
373 __ Srai(rs1, rs1, 32);
374 __ FMvWX(rd.AsFpuRegister<FRegister>(), rs1);
375 break;
376 case DataType::Type::kFloat64:
377 __ Rev8(rs1, rs1); // Note: Clobbers `rs1`.
378 __ FMvDX(rd.AsFpuRegister<FRegister>(), rs1);
379 break;
380 default:
381 LOG(FATAL) << "Unexpected type: " << type;
382 UNREACHABLE();
383 }
384 }
385
GenerateReverseBytes(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)386 static void GenerateReverseBytes(CodeGeneratorRISCV64* codegen,
387 HInvoke* invoke,
388 DataType::Type type) {
389 DCHECK_EQ(type, invoke->GetType());
390 LocationSummary* locations = invoke->GetLocations();
391 GenerateReverseBytes(codegen, locations->Out(), locations->InAt(0).AsRegister<XRegister>(), type);
392 }
393
GenerateReverse(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)394 static void GenerateReverse(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
395 DCHECK_EQ(type, invoke->GetType());
396 Riscv64Assembler* assembler = codegen->GetAssembler();
397 LocationSummary* locations = invoke->GetLocations();
398 XRegister in = locations->InAt(0).AsRegister<XRegister>();
399 XRegister out = locations->Out().AsRegister<XRegister>();
400 ScratchRegisterScope srs(assembler);
401 XRegister temp1 = srs.AllocateXRegister();
402 XRegister temp2 = srs.AllocateXRegister();
403
404 auto maybe_extend_mask = [type, assembler](XRegister mask, XRegister temp) {
405 if (type == DataType::Type::kInt64) {
406 __ Slli(temp, mask, 32);
407 __ Add(mask, mask, temp);
408 }
409 };
410
411 // Swap bits in bit pairs.
412 __ Li(temp1, 0x55555555);
413 maybe_extend_mask(temp1, temp2);
414 __ Srli(temp2, in, 1);
415 __ And(out, in, temp1);
416 __ And(temp2, temp2, temp1);
417 __ Sh1Add(out, out, temp2);
418
419 // Swap bit pairs in 4-bit groups.
420 __ Li(temp1, 0x33333333);
421 maybe_extend_mask(temp1, temp2);
422 __ Srli(temp2, out, 2);
423 __ And(out, out, temp1);
424 __ And(temp2, temp2, temp1);
425 __ Sh2Add(out, out, temp2);
426
427 // Swap 4-bit groups in 8-bit groups.
428 __ Li(temp1, 0x0f0f0f0f);
429 maybe_extend_mask(temp1, temp2);
430 __ Srli(temp2, out, 4);
431 __ And(out, out, temp1);
432 __ And(temp2, temp2, temp1);
433 __ Slli(out, out, 4);
434 __ Add(out, out, temp2);
435
436 GenerateReverseBytes(codegen, Location::RegisterLocation(out), out, type);
437 }
438
VisitIntegerReverse(HInvoke * invoke)439 void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverse(HInvoke* invoke) {
440 CreateIntToIntNoOverlapLocations(allocator_, invoke);
441 }
442
VisitIntegerReverse(HInvoke * invoke)443 void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverse(HInvoke* invoke) {
444 GenerateReverse(codegen_, invoke, DataType::Type::kInt32);
445 }
446
VisitLongReverse(HInvoke * invoke)447 void IntrinsicLocationsBuilderRISCV64::VisitLongReverse(HInvoke* invoke) {
448 CreateIntToIntNoOverlapLocations(allocator_, invoke);
449 }
450
VisitLongReverse(HInvoke * invoke)451 void IntrinsicCodeGeneratorRISCV64::VisitLongReverse(HInvoke* invoke) {
452 GenerateReverse(codegen_, invoke, DataType::Type::kInt64);
453 }
454
VisitIntegerReverseBytes(HInvoke * invoke)455 void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
456 CreateIntToIntNoOverlapLocations(allocator_, invoke);
457 }
458
VisitIntegerReverseBytes(HInvoke * invoke)459 void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
460 GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt32);
461 }
462
VisitLongReverseBytes(HInvoke * invoke)463 void IntrinsicLocationsBuilderRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
464 CreateIntToIntNoOverlapLocations(allocator_, invoke);
465 }
466
VisitLongReverseBytes(HInvoke * invoke)467 void IntrinsicCodeGeneratorRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
468 GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt64);
469 }
470
VisitShortReverseBytes(HInvoke * invoke)471 void IntrinsicLocationsBuilderRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
472 CreateIntToIntNoOverlapLocations(allocator_, invoke);
473 }
474
VisitShortReverseBytes(HInvoke * invoke)475 void IntrinsicCodeGeneratorRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
476 GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt16);
477 }
478
479 template <typename EmitOp>
EmitIntegralUnOp(HInvoke * invoke,EmitOp && emit_op)480 void EmitIntegralUnOp(HInvoke* invoke, EmitOp&& emit_op) {
481 LocationSummary* locations = invoke->GetLocations();
482 emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
483 }
484
VisitIntegerBitCount(HInvoke * invoke)485 void IntrinsicLocationsBuilderRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
486 CreateIntToIntNoOverlapLocations(allocator_, invoke);
487 }
488
VisitIntegerBitCount(HInvoke * invoke)489 void IntrinsicCodeGeneratorRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
490 Riscv64Assembler* assembler = GetAssembler();
491 EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpopw(rd, rs1); });
492 }
493
VisitLongBitCount(HInvoke * invoke)494 void IntrinsicLocationsBuilderRISCV64::VisitLongBitCount(HInvoke* invoke) {
495 CreateIntToIntNoOverlapLocations(allocator_, invoke);
496 }
497
VisitLongBitCount(HInvoke * invoke)498 void IntrinsicCodeGeneratorRISCV64::VisitLongBitCount(HInvoke* invoke) {
499 Riscv64Assembler* assembler = GetAssembler();
500 EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpop(rd, rs1); });
501 }
502
VisitIntegerHighestOneBit(HInvoke * invoke)503 void IntrinsicLocationsBuilderRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
504 CreateIntToIntNoOverlapLocations(allocator_, invoke);
505 }
506
VisitIntegerHighestOneBit(HInvoke * invoke)507 void IntrinsicCodeGeneratorRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
508 Riscv64Assembler* assembler = GetAssembler();
509 EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
510 ScratchRegisterScope srs(assembler);
511 XRegister tmp = srs.AllocateXRegister();
512 XRegister tmp2 = srs.AllocateXRegister();
513 __ Clzw(tmp, rs1);
514 __ Li(tmp2, INT64_C(-0x80000000));
515 __ Srlw(tmp2, tmp2, tmp);
516 __ And(rd, rs1, tmp2); // Make sure the result is zero if the input is zero.
517 });
518 }
519
VisitLongHighestOneBit(HInvoke * invoke)520 void IntrinsicLocationsBuilderRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
521 CreateIntToIntNoOverlapLocations(allocator_, invoke);
522 }
523
VisitLongHighestOneBit(HInvoke * invoke)524 void IntrinsicCodeGeneratorRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
525 Riscv64Assembler* assembler = GetAssembler();
526 EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
527 ScratchRegisterScope srs(assembler);
528 XRegister tmp = srs.AllocateXRegister();
529 XRegister tmp2 = srs.AllocateXRegister();
530 __ Clz(tmp, rs1);
531 __ Li(tmp2, INT64_C(-0x8000000000000000));
532 __ Srl(tmp2, tmp2, tmp);
533 __ And(rd, rs1, tmp2); // Make sure the result is zero if the input is zero.
534 });
535 }
536
VisitIntegerLowestOneBit(HInvoke * invoke)537 void IntrinsicLocationsBuilderRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
538 CreateIntToIntNoOverlapLocations(allocator_, invoke);
539 }
540
VisitIntegerLowestOneBit(HInvoke * invoke)541 void IntrinsicCodeGeneratorRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
542 Riscv64Assembler* assembler = GetAssembler();
543 EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
544 ScratchRegisterScope srs(assembler);
545 XRegister tmp = srs.AllocateXRegister();
546 __ NegW(tmp, rs1);
547 __ And(rd, rs1, tmp);
548 });
549 }
550
VisitLongLowestOneBit(HInvoke * invoke)551 void IntrinsicLocationsBuilderRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
552 CreateIntToIntNoOverlapLocations(allocator_, invoke);
553 }
554
VisitLongLowestOneBit(HInvoke * invoke)555 void IntrinsicCodeGeneratorRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
556 Riscv64Assembler* assembler = GetAssembler();
557 EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
558 ScratchRegisterScope srs(assembler);
559 XRegister tmp = srs.AllocateXRegister();
560 __ Neg(tmp, rs1);
561 __ And(rd, rs1, tmp);
562 });
563 }
564
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)565 void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
566 CreateIntToIntNoOverlapLocations(allocator_, invoke);
567 }
568
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)569 void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
570 Riscv64Assembler* assembler = GetAssembler();
571 EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clzw(rd, rs1); });
572 }
573
VisitLongNumberOfLeadingZeros(HInvoke * invoke)574 void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
575 CreateIntToIntNoOverlapLocations(allocator_, invoke);
576 }
577
VisitLongNumberOfLeadingZeros(HInvoke * invoke)578 void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
579 Riscv64Assembler* assembler = GetAssembler();
580 EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clz(rd, rs1); });
581 }
582
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)583 void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
584 CreateIntToIntNoOverlapLocations(allocator_, invoke);
585 }
586
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)587 void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
588 Riscv64Assembler* assembler = GetAssembler();
589 EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctzw(rd, rs1); });
590 }
591
VisitLongNumberOfTrailingZeros(HInvoke * invoke)592 void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
593 CreateIntToIntNoOverlapLocations(allocator_, invoke);
594 }
595
VisitLongNumberOfTrailingZeros(HInvoke * invoke)596 void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
597 Riscv64Assembler* assembler = GetAssembler();
598 EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctz(rd, rs1); });
599 }
600
GenerateDivRemUnsigned(HInvoke * invoke,bool is_div,CodeGeneratorRISCV64 * codegen)601 static void GenerateDivRemUnsigned(HInvoke* invoke, bool is_div, CodeGeneratorRISCV64* codegen) {
602 LocationSummary* locations = invoke->GetLocations();
603 Riscv64Assembler* assembler = codegen->GetAssembler();
604 DataType::Type type = invoke->GetType();
605 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
606
607 XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
608 XRegister divisor = locations->InAt(1).AsRegister<XRegister>();
609 XRegister out = locations->Out().AsRegister<XRegister>();
610
611 // Check if divisor is zero, bail to managed implementation to handle.
612 SlowPathCodeRISCV64* slow_path =
613 new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
614 codegen->AddSlowPath(slow_path);
615 __ Beqz(divisor, slow_path->GetEntryLabel());
616
617 if (is_div) {
618 if (type == DataType::Type::kInt32) {
619 __ Divuw(out, dividend, divisor);
620 } else {
621 __ Divu(out, dividend, divisor);
622 }
623 } else {
624 if (type == DataType::Type::kInt32) {
625 __ Remuw(out, dividend, divisor);
626 } else {
627 __ Remu(out, dividend, divisor);
628 }
629 }
630
631 __ Bind(slow_path->GetExitLabel());
632 }
633
VisitIntegerDivideUnsigned(HInvoke * invoke)634 void IntrinsicLocationsBuilderRISCV64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
635 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
636 }
637
VisitIntegerDivideUnsigned(HInvoke * invoke)638 void IntrinsicCodeGeneratorRISCV64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
639 GenerateDivRemUnsigned(invoke, /*is_div=*/true, codegen_);
640 }
641
VisitLongDivideUnsigned(HInvoke * invoke)642 void IntrinsicLocationsBuilderRISCV64::VisitLongDivideUnsigned(HInvoke* invoke) {
643 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
644 }
645
VisitLongDivideUnsigned(HInvoke * invoke)646 void IntrinsicCodeGeneratorRISCV64::VisitLongDivideUnsigned(HInvoke* invoke) {
647 GenerateDivRemUnsigned(invoke, /*is_div=*/true, codegen_);
648 }
649
VisitIntegerRemainderUnsigned(HInvoke * invoke)650 void IntrinsicLocationsBuilderRISCV64::VisitIntegerRemainderUnsigned(HInvoke* invoke) {
651 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
652 }
653
VisitIntegerRemainderUnsigned(HInvoke * invoke)654 void IntrinsicCodeGeneratorRISCV64::VisitIntegerRemainderUnsigned(HInvoke* invoke) {
655 GenerateDivRemUnsigned(invoke, /*is_div=*/false, codegen_);
656 }
657
VisitLongRemainderUnsigned(HInvoke * invoke)658 void IntrinsicLocationsBuilderRISCV64::VisitLongRemainderUnsigned(HInvoke* invoke) {
659 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
660 }
661
VisitLongRemainderUnsigned(HInvoke * invoke)662 void IntrinsicCodeGeneratorRISCV64::VisitLongRemainderUnsigned(HInvoke* invoke) {
663 GenerateDivRemUnsigned(invoke, /*is_div=*/false, codegen_);
664 }
665
666 #define VISIT_INTRINSIC(name, low, high, type, start_index) \
667 void IntrinsicLocationsBuilderRISCV64::Visit##name##ValueOf(HInvoke* invoke) { \
668 InvokeRuntimeCallingConvention calling_convention; \
669 IntrinsicVisitor::ComputeValueOfLocations( \
670 invoke, \
671 codegen_, \
672 low, \
673 (high) - (low) + 1, \
674 calling_convention.GetReturnLocation(DataType::Type::kReference), \
675 Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \
676 } \
677 void IntrinsicCodeGeneratorRISCV64::Visit##name##ValueOf(HInvoke* invoke) { \
678 IntrinsicVisitor::ValueOfInfo info = \
679 IntrinsicVisitor::ComputeValueOfInfo(invoke, \
680 codegen_->GetCompilerOptions(), \
681 WellKnownClasses::java_lang_##name##_value, \
682 low, \
683 (high) - (low) + 1, \
684 start_index); \
685 HandleValueOf(invoke, info, type); \
686 }
BOXED_TYPES(VISIT_INTRINSIC)687 BOXED_TYPES(VISIT_INTRINSIC)
688 #undef VISIT_INTRINSIC
689
690 void IntrinsicCodeGeneratorRISCV64::HandleValueOf(HInvoke* invoke,
691 const IntrinsicVisitor::ValueOfInfo& info,
692 DataType::Type type) {
693 Riscv64Assembler* assembler = codegen_->GetAssembler();
694 LocationSummary* locations = invoke->GetLocations();
695 XRegister out = locations->Out().AsRegister<XRegister>();
696 ScratchRegisterScope srs(assembler);
697 XRegister temp = srs.AllocateXRegister();
698 auto allocate_instance = [&]() {
699 DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
700 codegen_->LoadIntrinsicDeclaringClass(out, invoke);
701 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
702 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
703 };
704 if (invoke->InputAt(0)->IsIntConstant()) {
705 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
706 if (static_cast<uint32_t>(value - info.low) < info.length) {
707 // Just embed the object in the code.
708 DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
709 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
710 } else {
711 DCHECK(locations->CanCall());
712 // Allocate and initialize a new object.
713 // TODO: If we JIT, we could allocate the object now, and store it in the
714 // JIT object table.
715 allocate_instance();
716 __ Li(temp, value);
717 codegen_->GetInstructionVisitor()->Store(
718 Location::RegisterLocation(temp), out, info.value_offset, type);
719 // Class pointer and `value` final field stores require a barrier before publication.
720 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
721 }
722 } else {
723 DCHECK(locations->CanCall());
724 XRegister in = locations->InAt(0).AsRegister<XRegister>();
725 Riscv64Label allocate, done;
726 // Check bounds of our cache.
727 __ AddConst32(out, in, -info.low);
728 __ Li(temp, info.length);
729 __ Bgeu(out, temp, &allocate);
730 // If the value is within the bounds, load the object directly from the array.
731 codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
732 __ Sh2Add(temp, out, temp);
733 __ Loadwu(out, temp, 0);
734 codegen_->MaybeUnpoisonHeapReference(out);
735 __ J(&done);
736 __ Bind(&allocate);
737 // Otherwise allocate and initialize a new object.
738 allocate_instance();
739 codegen_->GetInstructionVisitor()->Store(
740 Location::RegisterLocation(in), out, info.value_offset, type);
741 // Class pointer and `value` final field stores require a barrier before publication.
742 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
743 __ Bind(&done);
744 }
745 }
746
VisitReferenceGetReferent(HInvoke * invoke)747 void IntrinsicLocationsBuilderRISCV64::VisitReferenceGetReferent(HInvoke* invoke) {
748 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
749
750 if (codegen_->EmitBakerReadBarrier() && invoke->GetLocations() != nullptr) {
751 invoke->GetLocations()->AddTemp(Location::RequiresRegister());
752 }
753 }
754
VisitReferenceGetReferent(HInvoke * invoke)755 void IntrinsicCodeGeneratorRISCV64::VisitReferenceGetReferent(HInvoke* invoke) {
756 Riscv64Assembler* assembler = GetAssembler();
757 LocationSummary* locations = invoke->GetLocations();
758 Location obj = locations->InAt(0);
759 Location out = locations->Out();
760
761 SlowPathCodeRISCV64* slow_path =
762 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
763 codegen_->AddSlowPath(slow_path);
764
765 if (codegen_->EmitReadBarrier()) {
766 // Check self->GetWeakRefAccessEnabled().
767 ScratchRegisterScope srs(assembler);
768 XRegister temp = srs.AllocateXRegister();
769 __ Loadwu(temp, TR, Thread::WeakRefAccessEnabledOffset<kRiscv64PointerSize>().Int32Value());
770 static_assert(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled) == 0);
771 __ Bnez(temp, slow_path->GetEntryLabel());
772 }
773
774 {
775 // Load the java.lang.ref.Reference class.
776 ScratchRegisterScope srs(assembler);
777 XRegister temp = srs.AllocateXRegister();
778 codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
779
780 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
781 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
782 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
783 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
784 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
785 __ Loadhu(temp, temp, disable_intrinsic_offset.Int32Value());
786 __ Bnez(temp, slow_path->GetEntryLabel());
787 }
788
789 // Load the value from the field.
790 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
791 if (codegen_->EmitBakerReadBarrier()) {
792 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
793 out,
794 obj.AsRegister<XRegister>(),
795 referent_offset,
796 /*temp=*/locations->GetTemp(0),
797 /*needs_null_check=*/false);
798 } else {
799 codegen_->GetInstructionVisitor()->Load(
800 out, obj.AsRegister<XRegister>(), referent_offset, DataType::Type::kReference);
801 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
802 }
803 // Emit memory barrier for load-acquire.
804 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
805 __ Bind(slow_path->GetExitLabel());
806 }
807
VisitReferenceRefersTo(HInvoke * invoke)808 void IntrinsicLocationsBuilderRISCV64::VisitReferenceRefersTo(HInvoke* invoke) {
809 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
810 }
811
VisitReferenceRefersTo(HInvoke * invoke)812 void IntrinsicCodeGeneratorRISCV64::VisitReferenceRefersTo(HInvoke* invoke) {
813 Riscv64Assembler* assembler = GetAssembler();
814 LocationSummary* locations = invoke->GetLocations();
815 XRegister obj = locations->InAt(0).AsRegister<XRegister>();
816 XRegister other = locations->InAt(1).AsRegister<XRegister>();
817 XRegister out = locations->Out().AsRegister<XRegister>();
818
819 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
820 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
821
822 codegen_->GetInstructionVisitor()->Load(
823 Location::RegisterLocation(out), obj, referent_offset, DataType::Type::kReference);
824 codegen_->MaybeRecordImplicitNullCheck(invoke);
825 codegen_->MaybeUnpoisonHeapReference(out);
826
827 // Emit memory barrier for load-acquire.
828 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
829
830 if (codegen_->EmitReadBarrier()) {
831 DCHECK(kUseBakerReadBarrier);
832
833 Riscv64Label calculate_result;
834
835 // If equal to `other`, the loaded reference is final (it cannot be a from-space reference).
836 __ Beq(out, other, &calculate_result);
837
838 // If the GC is not marking, the loaded reference is final.
839 ScratchRegisterScope srs(assembler);
840 XRegister tmp = srs.AllocateXRegister();
841 __ Loadwu(tmp, TR, Thread::IsGcMarkingOffset<kRiscv64PointerSize>().Int32Value());
842 __ Beqz(tmp, &calculate_result);
843
844 // Check if the loaded reference is null.
845 __ Beqz(out, &calculate_result);
846
847 // For correct memory visibility, we need a barrier before loading the lock word to
848 // synchronize with the publishing of `other` by the CC GC. However, as long as the
849 // load-acquire above is implemented as a plain load followed by a barrier (rather
850 // than an atomic load-acquire instruction which synchronizes only with other
851 // instructions on the same memory location), that barrier is sufficient.
852
853 // Load the lockword and check if it is a forwarding address.
854 static_assert(LockWord::kStateShift == 30u);
855 static_assert(LockWord::kStateForwardingAddress == 3u);
856 // Load the lock word sign-extended. Comparing it to the sign-extended forwarding
857 // address bits as unsigned is the same as comparing both zero-extended.
858 __ Loadw(tmp, out, monitor_offset);
859 // Materialize sign-extended forwarding address bits. This is a single LUI instruction.
860 XRegister tmp2 = srs.AllocateXRegister();
861 __ Li(tmp2, INT64_C(-1) & ~static_cast<int64_t>((1 << LockWord::kStateShift) - 1));
862 // If we do not have a forwarding address, the loaded reference cannot be the same as `other`,
863 // so we proceed to calculate the result with `out != other`.
864 __ Bltu(tmp, tmp2, &calculate_result);
865
866 // Extract the forwarding address for comparison with `other`.
867 // Note that the high 32 bits shall not be used for the result calculation.
868 __ Slliw(out, tmp, LockWord::kForwardingAddressShift);
869
870 __ Bind(&calculate_result);
871 }
872
873 // Calculate the result `out == other`.
874 __ Subw(out, out, other);
875 __ Seqz(out, out);
876 }
877
GenerateVisitStringIndexOf(HInvoke * invoke,Riscv64Assembler * assembler,CodeGeneratorRISCV64 * codegen,bool start_at_zero)878 static void GenerateVisitStringIndexOf(HInvoke* invoke,
879 Riscv64Assembler* assembler,
880 CodeGeneratorRISCV64* codegen,
881 bool start_at_zero) {
882 LocationSummary* locations = invoke->GetLocations();
883
884 // Note that the null check must have been done earlier.
885 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
886
887 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
888 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
889 SlowPathCodeRISCV64* slow_path = nullptr;
890 HInstruction* code_point = invoke->InputAt(1);
891 if (code_point->IsIntConstant()) {
892 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
893 // Always needs the slow-path. We could directly dispatch to it, but this case should be
894 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
895 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
896 codegen->AddSlowPath(slow_path);
897 __ J(slow_path->GetEntryLabel());
898 __ Bind(slow_path->GetExitLabel());
899 return;
900 }
901 } else if (code_point->GetType() != DataType::Type::kUint16) {
902 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
903 codegen->AddSlowPath(slow_path);
904 ScratchRegisterScope srs(assembler);
905 XRegister tmp = srs.AllocateXRegister();
906 __ Srliw(tmp, locations->InAt(1).AsRegister<XRegister>(), 16);
907 __ Bnez(tmp, slow_path->GetEntryLabel());
908 }
909
910 if (start_at_zero) {
911 // Start-index = 0.
912 XRegister tmp_reg = locations->GetTemp(0).AsRegister<XRegister>();
913 __ Li(tmp_reg, 0);
914 }
915
916 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
917 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
918
919 if (slow_path != nullptr) {
920 __ Bind(slow_path->GetExitLabel());
921 }
922 }
923
VisitStringIndexOf(HInvoke * invoke)924 void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOf(HInvoke* invoke) {
925 LocationSummary* locations = new (allocator_) LocationSummary(
926 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
927 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
928 // best to align the inputs accordingly.
929 InvokeRuntimeCallingConvention calling_convention;
930 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
931 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
932 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
933
934 // Need to send start_index=0.
935 locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
936 }
937
VisitStringIndexOf(HInvoke * invoke)938 void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOf(HInvoke* invoke) {
939 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
940 }
941
VisitStringIndexOfAfter(HInvoke * invoke)942 void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) {
943 LocationSummary* locations = new (allocator_) LocationSummary(
944 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
945 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
946 // best to align the inputs accordingly.
947 InvokeRuntimeCallingConvention calling_convention;
948 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
949 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
950 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
951 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
952 }
953
VisitStringIndexOfAfter(HInvoke * invoke)954 void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) {
955 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
956 }
957
VisitStringNewStringFromBytes(HInvoke * invoke)958 void IntrinsicLocationsBuilderRISCV64::VisitStringNewStringFromBytes(HInvoke* invoke) {
959 LocationSummary* locations = new (allocator_) LocationSummary(
960 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
961 InvokeRuntimeCallingConvention calling_convention;
962 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
963 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
964 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
965 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
966 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
967 }
968
VisitStringNewStringFromBytes(HInvoke * invoke)969 void IntrinsicCodeGeneratorRISCV64::VisitStringNewStringFromBytes(HInvoke* invoke) {
970 Riscv64Assembler* assembler = GetAssembler();
971 LocationSummary* locations = invoke->GetLocations();
972 XRegister byte_array = locations->InAt(0).AsRegister<XRegister>();
973
974 SlowPathCodeRISCV64* slow_path =
975 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
976 codegen_->AddSlowPath(slow_path);
977 __ Beqz(byte_array, slow_path->GetEntryLabel());
978
979 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
980 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
981 __ Bind(slow_path->GetExitLabel());
982 }
983
VisitStringNewStringFromChars(HInvoke * invoke)984 void IntrinsicLocationsBuilderRISCV64::VisitStringNewStringFromChars(HInvoke* invoke) {
985 LocationSummary* locations =
986 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
987 InvokeRuntimeCallingConvention calling_convention;
988 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
989 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
990 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
991 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
992 }
993
VisitStringNewStringFromChars(HInvoke * invoke)994 void IntrinsicCodeGeneratorRISCV64::VisitStringNewStringFromChars(HInvoke* invoke) {
995 // No need to emit code checking whether `locations->InAt(2)` is a null
996 // pointer, as callers of the native method
997 //
998 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
999 //
1000 // all include a null check on `data` before calling that method.
1001 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1002 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1003 }
1004
VisitStringNewStringFromString(HInvoke * invoke)1005 void IntrinsicLocationsBuilderRISCV64::VisitStringNewStringFromString(HInvoke* invoke) {
1006 LocationSummary* locations = new (allocator_) LocationSummary(
1007 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1008 InvokeRuntimeCallingConvention calling_convention;
1009 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1010 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1011 }
1012
VisitStringNewStringFromString(HInvoke * invoke)1013 void IntrinsicCodeGeneratorRISCV64::VisitStringNewStringFromString(HInvoke* invoke) {
1014 Riscv64Assembler* assembler = GetAssembler();
1015 LocationSummary* locations = invoke->GetLocations();
1016 XRegister string_to_copy = locations->InAt(0).AsRegister<XRegister>();
1017
1018 SlowPathCodeRISCV64* slow_path =
1019 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
1020 codegen_->AddSlowPath(slow_path);
1021 __ Beqz(string_to_copy, slow_path->GetEntryLabel());
1022
1023 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1024 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1025 __ Bind(slow_path->GetExitLabel());
1026 }
1027
GenerateSet(CodeGeneratorRISCV64 * codegen,std::memory_order order,Location value,XRegister rs1,int32_t offset,DataType::Type type)1028 static void GenerateSet(CodeGeneratorRISCV64* codegen,
1029 std::memory_order order,
1030 Location value,
1031 XRegister rs1,
1032 int32_t offset,
1033 DataType::Type type) {
1034 if (order == std::memory_order_seq_cst) {
1035 codegen->GetInstructionVisitor()->StoreSeqCst(value, rs1, offset, type);
1036 } else {
1037 if (order == std::memory_order_release) {
1038 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
1039 } else {
1040 DCHECK(order == std::memory_order_relaxed);
1041 }
1042 codegen->GetInstructionVisitor()->Store(value, rs1, offset, type);
1043 }
1044 }
1045
GetLrScAqRl(std::memory_order order)1046 std::pair<AqRl, AqRl> GetLrScAqRl(std::memory_order order) {
1047 AqRl load_aqrl = AqRl::kNone;
1048 AqRl store_aqrl = AqRl::kNone;
1049 if (order == std::memory_order_acquire) {
1050 load_aqrl = AqRl::kAcquire;
1051 } else if (order == std::memory_order_release) {
1052 store_aqrl = AqRl::kRelease;
1053 } else if (order == std::memory_order_seq_cst) {
1054 load_aqrl = AqRl::kAqRl;
1055 store_aqrl = AqRl::kRelease;
1056 } else {
1057 DCHECK(order == std::memory_order_relaxed);
1058 }
1059 return {load_aqrl, store_aqrl};
1060 }
1061
GetAmoAqRl(std::memory_order order)1062 AqRl GetAmoAqRl(std::memory_order order) {
1063 AqRl amo_aqrl = AqRl::kNone;
1064 if (order == std::memory_order_acquire) {
1065 amo_aqrl = AqRl::kAcquire;
1066 } else if (order == std::memory_order_release) {
1067 amo_aqrl = AqRl::kRelease;
1068 } else {
1069 DCHECK(order == std::memory_order_seq_cst);
1070 amo_aqrl = AqRl::kAqRl;
1071 }
1072 return amo_aqrl;
1073 }
1074
EmitLoadReserved(Riscv64Assembler * assembler,DataType::Type type,XRegister ptr,XRegister old_value,AqRl aqrl)1075 static void EmitLoadReserved(Riscv64Assembler* assembler,
1076 DataType::Type type,
1077 XRegister ptr,
1078 XRegister old_value,
1079 AqRl aqrl) {
1080 switch (type) {
1081 case DataType::Type::kInt32:
1082 __ LrW(old_value, ptr, aqrl);
1083 break;
1084 case DataType::Type::kReference:
1085 __ LrW(old_value, ptr, aqrl);
1086 // TODO(riscv64): The `ZextW()` macro currently emits `SLLI+SRLI` which are from the
1087 // base "I" instruction set. When the assembler is updated to use a single-instruction
1088 // `ZextW()` macro, either the ADD.UW, or the C.ZEXT.W (16-bit encoding), we need to
1089 // rewrite this to avoid these non-"I" instructions. We could, for example, sign-extend
1090 // the reference and do the CAS as `Int32`.
1091 __ ZextW(old_value, old_value);
1092 break;
1093 case DataType::Type::kInt64:
1094 __ LrD(old_value, ptr, aqrl);
1095 break;
1096 default:
1097 LOG(FATAL) << "Unexpected type: " << type;
1098 UNREACHABLE();
1099 }
1100 }
1101
VisitStringEquals(HInvoke * invoke)1102 void IntrinsicLocationsBuilderRISCV64::VisitStringEquals(HInvoke* invoke) {
1103 LocationSummary* locations =
1104 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1105 locations->SetInAt(0, Location::RequiresRegister());
1106 locations->SetInAt(1, Location::RequiresRegister());
1107 locations->AddTemp(Location::RequiresRegister());
1108 // TODO: If the String.equals() is used only for an immediately following HIf, we can
1109 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1110 // Then we shall need an extra temporary register instead of the output register.
1111 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1112 }
1113
VisitStringEquals(HInvoke * invoke)1114 void IntrinsicCodeGeneratorRISCV64::VisitStringEquals(HInvoke* invoke) {
1115 Riscv64Assembler* assembler = GetAssembler();
1116 LocationSummary* locations = invoke->GetLocations();
1117
1118 // Get offsets of count, value, and class fields within a string object.
1119 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1120 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1121 const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1122
1123 XRegister str = locations->InAt(0).AsRegister<XRegister>();
1124 XRegister arg = locations->InAt(1).AsRegister<XRegister>();
1125 XRegister out = locations->Out().AsRegister<XRegister>();
1126
1127 ScratchRegisterScope srs(assembler);
1128 XRegister temp = srs.AllocateXRegister();
1129 XRegister temp1 = locations->GetTemp(0).AsRegister<XRegister>();
1130
1131 Riscv64Label loop;
1132 Riscv64Label end;
1133 Riscv64Label return_true;
1134 Riscv64Label return_false;
1135
1136 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1137
1138 StringEqualsOptimizations optimizations(invoke);
1139 if (!optimizations.GetArgumentNotNull()) {
1140 // Check if input is null, return false if it is.
1141 __ Beqz(arg, &return_false);
1142 }
1143
1144 // Reference equality check, return true if same reference.
1145 __ Beq(str, arg, &return_true);
1146
1147 if (!optimizations.GetArgumentIsString()) {
1148 // Instanceof check for the argument by comparing class fields.
1149 // All string objects must have the same type since String cannot be subclassed.
1150 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1151 // If the argument is a string object, its class field must be equal to receiver's class field.
1152 //
1153 // As the String class is expected to be non-movable, we can read the class
1154 // field from String.equals' arguments without read barriers.
1155 AssertNonMovableStringClass();
1156 // /* HeapReference<Class> */ temp = str->klass_
1157 __ Loadwu(temp, str, class_offset);
1158 // /* HeapReference<Class> */ temp1 = arg->klass_
1159 __ Loadwu(temp1, arg, class_offset);
1160 // Also, because we use the previously loaded class references only in the
1161 // following comparison, we don't need to unpoison them.
1162 __ Bne(temp, temp1, &return_false);
1163 }
1164
1165 // Load `count` fields of this and argument strings.
1166 __ Loadwu(temp, str, count_offset);
1167 __ Loadwu(temp1, arg, count_offset);
1168 // Check if `count` fields are equal, return false if they're not.
1169 // Also compares the compression style, if differs return false.
1170 __ Bne(temp, temp1, &return_false);
1171
1172 // Assertions that must hold in order to compare strings 8 bytes at a time.
1173 // Ok to do this because strings are zero-padded to kObjectAlignment.
1174 DCHECK_ALIGNED(value_offset, 8);
1175 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1176
1177 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1178 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1179 "Expecting 0=compressed, 1=uncompressed");
1180 __ Beqz(temp, &return_true);
1181
1182 if (mirror::kUseStringCompression) {
1183 // For string compression, calculate the number of bytes to compare (not chars).
1184 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1185 __ Andi(temp1, temp, 1); // Extract compression flag.
1186 __ Srliw(temp, temp, 1u); // Extract length.
1187 __ Sllw(temp, temp, temp1); // Calculate number of bytes to compare.
1188 }
1189
1190 // Store offset of string value in preparation for comparison loop
1191 __ Li(temp1, value_offset);
1192
1193 XRegister temp2 = srs.AllocateXRegister();
1194 // Loop to compare strings 8 bytes at a time starting at the front of the string.
1195 __ Bind(&loop);
1196 __ Add(out, str, temp1);
1197 __ Ld(out, out, 0);
1198 __ Add(temp2, arg, temp1);
1199 __ Ld(temp2, temp2, 0);
1200 __ Addi(temp1, temp1, sizeof(uint64_t));
1201 __ Bne(out, temp2, &return_false);
1202 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1203 __ Addi(temp, temp, mirror::kUseStringCompression ? -8 : -4);
1204 __ Bgt(temp, Zero, &loop);
1205
1206 // Return true and exit the function.
1207 // If loop does not result in returning false, we return true.
1208 __ Bind(&return_true);
1209 __ Li(out, 1);
1210 __ J(&end);
1211
1212 // Return false and exit the function.
1213 __ Bind(&return_false);
1214 __ Li(out, 0);
1215 __ Bind(&end);
1216 }
1217
EmitStoreConditional(Riscv64Assembler * assembler,DataType::Type type,XRegister ptr,XRegister store_result,XRegister to_store,AqRl aqrl)1218 static void EmitStoreConditional(Riscv64Assembler* assembler,
1219 DataType::Type type,
1220 XRegister ptr,
1221 XRegister store_result,
1222 XRegister to_store,
1223 AqRl aqrl) {
1224 switch (type) {
1225 case DataType::Type::kInt32:
1226 case DataType::Type::kReference:
1227 __ ScW(store_result, to_store, ptr, aqrl);
1228 break;
1229 case DataType::Type::kInt64:
1230 __ ScD(store_result, to_store, ptr, aqrl);
1231 break;
1232 default:
1233 LOG(FATAL) << "Unexpected type: " << type;
1234 UNREACHABLE();
1235 }
1236 }
1237
GenerateCompareAndSet(Riscv64Assembler * assembler,DataType::Type type,std::memory_order order,bool strong,Riscv64Label * cmp_failure,XRegister ptr,XRegister new_value,XRegister old_value,XRegister mask,XRegister masked,XRegister store_result,XRegister expected,XRegister expected2=kNoXRegister)1238 static void GenerateCompareAndSet(Riscv64Assembler* assembler,
1239 DataType::Type type,
1240 std::memory_order order,
1241 bool strong,
1242 Riscv64Label* cmp_failure,
1243 XRegister ptr,
1244 XRegister new_value,
1245 XRegister old_value,
1246 XRegister mask,
1247 XRegister masked,
1248 XRegister store_result,
1249 XRegister expected,
1250 XRegister expected2 = kNoXRegister) {
1251 DCHECK(!DataType::IsFloatingPointType(type));
1252 DCHECK_GE(DataType::Size(type), 4u);
1253
1254 // The `expected2` is valid only for reference slow path and represents the unmarked old value
1255 // from the main path attempt to emit CAS when the marked old value matched `expected`.
1256 DCHECK_IMPLIES(expected2 != kNoXRegister, type == DataType::Type::kReference);
1257
1258 auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
1259
1260 // repeat: {
1261 // old_value = [ptr]; // Load exclusive.
1262 // cmp_value = old_value & mask; // Extract relevant bits if applicable.
1263 // if (cmp_value != expected && cmp_value != expected2) goto cmp_failure;
1264 // store_result = failed([ptr] <- new_value); // Store exclusive.
1265 // }
1266 // if (strong) {
1267 // if (store_result) goto repeat; // Repeat until compare fails or store exclusive succeeds.
1268 // } else {
1269 // store_result = store_result ^ 1; // Report success as 1, failure as 0.
1270 // }
1271 //
1272 // (If `mask` is not valid, `expected` is compared with `old_value` instead of `cmp_value`.)
1273 // (If `expected2` is not valid, the `cmp_value == expected2` part is not emitted.)
1274
1275 // Note: We're using "bare" local branches to enforce that they shall not be expanded
1276 // and the scrach register `TMP` shall not be clobbered if taken. Taking the branch to
1277 // `cmp_failure` can theoretically clobber `TMP` (if outside the 1 MiB range).
1278 Riscv64Label loop;
1279 if (strong) {
1280 __ Bind(&loop);
1281 }
1282 EmitLoadReserved(assembler, type, ptr, old_value, load_aqrl);
1283 XRegister to_store = new_value;
1284 {
1285 ScopedLrScExtensionsRestriction slser(assembler);
1286 if (mask != kNoXRegister) {
1287 DCHECK_EQ(expected2, kNoXRegister);
1288 DCHECK_NE(masked, kNoXRegister);
1289 __ And(masked, old_value, mask);
1290 __ Bne(masked, expected, cmp_failure);
1291 // The `old_value` does not need to be preserved as the caller shall use `masked`
1292 // to return the old value if needed.
1293 to_store = old_value;
1294 // TODO(riscv64): We could XOR the old and new value before the loop and use a single XOR here
1295 // instead of the XOR+OR. (The `new_value` is either Zero or a temporary we can clobber.)
1296 __ Xor(to_store, old_value, masked);
1297 __ Or(to_store, to_store, new_value);
1298 } else if (expected2 != kNoXRegister) {
1299 Riscv64Label match2;
1300 __ Beq(old_value, expected2, &match2, /*is_bare=*/ true);
1301 __ Bne(old_value, expected, cmp_failure);
1302 __ Bind(&match2);
1303 } else {
1304 __ Bne(old_value, expected, cmp_failure);
1305 }
1306 }
1307 EmitStoreConditional(assembler, type, ptr, store_result, to_store, store_aqrl);
1308 if (strong) {
1309 __ Bnez(store_result, &loop, /*is_bare=*/ true);
1310 } else {
1311 // Flip the `store_result` register to indicate success by 1 and failure by 0.
1312 __ Xori(store_result, store_result, 1);
1313 }
1314 }
1315
1316 class ReadBarrierCasSlowPathRISCV64 : public SlowPathCodeRISCV64 {
1317 public:
ReadBarrierCasSlowPathRISCV64(HInvoke * invoke,std::memory_order order,bool strong,XRegister base,XRegister offset,XRegister expected,XRegister new_value,XRegister old_value,XRegister old_value_temp,XRegister store_result,bool update_old_value,CodeGeneratorRISCV64 * riscv64_codegen)1318 ReadBarrierCasSlowPathRISCV64(HInvoke* invoke,
1319 std::memory_order order,
1320 bool strong,
1321 XRegister base,
1322 XRegister offset,
1323 XRegister expected,
1324 XRegister new_value,
1325 XRegister old_value,
1326 XRegister old_value_temp,
1327 XRegister store_result,
1328 bool update_old_value,
1329 CodeGeneratorRISCV64* riscv64_codegen)
1330 : SlowPathCodeRISCV64(invoke),
1331 order_(order),
1332 strong_(strong),
1333 base_(base),
1334 offset_(offset),
1335 expected_(expected),
1336 new_value_(new_value),
1337 old_value_(old_value),
1338 old_value_temp_(old_value_temp),
1339 store_result_(store_result),
1340 update_old_value_(update_old_value),
1341 mark_old_value_slow_path_(nullptr),
1342 update_old_value_slow_path_(nullptr) {
1343 // We need to add slow paths now, it is too late when emitting slow path code.
1344 Location old_value_loc = Location::RegisterLocation(old_value);
1345 Location old_value_temp_loc = Location::RegisterLocation(old_value_temp);
1346 if (kUseBakerReadBarrier) {
1347 mark_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath(
1348 invoke, old_value_temp_loc, kBakerReadBarrierTemp);
1349 if (update_old_value_) {
1350 update_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath(
1351 invoke, old_value_loc, kBakerReadBarrierTemp);
1352 }
1353 } else {
1354 Location base_loc = Location::RegisterLocation(base);
1355 Location index = Location::RegisterLocation(offset);
1356 mark_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath(
1357 invoke, old_value_temp_loc, old_value_loc, base_loc, /*offset=*/ 0u, index);
1358 if (update_old_value_) {
1359 update_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath(
1360 invoke, old_value_loc, old_value_temp_loc, base_loc, /*offset=*/ 0u, index);
1361 }
1362 }
1363 }
1364
GetDescription() const1365 const char* GetDescription() const override { return "ReadBarrierCasSlowPathRISCV64"; }
1366
1367 // We return to a different label on success for a strong CAS that does not return old value.
GetSuccessExitLabel()1368 Riscv64Label* GetSuccessExitLabel() {
1369 return &success_exit_label_;
1370 }
1371
EmitNativeCode(CodeGenerator * codegen)1372 void EmitNativeCode(CodeGenerator* codegen) override {
1373 CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
1374 Riscv64Assembler* assembler = riscv64_codegen->GetAssembler();
1375 __ Bind(GetEntryLabel());
1376
1377 // Mark the `old_value_` from the main path and compare with `expected_`.
1378 DCHECK(mark_old_value_slow_path_ != nullptr);
1379 if (kUseBakerReadBarrier) {
1380 __ Mv(old_value_temp_, old_value_);
1381 riscv64_codegen->EmitBakerReadBarierMarkingCheck(mark_old_value_slow_path_,
1382 Location::RegisterLocation(old_value_temp_),
1383 kBakerReadBarrierTemp);
1384 } else {
1385 __ J(mark_old_value_slow_path_->GetEntryLabel());
1386 __ Bind(mark_old_value_slow_path_->GetExitLabel());
1387 }
1388 Riscv64Label move_marked_old_value;
1389 __ Bne(old_value_temp_, expected_, update_old_value_ ? &move_marked_old_value : GetExitLabel());
1390
1391 // The `old_value` we have read did not match `expected` (which is always a to-space
1392 // reference) but after the read barrier the marked to-space value matched, so the
1393 // `old_value` must be a from-space reference to the same object. Do the same CAS loop
1394 // as the main path but check for both `expected` and the unmarked old value
1395 // representing the to-space and from-space references for the same object.
1396
1397 ScratchRegisterScope srs(assembler);
1398 XRegister tmp_ptr = srs.AllocateXRegister();
1399 XRegister store_result =
1400 store_result_ != kNoXRegister ? store_result_ : srs.AllocateXRegister();
1401
1402 // Recalculate the `tmp_ptr` from main path potentially clobbered by the read barrier above
1403 // or by an expanded conditional branch (clobbers `TMP` if beyond 1MiB).
1404 __ Add(tmp_ptr, base_, offset_);
1405
1406 Riscv64Label mark_old_value;
1407 GenerateCompareAndSet(riscv64_codegen->GetAssembler(),
1408 DataType::Type::kReference,
1409 order_,
1410 strong_,
1411 /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
1412 tmp_ptr,
1413 new_value_,
1414 /*old_value=*/ old_value_temp_,
1415 /*mask=*/ kNoXRegister,
1416 /*masked=*/ kNoXRegister,
1417 store_result,
1418 expected_,
1419 /*expected2=*/ old_value_);
1420 if (update_old_value_) {
1421 // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
1422 // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
1423 __ Mv(old_value_, expected_);
1424 }
1425 if (!update_old_value_ && strong_) {
1426 // Load success value to the result register.
1427 // We must jump to the instruction that loads the success value in the main path.
1428 // Note that a SC failure in the CAS loop sets the `store_result` to 1, so the main
1429 // path must not use the `store_result` as an indication of success.
1430 __ J(GetSuccessExitLabel());
1431 } else {
1432 __ J(GetExitLabel());
1433 }
1434
1435 if (update_old_value_) {
1436 // TODO(riscv64): If we initially saw a from-space reference and then saw
1437 // a different reference, can the latter be also a from-space reference?
1438 // (Shouldn't every reference write store a to-space reference?)
1439 DCHECK(update_old_value_slow_path_ != nullptr);
1440 __ Bind(&mark_old_value);
1441 if (kUseBakerReadBarrier) {
1442 __ Mv(old_value_, old_value_temp_);
1443 riscv64_codegen->EmitBakerReadBarierMarkingCheck(update_old_value_slow_path_,
1444 Location::RegisterLocation(old_value_),
1445 kBakerReadBarrierTemp);
1446 } else {
1447 // Note: We could redirect the `failure` above directly to the entry label and bind
1448 // the exit label in the main path, but the main path would need to access the
1449 // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
1450 __ J(update_old_value_slow_path_->GetEntryLabel());
1451 __ Bind(update_old_value_slow_path_->GetExitLabel());
1452 }
1453 __ J(GetExitLabel());
1454
1455 __ Bind(&move_marked_old_value);
1456 __ Mv(old_value_, old_value_temp_);
1457 __ J(GetExitLabel());
1458 }
1459 }
1460
1461 private:
1462 // Use RA as temp. It is clobbered in the slow path anyway.
1463 static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
1464
1465 std::memory_order order_;
1466 bool strong_;
1467 XRegister base_;
1468 XRegister offset_;
1469 XRegister expected_;
1470 XRegister new_value_;
1471 XRegister old_value_;
1472 XRegister old_value_temp_;
1473 XRegister store_result_;
1474 bool update_old_value_;
1475 SlowPathCodeRISCV64* mark_old_value_slow_path_;
1476 SlowPathCodeRISCV64* update_old_value_slow_path_;
1477 Riscv64Label success_exit_label_;
1478 };
1479
EmitBlt32(Riscv64Assembler * assembler,XRegister rs1,Location rs2,Riscv64Label * label,XRegister temp)1480 static void EmitBlt32(Riscv64Assembler* assembler,
1481 XRegister rs1,
1482 Location rs2,
1483 Riscv64Label* label,
1484 XRegister temp) {
1485 if (rs2.IsConstant()) {
1486 __ Li(temp, rs2.GetConstant()->AsIntConstant()->GetValue());
1487 __ Blt(rs1, temp, label);
1488 } else {
1489 __ Blt(rs1, rs2.AsRegister<XRegister>(), label);
1490 }
1491 }
1492
CheckSystemArrayCopyPosition(Riscv64Assembler * assembler,XRegister array,Location pos,Location length,SlowPathCodeRISCV64 * slow_path,XRegister temp1,XRegister temp2,bool length_is_array_length,bool position_sign_checked)1493 static void CheckSystemArrayCopyPosition(Riscv64Assembler* assembler,
1494 XRegister array,
1495 Location pos,
1496 Location length,
1497 SlowPathCodeRISCV64* slow_path,
1498 XRegister temp1,
1499 XRegister temp2,
1500 bool length_is_array_length,
1501 bool position_sign_checked) {
1502 const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
1503 if (pos.IsConstant()) {
1504 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1505 DCHECK_GE(pos_const, 0); // Checked in location builder.
1506 if (pos_const == 0) {
1507 if (!length_is_array_length) {
1508 // Check that length(array) >= length.
1509 __ Loadw(temp1, array, length_offset);
1510 EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
1511 }
1512 } else {
1513 // Calculate length(array) - pos.
1514 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1515 // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1516 __ Loadw(temp1, array, length_offset);
1517 __ AddConst32(temp1, temp1, -pos_const);
1518
1519 // Check that (length(array) - pos) >= length.
1520 EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
1521 }
1522 } else if (length_is_array_length) {
1523 // The only way the copy can succeed is if pos is zero.
1524 __ Bnez(pos.AsRegister<XRegister>(), slow_path->GetEntryLabel());
1525 } else {
1526 // Check that pos >= 0.
1527 XRegister pos_reg = pos.AsRegister<XRegister>();
1528 if (!position_sign_checked) {
1529 __ Bltz(pos_reg, slow_path->GetEntryLabel());
1530 }
1531
1532 // Calculate length(array) - pos.
1533 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1534 // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1535 __ Loadw(temp1, array, length_offset);
1536 __ Sub(temp1, temp1, pos_reg);
1537
1538 // Check that (length(array) - pos) >= length.
1539 EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
1540 }
1541 }
1542
GenArrayAddress(CodeGeneratorRISCV64 * codegen,XRegister dest,XRegister base,Location pos,DataType::Type type,int32_t data_offset)1543 static void GenArrayAddress(CodeGeneratorRISCV64* codegen,
1544 XRegister dest,
1545 XRegister base,
1546 Location pos,
1547 DataType::Type type,
1548 int32_t data_offset) {
1549 Riscv64Assembler* assembler = codegen->GetAssembler();
1550 if (pos.IsConstant()) {
1551 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
1552 __ AddConst64(dest, base, DataType::Size(type) * constant + data_offset);
1553 } else {
1554 codegen->GetInstructionVisitor()->ShNAdd(dest, pos.AsRegister<XRegister>(), base, type);
1555 if (data_offset != 0) {
1556 __ AddConst64(dest, dest, data_offset);
1557 }
1558 }
1559 }
1560
1561 // Compute base source address, base destination address, and end
1562 // source address for System.arraycopy* intrinsics in `src_base`,
1563 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(CodeGeneratorRISCV64 * codegen,DataType::Type type,XRegister src,Location src_pos,XRegister dst,Location dst_pos,Location copy_length,XRegister src_base,XRegister dst_base,XRegister src_end)1564 static void GenSystemArrayCopyAddresses(CodeGeneratorRISCV64* codegen,
1565 DataType::Type type,
1566 XRegister src,
1567 Location src_pos,
1568 XRegister dst,
1569 Location dst_pos,
1570 Location copy_length,
1571 XRegister src_base,
1572 XRegister dst_base,
1573 XRegister src_end) {
1574 // This routine is used by the SystemArrayCopyX intrinsics.
1575 DCHECK(type == DataType::Type::kReference || type == DataType::Type::kInt8 ||
1576 type == DataType::Type::kUint16 || type == DataType::Type::kInt32)
1577 << "Unexpected element type: " << type;
1578 const int32_t element_size = DataType::Size(type);
1579 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1580
1581 GenArrayAddress(codegen, src_base, src, src_pos, type, data_offset);
1582 GenArrayAddress(codegen, dst_base, dst, dst_pos, type, data_offset);
1583 GenArrayAddress(codegen, src_end, src_base, copy_length, type, /*data_offset=*/ 0);
1584 }
1585
LocationForSystemArrayCopyInput(HInstruction * input)1586 static Location LocationForSystemArrayCopyInput(HInstruction* input) {
1587 HIntConstant* const_input = input->AsIntConstantOrNull();
1588 if (const_input != nullptr && IsInt<12>(const_input->GetValue())) {
1589 return Location::ConstantLocation(const_input);
1590 } else {
1591 return Location::RequiresRegister();
1592 }
1593 }
1594
1595 // We can choose to use the native implementation there for longer copy lengths.
1596 static constexpr int32_t kSystemArrayCopyThreshold = 128;
1597
VisitSystemArrayCopy(HInvoke * invoke)1598 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
1599 // The only read barrier implementation supporting the
1600 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1601 if (codegen_->EmitNonBakerReadBarrier()) {
1602 return;
1603 }
1604
1605 size_t num_temps = codegen_->EmitBakerReadBarrier() ? 4u : 2u;
1606 LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
1607 invoke, kSystemArrayCopyThreshold, num_temps);
1608 if (locations != nullptr) {
1609 // We request position and length as constants only for small integral values.
1610 locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
1611 locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
1612 locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
1613 }
1614 }
1615
VisitSystemArrayCopy(HInvoke * invoke)1616 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
1617 // The only read barrier implementation supporting the
1618 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1619 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
1620
1621 Riscv64Assembler* assembler = GetAssembler();
1622 LocationSummary* locations = invoke->GetLocations();
1623
1624 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1625 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1626 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1627 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1628 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1629
1630 XRegister src = locations->InAt(0).AsRegister<XRegister>();
1631 Location src_pos = locations->InAt(1);
1632 XRegister dest = locations->InAt(2).AsRegister<XRegister>();
1633 Location dest_pos = locations->InAt(3);
1634 Location length = locations->InAt(4);
1635 XRegister temp1 = locations->GetTemp(0).AsRegister<XRegister>();
1636 XRegister temp2 = locations->GetTemp(1).AsRegister<XRegister>();
1637
1638 SlowPathCodeRISCV64* intrinsic_slow_path =
1639 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
1640 codegen_->AddSlowPath(intrinsic_slow_path);
1641
1642 Riscv64Label conditions_on_positions_validated;
1643 SystemArrayCopyOptimizations optimizations(invoke);
1644
1645 // If source and destination are the same, we go to slow path if we need to do forward copying.
1646 // We do not need to do this check if the source and destination positions are the same.
1647 if (!optimizations.GetSourcePositionIsDestinationPosition()) {
1648 if (src_pos.IsConstant()) {
1649 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1650 if (dest_pos.IsConstant()) {
1651 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1652 if (optimizations.GetDestinationIsSource()) {
1653 // Checked when building locations.
1654 DCHECK_GE(src_pos_constant, dest_pos_constant);
1655 } else if (src_pos_constant < dest_pos_constant) {
1656 __ Beq(src, dest, intrinsic_slow_path->GetEntryLabel());
1657 }
1658 } else {
1659 if (!optimizations.GetDestinationIsSource()) {
1660 __ Bne(src, dest, &conditions_on_positions_validated);
1661 }
1662 __ Li(temp1, src_pos_constant);
1663 __ Bgt(dest_pos.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel());
1664 }
1665 } else {
1666 if (!optimizations.GetDestinationIsSource()) {
1667 __ Bne(src, dest, &conditions_on_positions_validated);
1668 }
1669 XRegister src_pos_reg = src_pos.AsRegister<XRegister>();
1670 EmitBlt32(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel(), temp2);
1671 }
1672 }
1673
1674 __ Bind(&conditions_on_positions_validated);
1675
1676 if (!optimizations.GetSourceIsNotNull()) {
1677 // Bail out if the source is null.
1678 __ Beqz(src, intrinsic_slow_path->GetEntryLabel());
1679 }
1680
1681 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1682 // Bail out if the destination is null.
1683 __ Beqz(dest, intrinsic_slow_path->GetEntryLabel());
1684 }
1685
1686 // We have already checked in the LocationsBuilder for the constant case.
1687 if (!length.IsConstant()) {
1688 // Merge the following two comparisons into one:
1689 // If the length is negative, bail out (delegate to libcore's native implementation).
1690 // If the length >= 128 then (currently) prefer native implementation.
1691 __ Li(temp1, kSystemArrayCopyThreshold);
1692 __ Bgeu(length.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel());
1693 }
1694 // Validity checks: source.
1695 CheckSystemArrayCopyPosition(assembler,
1696 src,
1697 src_pos,
1698 length,
1699 intrinsic_slow_path,
1700 temp1,
1701 temp2,
1702 optimizations.GetCountIsSourceLength(),
1703 /*position_sign_checked=*/ false);
1704
1705 // Validity checks: dest.
1706 bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
1707 CheckSystemArrayCopyPosition(assembler,
1708 dest,
1709 dest_pos,
1710 length,
1711 intrinsic_slow_path,
1712 temp1,
1713 temp2,
1714 optimizations.GetCountIsDestinationLength(),
1715 dest_position_sign_checked);
1716
1717 auto check_non_primitive_array_class = [&](XRegister klass, XRegister temp) {
1718 // No read barrier is needed for reading a chain of constant references for comparing
1719 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1720 // /* HeapReference<Class> */ temp = klass->component_type_
1721 __ Loadwu(temp, klass, component_offset);
1722 codegen_->MaybeUnpoisonHeapReference(temp);
1723 // Check that the component type is not null.
1724 __ Beqz(temp, intrinsic_slow_path->GetEntryLabel());
1725 // Check that the component type is not a primitive.
1726 // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_);
1727 __ Loadhu(temp, temp, primitive_offset);
1728 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1729 __ Bnez(temp, intrinsic_slow_path->GetEntryLabel());
1730 };
1731
1732 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1733 // Check whether all elements of the source array are assignable to the component
1734 // type of the destination array. We do two checks: the classes are the same,
1735 // or the destination is Object[]. If none of these checks succeed, we go to the
1736 // slow path.
1737
1738 if (codegen_->EmitBakerReadBarrier()) {
1739 XRegister temp3 = locations->GetTemp(2).AsRegister<XRegister>();
1740 // /* HeapReference<Class> */ temp1 = dest->klass_
1741 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
1742 Location::RegisterLocation(temp1),
1743 dest,
1744 class_offset,
1745 Location::RegisterLocation(temp3),
1746 /* needs_null_check= */ false);
1747 // /* HeapReference<Class> */ temp2 = src->klass_
1748 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
1749 Location::RegisterLocation(temp2),
1750 src,
1751 class_offset,
1752 Location::RegisterLocation(temp3),
1753 /* needs_null_check= */ false);
1754 } else {
1755 // /* HeapReference<Class> */ temp1 = dest->klass_
1756 __ Loadwu(temp1, dest, class_offset);
1757 codegen_->MaybeUnpoisonHeapReference(temp1);
1758 // /* HeapReference<Class> */ temp2 = src->klass_
1759 __ Loadwu(temp2, src, class_offset);
1760 codegen_->MaybeUnpoisonHeapReference(temp2);
1761 }
1762
1763 if (optimizations.GetDestinationIsTypedObjectArray()) {
1764 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1765 Riscv64Label do_copy;
1766 // For class match, we can skip the source type check regardless of the optimization flag.
1767 __ Beq(temp1, temp2, &do_copy);
1768 // No read barrier is needed for reading a chain of constant references
1769 // for comparing with null, see `ReadBarrierOption`.
1770 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1771 __ Loadwu(temp1, temp1, component_offset);
1772 codegen_->MaybeUnpoisonHeapReference(temp1);
1773 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1774 __ Loadwu(temp1, temp1, super_offset);
1775 // No need to unpoison the result, we're comparing against null.
1776 __ Bnez(temp1, intrinsic_slow_path->GetEntryLabel());
1777 // Bail out if the source is not a non primitive array.
1778 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1779 check_non_primitive_array_class(temp2, temp2);
1780 }
1781 __ Bind(&do_copy);
1782 } else {
1783 DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1784 // For class match, we can skip the array type check completely if at least one of source
1785 // and destination is known to be a non primitive array, otherwise one check is enough.
1786 __ Bne(temp1, temp2, intrinsic_slow_path->GetEntryLabel());
1787 if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1788 !optimizations.GetSourceIsNonPrimitiveArray()) {
1789 check_non_primitive_array_class(temp2, temp2);
1790 }
1791 }
1792 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1793 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1794 // Bail out if the source is not a non primitive array.
1795 // No read barrier is needed for reading a chain of constant references for comparing
1796 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1797 // /* HeapReference<Class> */ temp2 = src->klass_
1798 __ Loadwu(temp2, src, class_offset);
1799 codegen_->MaybeUnpoisonHeapReference(temp2);
1800 check_non_primitive_array_class(temp2, temp2);
1801 }
1802
1803 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
1804 // Null constant length: not need to emit the loop code at all.
1805 } else {
1806 Riscv64Label skip_copy_and_write_barrier;
1807 if (length.IsRegister()) {
1808 // Don't enter the copy loop if the length is null.
1809 __ Beqz(length.AsRegister<XRegister>(), &skip_copy_and_write_barrier);
1810 }
1811
1812 {
1813 // We use a block to end the scratch scope before the write barrier, thus
1814 // freeing the scratch registers so they can be used in `MarkGCCard`.
1815 ScratchRegisterScope srs(assembler);
1816 bool emit_rb = codegen_->EmitBakerReadBarrier();
1817 XRegister temp3 =
1818 emit_rb ? locations->GetTemp(2).AsRegister<XRegister>() : srs.AllocateXRegister();
1819
1820 XRegister src_curr_addr = temp1;
1821 XRegister dst_curr_addr = temp2;
1822 XRegister src_stop_addr = temp3;
1823 const DataType::Type type = DataType::Type::kReference;
1824 const int32_t element_size = DataType::Size(type);
1825
1826 XRegister tmp = kNoXRegister;
1827 SlowPathCodeRISCV64* read_barrier_slow_path = nullptr;
1828 if (emit_rb) {
1829 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
1830
1831 // SystemArrayCopy implementation for Baker read barriers (see
1832 // also CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier):
1833 //
1834 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1835 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
1836 // bool is_gray = (rb_state == ReadBarrier::GrayState());
1837 // if (is_gray) {
1838 // // Slow-path copy.
1839 // do {
1840 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1841 // } while (src_ptr != end_ptr)
1842 // } else {
1843 // // Fast-path copy.
1844 // do {
1845 // *dest_ptr++ = *src_ptr++;
1846 // } while (src_ptr != end_ptr)
1847 // }
1848
1849 // /* uint32_t */ monitor = src->monitor_
1850 tmp = locations->GetTemp(3).AsRegister<XRegister>();
1851 __ Loadwu(tmp, src, monitor_offset);
1852 // /* LockWord */ lock_word = LockWord(monitor)
1853 static_assert(sizeof(LockWord) == sizeof(int32_t),
1854 "art::LockWord and int32_t have different sizes.");
1855
1856 // Shift the RB state bit to the sign bit while also clearing the low 32 bits
1857 // for the fake dependency below.
1858 static_assert(LockWord::kReadBarrierStateShift < 31);
1859 __ Slli(tmp, tmp, 63 - LockWord::kReadBarrierStateShift);
1860
1861 // Introduce a dependency on the lock_word including rb_state, to prevent load-load
1862 // reordering, and without using a memory barrier (which would be more expensive).
1863 // `src` is unchanged by this operation (since Adduw adds low 32 bits
1864 // which are zero after left shift), but its value now depends on `tmp`.
1865 __ AddUw(src, tmp, src);
1866
1867 // Slow path used to copy array when `src` is gray.
1868 read_barrier_slow_path = new (codegen_->GetScopedAllocator())
1869 ReadBarrierSystemArrayCopySlowPathRISCV64(invoke, Location::RegisterLocation(tmp));
1870 codegen_->AddSlowPath(read_barrier_slow_path);
1871 }
1872
1873 // Compute base source address, base destination address, and end source address for
1874 // System.arraycopy* intrinsics in `src_base`, `dst_base` and `src_end` respectively.
1875 // Note that `src_curr_addr` is computed from from `src` (and `src_pos`) here, and
1876 // thus honors the artificial dependency of `src` on `tmp` for read barriers.
1877 GenSystemArrayCopyAddresses(codegen_,
1878 type,
1879 src,
1880 src_pos,
1881 dest,
1882 dest_pos,
1883 length,
1884 src_curr_addr,
1885 dst_curr_addr,
1886 src_stop_addr);
1887
1888 if (emit_rb) {
1889 // Given the numeric representation, it's enough to check the low bit of the RB state.
1890 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1891 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1892 DCHECK_NE(tmp, kNoXRegister);
1893 __ Bltz(tmp, read_barrier_slow_path->GetEntryLabel());
1894 } else {
1895 // After allocating the last scrach register, we cannot use macro load/store instructions
1896 // such as `Loadwu()` and need to use raw instructions. However, all offsets below are 0.
1897 DCHECK_EQ(tmp, kNoXRegister);
1898 tmp = srs.AllocateXRegister();
1899 }
1900
1901 // Iterate over the arrays and do a raw copy of the objects. We don't need to
1902 // poison/unpoison.
1903 Riscv64Label loop;
1904 __ Bind(&loop);
1905 __ Lwu(tmp, src_curr_addr, 0);
1906 __ Sw(tmp, dst_curr_addr, 0);
1907 __ Addi(src_curr_addr, src_curr_addr, element_size);
1908 __ Addi(dst_curr_addr, dst_curr_addr, element_size);
1909 // Bare: `TMP` shall not be clobbered.
1910 __ Bne(src_curr_addr, src_stop_addr, &loop, /*is_bare=*/ true);
1911
1912 if (emit_rb) {
1913 DCHECK(read_barrier_slow_path != nullptr);
1914 __ Bind(read_barrier_slow_path->GetExitLabel());
1915 }
1916 }
1917
1918 // We only need one card marking on the destination array.
1919 codegen_->MarkGCCard(dest);
1920
1921 __ Bind(&skip_copy_and_write_barrier);
1922 }
1923
1924 __ Bind(intrinsic_slow_path->GetExitLabel());
1925 }
1926
1927 // This value is in bytes and greater than ARRAYCOPY_SHORT_XXX_ARRAY_THRESHOLD
1928 // in libcore, so if we choose to jump to the slow path we will end up
1929 // in the native implementation.
1930 static constexpr int32_t kSystemArrayCopyPrimThreshold = 384;
1931
CreateSystemArrayCopyLocations(HInvoke * invoke,DataType::Type type)1932 static void CreateSystemArrayCopyLocations(HInvoke* invoke, DataType::Type type) {
1933 int32_t copy_threshold = kSystemArrayCopyPrimThreshold / DataType::Size(type);
1934
1935 // Check to see if we have known failures that will cause us to have to bail out
1936 // to the runtime, and just generate the runtime call directly.
1937 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
1938 HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstantOrNull();
1939
1940 // The positions must be non-negative.
1941 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
1942 (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
1943 // We will have to fail anyways.
1944 return;
1945 }
1946
1947 // The length must be >= 0 and not so long that we would (currently) prefer libcore's
1948 // native implementation.
1949 HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
1950 if (length != nullptr) {
1951 int32_t len = length->GetValue();
1952 if (len < 0 || len > copy_threshold) {
1953 // Just call as normal.
1954 return;
1955 }
1956 }
1957
1958 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
1959 LocationSummary* locations =
1960 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
1961 // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
1962 locations->SetInAt(0, Location::RequiresRegister());
1963 locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
1964 locations->SetInAt(2, Location::RequiresRegister());
1965 locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
1966 locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
1967
1968 locations->AddRegisterTemps(3);
1969 }
1970
VisitSystemArrayCopyByte(HInvoke * invoke)1971 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopyByte(HInvoke* invoke) {
1972 CreateSystemArrayCopyLocations(invoke, DataType::Type::kInt8);
1973 }
1974
VisitSystemArrayCopyChar(HInvoke * invoke)1975 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopyChar(HInvoke* invoke) {
1976 CreateSystemArrayCopyLocations(invoke, DataType::Type::kUint16);
1977 }
1978
VisitSystemArrayCopyInt(HInvoke * invoke)1979 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopyInt(HInvoke* invoke) {
1980 CreateSystemArrayCopyLocations(invoke, DataType::Type::kInt32);
1981 }
1982
GenerateUnsignedLoad(Riscv64Assembler * assembler,XRegister rd,XRegister rs1,int32_t offset,size_t type_size)1983 static void GenerateUnsignedLoad(
1984 Riscv64Assembler* assembler, XRegister rd, XRegister rs1, int32_t offset, size_t type_size) {
1985 switch (type_size) {
1986 case 1:
1987 __ Lbu(rd, rs1, offset);
1988 break;
1989 case 2:
1990 __ Lhu(rd, rs1, offset);
1991 break;
1992 case 4:
1993 __ Lwu(rd, rs1, offset);
1994 break;
1995 case 8:
1996 __ Ld(rd, rs1, offset);
1997 break;
1998 default:
1999 LOG(FATAL) << "Unexpected data type";
2000 }
2001 }
2002
GenerateStore(Riscv64Assembler * assembler,XRegister rs2,XRegister rs1,int32_t offset,size_t type_size)2003 static void GenerateStore(
2004 Riscv64Assembler* assembler, XRegister rs2, XRegister rs1, int32_t offset, size_t type_size) {
2005 switch (type_size) {
2006 case 1:
2007 __ Sb(rs2, rs1, offset);
2008 break;
2009 case 2:
2010 __ Sh(rs2, rs1, offset);
2011 break;
2012 case 4:
2013 __ Sw(rs2, rs1, offset);
2014 break;
2015 case 8:
2016 __ Sd(rs2, rs1, offset);
2017 break;
2018 default:
2019 LOG(FATAL) << "Unexpected data type";
2020 }
2021 }
2022
SystemArrayCopyPrimitive(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,DataType::Type type)2023 static void SystemArrayCopyPrimitive(HInvoke* invoke,
2024 CodeGeneratorRISCV64* codegen,
2025 DataType::Type type) {
2026 Riscv64Assembler* assembler = codegen->GetAssembler();
2027 LocationSummary* locations = invoke->GetLocations();
2028 XRegister src = locations->InAt(0).AsRegister<XRegister>();
2029 Location src_pos = locations->InAt(1);
2030 XRegister dst = locations->InAt(2).AsRegister<XRegister>();
2031 Location dst_pos = locations->InAt(3);
2032 Location length = locations->InAt(4);
2033
2034 SlowPathCodeRISCV64* slow_path =
2035 new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
2036 codegen->AddSlowPath(slow_path);
2037
2038 SystemArrayCopyOptimizations optimizations(invoke);
2039
2040 // If source and destination are the same, take the slow path. Overlapping copy regions must be
2041 // copied in reverse and we can't know in all cases if it's needed.
2042 __ Beq(src, dst, slow_path->GetEntryLabel());
2043
2044 if (!optimizations.GetSourceIsNotNull()) {
2045 // Bail out if the source is null.
2046 __ Beqz(src, slow_path->GetEntryLabel());
2047 }
2048
2049 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2050 // Bail out if the destination is null.
2051 __ Beqz(dst, slow_path->GetEntryLabel());
2052 }
2053
2054 int32_t copy_threshold = kSystemArrayCopyPrimThreshold / DataType::Size(type);
2055 XRegister tmp = locations->GetTemp(0).AsRegister<XRegister>();
2056 if (!length.IsConstant()) {
2057 // Merge the following two comparisons into one:
2058 // If the length is negative, bail out (delegate to libcore's native implementation).
2059 // If the length >= kSystemArrayCopyPrimThreshold then (currently) prefer libcore's
2060 // native implementation.
2061 __ Li(tmp, copy_threshold);
2062 __ Bgeu(length.AsRegister<XRegister>(), tmp, slow_path->GetEntryLabel());
2063 } else {
2064 // We have already checked in the LocationsBuilder for the constant case.
2065 DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2066 DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), copy_threshold);
2067 }
2068
2069 XRegister src_curr_addr = locations->GetTemp(1).AsRegister<XRegister>();
2070 XRegister dst_curr_addr = locations->GetTemp(2).AsRegister<XRegister>();
2071
2072 CheckSystemArrayCopyPosition(assembler,
2073 src,
2074 src_pos,
2075 length,
2076 slow_path,
2077 src_curr_addr,
2078 dst_curr_addr,
2079 /*length_is_array_length=*/ false,
2080 /*position_sign_checked=*/ false);
2081
2082 CheckSystemArrayCopyPosition(assembler,
2083 dst,
2084 dst_pos,
2085 length,
2086 slow_path,
2087 src_curr_addr,
2088 dst_curr_addr,
2089 /*length_is_array_length=*/ false,
2090 /*position_sign_checked=*/ false);
2091
2092 const int32_t element_size = DataType::Size(type);
2093 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2094
2095 GenArrayAddress(codegen, src_curr_addr, src, src_pos, type, data_offset);
2096 GenArrayAddress(codegen, dst_curr_addr, dst, dst_pos, type, data_offset);
2097
2098 // We split processing of the array in two parts: head and tail.
2099 // A first loop handles the head by copying a block of elements per
2100 // iteration (see: elements_per_block).
2101 // A second loop handles the tail by copying the remaining elements.
2102 // If the copy length is not constant, we copy them one-by-one.
2103 //
2104 // Both loops are inverted for better performance, meaning they are
2105 // implemented as conditional do-while loops.
2106 // Here, the loop condition is first checked to determine if there are
2107 // sufficient elements to run an iteration, then we enter the do-while: an
2108 // iteration is performed followed by a conditional branch only if another
2109 // iteration is necessary. As opposed to a standard while-loop, this inversion
2110 // can save some branching (e.g. we don't branch back to the initial condition
2111 // at the end of every iteration only to potentially immediately branch
2112 // again).
2113 //
2114 // A full block of elements is subtracted and added before and after the head
2115 // loop, respectively. This ensures that any remaining length after each
2116 // head loop iteration means there is a full block remaining, reducing the
2117 // number of conditional checks required on every iteration.
2118 ScratchRegisterScope temps(assembler);
2119 constexpr int32_t bytes_copied_per_iteration = 16;
2120 DCHECK_EQ(bytes_copied_per_iteration % element_size, 0);
2121 int32_t elements_per_block = bytes_copied_per_iteration / element_size;
2122 Riscv64Label done;
2123
2124 XRegister length_tmp = temps.AllocateXRegister();
2125
2126 auto emit_head_loop = [&]() {
2127 ScratchRegisterScope local_temps(assembler);
2128 XRegister tmp2 = local_temps.AllocateXRegister();
2129
2130 Riscv64Label loop;
2131 __ Bind(&loop);
2132 __ Ld(tmp, src_curr_addr, 0);
2133 __ Ld(tmp2, src_curr_addr, 8);
2134 __ Sd(tmp, dst_curr_addr, 0);
2135 __ Sd(tmp2, dst_curr_addr, 8);
2136 __ Addi(length_tmp, length_tmp, -elements_per_block);
2137 __ Addi(src_curr_addr, src_curr_addr, bytes_copied_per_iteration);
2138 __ Addi(dst_curr_addr, dst_curr_addr, bytes_copied_per_iteration);
2139 __ Bgez(length_tmp, &loop);
2140 };
2141
2142 auto emit_tail_loop = [&]() {
2143 Riscv64Label loop;
2144 __ Bind(&loop);
2145 GenerateUnsignedLoad(assembler, tmp, src_curr_addr, 0, element_size);
2146 GenerateStore(assembler, tmp, dst_curr_addr, 0, element_size);
2147 __ Addi(length_tmp, length_tmp, -1);
2148 __ Addi(src_curr_addr, src_curr_addr, element_size);
2149 __ Addi(dst_curr_addr, dst_curr_addr, element_size);
2150 __ Bgtz(length_tmp, &loop);
2151 };
2152
2153 auto emit_unrolled_tail_loop = [&](int32_t tail_length) {
2154 DCHECK_LT(tail_length, elements_per_block);
2155
2156 int32_t length_in_bytes = tail_length * element_size;
2157 size_t offset = 0;
2158 for (size_t operation_size = 8; operation_size > 0; operation_size >>= 1) {
2159 if ((length_in_bytes & operation_size) != 0) {
2160 GenerateUnsignedLoad(assembler, tmp, src_curr_addr, offset, operation_size);
2161 GenerateStore(assembler, tmp, dst_curr_addr, offset, operation_size);
2162 offset += operation_size;
2163 }
2164 }
2165 };
2166
2167 if (length.IsConstant()) {
2168 const int32_t constant_length = length.GetConstant()->AsIntConstant()->GetValue();
2169 if (constant_length >= elements_per_block) {
2170 __ Li(length_tmp, constant_length - elements_per_block);
2171 emit_head_loop();
2172 }
2173 emit_unrolled_tail_loop(constant_length % elements_per_block);
2174 } else {
2175 Riscv64Label tail_loop;
2176 XRegister length_reg = length.AsRegister<XRegister>();
2177 __ Addi(length_tmp, length_reg, -elements_per_block);
2178 __ Bltz(length_tmp, &tail_loop);
2179
2180 emit_head_loop();
2181
2182 __ Bind(&tail_loop);
2183 __ Addi(length_tmp, length_tmp, elements_per_block);
2184 __ Beqz(length_tmp, &done);
2185
2186 emit_tail_loop();
2187 }
2188
2189 __ Bind(&done);
2190 __ Bind(slow_path->GetExitLabel());
2191 }
2192
VisitSystemArrayCopyByte(HInvoke * invoke)2193 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopyByte(HInvoke* invoke) {
2194 SystemArrayCopyPrimitive(invoke, codegen_, DataType::Type::kInt8);
2195 }
2196
VisitSystemArrayCopyChar(HInvoke * invoke)2197 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2198 SystemArrayCopyPrimitive(invoke, codegen_, DataType::Type::kUint16);
2199 }
2200
VisitSystemArrayCopyInt(HInvoke * invoke)2201 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopyInt(HInvoke* invoke) {
2202 SystemArrayCopyPrimitive(invoke, codegen_, DataType::Type::kInt32);
2203 }
2204
2205 enum class GetAndUpdateOp {
2206 kSet,
2207 kAdd,
2208 kAnd,
2209 kOr,
2210 kXor
2211 };
2212
2213 // Generate a GetAndUpdate operation.
2214 //
2215 // Only 32-bit and 64-bit atomics are currently supported, therefore smaller types need
2216 // special handling. The caller emits code to prepare aligned `ptr` and adjusted `arg`
2217 // and extract the needed bits from `old_value`. For bitwise operations, no extra
2218 // handling is needed here. For `GetAndUpdateOp::kSet` and `GetAndUpdateOp::kAdd` we
2219 // also use a special LR/SC sequence that uses a `mask` to update only the desired bits.
2220 // Note: The `mask` must contain the bits to keep for `GetAndUpdateOp::kSet` and
2221 // the bits to replace for `GetAndUpdateOp::kAdd`.
GenerateGetAndUpdate(CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op,DataType::Type type,std::memory_order order,XRegister ptr,XRegister arg,XRegister old_value,XRegister mask,XRegister temp)2222 static void GenerateGetAndUpdate(CodeGeneratorRISCV64* codegen,
2223 GetAndUpdateOp get_and_update_op,
2224 DataType::Type type,
2225 std::memory_order order,
2226 XRegister ptr,
2227 XRegister arg,
2228 XRegister old_value,
2229 XRegister mask,
2230 XRegister temp) {
2231 DCHECK_EQ(mask != kNoXRegister, temp != kNoXRegister);
2232 DCHECK_IMPLIES(mask != kNoXRegister, type == DataType::Type::kInt32);
2233 DCHECK_IMPLIES(
2234 mask != kNoXRegister,
2235 (get_and_update_op == GetAndUpdateOp::kSet) || (get_and_update_op == GetAndUpdateOp::kAdd));
2236 Riscv64Assembler* assembler = codegen->GetAssembler();
2237 AqRl amo_aqrl = GetAmoAqRl(order);
2238 switch (get_and_update_op) {
2239 case GetAndUpdateOp::kSet:
2240 if (type == DataType::Type::kInt64) {
2241 __ AmoSwapD(old_value, arg, ptr, amo_aqrl);
2242 } else if (mask == kNoXRegister) {
2243 DCHECK_EQ(type, DataType::Type::kInt32);
2244 __ AmoSwapW(old_value, arg, ptr, amo_aqrl);
2245 } else {
2246 DCHECK_EQ(type, DataType::Type::kInt32);
2247 DCHECK_NE(temp, kNoXRegister);
2248 auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
2249 Riscv64Label retry;
2250 __ Bind(&retry);
2251 __ LrW(old_value, ptr, load_aqrl);
2252 {
2253 ScopedLrScExtensionsRestriction slser(assembler);
2254 __ And(temp, old_value, mask);
2255 __ Or(temp, temp, arg);
2256 }
2257 __ ScW(temp, temp, ptr, store_aqrl);
2258 __ Bnez(temp, &retry, /*is_bare=*/ true); // Bare: `TMP` shall not be clobbered.
2259 }
2260 break;
2261 case GetAndUpdateOp::kAdd:
2262 if (type == DataType::Type::kInt64) {
2263 __ AmoAddD(old_value, arg, ptr, amo_aqrl);
2264 } else if (mask == kNoXRegister) {
2265 DCHECK_EQ(type, DataType::Type::kInt32);
2266 __ AmoAddW(old_value, arg, ptr, amo_aqrl);
2267 } else {
2268 DCHECK_EQ(type, DataType::Type::kInt32);
2269 DCHECK_NE(temp, kNoXRegister);
2270 auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
2271 Riscv64Label retry;
2272 __ Bind(&retry);
2273 __ LrW(old_value, ptr, load_aqrl);
2274 {
2275 ScopedLrScExtensionsRestriction slser(assembler);
2276 __ Add(temp, old_value, arg);
2277 // We use `(A ^ B) ^ A == B` and with the masking `((A ^ B) & mask) ^ A`, the result
2278 // contains bits from `B` for bits specified in `mask` and bits from `A` elsewhere.
2279 // Note: These instructions directly depend on each other, so it's not necessarily the
2280 // fastest approach but for `(A ^ ~mask) | (B & mask)` we would need an extra register
2281 // for `~mask` because ANDN is not in the "I" instruction set as required for a LR/SC
2282 // sequence.
2283 __ Xor(temp, temp, old_value);
2284 __ And(temp, temp, mask);
2285 __ Xor(temp, temp, old_value);
2286 }
2287 __ ScW(temp, temp, ptr, store_aqrl);
2288 __ Bnez(temp, &retry, /*is_bare=*/ true); // Bare: `TMP` shall not be clobbered.
2289 }
2290 break;
2291 case GetAndUpdateOp::kAnd:
2292 if (type == DataType::Type::kInt64) {
2293 __ AmoAndD(old_value, arg, ptr, amo_aqrl);
2294 } else {
2295 DCHECK_EQ(type, DataType::Type::kInt32);
2296 __ AmoAndW(old_value, arg, ptr, amo_aqrl);
2297 }
2298 break;
2299 case GetAndUpdateOp::kOr:
2300 if (type == DataType::Type::kInt64) {
2301 __ AmoOrD(old_value, arg, ptr, amo_aqrl);
2302 } else {
2303 DCHECK_EQ(type, DataType::Type::kInt32);
2304 __ AmoOrW(old_value, arg, ptr, amo_aqrl);
2305 }
2306 break;
2307 case GetAndUpdateOp::kXor:
2308 if (type == DataType::Type::kInt64) {
2309 __ AmoXorD(old_value, arg, ptr, amo_aqrl);
2310 } else {
2311 DCHECK_EQ(type, DataType::Type::kInt32);
2312 __ AmoXorW(old_value, arg, ptr, amo_aqrl);
2313 }
2314 break;
2315 }
2316 }
2317
CreateUnsafeGetLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorRISCV64 * codegen)2318 static void CreateUnsafeGetLocations(ArenaAllocator* allocator,
2319 HInvoke* invoke,
2320 CodeGeneratorRISCV64* codegen) {
2321 bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
2322 LocationSummary* locations = new (allocator) LocationSummary(
2323 invoke,
2324 can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
2325 kIntrinsified);
2326 if (can_call && kUseBakerReadBarrier) {
2327 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2328 }
2329 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2330 locations->SetInAt(1, Location::RequiresRegister());
2331 locations->SetInAt(2, Location::RequiresRegister());
2332 locations->SetOut(Location::RequiresRegister(),
2333 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2334 }
2335
CreateUnsafeGetAbsoluteLocations(ArenaAllocator * allocator,HInvoke * invoke)2336 static void CreateUnsafeGetAbsoluteLocations(ArenaAllocator* allocator,
2337 HInvoke* invoke) {
2338 LocationSummary* locations =
2339 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2340 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2341 locations->SetInAt(1, Location::RequiresRegister());
2342 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2343 }
2344
GenUnsafeGet(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2345 static void GenUnsafeGet(HInvoke* invoke,
2346 CodeGeneratorRISCV64* codegen,
2347 std::memory_order order,
2348 DataType::Type type) {
2349 DCHECK((type == DataType::Type::kInt8) ||
2350 (type == DataType::Type::kInt32) ||
2351 (type == DataType::Type::kInt64) ||
2352 (type == DataType::Type::kReference));
2353 LocationSummary* locations = invoke->GetLocations();
2354 Location object_loc = locations->InAt(1);
2355 XRegister object = object_loc.AsRegister<XRegister>(); // Object pointer.
2356 Location offset_loc = locations->InAt(2);
2357 XRegister offset = offset_loc.AsRegister<XRegister>(); // Long offset.
2358 Location out_loc = locations->Out();
2359 XRegister out = out_loc.AsRegister<XRegister>();
2360
2361 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2362 bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
2363 DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2364
2365 if (seq_cst_barrier) {
2366 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2367 }
2368
2369 if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
2370 // JdkUnsafeGetReference/JdkUnsafeGetReferenceVolatile with Baker's read barrier case.
2371 // TODO(riscv64): Revisit when we add checking if the holder is black.
2372 Location temp = Location::NoLocation();
2373 codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
2374 out_loc,
2375 object,
2376 /*offset=*/ 0,
2377 /*index=*/ offset_loc,
2378 temp,
2379 /*needs_null_check=*/ false);
2380 } else {
2381 // Other cases.
2382 Riscv64Assembler* assembler = codegen->GetAssembler();
2383 __ Add(out, object, offset);
2384 codegen->GetInstructionVisitor()->Load(out_loc, out, /*offset=*/ 0, type);
2385
2386 if (type == DataType::Type::kReference) {
2387 codegen->MaybeGenerateReadBarrierSlow(
2388 invoke, out_loc, out_loc, object_loc, /*offset=*/ 0u, /*index=*/ offset_loc);
2389 }
2390 }
2391
2392 if (acquire_barrier) {
2393 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2394 }
2395 }
2396
GenUnsafeGetAbsolute(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2397 static void GenUnsafeGetAbsolute(HInvoke* invoke,
2398 CodeGeneratorRISCV64* codegen,
2399 std::memory_order order,
2400 DataType::Type type) {
2401 DCHECK((type == DataType::Type::kInt8) ||
2402 (type == DataType::Type::kInt32) ||
2403 (type == DataType::Type::kInt64));
2404 LocationSummary* locations = invoke->GetLocations();
2405 Location address_loc = locations->InAt(1);
2406 XRegister address = address_loc.AsRegister<XRegister>();
2407 Location out_loc = locations->Out();
2408
2409 bool seq_cst_barrier = order == std::memory_order_seq_cst;
2410 bool acquire_barrier = seq_cst_barrier || order == std::memory_order_acquire;
2411 DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2412
2413 if (seq_cst_barrier) {
2414 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2415 }
2416
2417 codegen->GetInstructionVisitor()->Load(out_loc, address, /*offset=*/ 0, type);
2418
2419 if (acquire_barrier) {
2420 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2421 }
2422 }
2423
VisitUnsafeGet(HInvoke * invoke)2424 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGet(HInvoke* invoke) {
2425 VisitJdkUnsafeGet(invoke);
2426 }
2427
VisitUnsafeGetAbsolute(HInvoke * invoke)2428 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2429 VisitJdkUnsafeGetAbsolute(invoke);
2430 }
2431
VisitUnsafeGet(HInvoke * invoke)2432 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGet(HInvoke* invoke) {
2433 VisitJdkUnsafeGet(invoke);
2434 }
2435
VisitUnsafeGetAbsolute(HInvoke * invoke)2436 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2437 VisitJdkUnsafeGetAbsolute(invoke);
2438 }
2439
VisitUnsafeGetVolatile(HInvoke * invoke)2440 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2441 VisitJdkUnsafeGetVolatile(invoke);
2442 }
2443
VisitUnsafeGetVolatile(HInvoke * invoke)2444 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2445 VisitJdkUnsafeGetVolatile(invoke);
2446 }
2447
VisitUnsafeGetObject(HInvoke * invoke)2448 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetObject(HInvoke* invoke) {
2449 VisitJdkUnsafeGetReference(invoke);
2450 }
2451
VisitUnsafeGetObject(HInvoke * invoke)2452 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetObject(HInvoke* invoke) {
2453 VisitJdkUnsafeGetReference(invoke);
2454 }
2455
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2456 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2457 VisitJdkUnsafeGetReferenceVolatile(invoke);
2458 }
2459
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2460 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2461 VisitJdkUnsafeGetReferenceVolatile(invoke);
2462 }
2463
VisitUnsafeGetLong(HInvoke * invoke)2464 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetLong(HInvoke* invoke) {
2465 VisitJdkUnsafeGetLong(invoke);
2466 }
2467
VisitUnsafeGetLong(HInvoke * invoke)2468 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetLong(HInvoke* invoke) {
2469 VisitJdkUnsafeGetLong(invoke);
2470 }
2471
VisitUnsafeGetLongVolatile(HInvoke * invoke)2472 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2473 VisitJdkUnsafeGetLongVolatile(invoke);
2474 }
2475
VisitUnsafeGetLongVolatile(HInvoke * invoke)2476 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2477 VisitJdkUnsafeGetLongVolatile(invoke);
2478 }
2479
VisitUnsafeGetByte(HInvoke * invoke)2480 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetByte(HInvoke* invoke) {
2481 VisitJdkUnsafeGetByte(invoke);
2482 }
2483
VisitUnsafeGetByte(HInvoke * invoke)2484 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetByte(HInvoke* invoke) {
2485 VisitJdkUnsafeGetByte(invoke);
2486 }
2487
VisitJdkUnsafeGet(HInvoke * invoke)2488 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGet(HInvoke* invoke) {
2489 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2490 }
2491
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2492 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2493 CreateUnsafeGetAbsoluteLocations(allocator_, invoke);
2494 }
2495
VisitJdkUnsafeGet(HInvoke * invoke)2496 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGet(HInvoke* invoke) {
2497 GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2498 }
2499
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2500 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2501 GenUnsafeGetAbsolute(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2502 }
2503
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2504 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2505 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2506 }
2507
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2508 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2509 GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kInt32);
2510 }
2511
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2512 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2513 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2514 }
2515
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2516 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2517 GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt32);
2518 }
2519
VisitJdkUnsafeGetReference(HInvoke * invoke)2520 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2521 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2522 }
2523
VisitJdkUnsafeGetReference(HInvoke * invoke)2524 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2525 GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kReference);
2526 }
2527
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2528 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2529 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2530 }
2531
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2532 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2533 GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kReference);
2534 }
2535
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2536 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2537 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2538 }
2539
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2540 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2541 GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kReference);
2542 }
2543
VisitJdkUnsafeGetLong(HInvoke * invoke)2544 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2545 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2546 }
2547
VisitJdkUnsafeGetLong(HInvoke * invoke)2548 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2549 GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt64);
2550 }
2551
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2552 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2553 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2554 }
2555
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2556 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2557 GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kInt64);
2558 }
2559
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2560 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2561 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2562 }
2563
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2564 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2565 GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt64);
2566 }
2567
VisitJdkUnsafeGetByte(HInvoke * invoke)2568 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2569 CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2570 }
2571
VisitJdkUnsafeGetByte(HInvoke * invoke)2572 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2573 GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt8);
2574 }
2575
CreateUnsafePutLocations(ArenaAllocator * allocator,HInvoke * invoke)2576 static void CreateUnsafePutLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2577 LocationSummary* locations =
2578 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2579 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2580 locations->SetInAt(1, Location::RequiresRegister());
2581 locations->SetInAt(2, Location::RequiresRegister());
2582 locations->SetInAt(3, Location::RequiresRegister());
2583 if (kPoisonHeapReferences && invoke->InputAt(3)->GetType() == DataType::Type::kReference) {
2584 locations->AddTemp(Location::RequiresRegister());
2585 }
2586 }
2587
CreateUnsafePutAbsoluteLocations(ArenaAllocator * allocator,HInvoke * invoke)2588 static void CreateUnsafePutAbsoluteLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2589 LocationSummary* locations =
2590 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2591 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2592 locations->SetInAt(1, Location::RequiresRegister());
2593 locations->SetInAt(2, Location::RequiresRegister());
2594 }
2595
GenUnsafePut(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2596 static void GenUnsafePut(HInvoke* invoke,
2597 CodeGeneratorRISCV64* codegen,
2598 std::memory_order order,
2599 DataType::Type type) {
2600 Riscv64Assembler* assembler = codegen->GetAssembler();
2601 LocationSummary* locations = invoke->GetLocations();
2602 XRegister base = locations->InAt(1).AsRegister<XRegister>(); // Object pointer.
2603 XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset.
2604 Location value = locations->InAt(3);
2605
2606 {
2607 // We use a block to end the scratch scope before the write barrier, thus
2608 // freeing the temporary registers so they can be used in `MarkGCCard()`.
2609 ScratchRegisterScope srs(assembler);
2610 // Heap poisoning needs two scratch registers in `Store()`.
2611 XRegister address = (kPoisonHeapReferences && type == DataType::Type::kReference)
2612 ? locations->GetTemp(0).AsRegister<XRegister>()
2613 : srs.AllocateXRegister();
2614 __ Add(address, base, offset);
2615 GenerateSet(codegen, order, value, address, /*offset=*/ 0, type);
2616 }
2617
2618 if (type == DataType::Type::kReference) {
2619 bool value_can_be_null = true; // TODO: Worth finding out this information?
2620 codegen->MaybeMarkGCCard(base, value.AsRegister<XRegister>(), value_can_be_null);
2621 }
2622 }
2623
GenUnsafePutAbsolute(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2624 static void GenUnsafePutAbsolute(HInvoke* invoke,
2625 CodeGeneratorRISCV64* codegen,
2626 std::memory_order order,
2627 DataType::Type type) {
2628 LocationSummary* locations = invoke->GetLocations();
2629 XRegister address = locations->InAt(1).AsRegister<XRegister>();
2630 Location value = locations->InAt(2);
2631
2632 GenerateSet(codegen, order, value, address, /*offset=*/ 0, type);
2633 }
2634
VisitUnsafePut(HInvoke * invoke)2635 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePut(HInvoke* invoke) {
2636 VisitJdkUnsafePut(invoke);
2637 }
2638
VisitUnsafePutAbsolute(HInvoke * invoke)2639 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2640 VisitJdkUnsafePutAbsolute(invoke);
2641 }
2642
VisitUnsafePut(HInvoke * invoke)2643 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePut(HInvoke* invoke) {
2644 VisitJdkUnsafePut(invoke);
2645 }
2646
VisitUnsafePutAbsolute(HInvoke * invoke)2647 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2648 VisitJdkUnsafePutAbsolute(invoke);
2649 }
2650
VisitUnsafePutOrdered(HInvoke * invoke)2651 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutOrdered(HInvoke* invoke) {
2652 VisitJdkUnsafePutOrdered(invoke);
2653 }
2654
VisitUnsafePutOrdered(HInvoke * invoke)2655 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutOrdered(HInvoke* invoke) {
2656 VisitJdkUnsafePutOrdered(invoke);
2657 }
2658
VisitUnsafePutVolatile(HInvoke * invoke)2659 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutVolatile(HInvoke* invoke) {
2660 VisitJdkUnsafePutVolatile(invoke);
2661 }
2662
VisitUnsafePutVolatile(HInvoke * invoke)2663 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutVolatile(HInvoke* invoke) {
2664 VisitJdkUnsafePutVolatile(invoke);
2665 }
2666
VisitUnsafePutObject(HInvoke * invoke)2667 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObject(HInvoke* invoke) {
2668 VisitJdkUnsafePutReference(invoke);
2669 }
2670
VisitUnsafePutObject(HInvoke * invoke)2671 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObject(HInvoke* invoke) {
2672 VisitJdkUnsafePutReference(invoke);
2673 }
2674
VisitUnsafePutObjectOrdered(HInvoke * invoke)2675 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2676 VisitJdkUnsafePutObjectOrdered(invoke);
2677 }
2678
VisitUnsafePutObjectOrdered(HInvoke * invoke)2679 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2680 VisitJdkUnsafePutObjectOrdered(invoke);
2681 }
2682
VisitUnsafePutObjectVolatile(HInvoke * invoke)2683 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2684 VisitJdkUnsafePutReferenceVolatile(invoke);
2685 }
2686
VisitUnsafePutObjectVolatile(HInvoke * invoke)2687 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2688 VisitJdkUnsafePutReferenceVolatile(invoke);
2689 }
2690
VisitUnsafePutLong(HInvoke * invoke)2691 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLong(HInvoke* invoke) {
2692 VisitJdkUnsafePutLong(invoke);
2693 }
2694
VisitUnsafePutLong(HInvoke * invoke)2695 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLong(HInvoke* invoke) {
2696 VisitJdkUnsafePutLong(invoke);
2697 }
2698
VisitUnsafePutLongOrdered(HInvoke * invoke)2699 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2700 VisitJdkUnsafePutLongOrdered(invoke);
2701 }
2702
VisitUnsafePutLongOrdered(HInvoke * invoke)2703 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2704 VisitJdkUnsafePutLongOrdered(invoke);
2705 }
2706
VisitUnsafePutLongVolatile(HInvoke * invoke)2707 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2708 VisitJdkUnsafePutLongVolatile(invoke);
2709 }
2710
VisitUnsafePutLongVolatile(HInvoke * invoke)2711 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2712 VisitJdkUnsafePutLongVolatile(invoke);
2713 }
2714
VisitUnsafePutByte(HInvoke * invoke)2715 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutByte(HInvoke* invoke) {
2716 VisitJdkUnsafePutByte(invoke);
2717 }
2718
VisitUnsafePutByte(HInvoke * invoke)2719 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutByte(HInvoke* invoke) {
2720 VisitJdkUnsafePutByte(invoke);
2721 }
2722
VisitJdkUnsafePut(HInvoke * invoke)2723 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePut(HInvoke* invoke) {
2724 CreateUnsafePutLocations(allocator_, invoke);
2725 }
2726
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2727 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2728 CreateUnsafePutAbsoluteLocations(allocator_, invoke);
2729 }
2730
VisitJdkUnsafePut(HInvoke * invoke)2731 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePut(HInvoke* invoke) {
2732 GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2733 }
2734
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2735 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2736 GenUnsafePutAbsolute(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2737 }
2738
VisitJdkUnsafePutOrdered(HInvoke * invoke)2739 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2740 CreateUnsafePutLocations(allocator_, invoke);
2741 }
2742
VisitJdkUnsafePutOrdered(HInvoke * invoke)2743 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2744 GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt32);
2745 }
2746
VisitJdkUnsafePutRelease(HInvoke * invoke)2747 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2748 CreateUnsafePutLocations(allocator_, invoke);
2749 }
2750
VisitJdkUnsafePutRelease(HInvoke * invoke)2751 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2752 GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt32);
2753 }
2754
VisitJdkUnsafePutVolatile(HInvoke * invoke)2755 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2756 CreateUnsafePutLocations(allocator_, invoke);
2757 }
2758
VisitJdkUnsafePutVolatile(HInvoke * invoke)2759 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2760 GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt32);
2761 }
2762
VisitJdkUnsafePutReference(HInvoke * invoke)2763 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2764 CreateUnsafePutLocations(allocator_, invoke);
2765 }
2766
VisitJdkUnsafePutReference(HInvoke * invoke)2767 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2768 GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kReference);
2769 }
2770
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2771 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2772 CreateUnsafePutLocations(allocator_, invoke);
2773 }
2774
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2775 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2776 GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kReference);
2777 }
2778
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2779 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2780 CreateUnsafePutLocations(allocator_, invoke);
2781 }
2782
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2783 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2784 GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kReference);
2785 }
2786
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2787 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2788 CreateUnsafePutLocations(allocator_, invoke);
2789 }
2790
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2791 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2792 GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kReference);
2793 }
2794
VisitJdkUnsafePutLong(HInvoke * invoke)2795 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2796 CreateUnsafePutLocations(allocator_, invoke);
2797 }
2798
VisitJdkUnsafePutLong(HInvoke * invoke)2799 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2800 GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt64);
2801 }
2802
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2803 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2804 CreateUnsafePutLocations(allocator_, invoke);
2805 }
2806
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2807 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2808 GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt64);
2809 }
2810
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2811 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2812 CreateUnsafePutLocations(allocator_, invoke);
2813 }
2814
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2815 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2816 GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt64);
2817 }
2818
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2819 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2820 CreateUnsafePutLocations(allocator_, invoke);
2821 }
2822
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2823 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2824 GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt64);
2825 }
2826
VisitJdkUnsafePutByte(HInvoke * invoke)2827 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2828 CreateUnsafePutLocations(allocator_, invoke);
2829 }
2830
VisitJdkUnsafePutByte(HInvoke * invoke)2831 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2832 GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt8);
2833 }
2834
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorRISCV64 * codegen)2835 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
2836 HInvoke* invoke,
2837 CodeGeneratorRISCV64* codegen) {
2838 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
2839 LocationSummary* locations = new (allocator) LocationSummary(
2840 invoke,
2841 can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
2842 kIntrinsified);
2843 if (can_call && kUseBakerReadBarrier) {
2844 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2845 }
2846 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2847 locations->SetInAt(1, Location::RequiresRegister());
2848 locations->SetInAt(2, Location::RequiresRegister());
2849 locations->SetInAt(3, Location::RequiresRegister());
2850 locations->SetInAt(4, Location::RequiresRegister());
2851
2852 locations->SetOut(Location::RequiresRegister());
2853 }
2854
GenUnsafeCas(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,DataType::Type type)2855 static void GenUnsafeCas(HInvoke* invoke, CodeGeneratorRISCV64* codegen, DataType::Type type) {
2856 Riscv64Assembler* assembler = codegen->GetAssembler();
2857 LocationSummary* locations = invoke->GetLocations();
2858 XRegister out = locations->Out().AsRegister<XRegister>(); // Boolean result.
2859 XRegister object = locations->InAt(1).AsRegister<XRegister>(); // Object pointer.
2860 XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset.
2861 XRegister expected = locations->InAt(3).AsRegister<XRegister>(); // Expected.
2862 XRegister new_value = locations->InAt(4).AsRegister<XRegister>(); // New value.
2863
2864 // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
2865 if (type == DataType::Type::kReference) {
2866 // Mark card for object assuming new value is stored.
2867 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
2868 codegen->MaybeMarkGCCard(object, new_value, new_value_can_be_null);
2869 }
2870
2871 ScratchRegisterScope srs(assembler);
2872 XRegister tmp_ptr = srs.AllocateXRegister(); // Pointer to actual memory.
2873 XRegister old_value; // Value in memory.
2874
2875 Riscv64Label exit_loop_label;
2876 Riscv64Label* exit_loop = &exit_loop_label;
2877 Riscv64Label* cmp_failure = &exit_loop_label;
2878
2879 ReadBarrierCasSlowPathRISCV64* slow_path = nullptr;
2880 if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
2881 // We need to store the `old_value` in a non-scratch register to make sure
2882 // the read barrier in the slow path does not clobber it.
2883 old_value = locations->GetTemp(0).AsRegister<XRegister>(); // The old value from main path.
2884 // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked
2885 // reloaded old value for subsequent CAS in the slow path. We make this a scratch register
2886 // as we do have marking entrypoints on riscv64 even for scratch registers.
2887 XRegister old_value_temp = srs.AllocateXRegister();
2888 slow_path = new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathRISCV64(
2889 invoke,
2890 std::memory_order_seq_cst,
2891 /*strong=*/ true,
2892 object,
2893 offset,
2894 expected,
2895 new_value,
2896 old_value,
2897 old_value_temp,
2898 /*store_result=*/ old_value_temp, // Let the SC result clobber the reloaded old_value.
2899 /*update_old_value=*/ false,
2900 codegen);
2901 codegen->AddSlowPath(slow_path);
2902 exit_loop = slow_path->GetExitLabel();
2903 cmp_failure = slow_path->GetEntryLabel();
2904 } else {
2905 old_value = srs.AllocateXRegister();
2906 }
2907
2908 __ Add(tmp_ptr, object, offset);
2909
2910 // Pre-populate the result register with failure.
2911 __ Li(out, 0);
2912
2913 GenerateCompareAndSet(assembler,
2914 type,
2915 std::memory_order_seq_cst,
2916 /*strong=*/ true,
2917 cmp_failure,
2918 tmp_ptr,
2919 new_value,
2920 old_value,
2921 /*mask=*/ kNoXRegister,
2922 /*masked=*/ kNoXRegister,
2923 /*store_result=*/ old_value, // Let the SC result clobber the `old_value`.
2924 expected);
2925
2926 DCHECK_EQ(slow_path != nullptr, type == DataType::Type::kReference && codegen->EmitReadBarrier());
2927 if (slow_path != nullptr) {
2928 __ Bind(slow_path->GetSuccessExitLabel());
2929 }
2930
2931 // Indicate success if we successfully execute the SC.
2932 __ Li(out, 1);
2933
2934 __ Bind(exit_loop);
2935 }
2936
VisitUnsafeCASInt(HInvoke * invoke)2937 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASInt(HInvoke* invoke) {
2938 VisitJdkUnsafeCASInt(invoke);
2939 }
2940
VisitUnsafeCASInt(HInvoke * invoke)2941 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASInt(HInvoke* invoke) {
2942 VisitJdkUnsafeCASInt(invoke);
2943 }
2944
VisitUnsafeCASLong(HInvoke * invoke)2945 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASLong(HInvoke* invoke) {
2946 VisitJdkUnsafeCASLong(invoke);
2947 }
2948
VisitUnsafeCASLong(HInvoke * invoke)2949 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASLong(HInvoke* invoke) {
2950 VisitJdkUnsafeCASLong(invoke);
2951 }
2952
VisitUnsafeCASObject(HInvoke * invoke)2953 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASObject(HInvoke* invoke) {
2954 VisitJdkUnsafeCASObject(invoke);
2955 }
2956
VisitUnsafeCASObject(HInvoke * invoke)2957 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASObject(HInvoke* invoke) {
2958 VisitJdkUnsafeCASObject(invoke);
2959 }
2960
VisitJdkUnsafeCASInt(HInvoke * invoke)2961 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2962 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2963 VisitJdkUnsafeCompareAndSetInt(invoke);
2964 }
2965
VisitJdkUnsafeCASInt(HInvoke * invoke)2966 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2967 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2968 VisitJdkUnsafeCompareAndSetInt(invoke);
2969 }
2970
VisitJdkUnsafeCASLong(HInvoke * invoke)2971 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2972 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2973 VisitJdkUnsafeCompareAndSetLong(invoke);
2974 }
2975
VisitJdkUnsafeCASLong(HInvoke * invoke)2976 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2977 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2978 VisitJdkUnsafeCompareAndSetLong(invoke);
2979 }
2980
VisitJdkUnsafeCASObject(HInvoke * invoke)2981 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2982 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2983 VisitJdkUnsafeCompareAndSetReference(invoke);
2984 }
2985
VisitJdkUnsafeCASObject(HInvoke * invoke)2986 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2987 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2988 VisitJdkUnsafeCompareAndSetReference(invoke);
2989 }
2990
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2991 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2992 CreateUnsafeCASLocations(allocator_, invoke, codegen_);
2993 }
2994
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2995 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2996 GenUnsafeCas(invoke, codegen_, DataType::Type::kInt32);
2997 }
2998
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2999 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
3000 CreateUnsafeCASLocations(allocator_, invoke, codegen_);
3001 }
3002
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)3003 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
3004 GenUnsafeCas(invoke, codegen_, DataType::Type::kInt64);
3005 }
3006
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3007 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3008 // The only supported read barrier implementation is the Baker-style read barriers.
3009 if (codegen_->EmitNonBakerReadBarrier()) {
3010 return;
3011 }
3012
3013 // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
3014 if (kPoisonHeapReferences) {
3015 return;
3016 }
3017
3018 CreateUnsafeCASLocations(allocator_, invoke, codegen_);
3019 if (codegen_->EmitReadBarrier()) {
3020 DCHECK(kUseBakerReadBarrier);
3021 // We need one non-scratch temporary register for read barrier.
3022 LocationSummary* locations = invoke->GetLocations();
3023 locations->AddTemp(Location::RequiresRegister());
3024 }
3025 }
3026
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3027 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3028 GenUnsafeCas(invoke, codegen_, DataType::Type::kReference);
3029 }
3030
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorRISCV64 * codegen)3031 static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
3032 HInvoke* invoke,
3033 CodeGeneratorRISCV64* codegen) {
3034 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
3035 LocationSummary* locations = new (allocator) LocationSummary(
3036 invoke,
3037 can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
3038 kIntrinsified);
3039 if (can_call && kUseBakerReadBarrier) {
3040 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
3041 }
3042 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
3043 locations->SetInAt(1, Location::RequiresRegister());
3044 locations->SetInAt(2, Location::RequiresRegister());
3045 locations->SetInAt(3, Location::RequiresRegister());
3046
3047 // Request another temporary register for methods that don't return a value.
3048 DataType::Type return_type = invoke->GetType();
3049 const bool is_void = return_type == DataType::Type::kVoid;
3050 if (is_void) {
3051 locations->AddTemp(Location::RequiresRegister());
3052 } else {
3053 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3054 }
3055 }
3056
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op)3057 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
3058 DataType::Type type,
3059 CodeGeneratorRISCV64* codegen,
3060 GetAndUpdateOp get_and_update_op) {
3061 // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check
3062 // before using.
3063 DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet);
3064
3065 Riscv64Assembler* assembler = codegen->GetAssembler();
3066 LocationSummary* locations = invoke->GetLocations();
3067 DataType::Type return_type = invoke->GetType();
3068 const bool is_void = return_type == DataType::Type::kVoid;
3069 // We use a temporary for void methods, as we don't return the value.
3070 Location out_or_temp_loc =
3071 is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
3072 XRegister out_or_temp = out_or_temp_loc.AsRegister<XRegister>(); // Result.
3073 XRegister base = locations->InAt(1).AsRegister<XRegister>(); // Object pointer.
3074 XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset.
3075 XRegister arg = locations->InAt(3).AsRegister<XRegister>(); // New value or addend.
3076
3077 // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
3078 if (type == DataType::Type::kReference) {
3079 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
3080 // Mark card for object as a new value shall be stored.
3081 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
3082 codegen->MaybeMarkGCCard(base, /*value=*/arg, new_value_can_be_null);
3083 }
3084
3085 ScratchRegisterScope srs(assembler);
3086 XRegister tmp_ptr = srs.AllocateXRegister(); // Pointer to actual memory.
3087 __ Add(tmp_ptr, base, offset);
3088 GenerateGetAndUpdate(codegen,
3089 get_and_update_op,
3090 (type == DataType::Type::kReference) ? DataType::Type::kInt32 : type,
3091 std::memory_order_seq_cst,
3092 tmp_ptr,
3093 arg,
3094 /*old_value=*/ out_or_temp,
3095 /*mask=*/ kNoXRegister,
3096 /*temp=*/ kNoXRegister);
3097
3098 if (!is_void && type == DataType::Type::kReference) {
3099 __ ZextW(out_or_temp, out_or_temp);
3100 if (codegen->EmitReadBarrier()) {
3101 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
3102 if (kUseBakerReadBarrier) {
3103 // Use RA as temp. It is clobbered in the slow path anyway.
3104 static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
3105 SlowPathCodeRISCV64* rb_slow_path = codegen->AddGcRootBakerBarrierBarrierSlowPath(
3106 invoke, out_or_temp_loc, kBakerReadBarrierTemp);
3107 codegen->EmitBakerReadBarierMarkingCheck(
3108 rb_slow_path, out_or_temp_loc, kBakerReadBarrierTemp);
3109 } else {
3110 codegen->GenerateReadBarrierSlow(invoke,
3111 out_or_temp_loc,
3112 out_or_temp_loc,
3113 Location::RegisterLocation(base),
3114 /*offset=*/ 0u,
3115 /*index=*/ Location::RegisterLocation(offset));
3116 }
3117 }
3118 }
3119 }
3120
VisitUnsafeGetAndAddInt(HInvoke * invoke)3121 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
3122 VisitJdkUnsafeGetAndAddInt(invoke);
3123 }
3124
VisitUnsafeGetAndAddInt(HInvoke * invoke)3125 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
3126 VisitJdkUnsafeGetAndAddInt(invoke);
3127 }
3128
VisitUnsafeGetAndAddLong(HInvoke * invoke)3129 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
3130 VisitJdkUnsafeGetAndAddLong(invoke);
3131 }
3132
VisitUnsafeGetAndAddLong(HInvoke * invoke)3133 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
3134 VisitJdkUnsafeGetAndAddLong(invoke);
3135 }
3136
VisitUnsafeGetAndSetInt(HInvoke * invoke)3137 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
3138 VisitJdkUnsafeGetAndSetInt(invoke);
3139 }
3140
VisitUnsafeGetAndSetInt(HInvoke * invoke)3141 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
3142 VisitJdkUnsafeGetAndSetInt(invoke);
3143 }
3144
VisitUnsafeGetAndSetLong(HInvoke * invoke)3145 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
3146 VisitJdkUnsafeGetAndSetLong(invoke);
3147 }
3148
VisitUnsafeGetAndSetLong(HInvoke * invoke)3149 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
3150 VisitJdkUnsafeGetAndSetLong(invoke);
3151 }
3152
VisitUnsafeGetAndSetObject(HInvoke * invoke)3153 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
3154 VisitJdkUnsafeGetAndSetReference(invoke);
3155 }
3156
VisitUnsafeGetAndSetObject(HInvoke * invoke)3157 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
3158 VisitJdkUnsafeGetAndSetReference(invoke);
3159 }
3160
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)3161 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
3162 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3163 }
3164
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)3165 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
3166 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
3167 }
3168
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)3169 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
3170 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3171 }
3172
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)3173 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
3174 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
3175 }
3176
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)3177 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
3178 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3179 }
3180
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)3181 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
3182 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
3183 }
3184
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)3185 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
3186 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3187 }
3188
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)3189 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
3190 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
3191 }
3192
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)3193 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
3194 // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
3195 if (kPoisonHeapReferences) {
3196 return;
3197 }
3198
3199 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3200 }
3201
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)3202 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
3203 GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
3204 }
3205
VisitStringCompareTo(HInvoke * invoke)3206 void IntrinsicLocationsBuilderRISCV64::VisitStringCompareTo(HInvoke* invoke) {
3207 LocationSummary* locations =
3208 new (allocator_) LocationSummary(invoke,
3209 invoke->InputAt(1)->CanBeNull()
3210 ? LocationSummary::kCallOnSlowPath
3211 : LocationSummary::kNoCall,
3212 kIntrinsified);
3213 locations->SetInAt(0, Location::RequiresRegister());
3214 locations->SetInAt(1, Location::RequiresRegister());
3215 locations->AddRegisterTemps(3);
3216 // Need temporary registers for String compression's feature.
3217 if (mirror::kUseStringCompression) {
3218 locations->AddTemp(Location::RequiresRegister());
3219 }
3220 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3221 }
3222
VisitStringCompareTo(HInvoke * invoke)3223 void IntrinsicCodeGeneratorRISCV64::VisitStringCompareTo(HInvoke* invoke) {
3224 Riscv64Assembler* assembler = GetAssembler();
3225 DCHECK(assembler->IsExtensionEnabled(Riscv64Extension::kZbb));
3226 LocationSummary* locations = invoke->GetLocations();
3227
3228 XRegister str = locations->InAt(0).AsRegister<XRegister>();
3229 XRegister arg = locations->InAt(1).AsRegister<XRegister>();
3230 XRegister out = locations->Out().AsRegister<XRegister>();
3231
3232 XRegister temp0 = locations->GetTemp(0).AsRegister<XRegister>();
3233 XRegister temp1 = locations->GetTemp(1).AsRegister<XRegister>();
3234 XRegister temp2 = locations->GetTemp(2).AsRegister<XRegister>();
3235 XRegister temp3 = kNoXRegister;
3236 if (mirror::kUseStringCompression) {
3237 temp3 = locations->GetTemp(3).AsRegister<XRegister>();
3238 }
3239
3240 Riscv64Label loop;
3241 Riscv64Label find_char_diff;
3242 Riscv64Label end;
3243 Riscv64Label different_compression;
3244
3245 // Get offsets of count and value fields within a string object.
3246 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
3247 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
3248
3249 // Note that the null check must have been done earlier.
3250 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
3251
3252 // Take slow path and throw if input can be and is null.
3253 SlowPathCodeRISCV64* slow_path = nullptr;
3254 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
3255 if (can_slow_path) {
3256 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
3257 codegen_->AddSlowPath(slow_path);
3258 __ Beqz(arg, slow_path->GetEntryLabel());
3259 }
3260
3261 // Reference equality check, return 0 if same reference.
3262 __ Sub(out, str, arg);
3263 __ Beqz(out, &end);
3264
3265 if (mirror::kUseStringCompression) {
3266 // Load `count` fields of this and argument strings.
3267 __ Loadwu(temp3, str, count_offset);
3268 __ Loadwu(temp2, arg, count_offset);
3269 // Clean out compression flag from lengths.
3270 __ Srliw(temp0, temp3, 1u);
3271 __ Srliw(temp1, temp2, 1u);
3272 } else {
3273 // Load lengths of this and argument strings.
3274 __ Loadwu(temp0, str, count_offset);
3275 __ Loadwu(temp1, arg, count_offset);
3276 }
3277 // out = length diff.
3278 __ Subw(out, temp0, temp1);
3279
3280 // Find the length of the shorter string
3281 __ Minu(temp0, temp0, temp1);
3282 // Shorter string is empty?
3283 __ Beqz(temp0, &end);
3284
3285 if (mirror::kUseStringCompression) {
3286 // Extract both compression flags
3287 __ Andi(temp3, temp3, 1);
3288 __ Andi(temp2, temp2, 1);
3289 __ Bne(temp2, temp3, &different_compression);
3290 }
3291 // Store offset of string value in preparation for comparison loop.
3292 __ Li(temp1, value_offset);
3293 if (mirror::kUseStringCompression) {
3294 // For string compression, calculate the number of bytes to compare (not chars).
3295 __ Sll(temp0, temp0, temp3);
3296 }
3297
3298 // Assertions that must hold in order to compare strings 8 bytes at a time.
3299 DCHECK_ALIGNED(value_offset, 8);
3300 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
3301
3302 constexpr size_t char_size = DataType::Size(DataType::Type::kUint16);
3303 static_assert(char_size == 2u, "Char expected to be 2 bytes wide");
3304
3305 ScratchRegisterScope scratch_scope(assembler);
3306 XRegister temp4 = scratch_scope.AllocateXRegister();
3307
3308 // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
3309 __ Bind(&loop);
3310 __ Add(temp4, str, temp1);
3311 __ Ld(temp4, temp4, 0);
3312 __ Add(temp2, arg, temp1);
3313 __ Ld(temp2, temp2, 0);
3314 __ Bne(temp4, temp2, &find_char_diff);
3315 __ Addi(temp1, temp1, char_size * 4);
3316 // With string compression, we have compared 8 bytes, otherwise 4 chars.
3317 __ Addi(temp0, temp0, (mirror::kUseStringCompression) ? -8 : -4);
3318 __ Bgtz(temp0, &loop);
3319 __ J(&end);
3320
3321 // Find the single character difference.
3322 __ Bind(&find_char_diff);
3323 // Get the bit position of the first character that differs.
3324 __ Xor(temp1, temp2, temp4);
3325 __ Ctz(temp1, temp1);
3326
3327 // If the number of chars remaining <= the index where the difference occurs (0-3), then
3328 // the difference occurs outside the remaining string data, so just return length diff (out).
3329 __ Srliw(temp1, temp1, (mirror::kUseStringCompression) ? 3 : 4);
3330 __ Ble(temp0, temp1, &end);
3331
3332 // Extract the characters and calculate the difference.
3333 __ Slliw(temp1, temp1, (mirror::kUseStringCompression) ? 3 : 4);
3334 if (mirror:: kUseStringCompression) {
3335 __ Slliw(temp3, temp3, 3u);
3336 __ Andn(temp1, temp1, temp3);
3337 }
3338 __ Srl(temp2, temp2, temp1);
3339 __ Srl(temp4, temp4, temp1);
3340 if (mirror::kUseStringCompression) {
3341 __ Li(temp0, -256); // ~0xff
3342 __ Sllw(temp0, temp0, temp3); // temp3 = 0 or 8, temp0 := ~0xff or ~0xffff
3343 __ Andn(temp4, temp4, temp0); // Extract 8 or 16 bits.
3344 __ Andn(temp2, temp2, temp0); // Extract 8 or 16 bits.
3345 } else {
3346 __ ZextH(temp4, temp4);
3347 __ ZextH(temp2, temp2);
3348 }
3349
3350 __ Subw(out, temp4, temp2);
3351
3352 if (mirror::kUseStringCompression) {
3353 __ J(&end);
3354 __ Bind(&different_compression);
3355
3356 // Comparison for different compression style.
3357 constexpr size_t c_char_size = DataType::Size(DataType::Type::kInt8);
3358 static_assert(c_char_size == 1u, "Compressed char expected to be 1 byte wide");
3359
3360 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
3361 __ Xor(temp4, str, arg);
3362 __ Addi(temp3, temp3, -1); // -1 if str is compressed, 0 otherwise
3363 __ And(temp2, temp4, temp3); // str^arg if str is compressed, 0 otherwise
3364 __ Xor(temp1, temp2, arg); // str if str is compressed, arg otherwise
3365 __ Xor(temp2, temp2, str); // arg if str is compressed, str otherwise
3366
3367 // We want to free up the temp3, currently holding `str` compression flag, for comparison.
3368 // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
3369 // as unsigned. This will allow `addi temp0, temp0, -2; bgtz different_compression_loop`
3370 // to serve as the loop condition.
3371 __ Sh1Add(temp0, temp0, temp3);
3372
3373 // Adjust temp1 and temp2 from string pointers to data pointers.
3374 __ Addi(temp1, temp1, value_offset);
3375 __ Addi(temp2, temp2, value_offset);
3376
3377 Riscv64Label different_compression_loop;
3378 Riscv64Label different_compression_diff;
3379
3380 __ Bind(&different_compression_loop);
3381 __ Lbu(temp4, temp1, 0);
3382 __ Addiw(temp1, temp1, c_char_size);
3383 __ Lhu(temp3, temp2, 0);
3384 __ Addi(temp2, temp2, char_size);
3385 __ Sub(temp4, temp4, temp3);
3386 __ Bnez(temp4, &different_compression_diff);
3387 __ Addi(temp0, temp0, -2);
3388 __ Bgtz(temp0, &different_compression_loop);
3389 __ J(&end);
3390
3391 // Calculate the difference.
3392 __ Bind(&different_compression_diff);
3393 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
3394 "Expecting 0=compressed, 1=uncompressed");
3395 __ Andi(temp0, temp0, 1);
3396 __ Addi(temp0, temp0, -1);
3397 __ Xor(out, temp4, temp0);
3398 __ Sub(out, out, temp0);
3399 }
3400
3401 __ Bind(&end);
3402
3403 if (can_slow_path) {
3404 __ Bind(slow_path->GetExitLabel());
3405 }
3406 }
3407
3408 class VarHandleSlowPathRISCV64 : public IntrinsicSlowPathRISCV64 {
3409 public:
VarHandleSlowPathRISCV64(HInvoke * invoke,std::memory_order order)3410 VarHandleSlowPathRISCV64(HInvoke* invoke, std::memory_order order)
3411 : IntrinsicSlowPathRISCV64(invoke),
3412 order_(order),
3413 return_success_(false),
3414 strong_(false),
3415 get_and_update_op_(GetAndUpdateOp::kAdd) {
3416 }
3417
GetByteArrayViewCheckLabel()3418 Riscv64Label* GetByteArrayViewCheckLabel() {
3419 return &byte_array_view_check_label_;
3420 }
3421
GetNativeByteOrderLabel()3422 Riscv64Label* GetNativeByteOrderLabel() {
3423 return &native_byte_order_label_;
3424 }
3425
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)3426 void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
3427 if (return_success) {
3428 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
3429 } else {
3430 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
3431 }
3432 return_success_ = return_success;
3433 strong_ = strong;
3434 }
3435
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3436 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3437 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
3438 get_and_update_op_ = get_and_update_op;
3439 }
3440
EmitNativeCode(CodeGenerator * codegen_in)3441 void EmitNativeCode(CodeGenerator* codegen_in) override {
3442 if (GetByteArrayViewCheckLabel()->IsLinked()) {
3443 EmitByteArrayViewCode(codegen_in);
3444 }
3445 IntrinsicSlowPathRISCV64::EmitNativeCode(codegen_in);
3446 }
3447
3448 private:
GetInvoke() const3449 HInvoke* GetInvoke() const {
3450 return GetInstruction()->AsInvoke();
3451 }
3452
GetAccessModeTemplate() const3453 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3454 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3455 }
3456
3457 void EmitByteArrayViewCode(CodeGenerator* codegen_in);
3458
3459 Riscv64Label byte_array_view_check_label_;
3460 Riscv64Label native_byte_order_label_;
3461 // Shared parameter for all VarHandle intrinsics.
3462 std::memory_order order_;
3463 // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
3464 bool return_success_;
3465 bool strong_;
3466 // Extra argument for GenerateVarHandleGetAndUpdate().
3467 GetAndUpdateOp get_and_update_op_;
3468 };
3469
3470 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path,XRegister object,XRegister type,bool object_can_be_null=true)3471 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorRISCV64* codegen,
3472 SlowPathCodeRISCV64* slow_path,
3473 XRegister object,
3474 XRegister type,
3475 bool object_can_be_null = true) {
3476 Riscv64Assembler* assembler = codegen->GetAssembler();
3477
3478 const MemberOffset class_offset = mirror::Object::ClassOffset();
3479 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3480
3481 Riscv64Label success;
3482 if (object_can_be_null) {
3483 __ Beqz(object, &success);
3484 }
3485
3486 ScratchRegisterScope srs(assembler);
3487 XRegister temp = srs.AllocateXRegister();
3488
3489 // Note: The `type` can be `TMP`. We're using "bare" local branches to enforce that they shall
3490 // not be expanded and the scrach register `TMP` shall not be clobbered if taken. Taking the
3491 // branch to the slow path can theoretically clobber `TMP` (if outside the 1 MiB range).
3492 __ Loadwu(temp, object, class_offset.Int32Value());
3493 codegen->MaybeUnpoisonHeapReference(temp);
3494 Riscv64Label loop;
3495 __ Bind(&loop);
3496 __ Beq(type, temp, &success, /*is_bare=*/ true);
3497 // We may not have another scratch register for `Loadwu()`. Use `Lwu()` directly.
3498 DCHECK(IsInt<12>(super_class_offset.Int32Value()));
3499 __ Lwu(temp, temp, super_class_offset.Int32Value());
3500 codegen->MaybeUnpoisonHeapReference(temp);
3501 __ Beqz(temp, slow_path->GetEntryLabel());
3502 __ J(&loop, /*is_bare=*/ true);
3503 __ Bind(&success);
3504 }
3505
3506 // Check access mode and the primitive type from VarHandle.varType.
3507 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3508 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path,DataType::Type type)3509 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3510 CodeGeneratorRISCV64* codegen,
3511 SlowPathCodeRISCV64* slow_path,
3512 DataType::Type type) {
3513 mirror::VarHandle::AccessMode access_mode =
3514 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3515 Primitive::Type primitive_type = DataTypeToPrimitive(type);
3516
3517 Riscv64Assembler* assembler = codegen->GetAssembler();
3518 LocationSummary* locations = invoke->GetLocations();
3519 XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3520
3521 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3522 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3523 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3524
3525 ScratchRegisterScope srs(assembler);
3526 XRegister temp = srs.AllocateXRegister();
3527 XRegister temp2 = srs.AllocateXRegister();
3528
3529 // Check that the operation is permitted.
3530 __ Loadw(temp, varhandle, access_mode_bit_mask_offset.Int32Value());
3531 DCHECK_LT(enum_cast<uint32_t>(access_mode), 31u); // We cannot avoid the shift below.
3532 __ Slliw(temp, temp, 31 - enum_cast<uint32_t>(access_mode)); // Shift tested bit to sign bit.
3533 __ Bgez(temp, slow_path->GetEntryLabel()); // If not permitted, go to slow path.
3534
3535 // For primitive types, we do not need a read barrier when loading a reference only for loading
3536 // constant field through the reference. For reference types, we deliberately avoid the read
3537 // barrier, letting the slow path handle the false negatives.
3538 __ Loadwu(temp, varhandle, var_type_offset.Int32Value());
3539 codegen->MaybeUnpoisonHeapReference(temp);
3540
3541 // Check the varType.primitiveType field against the type we're trying to use.
3542 __ Loadhu(temp2, temp, primitive_type_offset.Int32Value());
3543 if (primitive_type == Primitive::kPrimNot) {
3544 static_assert(Primitive::kPrimNot == 0);
3545 __ Bnez(temp2, slow_path->GetEntryLabel());
3546 } else {
3547 __ Li(temp, enum_cast<int32_t>(primitive_type)); // `temp` can be clobbered.
3548 __ Bne(temp2, temp, slow_path->GetEntryLabel());
3549 }
3550
3551 srs.FreeXRegister(temp2);
3552
3553 if (type == DataType::Type::kReference) {
3554 // Check reference arguments against the varType.
3555 // False negatives due to varType being an interface or array type
3556 // or due to the missing read barrier are handled by the slow path.
3557 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3558 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3559 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3560 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3561 HInstruction* arg = invoke->InputAt(arg_index);
3562 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3563 if (!arg->IsNullConstant()) {
3564 XRegister arg_reg = locations->InAt(arg_index).AsRegister<XRegister>();
3565 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp);
3566 }
3567 }
3568 }
3569 }
3570
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path)3571 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3572 CodeGeneratorRISCV64* codegen,
3573 SlowPathCodeRISCV64* slow_path) {
3574 Riscv64Assembler* assembler = codegen->GetAssembler();
3575 XRegister varhandle = invoke->GetLocations()->InAt(0).AsRegister<XRegister>();
3576
3577 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3578
3579 ScratchRegisterScope srs(assembler);
3580 XRegister temp = srs.AllocateXRegister();
3581
3582 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3583 // Do not emit read barrier (or unpoison the reference) for comparing to null.
3584 __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
3585 __ Bnez(temp, slow_path->GetEntryLabel());
3586 }
3587
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path)3588 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3589 CodeGeneratorRISCV64* codegen,
3590 SlowPathCodeRISCV64* slow_path) {
3591 VarHandleOptimizations optimizations(invoke);
3592 Riscv64Assembler* assembler = codegen->GetAssembler();
3593 LocationSummary* locations = invoke->GetLocations();
3594 XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3595 XRegister object = locations->InAt(1).AsRegister<XRegister>();
3596
3597 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3598 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3599
3600 // Null-check the object.
3601 if (!optimizations.GetSkipObjectNullCheck()) {
3602 __ Beqz(object, slow_path->GetEntryLabel());
3603 }
3604
3605 if (!optimizations.GetUseKnownImageVarHandle()) {
3606 ScratchRegisterScope srs(assembler);
3607 XRegister temp = srs.AllocateXRegister();
3608
3609 // Check that the VarHandle references an instance field by checking that
3610 // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
3611 // type compatibility check with the source object's type, which will fail for null.
3612 __ Loadwu(temp, varhandle, coordinate_type1_offset.Int32Value());
3613 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3614 __ Bnez(temp, slow_path->GetEntryLabel());
3615
3616 // Check that the object has the correct type.
3617 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3618 __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
3619 codegen->MaybeUnpoisonHeapReference(temp);
3620 GenerateSubTypeObjectCheckNoReadBarrier(
3621 codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
3622 }
3623 }
3624
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,VarHandleSlowPathRISCV64 * slow_path)3625 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3626 CodeGeneratorRISCV64* codegen,
3627 VarHandleSlowPathRISCV64* slow_path) {
3628 VarHandleOptimizations optimizations(invoke);
3629 Riscv64Assembler* assembler = codegen->GetAssembler();
3630 LocationSummary* locations = invoke->GetLocations();
3631 XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3632 XRegister object = locations->InAt(1).AsRegister<XRegister>();
3633 XRegister index = locations->InAt(2).AsRegister<XRegister>();
3634 DataType::Type value_type =
3635 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3636 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3637
3638 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3639 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3640 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3641 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3642 const MemberOffset class_offset = mirror::Object::ClassOffset();
3643 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3644
3645 // Null-check the object.
3646 if (!optimizations.GetSkipObjectNullCheck()) {
3647 __ Beqz(object, slow_path->GetEntryLabel());
3648 }
3649
3650 ScratchRegisterScope srs(assembler);
3651 XRegister temp = srs.AllocateXRegister();
3652 XRegister temp2 = srs.AllocateXRegister();
3653
3654 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3655 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3656 // coordinateType0 shall not be null but we do not explicitly verify that.
3657 __ Loadwu(temp, varhandle, coordinate_type1_offset.Int32Value());
3658 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3659 __ Beqz(temp, slow_path->GetEntryLabel());
3660
3661 // Check object class against componentType0.
3662 //
3663 // This is an exact check and we defer other cases to the runtime. This includes
3664 // conversion to array of superclass references, which is valid but subsequently
3665 // requires all update operations to check that the value can indeed be stored.
3666 // We do not want to perform such extra checks in the intrinsified code.
3667 //
3668 // We do this check without read barrier, so there can be false negatives which we
3669 // defer to the slow path. There shall be no false negatives for array classes in the
3670 // boot image (including Object[] and primitive arrays) because they are non-movable.
3671 __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
3672 __ Loadwu(temp2, object, class_offset.Int32Value());
3673 __ Bne(temp, temp2, slow_path->GetEntryLabel());
3674
3675 // Check that the coordinateType0 is an array type. We do not need a read barrier
3676 // for loading constant reference fields (or chains of them) for comparison with null,
3677 // nor for finally loading a constant primitive field (primitive type) below.
3678 codegen->MaybeUnpoisonHeapReference(temp);
3679 __ Loadwu(temp2, temp, component_type_offset.Int32Value());
3680 codegen->MaybeUnpoisonHeapReference(temp2);
3681 __ Beqz(temp2, slow_path->GetEntryLabel());
3682
3683 // Check that the array component type matches the primitive type.
3684 __ Loadhu(temp, temp2, primitive_type_offset.Int32Value());
3685 if (primitive_type == Primitive::kPrimNot) {
3686 static_assert(Primitive::kPrimNot == 0);
3687 __ Bnez(temp, slow_path->GetEntryLabel());
3688 } else {
3689 // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
3690 // we shall check for a byte array view in the slow path.
3691 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
3692 // so we cannot emit that if we're JITting without boot image.
3693 bool boot_image_available =
3694 codegen->GetCompilerOptions().IsBootImage() ||
3695 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
3696 bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
3697 Riscv64Label* slow_path_label =
3698 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
3699 __ Li(temp2, enum_cast<int32_t>(primitive_type));
3700 __ Bne(temp, temp2, slow_path_label);
3701 }
3702
3703 // Check for array index out of bounds.
3704 __ Loadw(temp, object, array_length_offset.Int32Value());
3705 __ Bgeu(index, temp, slow_path->GetEntryLabel());
3706 }
3707
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,VarHandleSlowPathRISCV64 * slow_path)3708 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
3709 CodeGeneratorRISCV64* codegen,
3710 VarHandleSlowPathRISCV64* slow_path) {
3711 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3712 if (expected_coordinates_count == 0u) {
3713 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
3714 } else if (expected_coordinates_count == 1u) {
3715 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
3716 } else {
3717 DCHECK_EQ(expected_coordinates_count, 2u);
3718 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
3719 }
3720 }
3721
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)3722 static VarHandleSlowPathRISCV64* GenerateVarHandleChecks(HInvoke* invoke,
3723 CodeGeneratorRISCV64* codegen,
3724 std::memory_order order,
3725 DataType::Type type) {
3726 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3727 VarHandleOptimizations optimizations(invoke);
3728 if (optimizations.GetUseKnownImageVarHandle()) {
3729 DCHECK_NE(expected_coordinates_count, 2u);
3730 if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
3731 return nullptr;
3732 }
3733 }
3734
3735 VarHandleSlowPathRISCV64* slow_path =
3736 new (codegen->GetScopedAllocator()) VarHandleSlowPathRISCV64(invoke, order);
3737 codegen->AddSlowPath(slow_path);
3738
3739 if (!optimizations.GetUseKnownImageVarHandle()) {
3740 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
3741 }
3742 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
3743
3744 return slow_path;
3745 }
3746
3747 struct VarHandleTarget {
3748 XRegister object; // The object holding the value to operate on.
3749 XRegister offset; // The offset of the value to operate on.
3750 };
3751
GetVarHandleTarget(HInvoke * invoke)3752 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
3753 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3754 LocationSummary* locations = invoke->GetLocations();
3755
3756 VarHandleTarget target;
3757 // The temporary allocated for loading the offset.
3758 target.offset = locations->GetTemp(0u).AsRegister<XRegister>();
3759 // The reference to the object that holds the value to operate on.
3760 target.object = (expected_coordinates_count == 0u)
3761 ? locations->GetTemp(1u).AsRegister<XRegister>()
3762 : locations->InAt(1).AsRegister<XRegister>();
3763 return target;
3764 }
3765
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorRISCV64 * codegen)3766 static void GenerateVarHandleTarget(HInvoke* invoke,
3767 const VarHandleTarget& target,
3768 CodeGeneratorRISCV64* codegen) {
3769 Riscv64Assembler* assembler = codegen->GetAssembler();
3770 LocationSummary* locations = invoke->GetLocations();
3771 XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3772 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3773
3774 if (expected_coordinates_count <= 1u) {
3775 if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
3776 ScopedObjectAccess soa(Thread::Current());
3777 ArtField* target_field = GetBootImageVarHandleField(invoke);
3778 if (expected_coordinates_count == 0u) {
3779 ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
3780 if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
3781 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
3782 codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
3783 } else {
3784 codegen->LoadTypeForBootImageIntrinsic(
3785 target.object,
3786 TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
3787 }
3788 }
3789 __ Li(target.offset, target_field->GetOffset().Uint32Value());
3790 } else {
3791 // For static fields, we need to fill the `target.object` with the declaring class,
3792 // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
3793 // we do not need the declaring class, so we can forget the `ArtField*` when
3794 // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
3795 XRegister field = (expected_coordinates_count == 0) ? target.object : target.offset;
3796
3797 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
3798 const MemberOffset offset_offset = ArtField::OffsetOffset();
3799
3800 // Load the ArtField*, the offset and, if needed, declaring class.
3801 __ Loadd(field, varhandle, art_field_offset.Int32Value());
3802 __ Loadwu(target.offset, field, offset_offset.Int32Value());
3803 if (expected_coordinates_count == 0u) {
3804 codegen->GenerateGcRootFieldLoad(
3805 invoke,
3806 Location::RegisterLocation(target.object),
3807 field,
3808 ArtField::DeclaringClassOffset().Int32Value(),
3809 codegen->GetCompilerReadBarrierOption());
3810 }
3811 }
3812 } else {
3813 DCHECK_EQ(expected_coordinates_count, 2u);
3814 DataType::Type value_type =
3815 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3816 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
3817
3818 XRegister index = locations->InAt(2).AsRegister<XRegister>();
3819 __ Li(target.offset, data_offset.Int32Value());
3820 codegen->GetInstructionVisitor()->ShNAdd(target.offset, index, target.offset, value_type);
3821 }
3822 }
3823
CreateVarHandleCommonLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen)3824 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
3825 CodeGeneratorRISCV64* codegen) {
3826 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3827 DataType::Type return_type = invoke->GetType();
3828
3829 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3830 LocationSummary* locations =
3831 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3832 locations->SetInAt(0, Location::RequiresRegister());
3833 // Require coordinates in registers. These are the object holding the value
3834 // to operate on (except for static fields) and index (for arrays and views).
3835 for (size_t i = 0; i != expected_coordinates_count; ++i) {
3836 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
3837 }
3838 if (return_type != DataType::Type::kVoid) {
3839 if (DataType::IsFloatingPointType(return_type)) {
3840 locations->SetOut(Location::RequiresFpuRegister());
3841 } else {
3842 locations->SetOut(Location::RequiresRegister());
3843 }
3844 }
3845 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3846 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3847 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3848 HInstruction* arg = invoke->InputAt(arg_index);
3849 if (IsZeroBitPattern(arg)) {
3850 locations->SetInAt(arg_index, Location::ConstantLocation(arg));
3851 } else if (DataType::IsFloatingPointType(arg->GetType())) {
3852 locations->SetInAt(arg_index, Location::RequiresFpuRegister());
3853 } else {
3854 locations->SetInAt(arg_index, Location::RequiresRegister());
3855 }
3856 }
3857
3858 // Add a temporary for offset.
3859 if (codegen->EmitNonBakerReadBarrier() &&
3860 GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
3861 // To preserve the offset value across the non-Baker read barrier slow path
3862 // for loading the declaring class, use a fixed callee-save register.
3863 constexpr int first_callee_save = CTZ(kRiscv64CalleeSaveRefSpills);
3864 locations->AddTemp(Location::RegisterLocation(first_callee_save));
3865 } else {
3866 locations->AddTemp(Location::RequiresRegister());
3867 }
3868 if (expected_coordinates_count == 0u) {
3869 // Add a temporary to hold the declaring class.
3870 locations->AddTemp(Location::RequiresRegister());
3871 }
3872
3873 return locations;
3874 }
3875
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen)3876 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
3877 VarHandleOptimizations optimizations(invoke);
3878 if (optimizations.GetDoNotIntrinsify()) {
3879 return;
3880 }
3881
3882 if (codegen->EmitNonBakerReadBarrier() &&
3883 invoke->GetType() == DataType::Type::kReference &&
3884 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
3885 invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
3886 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
3887 // the passed reference and reloads it from the field. This gets the memory visibility
3888 // wrong for Acquire/Volatile operations. b/173104084
3889 return;
3890 }
3891
3892 CreateVarHandleCommonLocations(invoke, codegen);
3893 }
3894
IntTypeForFloatingPointType(DataType::Type fp_type)3895 DataType::Type IntTypeForFloatingPointType(DataType::Type fp_type) {
3896 DCHECK(DataType::IsFloatingPointType(fp_type));
3897 return (fp_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64;
3898 }
3899
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,bool byte_swap=false)3900 static void GenerateVarHandleGet(HInvoke* invoke,
3901 CodeGeneratorRISCV64* codegen,
3902 std::memory_order order,
3903 bool byte_swap = false) {
3904 DataType::Type type = invoke->GetType();
3905 DCHECK_NE(type, DataType::Type::kVoid);
3906
3907 LocationSummary* locations = invoke->GetLocations();
3908 Riscv64Assembler* assembler = codegen->GetAssembler();
3909 Location out = locations->Out();
3910
3911 VarHandleTarget target = GetVarHandleTarget(invoke);
3912 VarHandleSlowPathRISCV64* slow_path = nullptr;
3913 if (!byte_swap) {
3914 slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
3915 GenerateVarHandleTarget(invoke, target, codegen);
3916 if (slow_path != nullptr) {
3917 __ Bind(slow_path->GetNativeByteOrderLabel());
3918 }
3919 }
3920
3921 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
3922 bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
3923 DCHECK(acquire_barrier || order == std::memory_order_relaxed);
3924
3925 if (seq_cst_barrier) {
3926 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3927 }
3928
3929 // Load the value from the target location.
3930 if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
3931 Location index = Location::RegisterLocation(target.offset);
3932 // TODO(riscv64): Revisit when we add checking if the holder is black.
3933 Location temp = Location::NoLocation();
3934 codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
3935 out,
3936 target.object,
3937 /*offset=*/ 0,
3938 index,
3939 temp,
3940 /*needs_null_check=*/ false);
3941 DCHECK(!byte_swap);
3942 } else {
3943 ScratchRegisterScope srs(assembler);
3944 XRegister address = srs.AllocateXRegister();
3945 __ Add(address, target.object, target.offset);
3946 Location load_loc = out;
3947 DataType::Type load_type = type;
3948 if (byte_swap && DataType::IsFloatingPointType(type)) {
3949 load_loc = Location::RegisterLocation(target.offset); // Load to the offset temporary.
3950 load_type = IntTypeForFloatingPointType(type);
3951 }
3952 codegen->GetInstructionVisitor()->Load(load_loc, address, /*offset=*/ 0, load_type);
3953 if (type == DataType::Type::kReference) {
3954 DCHECK(!byte_swap);
3955 Location object_loc = Location::RegisterLocation(target.object);
3956 Location offset_loc = Location::RegisterLocation(target.offset);
3957 codegen->MaybeGenerateReadBarrierSlow(
3958 invoke, out, out, object_loc, /*offset=*/ 0u, /*index=*/ offset_loc);
3959 } else if (byte_swap) {
3960 GenerateReverseBytes(codegen, out, load_loc.AsRegister<XRegister>(), type);
3961 }
3962 }
3963
3964 if (acquire_barrier) {
3965 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3966 }
3967
3968 if (slow_path != nullptr) {
3969 DCHECK(!byte_swap);
3970 __ Bind(slow_path->GetExitLabel());
3971 }
3972 }
3973
VisitVarHandleGet(HInvoke * invoke)3974 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGet(HInvoke* invoke) {
3975 CreateVarHandleGetLocations(invoke, codegen_);
3976 }
3977
VisitVarHandleGet(HInvoke * invoke)3978 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGet(HInvoke* invoke) {
3979 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
3980 }
3981
VisitVarHandleGetOpaque(HInvoke * invoke)3982 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetOpaque(HInvoke* invoke) {
3983 CreateVarHandleGetLocations(invoke, codegen_);
3984 }
3985
VisitVarHandleGetOpaque(HInvoke * invoke)3986 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetOpaque(HInvoke* invoke) {
3987 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
3988 }
3989
VisitVarHandleGetAcquire(HInvoke * invoke)3990 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAcquire(HInvoke* invoke) {
3991 CreateVarHandleGetLocations(invoke, codegen_);
3992 }
3993
VisitVarHandleGetAcquire(HInvoke * invoke)3994 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAcquire(HInvoke* invoke) {
3995 GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire);
3996 }
3997
VisitVarHandleGetVolatile(HInvoke * invoke)3998 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetVolatile(HInvoke* invoke) {
3999 CreateVarHandleGetLocations(invoke, codegen_);
4000 }
4001
VisitVarHandleGetVolatile(HInvoke * invoke)4002 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4003 GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst);
4004 }
4005
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen)4006 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
4007 VarHandleOptimizations optimizations(invoke);
4008 if (optimizations.GetDoNotIntrinsify()) {
4009 return;
4010 }
4011
4012 CreateVarHandleCommonLocations(invoke, codegen);
4013 if (kPoisonHeapReferences && invoke->GetLocations() != nullptr) {
4014 LocationSummary* locations = invoke->GetLocations();
4015 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4016 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4017 if (value_type == DataType::Type::kReference && !locations->InAt(value_index).IsConstant()) {
4018 locations->AddTemp(Location::RequiresRegister());
4019 }
4020 }
4021 }
4022
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,bool byte_swap=false)4023 static void GenerateVarHandleSet(HInvoke* invoke,
4024 CodeGeneratorRISCV64* codegen,
4025 std::memory_order order,
4026 bool byte_swap = false) {
4027 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4028 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4029
4030 Riscv64Assembler* assembler = codegen->GetAssembler();
4031 Location value = invoke->GetLocations()->InAt(value_index);
4032
4033 VarHandleTarget target = GetVarHandleTarget(invoke);
4034 VarHandleSlowPathRISCV64* slow_path = nullptr;
4035 if (!byte_swap) {
4036 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4037 GenerateVarHandleTarget(invoke, target, codegen);
4038 if (slow_path != nullptr) {
4039 __ Bind(slow_path->GetNativeByteOrderLabel());
4040 }
4041 }
4042
4043 {
4044 ScratchRegisterScope srs(assembler);
4045 // Heap poisoning needs two scratch registers in `Store()`, except for null constants.
4046 XRegister address =
4047 (kPoisonHeapReferences && value_type == DataType::Type::kReference && !value.IsConstant())
4048 ? invoke->GetLocations()->GetTemp(0).AsRegister<XRegister>()
4049 : srs.AllocateXRegister();
4050 __ Add(address, target.object, target.offset);
4051
4052 if (byte_swap) {
4053 DCHECK(!value.IsConstant()); // Zero uses the main path as it does not need a byte swap.
4054 // The offset is no longer needed, so reuse the offset temporary for the byte-swapped value.
4055 Location new_value = Location::RegisterLocation(target.offset);
4056 if (DataType::IsFloatingPointType(value_type)) {
4057 value_type = IntTypeForFloatingPointType(value_type);
4058 codegen->MoveLocation(new_value, value, value_type);
4059 value = new_value;
4060 }
4061 GenerateReverseBytes(codegen, new_value, value.AsRegister<XRegister>(), value_type);
4062 value = new_value;
4063 }
4064
4065 GenerateSet(codegen, order, value, address, /*offset=*/ 0, value_type);
4066 }
4067
4068 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4069 codegen->MaybeMarkGCCard(
4070 target.object, value.AsRegister<XRegister>(), /* emit_null_check= */ true);
4071 }
4072
4073 if (slow_path != nullptr) {
4074 DCHECK(!byte_swap);
4075 __ Bind(slow_path->GetExitLabel());
4076 }
4077 }
4078
VisitVarHandleSet(HInvoke * invoke)4079 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSet(HInvoke* invoke) {
4080 CreateVarHandleSetLocations(invoke, codegen_);
4081 }
4082
VisitVarHandleSet(HInvoke * invoke)4083 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSet(HInvoke* invoke) {
4084 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
4085 }
4086
VisitVarHandleSetOpaque(HInvoke * invoke)4087 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4088 CreateVarHandleSetLocations(invoke, codegen_);
4089 }
4090
VisitVarHandleSetOpaque(HInvoke * invoke)4091 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4092 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
4093 }
4094
VisitVarHandleSetRelease(HInvoke * invoke)4095 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetRelease(HInvoke* invoke) {
4096 CreateVarHandleSetLocations(invoke, codegen_);
4097 }
4098
VisitVarHandleSetRelease(HInvoke * invoke)4099 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetRelease(HInvoke* invoke) {
4100 GenerateVarHandleSet(invoke, codegen_, std::memory_order_release);
4101 }
4102
VisitVarHandleSetVolatile(HInvoke * invoke)4103 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4104 CreateVarHandleSetLocations(invoke, codegen_);
4105 }
4106
VisitVarHandleSetVolatile(HInvoke * invoke)4107 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4108 GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst);
4109 }
4110
ScratchXRegisterNeeded(Location loc,DataType::Type type,bool byte_swap)4111 static bool ScratchXRegisterNeeded(Location loc, DataType::Type type, bool byte_swap) {
4112 if (loc.IsConstant()) {
4113 DCHECK(loc.GetConstant()->IsZeroBitPattern());
4114 return false;
4115 }
4116 return DataType::IsFloatingPointType(type) || DataType::Size(type) < 4u || byte_swap;
4117 }
4118
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,bool return_success)4119 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4120 CodeGeneratorRISCV64* codegen,
4121 bool return_success) {
4122 VarHandleOptimizations optimizations(invoke);
4123 if (optimizations.GetDoNotIntrinsify()) {
4124 return;
4125 }
4126
4127 uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4128 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4129 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4130 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4131
4132 bool is_reference = (value_type == DataType::Type::kReference);
4133 if (is_reference && codegen->EmitNonBakerReadBarrier()) {
4134 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4135 // the passed reference and reloads it from the field. This breaks the read barriers
4136 // in slow path in different ways. The marked old value may not actually be a to-space
4137 // reference to the same object as `old_value`, breaking slow path assumptions. And
4138 // for CompareAndExchange, marking the old value after comparison failure may actually
4139 // return the reference to `expected`, erroneously indicating success even though we
4140 // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
4141 return;
4142 }
4143
4144 // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
4145 if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
4146 return;
4147 }
4148
4149 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4150 DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4151
4152 if (codegen->EmitNonBakerReadBarrier()) {
4153 // We need callee-save registers for both the class object and offset instead of
4154 // the temporaries reserved in CreateVarHandleCommonLocations().
4155 static_assert(POPCOUNT(kRiscv64CalleeSaveRefSpills) >= 2u);
4156 uint32_t first_callee_save = CTZ(kRiscv64CalleeSaveRefSpills);
4157 uint32_t second_callee_save = CTZ(kRiscv64CalleeSaveRefSpills ^ (1u << first_callee_save));
4158 if (expected_index == 1u) { // For static fields.
4159 DCHECK_EQ(locations->GetTempCount(), 2u);
4160 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4161 DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4162 locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4163 } else {
4164 DCHECK_EQ(locations->GetTempCount(), 1u);
4165 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4166 locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4167 }
4168 }
4169
4170 size_t old_temp_count = locations->GetTempCount();
4171 DCHECK_EQ(old_temp_count, (expected_index == 1u) ? 2u : 1u);
4172 Location expected = locations->InAt(expected_index);
4173 Location new_value = locations->InAt(new_value_index);
4174 size_t data_size = DataType::Size(value_type);
4175 bool is_small = (data_size < 4u);
4176 bool can_byte_swap =
4177 (expected_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
4178 bool is_fp = DataType::IsFloatingPointType(value_type);
4179 size_t temps_needed =
4180 // The offset temp is used for the `tmp_ptr`, except for the read barrier case. For read
4181 // barrier we must preserve the offset and class pointer (if any) for the slow path and
4182 // use a separate temp for `tmp_ptr` and we also need another temp for `old_value_temp`.
4183 ((is_reference && codegen->EmitReadBarrier()) ? old_temp_count + 2u : 1u) +
4184 // For small values, we need a temp for the `mask`, `masked` and maybe also for the `shift`.
4185 (is_small ? (return_success ? 2u : 3u) : 0u) +
4186 // Some cases need modified copies of `new_value` and `expected`.
4187 (ScratchXRegisterNeeded(expected, value_type, can_byte_swap) ? 1u : 0u) +
4188 (ScratchXRegisterNeeded(new_value, value_type, can_byte_swap) ? 1u : 0u) +
4189 // We need a scratch register either for the old value or for the result of SC.
4190 // If we need to return a floating point old value, we need a temp for each.
4191 ((!return_success && is_fp) ? 2u : 1u);
4192 size_t scratch_registers_available = 2u;
4193 DCHECK_EQ(scratch_registers_available,
4194 ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
4195 if (temps_needed > old_temp_count + scratch_registers_available) {
4196 locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available));
4197 }
4198 }
4199
PrepareXRegister(CodeGeneratorRISCV64 * codegen,Location loc,DataType::Type type,XRegister shift,XRegister mask,bool byte_swap,ScratchRegisterScope * srs)4200 static XRegister PrepareXRegister(CodeGeneratorRISCV64* codegen,
4201 Location loc,
4202 DataType::Type type,
4203 XRegister shift,
4204 XRegister mask,
4205 bool byte_swap,
4206 ScratchRegisterScope* srs) {
4207 DCHECK_IMPLIES(mask != kNoXRegister, shift != kNoXRegister);
4208 DCHECK_EQ(shift == kNoXRegister, DataType::Size(type) >= 4u);
4209 if (loc.IsConstant()) {
4210 // The `shift`/`mask` and `byte_swap` are irrelevant for zero input.
4211 DCHECK(loc.GetConstant()->IsZeroBitPattern());
4212 return Zero;
4213 }
4214
4215 Location result = loc;
4216 if (DataType::IsFloatingPointType(type)) {
4217 type = IntTypeForFloatingPointType(type);
4218 result = Location::RegisterLocation(srs->AllocateXRegister());
4219 codegen->MoveLocation(result, loc, type);
4220 loc = result;
4221 } else if (byte_swap || shift != kNoXRegister) {
4222 result = Location::RegisterLocation(srs->AllocateXRegister());
4223 }
4224 if (byte_swap) {
4225 if (type == DataType::Type::kInt16) {
4226 type = DataType::Type::kUint16; // Do the masking as part of the byte swap.
4227 }
4228 GenerateReverseBytes(codegen, result, loc.AsRegister<XRegister>(), type);
4229 loc = result;
4230 }
4231 if (shift != kNoXRegister) {
4232 Riscv64Assembler* assembler = codegen->GetAssembler();
4233 __ Sllw(result.AsRegister<XRegister>(), loc.AsRegister<XRegister>(), shift);
4234 DCHECK_NE(type, DataType::Type::kUint8);
4235 if (mask != kNoXRegister && type != DataType::Type::kUint16 && type != DataType::Type::kBool) {
4236 __ And(result.AsRegister<XRegister>(), result.AsRegister<XRegister>(), mask);
4237 }
4238 }
4239 return result.AsRegister<XRegister>();
4240 }
4241
GenerateByteSwapAndExtract(CodeGeneratorRISCV64 * codegen,Location rd,XRegister rs1,XRegister shift,DataType::Type type)4242 static void GenerateByteSwapAndExtract(CodeGeneratorRISCV64* codegen,
4243 Location rd,
4244 XRegister rs1,
4245 XRegister shift,
4246 DataType::Type type) {
4247 // Apply shift before `GenerateReverseBytes()` for small types.
4248 DCHECK_EQ(shift != kNoXRegister, DataType::Size(type) < 4u);
4249 if (shift != kNoXRegister) {
4250 Riscv64Assembler* assembler = codegen->GetAssembler();
4251 __ Srlw(rd.AsRegister<XRegister>(), rs1, shift);
4252 rs1 = rd.AsRegister<XRegister>();
4253 }
4254 // Also handles moving to FP registers.
4255 GenerateReverseBytes(codegen, rd, rs1, type);
4256 }
4257
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)4258 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4259 CodeGeneratorRISCV64* codegen,
4260 std::memory_order order,
4261 bool return_success,
4262 bool strong,
4263 bool byte_swap = false) {
4264 DCHECK(return_success || strong);
4265
4266 uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4267 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4268 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4269 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4270
4271 Riscv64Assembler* assembler = codegen->GetAssembler();
4272 LocationSummary* locations = invoke->GetLocations();
4273 Location expected = locations->InAt(expected_index);
4274 Location new_value = locations->InAt(new_value_index);
4275 Location out = locations->Out();
4276
4277 VarHandleTarget target = GetVarHandleTarget(invoke);
4278 VarHandleSlowPathRISCV64* slow_path = nullptr;
4279 if (!byte_swap) {
4280 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4281 GenerateVarHandleTarget(invoke, target, codegen);
4282 if (slow_path != nullptr) {
4283 slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
4284 __ Bind(slow_path->GetNativeByteOrderLabel());
4285 }
4286 }
4287
4288 // This needs to be before we allocate the scratch registers, as MarkGCCard also uses them.
4289 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
4290 // Mark card for object assuming new value is stored.
4291 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
4292 codegen->MaybeMarkGCCard(
4293 target.object, new_value.AsRegister<XRegister>(), new_value_can_be_null);
4294 }
4295
4296 // Scratch registers may be needed for `new_value` and `expected`.
4297 ScratchRegisterScope srs(assembler);
4298 DCHECK_EQ(srs.AvailableXRegisters(), 2u);
4299 size_t available_scratch_registers =
4300 (ScratchXRegisterNeeded(expected, value_type, byte_swap) ? 0u : 1u) +
4301 (ScratchXRegisterNeeded(new_value, value_type, byte_swap) ? 0u : 1u);
4302
4303 // Reuse the `offset` temporary for the pointer to the target location,
4304 // except for references that need the offset for the read barrier.
4305 DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
4306 size_t next_temp = 1u;
4307 XRegister tmp_ptr = target.offset;
4308 bool is_reference = (value_type == DataType::Type::kReference);
4309 if (is_reference && codegen->EmitReadBarrier()) {
4310 // Reserve scratch registers for `tmp_ptr` and `old_value_temp`.
4311 DCHECK_EQ(available_scratch_registers, 2u);
4312 available_scratch_registers = 0u;
4313 DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4314 next_temp = expected_index == 1u ? 2u : 1u; // Preserve the class register for static field.
4315 tmp_ptr = srs.AllocateXRegister();
4316 }
4317 __ Add(tmp_ptr, target.object, target.offset);
4318
4319 auto get_temp = [&]() {
4320 if (available_scratch_registers != 0u) {
4321 available_scratch_registers -= 1u;
4322 return srs.AllocateXRegister();
4323 } else {
4324 XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
4325 next_temp += 1u;
4326 return temp;
4327 }
4328 };
4329
4330 XRegister shift = kNoXRegister;
4331 XRegister mask = kNoXRegister;
4332 XRegister masked = kNoXRegister;
4333 size_t data_size = DataType::Size(value_type);
4334 bool is_small = (data_size < 4u);
4335 if (is_small) {
4336 // When returning "success" and not the old value, we shall not need the `shift` after
4337 // the raw CAS operation, so use the output register as a temporary here.
4338 shift = return_success ? locations->Out().AsRegister<XRegister>() : get_temp();
4339 mask = get_temp();
4340 masked = get_temp();
4341 // Upper bits of the shift are not used, so we do not need to clear them.
4342 __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
4343 __ Andi(tmp_ptr, tmp_ptr, -4);
4344 __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
4345 __ Sllw(mask, mask, shift);
4346 }
4347
4348 // Move floating point values to scratch registers and apply shift, mask and byte swap if needed.
4349 // Note that float/double CAS uses bitwise comparison, rather than the operator==.
4350 XRegister expected_reg =
4351 PrepareXRegister(codegen, expected, value_type, shift, mask, byte_swap, &srs);
4352 XRegister new_value_reg =
4353 PrepareXRegister(codegen, new_value, value_type, shift, mask, byte_swap, &srs);
4354 bool is_fp = DataType::IsFloatingPointType(value_type);
4355 DataType::Type cas_type = is_fp
4356 ? IntTypeForFloatingPointType(value_type)
4357 : (is_small ? DataType::Type::kInt32 : value_type);
4358
4359 // Prepare registers for old value and the result of the store conditional.
4360 XRegister old_value;
4361 XRegister store_result;
4362 if (return_success) {
4363 // Use a temp for the old value.
4364 old_value = get_temp();
4365 // For strong CAS, use the `old_value` temp also for the SC result.
4366 // For weak CAS, put the SC result directly to `out`.
4367 store_result = strong ? old_value : out.AsRegister<XRegister>();
4368 } else if (is_fp) {
4369 // We need two temporary registers.
4370 old_value = get_temp();
4371 store_result = get_temp();
4372 } else {
4373 // Use the output register for the old value and a temp for the store conditional result.
4374 old_value = out.AsRegister<XRegister>();
4375 store_result = get_temp();
4376 }
4377
4378 Riscv64Label exit_loop_label;
4379 Riscv64Label* exit_loop = &exit_loop_label;
4380 Riscv64Label* cmp_failure = &exit_loop_label;
4381
4382 ReadBarrierCasSlowPathRISCV64* rb_slow_path = nullptr;
4383 if (is_reference && codegen->EmitReadBarrier()) {
4384 // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked
4385 // reloaded old value for subsequent CAS in the slow path. We make this a scratch register
4386 // as we do have marking entrypoints on riscv64 even for scratch registers.
4387 XRegister old_value_temp = srs.AllocateXRegister();
4388 // For strong CAS, use the `old_value_temp` also for the SC result as the reloaded old value
4389 // is no longer needed after the comparison. For weak CAS, store the SC result in the same
4390 // result register as the main path.
4391 // Note that for a strong CAS, a SC failure in the slow path can set the register to 1, so
4392 // we cannot use that register to indicate success without resetting it to 0 at the start of
4393 // the retry loop. Instead, we return to the success indicating instruction in the main path.
4394 XRegister slow_path_store_result = strong ? old_value_temp : store_result;
4395 rb_slow_path = new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathRISCV64(
4396 invoke,
4397 order,
4398 strong,
4399 target.object,
4400 target.offset,
4401 expected_reg,
4402 new_value_reg,
4403 old_value,
4404 old_value_temp,
4405 slow_path_store_result,
4406 /*update_old_value=*/ !return_success,
4407 codegen);
4408 codegen->AddSlowPath(rb_slow_path);
4409 exit_loop = rb_slow_path->GetExitLabel();
4410 cmp_failure = rb_slow_path->GetEntryLabel();
4411 }
4412
4413 if (return_success) {
4414 // Pre-populate the output register with failure for the case when the old value
4415 // differs and we do not execute the store conditional.
4416 __ Li(out.AsRegister<XRegister>(), 0);
4417 }
4418 GenerateCompareAndSet(codegen->GetAssembler(),
4419 cas_type,
4420 order,
4421 strong,
4422 cmp_failure,
4423 tmp_ptr,
4424 new_value_reg,
4425 old_value,
4426 mask,
4427 masked,
4428 store_result,
4429 expected_reg);
4430 if (return_success && strong) {
4431 if (rb_slow_path != nullptr) {
4432 // Slow path returns here on success.
4433 __ Bind(rb_slow_path->GetSuccessExitLabel());
4434 }
4435 // Load success value to the output register.
4436 // `GenerateCompareAndSet()` does not emit code to indicate success for a strong CAS.
4437 __ Li(out.AsRegister<XRegister>(), 1);
4438 } else if (rb_slow_path != nullptr) {
4439 DCHECK(!rb_slow_path->GetSuccessExitLabel()->IsLinked());
4440 }
4441 __ Bind(exit_loop);
4442
4443 if (return_success) {
4444 // Nothing to do, the result register already contains 1 on success and 0 on failure.
4445 } else if (byte_swap) {
4446 DCHECK_IMPLIES(is_small, out.AsRegister<XRegister>() == old_value)
4447 << " " << value_type << " " << out.AsRegister<XRegister>() << "!=" << old_value;
4448 GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type);
4449 } else if (is_fp) {
4450 codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
4451 } else if (is_small) {
4452 __ Srlw(old_value, masked, shift);
4453 if (value_type == DataType::Type::kInt8) {
4454 __ SextB(old_value, old_value);
4455 } else if (value_type == DataType::Type::kInt16) {
4456 __ SextH(old_value, old_value);
4457 }
4458 }
4459
4460 if (slow_path != nullptr) {
4461 DCHECK(!byte_swap);
4462 __ Bind(slow_path->GetExitLabel());
4463 }
4464
4465 // Check that we have allocated the right number of temps. We may need more registers
4466 // for byte swapped CAS in the slow path, so skip this check for the main path in that case.
4467 bool has_byte_swap = (expected_index == 3u) && (!is_reference && data_size != 1u);
4468 if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) {
4469 // We allocate a temporary register for the class object for a static field `VarHandle` but
4470 // we do not update the `next_temp` if it's otherwise unused after the address calculation.
4471 CHECK_EQ(expected_index, 1u);
4472 CHECK_EQ(next_temp, 1u);
4473 CHECK_EQ(locations->GetTempCount(), 2u);
4474 }
4475 }
4476
VisitVarHandleCompareAndExchange(HInvoke * invoke)4477 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4478 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
4479 }
4480
VisitVarHandleCompareAndExchange(HInvoke * invoke)4481 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4482 GenerateVarHandleCompareAndSetOrExchange(
4483 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
4484 }
4485
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4486 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4487 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
4488 }
4489
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4490 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4491 GenerateVarHandleCompareAndSetOrExchange(
4492 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
4493 }
4494
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4495 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4496 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
4497 }
4498
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4499 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4500 GenerateVarHandleCompareAndSetOrExchange(
4501 invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
4502 }
4503
VisitVarHandleCompareAndSet(HInvoke * invoke)4504 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4505 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4506 }
4507
VisitVarHandleCompareAndSet(HInvoke * invoke)4508 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4509 GenerateVarHandleCompareAndSetOrExchange(
4510 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
4511 }
4512
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4513 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4514 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4515 }
4516
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4517 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4518 GenerateVarHandleCompareAndSetOrExchange(
4519 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
4520 }
4521
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4522 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4523 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4524 }
4525
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4526 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4527 GenerateVarHandleCompareAndSetOrExchange(
4528 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
4529 }
4530
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4531 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4532 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4533 }
4534
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4535 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4536 GenerateVarHandleCompareAndSetOrExchange(
4537 invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
4538 }
4539
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4540 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4541 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4542 }
4543
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4544 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4545 GenerateVarHandleCompareAndSetOrExchange(
4546 invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
4547 }
4548
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op)4549 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
4550 CodeGeneratorRISCV64* codegen,
4551 GetAndUpdateOp get_and_update_op) {
4552 VarHandleOptimizations optimizations(invoke);
4553 if (optimizations.GetDoNotIntrinsify()) {
4554 return;
4555 }
4556
4557 // Get the type from the shorty as the invokes may not return a value.
4558 uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
4559 DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4560 DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
4561 if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
4562 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4563 // the passed reference and reloads it from the field, thus seeing the new value
4564 // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
4565 return;
4566 }
4567
4568 // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
4569 if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
4570 return;
4571 }
4572
4573 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4574 Location arg = locations->InAt(arg_index);
4575
4576 bool is_fp = DataType::IsFloatingPointType(value_type);
4577 if (is_fp) {
4578 if (get_and_update_op == GetAndUpdateOp::kAdd) {
4579 // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
4580 locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4581 } else {
4582 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
4583 }
4584 }
4585
4586 size_t data_size = DataType::Size(value_type);
4587 bool can_byte_swap =
4588 (arg_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
4589 bool can_use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (can_byte_swap || is_fp);
4590 bool is_small = (data_size < 4u);
4591 bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
4592 bool is_bitwise =
4593 (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd);
4594
4595 size_t temps_needed =
4596 // The offset temp is used for the `tmp_ptr`.
4597 1u +
4598 // For small values, we need temps for `shift` and maybe also `mask` and `temp`.
4599 (is_small ? (is_bitwise ? 1u : 3u) : 0u) +
4600 // Some cases need modified copies of `arg`.
4601 (is_small_and || ScratchXRegisterNeeded(arg, value_type, can_byte_swap) ? 1u : 0u) +
4602 // For FP types, we need a temp for `old_value` which cannot be loaded directly to `out`.
4603 (is_fp ? 1u : 0u);
4604 if (can_use_cas) {
4605 size_t cas_temps_needed =
4606 // The offset temp is used for the `tmp_ptr`.
4607 1u +
4608 // For small values, we need a temp for `shift`.
4609 (is_small ? 1u : 0u) +
4610 // And we always need temps for `old_value`, `new_value` and `reloaded_old_value`.
4611 3u;
4612 DCHECK_GE(cas_temps_needed, temps_needed);
4613 temps_needed = cas_temps_needed;
4614 }
4615
4616 size_t scratch_registers_available = 2u;
4617 DCHECK_EQ(scratch_registers_available,
4618 ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
4619 size_t old_temp_count = locations->GetTempCount();
4620 DCHECK_EQ(old_temp_count, (arg_index == 1u) ? 2u : 1u);
4621 if (temps_needed > old_temp_count + scratch_registers_available) {
4622 locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available));
4623 }
4624
4625 // Request another temporary register for methods that don't return a value.
4626 // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`.
4627 DataType::Type return_type = invoke->GetType();
4628 const bool is_void = return_type == DataType::Type::kVoid;
4629 DCHECK_IMPLIES(!is_void, return_type == value_type);
4630 if (is_void) {
4631 if (DataType::IsFloatingPointType(value_type)) {
4632 locations->AddTemp(Location::RequiresFpuRegister());
4633 } else {
4634 locations->AddTemp(Location::RequiresRegister());
4635 }
4636 }
4637 }
4638
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)4639 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
4640 CodeGeneratorRISCV64* codegen,
4641 GetAndUpdateOp get_and_update_op,
4642 std::memory_order order,
4643 bool byte_swap = false) {
4644 // Get the type from the shorty as the invokes may not return a value.
4645 uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
4646 DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4647 DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
4648
4649 Riscv64Assembler* assembler = codegen->GetAssembler();
4650 LocationSummary* locations = invoke->GetLocations();
4651 Location arg = locations->InAt(arg_index);
4652 DCHECK_IMPLIES(arg.IsConstant(), arg.GetConstant()->IsZeroBitPattern());
4653 DataType::Type return_type = invoke->GetType();
4654 const bool is_void = return_type == DataType::Type::kVoid;
4655 DCHECK_IMPLIES(!is_void, return_type == value_type);
4656 // We use a temporary for void methods, as we don't return the value.
4657 Location out_or_temp =
4658 is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
4659
4660 VarHandleTarget target = GetVarHandleTarget(invoke);
4661 VarHandleSlowPathRISCV64* slow_path = nullptr;
4662 if (!byte_swap) {
4663 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4664 GenerateVarHandleTarget(invoke, target, codegen);
4665 if (slow_path != nullptr) {
4666 slow_path->SetGetAndUpdateOp(get_and_update_op);
4667 __ Bind(slow_path->GetNativeByteOrderLabel());
4668 }
4669 }
4670
4671 // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
4672 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
4673 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
4674 // Mark card for object, the new value shall be stored.
4675 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
4676 codegen->MaybeMarkGCCard(target.object, arg.AsRegister<XRegister>(), new_value_can_be_null);
4677 }
4678
4679 size_t data_size = DataType::Size(value_type);
4680 bool is_fp = DataType::IsFloatingPointType(value_type);
4681 bool use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (byte_swap || is_fp);
4682 bool is_small = (data_size < 4u);
4683 bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
4684 bool is_reference = (value_type == DataType::Type::kReference);
4685 DataType::Type op_type = is_fp
4686 ? IntTypeForFloatingPointType(value_type)
4687 : (is_small || is_reference ? DataType::Type::kInt32 : value_type);
4688
4689 ScratchRegisterScope srs(assembler);
4690 DCHECK_EQ(srs.AvailableXRegisters(), 2u);
4691 size_t available_scratch_registers = use_cas
4692 // We use scratch registers differently for the CAS path.
4693 ? 0u
4694 // Reserve one scratch register for `PrepareXRegister()` or similar `arg_reg` allocation.
4695 : (is_small_and || ScratchXRegisterNeeded(arg, value_type, byte_swap) ? 1u : 2u);
4696
4697 // Reuse the `target.offset` temporary for the pointer to the target location,
4698 // except for references that need the offset for the non-Baker read barrier.
4699 DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
4700 size_t next_temp = 1u;
4701 XRegister tmp_ptr = target.offset;
4702 if (is_reference && codegen->EmitNonBakerReadBarrier()) {
4703 DCHECK_EQ(available_scratch_registers, 2u);
4704 available_scratch_registers -= 1u;
4705 tmp_ptr = srs.AllocateXRegister();
4706 }
4707 __ Add(tmp_ptr, target.object, target.offset);
4708
4709 auto get_temp = [&]() {
4710 if (available_scratch_registers != 0u) {
4711 available_scratch_registers -= 1u;
4712 return srs.AllocateXRegister();
4713 } else {
4714 DCHECK_IMPLIES(is_void, next_temp != locations->GetTempCount() - 1u)
4715 << "The last temp is special for the void case, as it represents the out register.";
4716 XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
4717 next_temp += 1u;
4718 return temp;
4719 }
4720 };
4721
4722 XRegister shift = kNoXRegister;
4723 XRegister mask = kNoXRegister;
4724 XRegister prepare_mask = kNoXRegister;
4725 XRegister temp = kNoXRegister;
4726 XRegister arg_reg = kNoXRegister;
4727 if (is_small) {
4728 shift = get_temp();
4729 // Upper bits of the shift are not used, so we do not need to clear them.
4730 __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
4731 __ Andi(tmp_ptr, tmp_ptr, -4);
4732 switch (get_and_update_op) {
4733 case GetAndUpdateOp::kAdd:
4734 if (byte_swap) {
4735 // The mask is not needed in the CAS path.
4736 DCHECK(use_cas);
4737 break;
4738 }
4739 FALLTHROUGH_INTENDED;
4740 case GetAndUpdateOp::kSet:
4741 mask = get_temp();
4742 temp = get_temp();
4743 __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
4744 __ Sllw(mask, mask, shift);
4745 // The argument does not need to be masked for `GetAndUpdateOp::kAdd`,
4746 // the mask shall be applied after the ADD instruction.
4747 prepare_mask = (get_and_update_op == GetAndUpdateOp::kSet) ? mask : kNoXRegister;
4748 break;
4749 case GetAndUpdateOp::kAnd:
4750 // We need to set all other bits, so we always need a temp.
4751 arg_reg = srs.AllocateXRegister();
4752 if (data_size == 1u) {
4753 __ Ori(arg_reg, InputXRegisterOrZero(arg), ~0xff);
4754 DCHECK(!byte_swap);
4755 } else {
4756 DCHECK_EQ(data_size, 2u);
4757 __ Li(arg_reg, ~0xffff);
4758 __ Or(arg_reg, InputXRegisterOrZero(arg), arg_reg);
4759 if (byte_swap) {
4760 __ Rev8(arg_reg, arg_reg);
4761 __ Rori(arg_reg, arg_reg, 48);
4762 }
4763 }
4764 __ Rolw(arg_reg, arg_reg, shift);
4765 break;
4766 case GetAndUpdateOp::kOr:
4767 case GetAndUpdateOp::kXor:
4768 // Signed values need to be truncated but we're keeping `prepare_mask == kNoXRegister`.
4769 if (value_type == DataType::Type::kInt8 && !arg.IsConstant()) {
4770 DCHECK(!byte_swap);
4771 arg_reg = srs.AllocateXRegister();
4772 __ ZextB(arg_reg, arg.AsRegister<XRegister>());
4773 __ Sllw(arg_reg, arg_reg, shift);
4774 } else if (value_type == DataType::Type::kInt16 && !arg.IsConstant() && !byte_swap) {
4775 arg_reg = srs.AllocateXRegister();
4776 __ ZextH(arg_reg, arg.AsRegister<XRegister>());
4777 __ Sllw(arg_reg, arg_reg, shift);
4778 } // else handled by `PrepareXRegister()` below.
4779 break;
4780 }
4781 }
4782 if (arg_reg == kNoXRegister && !use_cas) {
4783 arg_reg = PrepareXRegister(codegen, arg, value_type, shift, prepare_mask, byte_swap, &srs);
4784 }
4785 if (mask != kNoXRegister && get_and_update_op == GetAndUpdateOp::kSet) {
4786 __ Not(mask, mask); // We need to flip the mask for `kSet`, see `GenerateGetAndUpdate()`.
4787 }
4788
4789 if (use_cas) {
4790 // Allocate scratch registers for temps that can theoretically be clobbered on retry.
4791 // (Even though the `retry` label shall never be far enough for `TMP` to be clobbered.)
4792 DCHECK_EQ(available_scratch_registers, 0u); // Reserved for the two uses below.
4793 XRegister old_value = srs.AllocateXRegister();
4794 XRegister new_value = srs.AllocateXRegister();
4795 // Allocate other needed temporaries.
4796 XRegister reloaded_old_value = get_temp();
4797 XRegister store_result = reloaded_old_value; // Clobber reloaded old value by store result.
4798 FRegister ftmp = is_fp ? srs.AllocateFRegister() : kNoFRegister;
4799
4800 Riscv64Label retry;
4801 __ Bind(&retry);
4802 codegen->GetInstructionVisitor()->Load(
4803 Location::RegisterLocation(old_value), tmp_ptr, /*offset=*/ 0, op_type);
4804 if (byte_swap) {
4805 GenerateByteSwapAndExtract(codegen, out_or_temp, old_value, shift, value_type);
4806 } else {
4807 DCHECK(is_fp);
4808 codegen->MoveLocation(out_or_temp, Location::RegisterLocation(old_value), value_type);
4809 }
4810 if (is_fp) {
4811 codegen->GetInstructionVisitor()->FAdd(
4812 ftmp, out_or_temp.AsFpuRegister<FRegister>(), arg.AsFpuRegister<FRegister>(), value_type);
4813 codegen->MoveLocation(
4814 Location::RegisterLocation(new_value), Location::FpuRegisterLocation(ftmp), op_type);
4815 } else if (arg.IsConstant()) {
4816 DCHECK(arg.GetConstant()->IsZeroBitPattern());
4817 __ Mv(new_value, out_or_temp.AsRegister<XRegister>());
4818 } else if (value_type == DataType::Type::kInt64) {
4819 __ Add(new_value, out_or_temp.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
4820 } else {
4821 DCHECK_EQ(op_type, DataType::Type::kInt32);
4822 __ Addw(new_value, out_or_temp.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
4823 }
4824 if (byte_swap) {
4825 DataType::Type swap_type = op_type;
4826 if (is_small) {
4827 DCHECK_EQ(data_size, 2u);
4828 // We want to update only 16 bits of the 32-bit location. The 16 bits we want to replace
4829 // are present in both `old_value` and `out` but in different bits and byte order.
4830 // To update the 16 bits, we can XOR the new value with the `out`, byte swap as Uint16
4831 // (extracting only the bits we want to update), shift and XOR with the old value.
4832 swap_type = DataType::Type::kUint16;
4833 __ Xor(new_value, new_value, out_or_temp.AsRegister<XRegister>());
4834 }
4835 GenerateReverseBytes(codegen, Location::RegisterLocation(new_value), new_value, swap_type);
4836 if (is_small) {
4837 __ Sllw(new_value, new_value, shift);
4838 __ Xor(new_value, new_value, old_value);
4839 }
4840 }
4841 GenerateCompareAndSet(assembler,
4842 op_type,
4843 order,
4844 /*strong=*/ true,
4845 /*cmp_failure=*/ &retry,
4846 tmp_ptr,
4847 new_value,
4848 /*old_value=*/ reloaded_old_value,
4849 /*mask=*/ kNoXRegister,
4850 /*masked=*/ kNoXRegister,
4851 store_result,
4852 /*expected=*/ old_value);
4853 } else {
4854 XRegister old_value = is_fp ? get_temp() : out_or_temp.AsRegister<XRegister>();
4855 GenerateGetAndUpdate(
4856 codegen, get_and_update_op, op_type, order, tmp_ptr, arg_reg, old_value, mask, temp);
4857 if (byte_swap) {
4858 DCHECK_IMPLIES(is_small, out_or_temp.AsRegister<XRegister>() == old_value)
4859 << " " << value_type << " " << out_or_temp.AsRegister<XRegister>() << "!=" << old_value;
4860 GenerateByteSwapAndExtract(codegen, out_or_temp, old_value, shift, value_type);
4861 } else if (is_fp) {
4862 codegen->MoveLocation(out_or_temp, Location::RegisterLocation(old_value), value_type);
4863 } else if (is_small) {
4864 __ Srlw(old_value, old_value, shift);
4865 DCHECK_NE(value_type, DataType::Type::kUint8);
4866 if (value_type == DataType::Type::kInt8) {
4867 __ SextB(old_value, old_value);
4868 } else if (value_type == DataType::Type::kBool) {
4869 __ ZextB(old_value, old_value);
4870 } else if (value_type == DataType::Type::kInt16) {
4871 __ SextH(old_value, old_value);
4872 } else {
4873 DCHECK_EQ(value_type, DataType::Type::kUint16);
4874 __ ZextH(old_value, old_value);
4875 }
4876 } else if (is_reference) {
4877 __ ZextW(old_value, old_value);
4878 if (codegen->EmitBakerReadBarrier()) {
4879 // Use RA as temp. It is clobbered in the slow path anyway.
4880 static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
4881 SlowPathCodeRISCV64* rb_slow_path = codegen->AddGcRootBakerBarrierBarrierSlowPath(
4882 invoke, out_or_temp, kBakerReadBarrierTemp);
4883 codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out_or_temp, kBakerReadBarrierTemp);
4884 } else if (codegen->EmitNonBakerReadBarrier()) {
4885 Location base_loc = Location::RegisterLocation(target.object);
4886 Location index = Location::RegisterLocation(target.offset);
4887 SlowPathCodeRISCV64* rb_slow_path = codegen->AddReadBarrierSlowPath(
4888 invoke, out_or_temp, out_or_temp, base_loc, /*offset=*/ 0u, index);
4889 __ J(rb_slow_path->GetEntryLabel());
4890 __ Bind(rb_slow_path->GetExitLabel());
4891 }
4892 }
4893 }
4894
4895 if (slow_path != nullptr) {
4896 DCHECK(!byte_swap);
4897 __ Bind(slow_path->GetExitLabel());
4898 }
4899
4900 // Check that we have allocated the right number of temps. We may need more registers
4901 // for byte swapped CAS in the slow path, so skip this check for the main path in that case.
4902 // In the void case, we requested an extra register to mimic the `out` register.
4903 const size_t extra_temp_registers = is_void ? 1u : 0u;
4904 bool has_byte_swap = (arg_index == 3u) && (!is_reference && data_size != 1u);
4905 if ((!has_byte_swap || byte_swap) &&
4906 next_temp != locations->GetTempCount() - extra_temp_registers) {
4907 // We allocate a temporary register for the class object for a static field `VarHandle` but
4908 // we do not update the `next_temp` if it's otherwise unused after the address calculation.
4909 CHECK_EQ(arg_index, 1u);
4910 CHECK_EQ(next_temp, 1u);
4911 CHECK_EQ(locations->GetTempCount(), 2u + extra_temp_registers);
4912 }
4913 }
4914
VisitVarHandleGetAndSet(HInvoke * invoke)4915 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4916 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
4917 }
4918
VisitVarHandleGetAndSet(HInvoke * invoke)4919 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4920 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
4921 }
4922
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4923 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4924 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
4925 }
4926
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4927 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4928 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
4929 }
4930
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4931 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4932 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
4933 }
4934
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4935 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4936 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
4937 }
4938
VisitVarHandleGetAndAdd(HInvoke * invoke)4939 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4940 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
4941 }
4942
VisitVarHandleGetAndAdd(HInvoke * invoke)4943 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4944 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
4945 }
4946
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4947 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4948 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
4949 }
4950
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4951 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4952 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
4953 }
4954
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4955 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4956 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
4957 }
4958
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4959 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4960 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
4961 }
4962
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4963 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4964 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
4965 }
4966
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4967 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4968 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
4969 }
4970
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4971 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4972 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
4973 }
4974
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4975 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4976 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
4977 }
4978
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4979 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4980 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
4981 }
4982
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4983 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4984 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
4985 }
4986
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4987 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4988 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
4989 }
4990
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4991 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4992 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
4993 }
4994
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4995 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4996 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
4997 }
4998
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4999 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5000 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5001 }
5002
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5003 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5004 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5005 }
5006
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5007 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5008 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5009 }
5010
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5011 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5012 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5013 }
5014
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5015 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5016 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5017 }
5018
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5019 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5020 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5021 }
5022
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5023 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5024 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5025 }
5026
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5027 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5028 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5029 }
5030
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5031 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5032 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5033 }
5034
EmitByteArrayViewCode(CodeGenerator * codegen_in)5035 void VarHandleSlowPathRISCV64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5036 DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5037 CodeGeneratorRISCV64* codegen = down_cast<CodeGeneratorRISCV64*>(codegen_in);
5038 Riscv64Assembler* assembler = codegen->GetAssembler();
5039 HInvoke* invoke = GetInvoke();
5040 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5041 DataType::Type value_type =
5042 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5043 DCHECK_NE(value_type, DataType::Type::kReference);
5044 size_t size = DataType::Size(value_type);
5045 DCHECK_GT(size, 1u);
5046 LocationSummary* locations = invoke->GetLocations();
5047 XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
5048 XRegister object = locations->InAt(1).AsRegister<XRegister>();
5049 XRegister index = locations->InAt(2).AsRegister<XRegister>();
5050
5051 MemberOffset class_offset = mirror::Object::ClassOffset();
5052 MemberOffset array_length_offset = mirror::Array::LengthOffset();
5053 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5054 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5055
5056 __ Bind(GetByteArrayViewCheckLabel());
5057
5058 VarHandleTarget target = GetVarHandleTarget(invoke);
5059 {
5060 ScratchRegisterScope srs(assembler);
5061 XRegister temp = srs.AllocateXRegister();
5062 XRegister temp2 = srs.AllocateXRegister();
5063
5064 // The main path checked that the coordinateType0 is an array class that matches
5065 // the class of the actual coordinate argument but it does not match the value type.
5066 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5067 __ Loadwu(temp, varhandle, class_offset.Int32Value());
5068 codegen->MaybeUnpoisonHeapReference(temp);
5069 codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5070 __ Bne(temp, temp2, GetEntryLabel());
5071
5072 // Check for array index out of bounds.
5073 __ Loadw(temp, object, array_length_offset.Int32Value());
5074 __ Bgeu(index, temp, GetEntryLabel());
5075 __ Addi(temp2, index, size - 1u);
5076 __ Bgeu(temp2, temp, GetEntryLabel());
5077
5078 // Construct the target.
5079 __ Addi(target.offset, index, data_offset.Int32Value());
5080
5081 // Alignment check. For unaligned access, go to the runtime.
5082 DCHECK(IsPowerOfTwo(size));
5083 __ Andi(temp, target.offset, size - 1u);
5084 __ Bnez(temp, GetEntryLabel());
5085
5086 // Byte order check. For native byte order return to the main path.
5087 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5088 IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5089 // There is no reason to differentiate between native byte order and byte-swap
5090 // for setting a zero bit pattern. Just return to the main path.
5091 __ J(GetNativeByteOrderLabel());
5092 return;
5093 }
5094 __ Loadbu(temp, varhandle, native_byte_order_offset.Int32Value());
5095 __ Bnez(temp, GetNativeByteOrderLabel());
5096 }
5097
5098 switch (access_mode_template) {
5099 case mirror::VarHandle::AccessModeTemplate::kGet:
5100 GenerateVarHandleGet(invoke, codegen, order_, /*byte_swap=*/ true);
5101 break;
5102 case mirror::VarHandle::AccessModeTemplate::kSet:
5103 GenerateVarHandleSet(invoke, codegen, order_, /*byte_swap=*/ true);
5104 break;
5105 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5106 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5107 GenerateVarHandleCompareAndSetOrExchange(
5108 invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5109 break;
5110 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5111 GenerateVarHandleGetAndUpdate(
5112 invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5113 break;
5114 }
5115 __ J(GetExitLabel());
5116 }
5117
VisitThreadCurrentThread(HInvoke * invoke)5118 void IntrinsicLocationsBuilderRISCV64::VisitThreadCurrentThread(HInvoke* invoke) {
5119 LocationSummary* locations =
5120 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5121 locations->SetOut(Location::RequiresRegister());
5122 }
5123
VisitThreadCurrentThread(HInvoke * invoke)5124 void IntrinsicCodeGeneratorRISCV64::VisitThreadCurrentThread(HInvoke* invoke) {
5125 Riscv64Assembler* assembler = GetAssembler();
5126 XRegister out = invoke->GetLocations()->Out().AsRegister<XRegister>();
5127 __ Loadwu(out, TR, Thread::PeerOffset<kRiscv64PointerSize>().Int32Value());
5128 }
5129
VisitThreadInterrupted(HInvoke * invoke)5130 void IntrinsicLocationsBuilderRISCV64::VisitThreadInterrupted(HInvoke* invoke) {
5131 LocationSummary* locations =
5132 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5133 locations->SetOut(Location::RequiresRegister());
5134 }
5135
VisitThreadInterrupted(HInvoke * invoke)5136 void IntrinsicCodeGeneratorRISCV64::VisitThreadInterrupted(HInvoke* invoke) {
5137 LocationSummary* locations = invoke->GetLocations();
5138 Riscv64Assembler* assembler = GetAssembler();
5139 XRegister out = locations->Out().AsRegister<XRegister>();
5140 Riscv64Label done;
5141
5142 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5143 __ Loadw(out, TR, Thread::InterruptedOffset<kRiscv64PointerSize>().Int32Value());
5144 __ Beqz(out, &done);
5145 __ Storew(Zero, TR, Thread::InterruptedOffset<kRiscv64PointerSize>().Int32Value());
5146 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5147 __ Bind(&done);
5148 }
5149
VisitReachabilityFence(HInvoke * invoke)5150 void IntrinsicLocationsBuilderRISCV64::VisitReachabilityFence(HInvoke* invoke) {
5151 LocationSummary* locations =
5152 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5153 locations->SetInAt(0, Location::Any());
5154 }
5155
VisitReachabilityFence(HInvoke * invoke)5156 void IntrinsicCodeGeneratorRISCV64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
5157
VisitMathFmaDouble(HInvoke * invoke)5158 void IntrinsicLocationsBuilderRISCV64::VisitMathFmaDouble(HInvoke* invoke) {
5159 CreateFpFpFpToFpNoOverlapLocations(allocator_, invoke);
5160 }
5161
VisitMathFmaDouble(HInvoke * invoke)5162 void IntrinsicCodeGeneratorRISCV64::VisitMathFmaDouble(HInvoke* invoke) {
5163 LocationSummary* locations = invoke->GetLocations();
5164 Riscv64Assembler* assembler = GetAssembler();
5165 FRegister n = locations->InAt(0).AsFpuRegister<FRegister>();
5166 FRegister m = locations->InAt(1).AsFpuRegister<FRegister>();
5167 FRegister a = locations->InAt(2).AsFpuRegister<FRegister>();
5168 FRegister out = locations->Out().AsFpuRegister<FRegister>();
5169
5170 __ FMAddD(out, n, m, a);
5171 }
5172
VisitMathFmaFloat(HInvoke * invoke)5173 void IntrinsicLocationsBuilderRISCV64::VisitMathFmaFloat(HInvoke* invoke) {
5174 CreateFpFpFpToFpNoOverlapLocations(allocator_, invoke);
5175 }
5176
VisitMathFmaFloat(HInvoke * invoke)5177 void IntrinsicCodeGeneratorRISCV64::VisitMathFmaFloat(HInvoke* invoke) {
5178 LocationSummary* locations = invoke->GetLocations();
5179 Riscv64Assembler* assembler = GetAssembler();
5180 FRegister n = locations->InAt(0).AsFpuRegister<FRegister>();
5181 FRegister m = locations->InAt(1).AsFpuRegister<FRegister>();
5182 FRegister a = locations->InAt(2).AsFpuRegister<FRegister>();
5183 FRegister out = locations->Out().AsFpuRegister<FRegister>();
5184
5185 __ FMAddS(out, n, m, a);
5186 }
5187
5188
VisitMathCos(HInvoke * invoke)5189 void IntrinsicLocationsBuilderRISCV64::VisitMathCos(HInvoke* invoke) {
5190 CreateFPToFPCallLocations(allocator_, invoke);
5191 }
5192
VisitMathCos(HInvoke * invoke)5193 void IntrinsicCodeGeneratorRISCV64::VisitMathCos(HInvoke* invoke) {
5194 codegen_->InvokeRuntime(kQuickCos, invoke, invoke->GetDexPc());
5195 }
5196
VisitMathSin(HInvoke * invoke)5197 void IntrinsicLocationsBuilderRISCV64::VisitMathSin(HInvoke* invoke) {
5198 CreateFPToFPCallLocations(allocator_, invoke);
5199 }
5200
VisitMathSin(HInvoke * invoke)5201 void IntrinsicCodeGeneratorRISCV64::VisitMathSin(HInvoke* invoke) {
5202 codegen_->InvokeRuntime(kQuickSin, invoke, invoke->GetDexPc());
5203 }
5204
VisitMathAcos(HInvoke * invoke)5205 void IntrinsicLocationsBuilderRISCV64::VisitMathAcos(HInvoke* invoke) {
5206 CreateFPToFPCallLocations(allocator_, invoke);
5207 }
5208
VisitMathAcos(HInvoke * invoke)5209 void IntrinsicCodeGeneratorRISCV64::VisitMathAcos(HInvoke* invoke) {
5210 codegen_->InvokeRuntime(kQuickAcos, invoke, invoke->GetDexPc());
5211 }
5212
VisitMathAsin(HInvoke * invoke)5213 void IntrinsicLocationsBuilderRISCV64::VisitMathAsin(HInvoke* invoke) {
5214 CreateFPToFPCallLocations(allocator_, invoke);
5215 }
5216
VisitMathAsin(HInvoke * invoke)5217 void IntrinsicCodeGeneratorRISCV64::VisitMathAsin(HInvoke* invoke) {
5218 codegen_->InvokeRuntime(kQuickAsin, invoke, invoke->GetDexPc());
5219 }
5220
VisitMathAtan(HInvoke * invoke)5221 void IntrinsicLocationsBuilderRISCV64::VisitMathAtan(HInvoke* invoke) {
5222 CreateFPToFPCallLocations(allocator_, invoke);
5223 }
5224
VisitMathAtan(HInvoke * invoke)5225 void IntrinsicCodeGeneratorRISCV64::VisitMathAtan(HInvoke* invoke) {
5226 codegen_->InvokeRuntime(kQuickAtan, invoke, invoke->GetDexPc());
5227 }
5228
VisitMathAtan2(HInvoke * invoke)5229 void IntrinsicLocationsBuilderRISCV64::VisitMathAtan2(HInvoke* invoke) {
5230 CreateFPFPToFPCallLocations(allocator_, invoke);
5231 }
5232
VisitMathAtan2(HInvoke * invoke)5233 void IntrinsicCodeGeneratorRISCV64::VisitMathAtan2(HInvoke* invoke) {
5234 codegen_->InvokeRuntime(kQuickAtan2, invoke, invoke->GetDexPc());
5235 }
5236
VisitMathPow(HInvoke * invoke)5237 void IntrinsicLocationsBuilderRISCV64::VisitMathPow(HInvoke* invoke) {
5238 CreateFPFPToFPCallLocations(allocator_, invoke);
5239 }
5240
VisitMathPow(HInvoke * invoke)5241 void IntrinsicCodeGeneratorRISCV64::VisitMathPow(HInvoke* invoke) {
5242 codegen_->InvokeRuntime(kQuickPow, invoke, invoke->GetDexPc());
5243 }
5244
VisitMathCbrt(HInvoke * invoke)5245 void IntrinsicLocationsBuilderRISCV64::VisitMathCbrt(HInvoke* invoke) {
5246 CreateFPToFPCallLocations(allocator_, invoke);
5247 }
5248
VisitMathCbrt(HInvoke * invoke)5249 void IntrinsicCodeGeneratorRISCV64::VisitMathCbrt(HInvoke* invoke) {
5250 codegen_->InvokeRuntime(kQuickCbrt, invoke, invoke->GetDexPc());
5251 }
5252
VisitMathCosh(HInvoke * invoke)5253 void IntrinsicLocationsBuilderRISCV64::VisitMathCosh(HInvoke* invoke) {
5254 CreateFPToFPCallLocations(allocator_, invoke);
5255 }
5256
VisitMathCosh(HInvoke * invoke)5257 void IntrinsicCodeGeneratorRISCV64::VisitMathCosh(HInvoke* invoke) {
5258 codegen_->InvokeRuntime(kQuickCosh, invoke, invoke->GetDexPc());
5259 }
5260
VisitMathExp(HInvoke * invoke)5261 void IntrinsicLocationsBuilderRISCV64::VisitMathExp(HInvoke* invoke) {
5262 CreateFPToFPCallLocations(allocator_, invoke);
5263 }
5264
VisitMathExp(HInvoke * invoke)5265 void IntrinsicCodeGeneratorRISCV64::VisitMathExp(HInvoke* invoke) {
5266 codegen_->InvokeRuntime(kQuickExp, invoke, invoke->GetDexPc());
5267 }
5268
VisitMathExpm1(HInvoke * invoke)5269 void IntrinsicLocationsBuilderRISCV64::VisitMathExpm1(HInvoke* invoke) {
5270 CreateFPToFPCallLocations(allocator_, invoke);
5271 }
5272
VisitMathExpm1(HInvoke * invoke)5273 void IntrinsicCodeGeneratorRISCV64::VisitMathExpm1(HInvoke* invoke) {
5274 codegen_->InvokeRuntime(kQuickExpm1, invoke, invoke->GetDexPc());
5275 }
5276
VisitMathHypot(HInvoke * invoke)5277 void IntrinsicLocationsBuilderRISCV64::VisitMathHypot(HInvoke* invoke) {
5278 CreateFPFPToFPCallLocations(allocator_, invoke);
5279 }
5280
VisitMathHypot(HInvoke * invoke)5281 void IntrinsicCodeGeneratorRISCV64::VisitMathHypot(HInvoke* invoke) {
5282 codegen_->InvokeRuntime(kQuickHypot, invoke, invoke->GetDexPc());
5283 }
5284
VisitMathLog(HInvoke * invoke)5285 void IntrinsicLocationsBuilderRISCV64::VisitMathLog(HInvoke* invoke) {
5286 CreateFPToFPCallLocations(allocator_, invoke);
5287 }
5288
VisitMathLog(HInvoke * invoke)5289 void IntrinsicCodeGeneratorRISCV64::VisitMathLog(HInvoke* invoke) {
5290 codegen_->InvokeRuntime(kQuickLog, invoke, invoke->GetDexPc());
5291 }
5292
VisitMathLog10(HInvoke * invoke)5293 void IntrinsicLocationsBuilderRISCV64::VisitMathLog10(HInvoke* invoke) {
5294 CreateFPToFPCallLocations(allocator_, invoke);
5295 }
5296
VisitMathLog10(HInvoke * invoke)5297 void IntrinsicCodeGeneratorRISCV64::VisitMathLog10(HInvoke* invoke) {
5298 codegen_->InvokeRuntime(kQuickLog10, invoke, invoke->GetDexPc());
5299 }
5300
VisitMathNextAfter(HInvoke * invoke)5301 void IntrinsicLocationsBuilderRISCV64::VisitMathNextAfter(HInvoke* invoke) {
5302 CreateFPFPToFPCallLocations(allocator_, invoke);
5303 }
5304
VisitMathNextAfter(HInvoke * invoke)5305 void IntrinsicCodeGeneratorRISCV64::VisitMathNextAfter(HInvoke* invoke) {
5306 codegen_->InvokeRuntime(kQuickNextAfter, invoke, invoke->GetDexPc());
5307 }
5308
VisitMathSinh(HInvoke * invoke)5309 void IntrinsicLocationsBuilderRISCV64::VisitMathSinh(HInvoke* invoke) {
5310 CreateFPToFPCallLocations(allocator_, invoke);
5311 }
5312
VisitMathSinh(HInvoke * invoke)5313 void IntrinsicCodeGeneratorRISCV64::VisitMathSinh(HInvoke* invoke) {
5314 codegen_->InvokeRuntime(kQuickSinh, invoke, invoke->GetDexPc());
5315 }
5316
VisitMathTan(HInvoke * invoke)5317 void IntrinsicLocationsBuilderRISCV64::VisitMathTan(HInvoke* invoke) {
5318 CreateFPToFPCallLocations(allocator_, invoke);
5319 }
5320
VisitMathTan(HInvoke * invoke)5321 void IntrinsicCodeGeneratorRISCV64::VisitMathTan(HInvoke* invoke) {
5322 codegen_->InvokeRuntime(kQuickTan, invoke, invoke->GetDexPc());
5323 }
5324
VisitMathTanh(HInvoke * invoke)5325 void IntrinsicLocationsBuilderRISCV64::VisitMathTanh(HInvoke* invoke) {
5326 CreateFPToFPCallLocations(allocator_, invoke);
5327 }
5328
VisitMathTanh(HInvoke * invoke)5329 void IntrinsicCodeGeneratorRISCV64::VisitMathTanh(HInvoke* invoke) {
5330 codegen_->InvokeRuntime(kQuickTanh, invoke, invoke->GetDexPc());
5331 }
5332
VisitMathSqrt(HInvoke * invoke)5333 void IntrinsicLocationsBuilderRISCV64::VisitMathSqrt(HInvoke* invoke) {
5334 CreateFPToFPLocations(allocator_, invoke, Location::kNoOutputOverlap);
5335 }
5336
VisitMathSqrt(HInvoke * invoke)5337 void IntrinsicCodeGeneratorRISCV64::VisitMathSqrt(HInvoke* invoke) {
5338 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
5339 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
5340
5341 LocationSummary* locations = invoke->GetLocations();
5342 Riscv64Assembler* assembler = GetAssembler();
5343 FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5344 FRegister out = locations->Out().AsFpuRegister<FRegister>();
5345
5346 __ FSqrtD(out, in);
5347 }
5348
GenDoubleRound(Riscv64Assembler * assembler,HInvoke * invoke,FPRoundingMode mode)5349 static void GenDoubleRound(Riscv64Assembler* assembler, HInvoke* invoke, FPRoundingMode mode) {
5350 LocationSummary* locations = invoke->GetLocations();
5351 FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5352 FRegister out = locations->Out().AsFpuRegister<FRegister>();
5353 ScratchRegisterScope srs(assembler);
5354 XRegister tmp = srs.AllocateXRegister();
5355 FRegister ftmp = srs.AllocateFRegister();
5356 Riscv64Label done;
5357
5358 // Load 2^52
5359 __ LoadConst64(tmp, 0x4330000000000000L);
5360 __ FMvDX(ftmp, tmp);
5361 __ FAbsD(out, in);
5362 __ FLtD(tmp, out, ftmp);
5363
5364 // Set output as the input if input greater than the max
5365 __ FMvD(out, in);
5366 __ Beqz(tmp, &done);
5367
5368 // Convert with rounding mode
5369 __ FCvtLD(tmp, in, mode);
5370 __ FCvtDL(ftmp, tmp, mode);
5371
5372 // Set the signed bit
5373 __ FSgnjD(out, ftmp, in);
5374 __ Bind(&done);
5375 }
5376
VisitMathFloor(HInvoke * invoke)5377 void IntrinsicLocationsBuilderRISCV64::VisitMathFloor(HInvoke* invoke) {
5378 CreateFPToFPLocations(allocator_, invoke);
5379 }
5380
VisitMathFloor(HInvoke * invoke)5381 void IntrinsicCodeGeneratorRISCV64::VisitMathFloor(HInvoke* invoke) {
5382 GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRDN);
5383 }
5384
VisitMathCeil(HInvoke * invoke)5385 void IntrinsicLocationsBuilderRISCV64::VisitMathCeil(HInvoke* invoke) {
5386 CreateFPToFPLocations(allocator_, invoke);
5387 }
5388
VisitMathCeil(HInvoke * invoke)5389 void IntrinsicCodeGeneratorRISCV64::VisitMathCeil(HInvoke* invoke) {
5390 GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRUP);
5391 }
5392
VisitMathRint(HInvoke * invoke)5393 void IntrinsicLocationsBuilderRISCV64::VisitMathRint(HInvoke* invoke) {
5394 CreateFPToFPLocations(allocator_, invoke);
5395 }
5396
VisitMathRint(HInvoke * invoke)5397 void IntrinsicCodeGeneratorRISCV64::VisitMathRint(HInvoke* invoke) {
5398 GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRNE);
5399 }
5400
GenMathRound(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)5401 void GenMathRound(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
5402 Riscv64Assembler* assembler = codegen->GetAssembler();
5403 LocationSummary* locations = invoke->GetLocations();
5404 FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5405 XRegister out = locations->Out().AsRegister<XRegister>();
5406 ScratchRegisterScope srs(assembler);
5407 FRegister ftmp = srs.AllocateFRegister();
5408 Riscv64Label done;
5409
5410 // Check NaN
5411 codegen->GetInstructionVisitor()->FClass(out, in, type);
5412 __ Slti(out, out, kFClassNaNMinValue);
5413 __ Beqz(out, &done);
5414
5415 if (type == DataType::Type::kFloat64) {
5416 // Add 0.5 (0x3fe0000000000000), rounding down (towards negative infinity).
5417 __ LoadConst64(out, 0x3fe0000000000000L);
5418 __ FMvDX(ftmp, out);
5419 __ FAddD(ftmp, ftmp, in, FPRoundingMode::kRDN);
5420
5421 // Convert to managed `long`, rounding down (towards negative infinity).
5422 __ FCvtLD(out, ftmp, FPRoundingMode::kRDN);
5423 } else {
5424 // Add 0.5 (0x3f000000), rounding down (towards negative infinity).
5425 __ LoadConst32(out, 0x3f000000);
5426 __ FMvWX(ftmp, out);
5427 __ FAddS(ftmp, ftmp, in, FPRoundingMode::kRDN);
5428
5429 // Convert to managed `int`, rounding down (towards negative infinity).
5430 __ FCvtWS(out, ftmp, FPRoundingMode::kRDN);
5431 }
5432
5433 __ Bind(&done);
5434 }
5435
VisitMathRoundDouble(HInvoke * invoke)5436 void IntrinsicLocationsBuilderRISCV64::VisitMathRoundDouble(HInvoke* invoke) {
5437 CreateFPToIntLocations(allocator_, invoke);
5438 }
5439
VisitMathRoundDouble(HInvoke * invoke)5440 void IntrinsicCodeGeneratorRISCV64::VisitMathRoundDouble(HInvoke* invoke) {
5441 GenMathRound(codegen_, invoke, DataType::Type::kFloat64);
5442 }
5443
VisitMathRoundFloat(HInvoke * invoke)5444 void IntrinsicLocationsBuilderRISCV64::VisitMathRoundFloat(HInvoke* invoke) {
5445 CreateFPToIntLocations(allocator_, invoke);
5446 }
5447
VisitMathRoundFloat(HInvoke * invoke)5448 void IntrinsicCodeGeneratorRISCV64::VisitMathRoundFloat(HInvoke* invoke) {
5449 GenMathRound(codegen_, invoke, DataType::Type::kFloat32);
5450 }
5451
VisitMathMultiplyHigh(HInvoke * invoke)5452 void IntrinsicLocationsBuilderRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) {
5453 LocationSummary* locations =
5454 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5455 locations->SetInAt(0, Location::RequiresRegister());
5456 locations->SetInAt(1, Location::RequiresRegister());
5457 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5458 }
5459
VisitMathMultiplyHigh(HInvoke * invoke)5460 void IntrinsicCodeGeneratorRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) {
5461 LocationSummary* locations = invoke->GetLocations();
5462 Riscv64Assembler* assembler = GetAssembler();
5463 DCHECK(invoke->GetType() == DataType::Type::kInt64);
5464
5465 XRegister x = locations->InAt(0).AsRegister<XRegister>();
5466 XRegister y = locations->InAt(1).AsRegister<XRegister>();
5467 XRegister out = locations->Out().AsRegister<XRegister>();
5468
5469 // Get high 64 of the multiply
5470 __ Mulh(out, x, y);
5471 }
5472
VisitStringGetCharsNoCheck(HInvoke * invoke)5473 void IntrinsicLocationsBuilderRISCV64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
5474 LocationSummary* locations =
5475 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5476
5477 locations->SetInAt(0, Location::RequiresRegister());
5478 locations->SetInAt(1, Location::RequiresRegister());
5479 locations->SetInAt(2, Location::RequiresRegister());
5480 locations->SetInAt(3, Location::RequiresRegister());
5481 locations->SetInAt(4, Location::RequiresRegister());
5482
5483 locations->AddRegisterTemps(3);
5484 }
5485
VisitStringGetCharsNoCheck(HInvoke * invoke)5486 void IntrinsicCodeGeneratorRISCV64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
5487 Riscv64Assembler* assembler = GetAssembler();
5488 LocationSummary* locations = invoke->GetLocations();
5489
5490 // In Java sizeof(Char) is 2.
5491 constexpr size_t char_size = DataType::Size(DataType::Type::kUint16);
5492 static_assert(char_size == 2u);
5493
5494 // Location of data in the destination char array buffer.
5495 const uint32_t array_data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
5496
5497 // Location of char array data in the source string.
5498 const uint32_t string_value_offset = mirror::String::ValueOffset().Uint32Value();
5499
5500 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
5501
5502 // The source string.
5503 XRegister source_string_object = locations->InAt(0).AsRegister<XRegister>();
5504 // Index of the first character.
5505 XRegister source_begin_index = locations->InAt(1).AsRegister<XRegister>();
5506 // Index that immediately follows the last character.
5507 XRegister source_end_index = locations->InAt(2).AsRegister<XRegister>();
5508 // The destination array.
5509 XRegister destination_array_object = locations->InAt(3).AsRegister<XRegister>();
5510 // The start offset in the destination array.
5511 XRegister destination_begin_offset = locations->InAt(4).AsRegister<XRegister>();
5512
5513 XRegister source_ptr = locations->GetTemp(0).AsRegister<XRegister>();
5514 XRegister destination_ptr = locations->GetTemp(1).AsRegister<XRegister>();
5515 XRegister number_of_chars = locations->GetTemp(2).AsRegister<XRegister>();
5516
5517 ScratchRegisterScope temps(assembler);
5518 XRegister tmp = temps.AllocateXRegister();
5519
5520 Riscv64Label done;
5521
5522 // Calculate the length(number_of_chars) of the string.
5523 __ Subw(number_of_chars, source_end_index, source_begin_index);
5524
5525 // If the string has zero length then exit.
5526 __ Beqz(number_of_chars, &done);
5527
5528 // Prepare a register with the destination address
5529 // to start copying to the address:
5530 // 1. set the address from which the data in the
5531 // destination array begins (destination_array_object + array_data_offset);
5532 __ Addi(destination_ptr, destination_array_object, array_data_offset);
5533 // 2. it is necessary to add the start offset relative to the beginning
5534 // of the data in the destination array,
5535 // yet, due to sizeof(Char) being 2, formerly scaling must be performed
5536 // (destination_begin_offset * 2 that equals to destination_begin_offset << 1);
5537 __ Sh1Add(destination_ptr, destination_begin_offset, destination_ptr);
5538
5539 // Prepare a register with the source address
5540 // to start copying from the address:
5541 // 1. set the address from which the data in the
5542 // source string begins (source_string_object + string_value_offset).
5543 // Other manipulations will be performed later,
5544 // since they depend on whether the string is compressed or not.
5545 __ Addi(source_ptr, source_string_object, string_value_offset);
5546
5547 // The string can be compressed. It is a way to store strings more compactly.
5548 // In this instance, every character is located in one byte (instead of two).
5549 Riscv64Label compressed_string_preloop;
5550
5551 // Information about whether the string is compressed or not is located
5552 // in the area intended for storing the length of the string.
5553 // The least significant bit of the string's length is used
5554 // as the compression flag if STRING_COMPRESSION_ENABLED.
5555 if (mirror::kUseStringCompression) {
5556 // Location of count in string.
5557 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5558 // String's length.
5559 __ Loadwu(tmp, source_string_object, count_offset);
5560
5561 // Checking the string for compression.
5562 // If so, move to the "compressed_string_preloop".
5563 __ Andi(tmp, tmp, 0x1);
5564 __ Beqz(tmp, &compressed_string_preloop);
5565 }
5566
5567 // Continue preparing the source register:
5568 // proceed similarly to what was done for the destination register.
5569 __ Sh1Add(source_ptr, source_begin_index, source_ptr);
5570
5571 // If the string is not compressed, then perform ordinary copying.
5572 // Copying will occur 4 characters (8 bytes) at a time, immediately after there are
5573 // less than 4 characters left, move to the "remainder_loop" and copy the remaining
5574 // characters one character (2 bytes) at a time.
5575 // Note: Unaligned addresses are acceptable here and it is not required to embed
5576 // additional code to correct them.
5577 Riscv64Label main_loop;
5578 Riscv64Label remainder_loop;
5579
5580 // If initially there are less than 4 characters,
5581 // then we directly calculate the remainder.
5582 __ Addi(tmp, number_of_chars, -4);
5583 __ Bltz(tmp, &remainder_loop);
5584
5585 // Otherwise, save the value to the counter and continue.
5586 __ Mv(number_of_chars, tmp);
5587
5588 // Main loop. Loads and stores 4 16-bit Java characters at a time.
5589 __ Bind(&main_loop);
5590
5591 __ Loadd(tmp, source_ptr, 0);
5592 __ Addi(source_ptr, source_ptr, char_size * 4);
5593 __ Stored(tmp, destination_ptr, 0);
5594 __ Addi(destination_ptr, destination_ptr, char_size * 4);
5595
5596 __ Addi(number_of_chars, number_of_chars, -4);
5597
5598 __ Bgez(number_of_chars, &main_loop);
5599
5600 // Restore the previous counter value.
5601 __ Addi(number_of_chars, number_of_chars, 4);
5602 __ Beqz(number_of_chars, &done);
5603
5604 // Remainder loop for < 4 characters case and remainder handling.
5605 // Loads and stores one 16-bit Java character at a time.
5606 __ Bind(&remainder_loop);
5607
5608 __ Loadhu(tmp, source_ptr, 0);
5609 __ Addi(source_ptr, source_ptr, char_size);
5610
5611 __ Storeh(tmp, destination_ptr, 0);
5612 __ Addi(destination_ptr, destination_ptr, char_size);
5613
5614 __ Addi(number_of_chars, number_of_chars, -1);
5615 __ Bgtz(number_of_chars, &remainder_loop);
5616
5617 Riscv64Label compressed_string_loop;
5618 if (mirror::kUseStringCompression) {
5619 __ J(&done);
5620
5621 // Below is the copying under the string compression circumstance mentioned above.
5622 // Every character in the source string occupies only one byte (instead of two).
5623 constexpr size_t compressed_char_size = DataType::Size(DataType::Type::kInt8);
5624 static_assert(compressed_char_size == 1u);
5625
5626 __ Bind(&compressed_string_preloop);
5627
5628 // Continue preparing the source register:
5629 // proceed identically to what was done for the destination register,
5630 // yet take into account that only one byte yields for every source character,
5631 // hence we need to extend it to two ones when copying it to the destination address.
5632 // Against this background scaling for source_begin_index is not needed.
5633 __ Add(source_ptr, source_ptr, source_begin_index);
5634
5635 // Copy loop for compressed strings. Copying one 8-bit character to 16-bit one at a time.
5636 __ Bind(&compressed_string_loop);
5637
5638 __ Loadbu(tmp, source_ptr, 0);
5639 __ Addi(source_ptr, source_ptr, compressed_char_size);
5640 __ Storeh(tmp, destination_ptr, 0);
5641 __ Addi(destination_ptr, destination_ptr, char_size);
5642
5643 __ Addi(number_of_chars, number_of_chars, -1);
5644 __ Bgtz(number_of_chars, &compressed_string_loop);
5645 }
5646
5647 __ Bind(&done);
5648 }
5649
GenMathSignum(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)5650 void GenMathSignum(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
5651 LocationSummary* locations = invoke->GetLocations();
5652 DCHECK(locations->InAt(0).Equals(locations->Out()));
5653 FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5654 Riscv64Assembler* assembler = codegen->GetAssembler();
5655 ScratchRegisterScope srs(assembler);
5656 XRegister tmp = srs.AllocateXRegister();
5657 FRegister ftmp = srs.AllocateFRegister();
5658 Riscv64Label done;
5659
5660 if (type == DataType::Type::kFloat64) {
5661 // 0x3FF0000000000000L = 1.0
5662 __ Li(tmp, 0x3FF0000000000000L);
5663 __ FMvDX(ftmp, tmp);
5664 __ FClassD(tmp, in);
5665 } else {
5666 // 0x3f800000 = 1.0f
5667 __ Li(tmp, 0x3F800000);
5668 __ FMvWX(ftmp, tmp);
5669 __ FClassS(tmp, in);
5670 }
5671
5672 __ Andi(tmp, tmp, kPositiveZero | kNegativeZero | kSignalingNaN | kQuietNaN);
5673 __ Bnez(tmp, &done);
5674
5675 if (type == DataType::Type::kFloat64) {
5676 __ FSgnjD(in, ftmp, in);
5677 } else {
5678 __ FSgnjS(in, ftmp, in);
5679 }
5680
5681 __ Bind(&done);
5682 }
5683
VisitMathSignumDouble(HInvoke * invoke)5684 void IntrinsicLocationsBuilderRISCV64::VisitMathSignumDouble(HInvoke* invoke) {
5685 LocationSummary* locations =
5686 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5687 locations->SetInAt(0, Location::RequiresFpuRegister());
5688 locations->SetOut(Location::SameAsFirstInput());
5689 }
5690
VisitMathSignumDouble(HInvoke * invoke)5691 void IntrinsicCodeGeneratorRISCV64::VisitMathSignumDouble(HInvoke* invoke) {
5692 GenMathSignum(codegen_, invoke, DataType::Type::kFloat64);
5693 }
5694
VisitMathSignumFloat(HInvoke * invoke)5695 void IntrinsicLocationsBuilderRISCV64::VisitMathSignumFloat(HInvoke* invoke) {
5696 LocationSummary* locations =
5697 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5698 locations->SetInAt(0, Location::RequiresFpuRegister());
5699 locations->SetOut(Location::SameAsFirstInput());
5700 }
5701
VisitMathSignumFloat(HInvoke * invoke)5702 void IntrinsicCodeGeneratorRISCV64::VisitMathSignumFloat(HInvoke* invoke) {
5703 GenMathSignum(codegen_, invoke, DataType::Type::kFloat32);
5704 }
5705
GenMathCopySign(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)5706 void GenMathCopySign(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
5707 Riscv64Assembler* assembler = codegen->GetAssembler();
5708 LocationSummary* locations = invoke->GetLocations();
5709 FRegister in0 = locations->InAt(0).AsFpuRegister<FRegister>();
5710 FRegister in1 = locations->InAt(1).AsFpuRegister<FRegister>();
5711 FRegister out = locations->Out().AsFpuRegister<FRegister>();
5712
5713 if (type == DataType::Type::kFloat64) {
5714 __ FSgnjD(out, in0, in1);
5715 } else {
5716 __ FSgnjS(out, in0, in1);
5717 }
5718 }
5719
VisitMathCopySignDouble(HInvoke * invoke)5720 void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignDouble(HInvoke* invoke) {
5721 CreateFPFPToFPCallLocations(allocator_, invoke);
5722 }
5723
VisitMathCopySignDouble(HInvoke * invoke)5724 void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignDouble(HInvoke* invoke) {
5725 GenMathCopySign(codegen_, invoke, DataType::Type::kFloat64);
5726 }
5727
VisitMathCopySignFloat(HInvoke * invoke)5728 void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignFloat(HInvoke* invoke) {
5729 CreateFPFPToFPCallLocations(allocator_, invoke);
5730 }
5731
VisitMathCopySignFloat(HInvoke * invoke)5732 void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignFloat(HInvoke* invoke) {
5733 GenMathCopySign(codegen_, invoke, DataType::Type::kFloat32);
5734 }
5735
5736 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name)
5737 UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED);
5738 #undef MARK_UNIMPLEMENTED
5739
5740 UNREACHABLE_INTRINSICS(RISCV64)
5741
5742 } // namespace riscv64
5743 } // namespace art
5744