xref: /aosp_15_r20/art/compiler/optimizing/intrinsics_riscv64.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_riscv64.h"
18 
19 #include "code_generator_riscv64.h"
20 #include "intrinsic_objects.h"
21 #include "intrinsics_utils.h"
22 #include "optimizing/locations.h"
23 #include "well_known_classes.h"
24 
25 namespace art HIDDEN {
26 namespace riscv64 {
27 
28 using IntrinsicSlowPathRISCV64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorRISCV64,
29                                                    SlowPathCodeRISCV64,
30                                                    Riscv64Assembler>;
31 
32 #define __ assembler->
33 
34 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
35 class ReadBarrierSystemArrayCopySlowPathRISCV64 : public SlowPathCodeRISCV64 {
36  public:
ReadBarrierSystemArrayCopySlowPathRISCV64(HInstruction * instruction,Location tmp)37   ReadBarrierSystemArrayCopySlowPathRISCV64(HInstruction* instruction, Location tmp)
38       : SlowPathCodeRISCV64(instruction), tmp_(tmp) {}
39 
EmitNativeCode(CodeGenerator * codegen_in)40   void EmitNativeCode(CodeGenerator* codegen_in) override {
41     DCHECK(codegen_in->EmitBakerReadBarrier());
42     CodeGeneratorRISCV64* codegen = down_cast<CodeGeneratorRISCV64*>(codegen_in);
43     Riscv64Assembler* assembler = codegen->GetAssembler();
44     LocationSummary* locations = instruction_->GetLocations();
45     DCHECK(locations->CanCall());
46     DCHECK(instruction_->IsInvokeStaticOrDirect())
47         << "Unexpected instruction in read barrier arraycopy slow path: "
48         << instruction_->DebugName();
49     DCHECK(instruction_->GetLocations()->Intrinsified());
50     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
51 
52     const int32_t element_size = DataType::Size(DataType::Type::kReference);
53 
54     XRegister src_curr_addr = locations->GetTemp(0).AsRegister<XRegister>();
55     XRegister dst_curr_addr = locations->GetTemp(1).AsRegister<XRegister>();
56     XRegister src_stop_addr = locations->GetTemp(2).AsRegister<XRegister>();
57     XRegister tmp_reg = tmp_.AsRegister<XRegister>();
58 
59     __ Bind(GetEntryLabel());
60     // The source range and destination pointer were initialized before entering the slow-path.
61     Riscv64Label slow_copy_loop;
62     __ Bind(&slow_copy_loop);
63     __ Loadwu(tmp_reg, src_curr_addr, 0);
64     codegen->MaybeUnpoisonHeapReference(tmp_reg);
65     // TODO: Inline the mark bit check before calling the runtime?
66     // tmp_reg = ReadBarrier::Mark(tmp_reg);
67     // No need to save live registers; it's taken care of by the
68     // entrypoint. Also, there is no need to update the stack mask,
69     // as this runtime call will not trigger a garbage collection.
70     // (See ReadBarrierMarkSlowPathRISCV64::EmitNativeCode for more
71     // explanations.)
72     int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(tmp_);
73     // This runtime call does not require a stack map.
74     codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
75     codegen->MaybePoisonHeapReference(tmp_reg);
76     __ Storew(tmp_reg, dst_curr_addr, 0);
77     __ Addi(src_curr_addr, src_curr_addr, element_size);
78     __ Addi(dst_curr_addr, dst_curr_addr, element_size);
79     __ Bne(src_curr_addr, src_stop_addr, &slow_copy_loop);
80     __ J(GetExitLabel());
81   }
82 
GetDescription() const83   const char* GetDescription() const override {
84     return "ReadBarrierSystemArrayCopySlowPathRISCV64";
85   }
86 
87  private:
88   Location tmp_;
89 
90   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathRISCV64);
91 };
92 
TryDispatch(HInvoke * invoke)93 bool IntrinsicLocationsBuilderRISCV64::TryDispatch(HInvoke* invoke) {
94   Dispatch(invoke);
95   LocationSummary* res = invoke->GetLocations();
96   if (res == nullptr) {
97     return false;
98   }
99   return res->Intrinsified();
100 }
101 
GetAssembler()102 Riscv64Assembler* IntrinsicCodeGeneratorRISCV64::GetAssembler() {
103   return codegen_->GetAssembler();
104 }
105 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)106 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
107   LocationSummary* locations =
108       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
109   locations->SetInAt(0, Location::RequiresFpuRegister());
110   locations->SetOut(Location::RequiresRegister());
111 }
112 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)113 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
114   LocationSummary* locations =
115       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
116   locations->SetInAt(0, Location::RequiresRegister());
117   locations->SetOut(Location::RequiresFpuRegister());
118 }
119 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)120 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
121   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
122   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
123   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
124 
125   LocationSummary* const locations =
126       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
127   InvokeRuntimeCallingConvention calling_convention;
128 
129   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
130   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
131 }
132 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)133 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
134   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
135   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
136   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
137   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
138 
139   LocationSummary* const locations =
140       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
141   InvokeRuntimeCallingConvention calling_convention;
142 
143   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
144   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
145   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
146 }
147 
CreateFpFpFpToFpNoOverlapLocations(ArenaAllocator * allocator,HInvoke * invoke)148 static void CreateFpFpFpToFpNoOverlapLocations(ArenaAllocator* allocator, HInvoke* invoke) {
149   DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
150   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
151   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
152   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(2)->GetType()));
153   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
154 
155   LocationSummary* const locations =
156       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
157 
158   locations->SetInAt(0, Location::RequiresFpuRegister());
159   locations->SetInAt(1, Location::RequiresFpuRegister());
160   locations->SetInAt(2, Location::RequiresFpuRegister());
161   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
162 }
163 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,Location::OutputOverlap overlaps=Location::kOutputOverlap)164 static void CreateFPToFPLocations(ArenaAllocator* allocator,
165                                   HInvoke* invoke,
166                                   Location::OutputOverlap overlaps = Location::kOutputOverlap) {
167   LocationSummary* locations =
168       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
169   locations->SetInAt(0, Location::RequiresFpuRegister());
170   locations->SetOut(Location::RequiresFpuRegister(), overlaps);
171 }
172 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)173 void IntrinsicLocationsBuilderRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
174   CreateFPToIntLocations(allocator_, invoke);
175 }
176 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)177 void IntrinsicCodeGeneratorRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
178   LocationSummary* locations = invoke->GetLocations();
179   Riscv64Assembler* assembler = GetAssembler();
180   __ FMvXD(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
181 }
182 
VisitDoubleLongBitsToDouble(HInvoke * invoke)183 void IntrinsicLocationsBuilderRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
184   CreateIntToFPLocations(allocator_, invoke);
185 }
186 
VisitDoubleLongBitsToDouble(HInvoke * invoke)187 void IntrinsicCodeGeneratorRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
188   LocationSummary* locations = invoke->GetLocations();
189   Riscv64Assembler* assembler = GetAssembler();
190   __ FMvDX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
191 }
192 
VisitFloatFloatToRawIntBits(HInvoke * invoke)193 void IntrinsicLocationsBuilderRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
194   CreateFPToIntLocations(allocator_, invoke);
195 }
196 
VisitFloatFloatToRawIntBits(HInvoke * invoke)197 void IntrinsicCodeGeneratorRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
198   LocationSummary* locations = invoke->GetLocations();
199   Riscv64Assembler* assembler = GetAssembler();
200   __ FMvXW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
201 }
202 
VisitFloatIntBitsToFloat(HInvoke * invoke)203 void IntrinsicLocationsBuilderRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
204   CreateIntToFPLocations(allocator_, invoke);
205 }
206 
VisitFloatIntBitsToFloat(HInvoke * invoke)207 void IntrinsicCodeGeneratorRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
208   LocationSummary* locations = invoke->GetLocations();
209   Riscv64Assembler* assembler = GetAssembler();
210   __ FMvWX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
211 }
212 
VisitDoubleIsInfinite(HInvoke * invoke)213 void IntrinsicLocationsBuilderRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
214   CreateFPToIntLocations(allocator_, invoke);
215 }
216 
VisitDoubleIsInfinite(HInvoke * invoke)217 void IntrinsicCodeGeneratorRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
218   LocationSummary* locations = invoke->GetLocations();
219   Riscv64Assembler* assembler = GetAssembler();
220   XRegister out = locations->Out().AsRegister<XRegister>();
221   __ FClassD(out, locations->InAt(0).AsFpuRegister<FRegister>());
222   __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
223   __ Snez(out, out);
224 }
225 
VisitFloatIsInfinite(HInvoke * invoke)226 void IntrinsicLocationsBuilderRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
227   CreateFPToIntLocations(allocator_, invoke);
228 }
229 
VisitFloatIsInfinite(HInvoke * invoke)230 void IntrinsicCodeGeneratorRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
231   LocationSummary* locations = invoke->GetLocations();
232   Riscv64Assembler* assembler = GetAssembler();
233   XRegister out = locations->Out().AsRegister<XRegister>();
234   __ FClassS(out, locations->InAt(0).AsFpuRegister<FRegister>());
235   __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
236   __ Snez(out, out);
237 }
238 
CreateIntToIntNoOverlapLocations(ArenaAllocator * allocator,HInvoke * invoke)239 static void CreateIntToIntNoOverlapLocations(ArenaAllocator* allocator, HInvoke* invoke) {
240   LocationSummary* locations =
241       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
242   locations->SetInAt(0, Location::RequiresRegister());
243   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
244 }
245 
246 template <typename EmitOp>
EmitMemoryPeek(HInvoke * invoke,EmitOp && emit_op)247 void EmitMemoryPeek(HInvoke* invoke, EmitOp&& emit_op) {
248   LocationSummary* locations = invoke->GetLocations();
249   emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
250 }
251 
VisitMemoryPeekByte(HInvoke * invoke)252 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
253   CreateIntToIntNoOverlapLocations(allocator_, invoke);
254 }
255 
VisitMemoryPeekByte(HInvoke * invoke)256 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
257   Riscv64Assembler* assembler = GetAssembler();
258   EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lb(rd, rs1, 0); });
259 }
260 
VisitMemoryPeekIntNative(HInvoke * invoke)261 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
262   CreateIntToIntNoOverlapLocations(allocator_, invoke);
263 }
264 
VisitMemoryPeekIntNative(HInvoke * invoke)265 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
266   Riscv64Assembler* assembler = GetAssembler();
267   EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lw(rd, rs1, 0); });
268 }
269 
VisitMemoryPeekLongNative(HInvoke * invoke)270 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
271   CreateIntToIntNoOverlapLocations(allocator_, invoke);
272 }
273 
VisitMemoryPeekLongNative(HInvoke * invoke)274 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
275   Riscv64Assembler* assembler = GetAssembler();
276   EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Ld(rd, rs1, 0); });
277 }
278 
VisitMemoryPeekShortNative(HInvoke * invoke)279 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
280   CreateIntToIntNoOverlapLocations(allocator_, invoke);
281 }
282 
VisitMemoryPeekShortNative(HInvoke * invoke)283 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
284   Riscv64Assembler* assembler = GetAssembler();
285   EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lh(rd, rs1, 0); });
286 }
287 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)288 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
289   LocationSummary* locations =
290       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
291   locations->SetInAt(0, Location::RequiresRegister());
292   locations->SetInAt(1, Location::RequiresRegister());
293 }
294 
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)295 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
296   LocationSummary* locations =
297       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
298   locations->SetInAt(0, Location::RequiresRegister());
299   locations->SetInAt(1, Location::RequiresRegister());
300   // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
301   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
302 }
303 
304 template <typename EmitOp>
EmitMemoryPoke(HInvoke * invoke,EmitOp && emit_op)305 void EmitMemoryPoke(HInvoke* invoke, EmitOp&& emit_op) {
306   LocationSummary* locations = invoke->GetLocations();
307   emit_op(locations->InAt(1).AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
308 }
309 
VisitMemoryPokeByte(HInvoke * invoke)310 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
311   CreateIntIntToVoidLocations(allocator_, invoke);
312 }
313 
VisitMemoryPokeByte(HInvoke * invoke)314 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
315   Riscv64Assembler* assembler = GetAssembler();
316   EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sb(rs2, rs1, 0); });
317 }
318 
VisitMemoryPokeIntNative(HInvoke * invoke)319 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
320   CreateIntIntToVoidLocations(allocator_, invoke);
321 }
322 
VisitMemoryPokeIntNative(HInvoke * invoke)323 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
324   Riscv64Assembler* assembler = GetAssembler();
325   EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sw(rs2, rs1, 0); });
326 }
327 
VisitMemoryPokeLongNative(HInvoke * invoke)328 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
329   CreateIntIntToVoidLocations(allocator_, invoke);
330 }
331 
VisitMemoryPokeLongNative(HInvoke * invoke)332 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
333   Riscv64Assembler* assembler = GetAssembler();
334   EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sd(rs2, rs1, 0); });
335 }
336 
VisitMemoryPokeShortNative(HInvoke * invoke)337 void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
338   CreateIntIntToVoidLocations(allocator_, invoke);
339 }
340 
VisitMemoryPokeShortNative(HInvoke * invoke)341 void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
342   Riscv64Assembler* assembler = GetAssembler();
343   EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sh(rs2, rs1, 0); });
344 }
345 
GenerateReverseBytes(CodeGeneratorRISCV64 * codegen,Location rd,XRegister rs1,DataType::Type type)346 static void GenerateReverseBytes(CodeGeneratorRISCV64* codegen,
347                                  Location rd,
348                                  XRegister rs1,
349                                  DataType::Type type) {
350   Riscv64Assembler* assembler = codegen->GetAssembler();
351   switch (type) {
352     case DataType::Type::kUint16:
353       // There is no 16-bit reverse bytes instruction.
354       __ Rev8(rd.AsRegister<XRegister>(), rs1);
355       __ Srli(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 48);
356       break;
357     case DataType::Type::kInt16:
358       // There is no 16-bit reverse bytes instruction.
359       __ Rev8(rd.AsRegister<XRegister>(), rs1);
360       __ Srai(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 48);
361       break;
362     case DataType::Type::kInt32:
363       // There is no 32-bit reverse bytes instruction.
364       __ Rev8(rd.AsRegister<XRegister>(), rs1);
365       __ Srai(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 32);
366       break;
367     case DataType::Type::kInt64:
368       __ Rev8(rd.AsRegister<XRegister>(), rs1);
369       break;
370     case DataType::Type::kFloat32:
371       // There is no 32-bit reverse bytes instruction.
372       __ Rev8(rs1, rs1);  // Note: Clobbers `rs1`.
373       __ Srai(rs1, rs1, 32);
374       __ FMvWX(rd.AsFpuRegister<FRegister>(), rs1);
375       break;
376     case DataType::Type::kFloat64:
377       __ Rev8(rs1, rs1);  // Note: Clobbers `rs1`.
378       __ FMvDX(rd.AsFpuRegister<FRegister>(), rs1);
379       break;
380     default:
381       LOG(FATAL) << "Unexpected type: " << type;
382       UNREACHABLE();
383   }
384 }
385 
GenerateReverseBytes(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)386 static void GenerateReverseBytes(CodeGeneratorRISCV64* codegen,
387                                  HInvoke* invoke,
388                                  DataType::Type type) {
389   DCHECK_EQ(type, invoke->GetType());
390   LocationSummary* locations = invoke->GetLocations();
391   GenerateReverseBytes(codegen, locations->Out(), locations->InAt(0).AsRegister<XRegister>(), type);
392 }
393 
GenerateReverse(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)394 static void GenerateReverse(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
395   DCHECK_EQ(type, invoke->GetType());
396   Riscv64Assembler* assembler = codegen->GetAssembler();
397   LocationSummary* locations = invoke->GetLocations();
398   XRegister in = locations->InAt(0).AsRegister<XRegister>();
399   XRegister out = locations->Out().AsRegister<XRegister>();
400   ScratchRegisterScope srs(assembler);
401   XRegister temp1 = srs.AllocateXRegister();
402   XRegister temp2 = srs.AllocateXRegister();
403 
404   auto maybe_extend_mask = [type, assembler](XRegister mask, XRegister temp) {
405     if (type == DataType::Type::kInt64) {
406       __ Slli(temp, mask, 32);
407       __ Add(mask, mask, temp);
408     }
409   };
410 
411   // Swap bits in bit pairs.
412   __ Li(temp1, 0x55555555);
413   maybe_extend_mask(temp1, temp2);
414   __ Srli(temp2, in, 1);
415   __ And(out, in, temp1);
416   __ And(temp2, temp2, temp1);
417   __ Sh1Add(out, out, temp2);
418 
419   // Swap bit pairs in 4-bit groups.
420   __ Li(temp1, 0x33333333);
421   maybe_extend_mask(temp1, temp2);
422   __ Srli(temp2, out, 2);
423   __ And(out, out, temp1);
424   __ And(temp2, temp2, temp1);
425   __ Sh2Add(out, out, temp2);
426 
427   // Swap 4-bit groups in 8-bit groups.
428   __ Li(temp1, 0x0f0f0f0f);
429   maybe_extend_mask(temp1, temp2);
430   __ Srli(temp2, out, 4);
431   __ And(out, out, temp1);
432   __ And(temp2, temp2, temp1);
433   __ Slli(out, out, 4);
434   __ Add(out, out, temp2);
435 
436   GenerateReverseBytes(codegen, Location::RegisterLocation(out), out, type);
437 }
438 
VisitIntegerReverse(HInvoke * invoke)439 void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverse(HInvoke* invoke) {
440   CreateIntToIntNoOverlapLocations(allocator_, invoke);
441 }
442 
VisitIntegerReverse(HInvoke * invoke)443 void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverse(HInvoke* invoke) {
444   GenerateReverse(codegen_, invoke, DataType::Type::kInt32);
445 }
446 
VisitLongReverse(HInvoke * invoke)447 void IntrinsicLocationsBuilderRISCV64::VisitLongReverse(HInvoke* invoke) {
448   CreateIntToIntNoOverlapLocations(allocator_, invoke);
449 }
450 
VisitLongReverse(HInvoke * invoke)451 void IntrinsicCodeGeneratorRISCV64::VisitLongReverse(HInvoke* invoke) {
452   GenerateReverse(codegen_, invoke, DataType::Type::kInt64);
453 }
454 
VisitIntegerReverseBytes(HInvoke * invoke)455 void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
456   CreateIntToIntNoOverlapLocations(allocator_, invoke);
457 }
458 
VisitIntegerReverseBytes(HInvoke * invoke)459 void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
460   GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt32);
461 }
462 
VisitLongReverseBytes(HInvoke * invoke)463 void IntrinsicLocationsBuilderRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
464   CreateIntToIntNoOverlapLocations(allocator_, invoke);
465 }
466 
VisitLongReverseBytes(HInvoke * invoke)467 void IntrinsicCodeGeneratorRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
468   GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt64);
469 }
470 
VisitShortReverseBytes(HInvoke * invoke)471 void IntrinsicLocationsBuilderRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
472   CreateIntToIntNoOverlapLocations(allocator_, invoke);
473 }
474 
VisitShortReverseBytes(HInvoke * invoke)475 void IntrinsicCodeGeneratorRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
476   GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt16);
477 }
478 
479 template <typename EmitOp>
EmitIntegralUnOp(HInvoke * invoke,EmitOp && emit_op)480 void EmitIntegralUnOp(HInvoke* invoke, EmitOp&& emit_op) {
481   LocationSummary* locations = invoke->GetLocations();
482   emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
483 }
484 
VisitIntegerBitCount(HInvoke * invoke)485 void IntrinsicLocationsBuilderRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
486   CreateIntToIntNoOverlapLocations(allocator_, invoke);
487 }
488 
VisitIntegerBitCount(HInvoke * invoke)489 void IntrinsicCodeGeneratorRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
490   Riscv64Assembler* assembler = GetAssembler();
491   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpopw(rd, rs1); });
492 }
493 
VisitLongBitCount(HInvoke * invoke)494 void IntrinsicLocationsBuilderRISCV64::VisitLongBitCount(HInvoke* invoke) {
495   CreateIntToIntNoOverlapLocations(allocator_, invoke);
496 }
497 
VisitLongBitCount(HInvoke * invoke)498 void IntrinsicCodeGeneratorRISCV64::VisitLongBitCount(HInvoke* invoke) {
499   Riscv64Assembler* assembler = GetAssembler();
500   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpop(rd, rs1); });
501 }
502 
VisitIntegerHighestOneBit(HInvoke * invoke)503 void IntrinsicLocationsBuilderRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
504   CreateIntToIntNoOverlapLocations(allocator_, invoke);
505 }
506 
VisitIntegerHighestOneBit(HInvoke * invoke)507 void IntrinsicCodeGeneratorRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
508   Riscv64Assembler* assembler = GetAssembler();
509   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
510     ScratchRegisterScope srs(assembler);
511     XRegister tmp = srs.AllocateXRegister();
512     XRegister tmp2 = srs.AllocateXRegister();
513     __ Clzw(tmp, rs1);
514     __ Li(tmp2, INT64_C(-0x80000000));
515     __ Srlw(tmp2, tmp2, tmp);
516     __ And(rd, rs1, tmp2);  // Make sure the result is zero if the input is zero.
517   });
518 }
519 
VisitLongHighestOneBit(HInvoke * invoke)520 void IntrinsicLocationsBuilderRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
521   CreateIntToIntNoOverlapLocations(allocator_, invoke);
522 }
523 
VisitLongHighestOneBit(HInvoke * invoke)524 void IntrinsicCodeGeneratorRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
525   Riscv64Assembler* assembler = GetAssembler();
526   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
527     ScratchRegisterScope srs(assembler);
528     XRegister tmp = srs.AllocateXRegister();
529     XRegister tmp2 = srs.AllocateXRegister();
530     __ Clz(tmp, rs1);
531     __ Li(tmp2, INT64_C(-0x8000000000000000));
532     __ Srl(tmp2, tmp2, tmp);
533     __ And(rd, rs1, tmp2);  // Make sure the result is zero if the input is zero.
534   });
535 }
536 
VisitIntegerLowestOneBit(HInvoke * invoke)537 void IntrinsicLocationsBuilderRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
538   CreateIntToIntNoOverlapLocations(allocator_, invoke);
539 }
540 
VisitIntegerLowestOneBit(HInvoke * invoke)541 void IntrinsicCodeGeneratorRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
542   Riscv64Assembler* assembler = GetAssembler();
543   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
544     ScratchRegisterScope srs(assembler);
545     XRegister tmp = srs.AllocateXRegister();
546     __ NegW(tmp, rs1);
547     __ And(rd, rs1, tmp);
548   });
549 }
550 
VisitLongLowestOneBit(HInvoke * invoke)551 void IntrinsicLocationsBuilderRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
552   CreateIntToIntNoOverlapLocations(allocator_, invoke);
553 }
554 
VisitLongLowestOneBit(HInvoke * invoke)555 void IntrinsicCodeGeneratorRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
556   Riscv64Assembler* assembler = GetAssembler();
557   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
558     ScratchRegisterScope srs(assembler);
559     XRegister tmp = srs.AllocateXRegister();
560     __ Neg(tmp, rs1);
561     __ And(rd, rs1, tmp);
562   });
563 }
564 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)565 void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
566   CreateIntToIntNoOverlapLocations(allocator_, invoke);
567 }
568 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)569 void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
570   Riscv64Assembler* assembler = GetAssembler();
571   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clzw(rd, rs1); });
572 }
573 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)574 void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
575   CreateIntToIntNoOverlapLocations(allocator_, invoke);
576 }
577 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)578 void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
579   Riscv64Assembler* assembler = GetAssembler();
580   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clz(rd, rs1); });
581 }
582 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)583 void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
584   CreateIntToIntNoOverlapLocations(allocator_, invoke);
585 }
586 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)587 void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
588   Riscv64Assembler* assembler = GetAssembler();
589   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctzw(rd, rs1); });
590 }
591 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)592 void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
593   CreateIntToIntNoOverlapLocations(allocator_, invoke);
594 }
595 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)596 void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
597   Riscv64Assembler* assembler = GetAssembler();
598   EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctz(rd, rs1); });
599 }
600 
GenerateDivRemUnsigned(HInvoke * invoke,bool is_div,CodeGeneratorRISCV64 * codegen)601 static void GenerateDivRemUnsigned(HInvoke* invoke, bool is_div, CodeGeneratorRISCV64* codegen) {
602   LocationSummary* locations = invoke->GetLocations();
603   Riscv64Assembler* assembler = codegen->GetAssembler();
604   DataType::Type type = invoke->GetType();
605   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
606 
607   XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
608   XRegister divisor = locations->InAt(1).AsRegister<XRegister>();
609   XRegister out = locations->Out().AsRegister<XRegister>();
610 
611   // Check if divisor is zero, bail to managed implementation to handle.
612   SlowPathCodeRISCV64* slow_path =
613       new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
614   codegen->AddSlowPath(slow_path);
615   __ Beqz(divisor, slow_path->GetEntryLabel());
616 
617   if (is_div) {
618     if (type == DataType::Type::kInt32) {
619       __ Divuw(out, dividend, divisor);
620     } else {
621       __ Divu(out, dividend, divisor);
622     }
623   } else {
624     if (type == DataType::Type::kInt32) {
625       __ Remuw(out, dividend, divisor);
626     } else {
627       __ Remu(out, dividend, divisor);
628     }
629   }
630 
631   __ Bind(slow_path->GetExitLabel());
632 }
633 
VisitIntegerDivideUnsigned(HInvoke * invoke)634 void IntrinsicLocationsBuilderRISCV64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
635   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
636 }
637 
VisitIntegerDivideUnsigned(HInvoke * invoke)638 void IntrinsicCodeGeneratorRISCV64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
639   GenerateDivRemUnsigned(invoke, /*is_div=*/true, codegen_);
640 }
641 
VisitLongDivideUnsigned(HInvoke * invoke)642 void IntrinsicLocationsBuilderRISCV64::VisitLongDivideUnsigned(HInvoke* invoke) {
643   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
644 }
645 
VisitLongDivideUnsigned(HInvoke * invoke)646 void IntrinsicCodeGeneratorRISCV64::VisitLongDivideUnsigned(HInvoke* invoke) {
647   GenerateDivRemUnsigned(invoke, /*is_div=*/true, codegen_);
648 }
649 
VisitIntegerRemainderUnsigned(HInvoke * invoke)650 void IntrinsicLocationsBuilderRISCV64::VisitIntegerRemainderUnsigned(HInvoke* invoke) {
651   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
652 }
653 
VisitIntegerRemainderUnsigned(HInvoke * invoke)654 void IntrinsicCodeGeneratorRISCV64::VisitIntegerRemainderUnsigned(HInvoke* invoke) {
655   GenerateDivRemUnsigned(invoke, /*is_div=*/false, codegen_);
656 }
657 
VisitLongRemainderUnsigned(HInvoke * invoke)658 void IntrinsicLocationsBuilderRISCV64::VisitLongRemainderUnsigned(HInvoke* invoke) {
659   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
660 }
661 
VisitLongRemainderUnsigned(HInvoke * invoke)662 void IntrinsicCodeGeneratorRISCV64::VisitLongRemainderUnsigned(HInvoke* invoke) {
663   GenerateDivRemUnsigned(invoke, /*is_div=*/false, codegen_);
664 }
665 
666 #define VISIT_INTRINSIC(name, low, high, type, start_index)                              \
667   void IntrinsicLocationsBuilderRISCV64::Visit##name##ValueOf(HInvoke* invoke) {         \
668     InvokeRuntimeCallingConvention calling_convention;                                   \
669     IntrinsicVisitor::ComputeValueOfLocations(                                           \
670         invoke,                                                                          \
671         codegen_,                                                                        \
672         low,                                                                             \
673         (high) - (low) + 1,                                                              \
674         calling_convention.GetReturnLocation(DataType::Type::kReference),                \
675         Location::RegisterLocation(calling_convention.GetRegisterAt(0)));                \
676   }                                                                                      \
677   void IntrinsicCodeGeneratorRISCV64::Visit##name##ValueOf(HInvoke* invoke) {            \
678     IntrinsicVisitor::ValueOfInfo info =                                                 \
679         IntrinsicVisitor::ComputeValueOfInfo(invoke,                                     \
680                                              codegen_->GetCompilerOptions(),             \
681                                              WellKnownClasses::java_lang_##name##_value, \
682                                              low,                                        \
683                                              (high) - (low) + 1,                         \
684                                              start_index);                               \
685     HandleValueOf(invoke, info, type);                                                   \
686   }
BOXED_TYPES(VISIT_INTRINSIC)687   BOXED_TYPES(VISIT_INTRINSIC)
688 #undef VISIT_INTRINSIC
689 
690 void IntrinsicCodeGeneratorRISCV64::HandleValueOf(HInvoke* invoke,
691                                                   const IntrinsicVisitor::ValueOfInfo& info,
692                                                   DataType::Type type) {
693   Riscv64Assembler* assembler = codegen_->GetAssembler();
694   LocationSummary* locations = invoke->GetLocations();
695   XRegister out = locations->Out().AsRegister<XRegister>();
696   ScratchRegisterScope srs(assembler);
697   XRegister temp = srs.AllocateXRegister();
698   auto allocate_instance = [&]() {
699     DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
700     codegen_->LoadIntrinsicDeclaringClass(out, invoke);
701     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
702     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
703   };
704   if (invoke->InputAt(0)->IsIntConstant()) {
705     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
706     if (static_cast<uint32_t>(value - info.low) < info.length) {
707       // Just embed the object in the code.
708       DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
709       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
710     } else {
711       DCHECK(locations->CanCall());
712       // Allocate and initialize a new object.
713       // TODO: If we JIT, we could allocate the object now, and store it in the
714       // JIT object table.
715       allocate_instance();
716       __ Li(temp, value);
717       codegen_->GetInstructionVisitor()->Store(
718           Location::RegisterLocation(temp), out, info.value_offset, type);
719       // Class pointer and `value` final field stores require a barrier before publication.
720       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
721     }
722   } else {
723     DCHECK(locations->CanCall());
724     XRegister in = locations->InAt(0).AsRegister<XRegister>();
725     Riscv64Label allocate, done;
726     // Check bounds of our cache.
727     __ AddConst32(out, in, -info.low);
728     __ Li(temp, info.length);
729     __ Bgeu(out, temp, &allocate);
730     // If the value is within the bounds, load the object directly from the array.
731     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
732     __ Sh2Add(temp, out, temp);
733     __ Loadwu(out, temp, 0);
734     codegen_->MaybeUnpoisonHeapReference(out);
735     __ J(&done);
736     __ Bind(&allocate);
737     // Otherwise allocate and initialize a new object.
738     allocate_instance();
739     codegen_->GetInstructionVisitor()->Store(
740         Location::RegisterLocation(in), out, info.value_offset, type);
741     // Class pointer and `value` final field stores require a barrier before publication.
742     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
743     __ Bind(&done);
744   }
745 }
746 
VisitReferenceGetReferent(HInvoke * invoke)747 void IntrinsicLocationsBuilderRISCV64::VisitReferenceGetReferent(HInvoke* invoke) {
748   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
749 
750   if (codegen_->EmitBakerReadBarrier() && invoke->GetLocations() != nullptr) {
751     invoke->GetLocations()->AddTemp(Location::RequiresRegister());
752   }
753 }
754 
VisitReferenceGetReferent(HInvoke * invoke)755 void IntrinsicCodeGeneratorRISCV64::VisitReferenceGetReferent(HInvoke* invoke) {
756   Riscv64Assembler* assembler = GetAssembler();
757   LocationSummary* locations = invoke->GetLocations();
758   Location obj = locations->InAt(0);
759   Location out = locations->Out();
760 
761   SlowPathCodeRISCV64* slow_path =
762       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
763   codegen_->AddSlowPath(slow_path);
764 
765   if (codegen_->EmitReadBarrier()) {
766     // Check self->GetWeakRefAccessEnabled().
767     ScratchRegisterScope srs(assembler);
768     XRegister temp = srs.AllocateXRegister();
769     __ Loadwu(temp, TR, Thread::WeakRefAccessEnabledOffset<kRiscv64PointerSize>().Int32Value());
770     static_assert(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled) == 0);
771     __ Bnez(temp, slow_path->GetEntryLabel());
772   }
773 
774   {
775     // Load the java.lang.ref.Reference class.
776     ScratchRegisterScope srs(assembler);
777     XRegister temp = srs.AllocateXRegister();
778     codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
779 
780     // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
781     MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
782     DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
783     DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
784               IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
785     __ Loadhu(temp, temp, disable_intrinsic_offset.Int32Value());
786     __ Bnez(temp, slow_path->GetEntryLabel());
787   }
788 
789   // Load the value from the field.
790   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
791   if (codegen_->EmitBakerReadBarrier()) {
792     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
793                                                     out,
794                                                     obj.AsRegister<XRegister>(),
795                                                     referent_offset,
796                                                     /*temp=*/locations->GetTemp(0),
797                                                     /*needs_null_check=*/false);
798   } else {
799     codegen_->GetInstructionVisitor()->Load(
800         out, obj.AsRegister<XRegister>(), referent_offset, DataType::Type::kReference);
801     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
802   }
803   // Emit memory barrier for load-acquire.
804   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
805   __ Bind(slow_path->GetExitLabel());
806 }
807 
VisitReferenceRefersTo(HInvoke * invoke)808 void IntrinsicLocationsBuilderRISCV64::VisitReferenceRefersTo(HInvoke* invoke) {
809   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
810 }
811 
VisitReferenceRefersTo(HInvoke * invoke)812 void IntrinsicCodeGeneratorRISCV64::VisitReferenceRefersTo(HInvoke* invoke) {
813   Riscv64Assembler* assembler = GetAssembler();
814   LocationSummary* locations = invoke->GetLocations();
815   XRegister obj = locations->InAt(0).AsRegister<XRegister>();
816   XRegister other = locations->InAt(1).AsRegister<XRegister>();
817   XRegister out = locations->Out().AsRegister<XRegister>();
818 
819   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
820   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
821 
822   codegen_->GetInstructionVisitor()->Load(
823       Location::RegisterLocation(out), obj, referent_offset, DataType::Type::kReference);
824   codegen_->MaybeRecordImplicitNullCheck(invoke);
825   codegen_->MaybeUnpoisonHeapReference(out);
826 
827   // Emit memory barrier for load-acquire.
828   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
829 
830   if (codegen_->EmitReadBarrier()) {
831     DCHECK(kUseBakerReadBarrier);
832 
833     Riscv64Label calculate_result;
834 
835     // If equal to `other`, the loaded reference is final (it cannot be a from-space reference).
836     __ Beq(out, other, &calculate_result);
837 
838     // If the GC is not marking, the loaded reference is final.
839     ScratchRegisterScope srs(assembler);
840     XRegister tmp = srs.AllocateXRegister();
841     __ Loadwu(tmp, TR, Thread::IsGcMarkingOffset<kRiscv64PointerSize>().Int32Value());
842     __ Beqz(tmp, &calculate_result);
843 
844     // Check if the loaded reference is null.
845     __ Beqz(out, &calculate_result);
846 
847     // For correct memory visibility, we need a barrier before loading the lock word to
848     // synchronize with the publishing of `other` by the CC GC. However, as long as the
849     // load-acquire above is implemented as a plain load followed by a barrier (rather
850     // than an atomic load-acquire instruction which synchronizes only with other
851     // instructions on the same memory location), that barrier is sufficient.
852 
853     // Load the lockword and check if it is a forwarding address.
854     static_assert(LockWord::kStateShift == 30u);
855     static_assert(LockWord::kStateForwardingAddress == 3u);
856     // Load the lock word sign-extended. Comparing it to the sign-extended forwarding
857     // address bits as unsigned is the same as comparing both zero-extended.
858     __ Loadw(tmp, out, monitor_offset);
859     // Materialize sign-extended forwarding address bits. This is a single LUI instruction.
860     XRegister tmp2 = srs.AllocateXRegister();
861     __ Li(tmp2, INT64_C(-1) & ~static_cast<int64_t>((1 << LockWord::kStateShift) - 1));
862     // If we do not have a forwarding address, the loaded reference cannot be the same as `other`,
863     // so we proceed to calculate the result with `out != other`.
864     __ Bltu(tmp, tmp2, &calculate_result);
865 
866     // Extract the forwarding address for comparison with `other`.
867     // Note that the high 32 bits shall not be used for the result calculation.
868     __ Slliw(out, tmp, LockWord::kForwardingAddressShift);
869 
870     __ Bind(&calculate_result);
871   }
872 
873   // Calculate the result `out == other`.
874   __ Subw(out, out, other);
875   __ Seqz(out, out);
876 }
877 
GenerateVisitStringIndexOf(HInvoke * invoke,Riscv64Assembler * assembler,CodeGeneratorRISCV64 * codegen,bool start_at_zero)878 static void GenerateVisitStringIndexOf(HInvoke* invoke,
879                                        Riscv64Assembler* assembler,
880                                        CodeGeneratorRISCV64* codegen,
881                                        bool start_at_zero) {
882   LocationSummary* locations = invoke->GetLocations();
883 
884   // Note that the null check must have been done earlier.
885   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
886 
887   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
888   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
889   SlowPathCodeRISCV64* slow_path = nullptr;
890   HInstruction* code_point = invoke->InputAt(1);
891   if (code_point->IsIntConstant()) {
892     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
893       // Always needs the slow-path. We could directly dispatch to it, but this case should be
894       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
895       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
896       codegen->AddSlowPath(slow_path);
897       __ J(slow_path->GetEntryLabel());
898       __ Bind(slow_path->GetExitLabel());
899       return;
900     }
901   } else if (code_point->GetType() != DataType::Type::kUint16) {
902     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
903     codegen->AddSlowPath(slow_path);
904     ScratchRegisterScope srs(assembler);
905     XRegister tmp = srs.AllocateXRegister();
906     __ Srliw(tmp, locations->InAt(1).AsRegister<XRegister>(), 16);
907     __ Bnez(tmp, slow_path->GetEntryLabel());
908   }
909 
910   if (start_at_zero) {
911     // Start-index = 0.
912     XRegister tmp_reg = locations->GetTemp(0).AsRegister<XRegister>();
913     __ Li(tmp_reg, 0);
914   }
915 
916   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
917   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
918 
919   if (slow_path != nullptr) {
920     __ Bind(slow_path->GetExitLabel());
921   }
922 }
923 
VisitStringIndexOf(HInvoke * invoke)924 void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOf(HInvoke* invoke) {
925   LocationSummary* locations = new (allocator_) LocationSummary(
926       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
927   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
928   // best to align the inputs accordingly.
929   InvokeRuntimeCallingConvention calling_convention;
930   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
931   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
932   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
933 
934   // Need to send start_index=0.
935   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
936 }
937 
VisitStringIndexOf(HInvoke * invoke)938 void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOf(HInvoke* invoke) {
939   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
940 }
941 
VisitStringIndexOfAfter(HInvoke * invoke)942 void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) {
943   LocationSummary* locations = new (allocator_) LocationSummary(
944       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
945   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
946   // best to align the inputs accordingly.
947   InvokeRuntimeCallingConvention calling_convention;
948   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
949   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
950   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
951   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
952 }
953 
VisitStringIndexOfAfter(HInvoke * invoke)954 void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) {
955   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
956 }
957 
VisitStringNewStringFromBytes(HInvoke * invoke)958 void IntrinsicLocationsBuilderRISCV64::VisitStringNewStringFromBytes(HInvoke* invoke) {
959   LocationSummary* locations = new (allocator_) LocationSummary(
960       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
961   InvokeRuntimeCallingConvention calling_convention;
962   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
963   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
964   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
965   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
966   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
967 }
968 
VisitStringNewStringFromBytes(HInvoke * invoke)969 void IntrinsicCodeGeneratorRISCV64::VisitStringNewStringFromBytes(HInvoke* invoke) {
970   Riscv64Assembler* assembler = GetAssembler();
971   LocationSummary* locations = invoke->GetLocations();
972   XRegister byte_array = locations->InAt(0).AsRegister<XRegister>();
973 
974   SlowPathCodeRISCV64* slow_path =
975       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
976   codegen_->AddSlowPath(slow_path);
977   __ Beqz(byte_array, slow_path->GetEntryLabel());
978 
979   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
980   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
981   __ Bind(slow_path->GetExitLabel());
982 }
983 
VisitStringNewStringFromChars(HInvoke * invoke)984 void IntrinsicLocationsBuilderRISCV64::VisitStringNewStringFromChars(HInvoke* invoke) {
985   LocationSummary* locations =
986       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
987   InvokeRuntimeCallingConvention calling_convention;
988   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
989   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
990   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
991   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
992 }
993 
VisitStringNewStringFromChars(HInvoke * invoke)994 void IntrinsicCodeGeneratorRISCV64::VisitStringNewStringFromChars(HInvoke* invoke) {
995   // No need to emit code checking whether `locations->InAt(2)` is a null
996   // pointer, as callers of the native method
997   //
998   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
999   //
1000   // all include a null check on `data` before calling that method.
1001   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1002   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1003 }
1004 
VisitStringNewStringFromString(HInvoke * invoke)1005 void IntrinsicLocationsBuilderRISCV64::VisitStringNewStringFromString(HInvoke* invoke) {
1006   LocationSummary* locations = new (allocator_) LocationSummary(
1007       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1008   InvokeRuntimeCallingConvention calling_convention;
1009   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1010   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1011 }
1012 
VisitStringNewStringFromString(HInvoke * invoke)1013 void IntrinsicCodeGeneratorRISCV64::VisitStringNewStringFromString(HInvoke* invoke) {
1014   Riscv64Assembler* assembler = GetAssembler();
1015   LocationSummary* locations = invoke->GetLocations();
1016   XRegister string_to_copy = locations->InAt(0).AsRegister<XRegister>();
1017 
1018   SlowPathCodeRISCV64* slow_path =
1019       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
1020   codegen_->AddSlowPath(slow_path);
1021   __ Beqz(string_to_copy, slow_path->GetEntryLabel());
1022 
1023   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1024   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1025   __ Bind(slow_path->GetExitLabel());
1026 }
1027 
GenerateSet(CodeGeneratorRISCV64 * codegen,std::memory_order order,Location value,XRegister rs1,int32_t offset,DataType::Type type)1028 static void GenerateSet(CodeGeneratorRISCV64* codegen,
1029                         std::memory_order order,
1030                         Location value,
1031                         XRegister rs1,
1032                         int32_t offset,
1033                         DataType::Type type) {
1034   if (order == std::memory_order_seq_cst) {
1035     codegen->GetInstructionVisitor()->StoreSeqCst(value, rs1, offset, type);
1036   } else {
1037     if (order == std::memory_order_release) {
1038       codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
1039     } else {
1040       DCHECK(order == std::memory_order_relaxed);
1041     }
1042     codegen->GetInstructionVisitor()->Store(value, rs1, offset, type);
1043   }
1044 }
1045 
GetLrScAqRl(std::memory_order order)1046 std::pair<AqRl, AqRl> GetLrScAqRl(std::memory_order order) {
1047   AqRl load_aqrl = AqRl::kNone;
1048   AqRl store_aqrl = AqRl::kNone;
1049   if (order == std::memory_order_acquire) {
1050     load_aqrl = AqRl::kAcquire;
1051   } else if (order == std::memory_order_release) {
1052     store_aqrl = AqRl::kRelease;
1053   } else if (order == std::memory_order_seq_cst) {
1054     load_aqrl = AqRl::kAqRl;
1055     store_aqrl = AqRl::kRelease;
1056   } else {
1057     DCHECK(order == std::memory_order_relaxed);
1058   }
1059   return {load_aqrl, store_aqrl};
1060 }
1061 
GetAmoAqRl(std::memory_order order)1062 AqRl GetAmoAqRl(std::memory_order order) {
1063   AqRl amo_aqrl = AqRl::kNone;
1064   if (order == std::memory_order_acquire) {
1065     amo_aqrl = AqRl::kAcquire;
1066   } else if (order == std::memory_order_release) {
1067     amo_aqrl = AqRl::kRelease;
1068   } else {
1069     DCHECK(order == std::memory_order_seq_cst);
1070     amo_aqrl = AqRl::kAqRl;
1071   }
1072   return amo_aqrl;
1073 }
1074 
EmitLoadReserved(Riscv64Assembler * assembler,DataType::Type type,XRegister ptr,XRegister old_value,AqRl aqrl)1075 static void EmitLoadReserved(Riscv64Assembler* assembler,
1076                              DataType::Type type,
1077                              XRegister ptr,
1078                              XRegister old_value,
1079                              AqRl aqrl) {
1080   switch (type) {
1081     case DataType::Type::kInt32:
1082       __ LrW(old_value, ptr, aqrl);
1083       break;
1084     case DataType::Type::kReference:
1085       __ LrW(old_value, ptr, aqrl);
1086       // TODO(riscv64): The `ZextW()` macro currently emits `SLLI+SRLI` which are from the
1087       // base "I" instruction set. When the assembler is updated to use a single-instruction
1088       // `ZextW()` macro, either the ADD.UW, or the C.ZEXT.W (16-bit encoding), we need to
1089       // rewrite this to avoid these non-"I" instructions. We could, for example, sign-extend
1090       // the reference and do the CAS as `Int32`.
1091       __ ZextW(old_value, old_value);
1092       break;
1093     case DataType::Type::kInt64:
1094       __ LrD(old_value, ptr, aqrl);
1095       break;
1096     default:
1097       LOG(FATAL) << "Unexpected type: " << type;
1098       UNREACHABLE();
1099   }
1100 }
1101 
VisitStringEquals(HInvoke * invoke)1102 void IntrinsicLocationsBuilderRISCV64::VisitStringEquals(HInvoke* invoke) {
1103   LocationSummary* locations =
1104       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1105   locations->SetInAt(0, Location::RequiresRegister());
1106   locations->SetInAt(1, Location::RequiresRegister());
1107   locations->AddTemp(Location::RequiresRegister());
1108   // TODO: If the String.equals() is used only for an immediately following HIf, we can
1109   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1110   // Then we shall need an extra temporary register instead of the output register.
1111   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1112 }
1113 
VisitStringEquals(HInvoke * invoke)1114 void IntrinsicCodeGeneratorRISCV64::VisitStringEquals(HInvoke* invoke) {
1115   Riscv64Assembler* assembler = GetAssembler();
1116   LocationSummary* locations = invoke->GetLocations();
1117 
1118   // Get offsets of count, value, and class fields within a string object.
1119   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1120   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1121   const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1122 
1123   XRegister str = locations->InAt(0).AsRegister<XRegister>();
1124   XRegister arg = locations->InAt(1).AsRegister<XRegister>();
1125   XRegister out = locations->Out().AsRegister<XRegister>();
1126 
1127   ScratchRegisterScope srs(assembler);
1128   XRegister temp = srs.AllocateXRegister();
1129   XRegister temp1 = locations->GetTemp(0).AsRegister<XRegister>();
1130 
1131   Riscv64Label loop;
1132   Riscv64Label end;
1133   Riscv64Label return_true;
1134   Riscv64Label return_false;
1135 
1136   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1137 
1138   StringEqualsOptimizations optimizations(invoke);
1139   if (!optimizations.GetArgumentNotNull()) {
1140     // Check if input is null, return false if it is.
1141     __ Beqz(arg, &return_false);
1142   }
1143 
1144   // Reference equality check, return true if same reference.
1145   __ Beq(str, arg, &return_true);
1146 
1147   if (!optimizations.GetArgumentIsString()) {
1148     // Instanceof check for the argument by comparing class fields.
1149     // All string objects must have the same type since String cannot be subclassed.
1150     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1151     // If the argument is a string object, its class field must be equal to receiver's class field.
1152     //
1153     // As the String class is expected to be non-movable, we can read the class
1154     // field from String.equals' arguments without read barriers.
1155     AssertNonMovableStringClass();
1156     // /* HeapReference<Class> */ temp = str->klass_
1157     __ Loadwu(temp, str, class_offset);
1158     // /* HeapReference<Class> */ temp1 = arg->klass_
1159     __ Loadwu(temp1, arg, class_offset);
1160     // Also, because we use the previously loaded class references only in the
1161     // following comparison, we don't need to unpoison them.
1162     __ Bne(temp, temp1, &return_false);
1163   }
1164 
1165   // Load `count` fields of this and argument strings.
1166   __ Loadwu(temp, str, count_offset);
1167   __ Loadwu(temp1, arg, count_offset);
1168   // Check if `count` fields are equal, return false if they're not.
1169   // Also compares the compression style, if differs return false.
1170   __ Bne(temp, temp1, &return_false);
1171 
1172   // Assertions that must hold in order to compare strings 8 bytes at a time.
1173   // Ok to do this because strings are zero-padded to kObjectAlignment.
1174   DCHECK_ALIGNED(value_offset, 8);
1175   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1176 
1177   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1178   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1179                 "Expecting 0=compressed, 1=uncompressed");
1180   __ Beqz(temp, &return_true);
1181 
1182   if (mirror::kUseStringCompression) {
1183     // For string compression, calculate the number of bytes to compare (not chars).
1184     // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1185     __ Andi(temp1, temp, 1);     // Extract compression flag.
1186     __ Srliw(temp, temp, 1u);    // Extract length.
1187     __ Sllw(temp, temp, temp1);  // Calculate number of bytes to compare.
1188   }
1189 
1190   // Store offset of string value in preparation for comparison loop
1191   __ Li(temp1, value_offset);
1192 
1193   XRegister temp2 = srs.AllocateXRegister();
1194   // Loop to compare strings 8 bytes at a time starting at the front of the string.
1195   __ Bind(&loop);
1196   __ Add(out, str, temp1);
1197   __ Ld(out, out, 0);
1198   __ Add(temp2, arg, temp1);
1199   __ Ld(temp2, temp2, 0);
1200   __ Addi(temp1, temp1, sizeof(uint64_t));
1201   __ Bne(out, temp2, &return_false);
1202   // With string compression, we have compared 8 bytes, otherwise 4 chars.
1203   __ Addi(temp, temp, mirror::kUseStringCompression ? -8 : -4);
1204   __ Bgt(temp, Zero, &loop);
1205 
1206   // Return true and exit the function.
1207   // If loop does not result in returning false, we return true.
1208   __ Bind(&return_true);
1209   __ Li(out, 1);
1210   __ J(&end);
1211 
1212   // Return false and exit the function.
1213   __ Bind(&return_false);
1214   __ Li(out, 0);
1215   __ Bind(&end);
1216 }
1217 
EmitStoreConditional(Riscv64Assembler * assembler,DataType::Type type,XRegister ptr,XRegister store_result,XRegister to_store,AqRl aqrl)1218 static void EmitStoreConditional(Riscv64Assembler* assembler,
1219                                  DataType::Type type,
1220                                  XRegister ptr,
1221                                  XRegister store_result,
1222                                  XRegister to_store,
1223                                  AqRl aqrl) {
1224   switch (type) {
1225     case DataType::Type::kInt32:
1226     case DataType::Type::kReference:
1227       __ ScW(store_result, to_store, ptr, aqrl);
1228       break;
1229     case DataType::Type::kInt64:
1230       __ ScD(store_result, to_store, ptr, aqrl);
1231       break;
1232     default:
1233       LOG(FATAL) << "Unexpected type: " << type;
1234       UNREACHABLE();
1235   }
1236 }
1237 
GenerateCompareAndSet(Riscv64Assembler * assembler,DataType::Type type,std::memory_order order,bool strong,Riscv64Label * cmp_failure,XRegister ptr,XRegister new_value,XRegister old_value,XRegister mask,XRegister masked,XRegister store_result,XRegister expected,XRegister expected2=kNoXRegister)1238 static void GenerateCompareAndSet(Riscv64Assembler* assembler,
1239                                   DataType::Type type,
1240                                   std::memory_order order,
1241                                   bool strong,
1242                                   Riscv64Label* cmp_failure,
1243                                   XRegister ptr,
1244                                   XRegister new_value,
1245                                   XRegister old_value,
1246                                   XRegister mask,
1247                                   XRegister masked,
1248                                   XRegister store_result,
1249                                   XRegister expected,
1250                                   XRegister expected2 = kNoXRegister) {
1251   DCHECK(!DataType::IsFloatingPointType(type));
1252   DCHECK_GE(DataType::Size(type), 4u);
1253 
1254   // The `expected2` is valid only for reference slow path and represents the unmarked old value
1255   // from the main path attempt to emit CAS when the marked old value matched `expected`.
1256   DCHECK_IMPLIES(expected2 != kNoXRegister, type == DataType::Type::kReference);
1257 
1258   auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
1259 
1260   // repeat: {
1261   //   old_value = [ptr];  // Load exclusive.
1262   //   cmp_value = old_value & mask;  // Extract relevant bits if applicable.
1263   //   if (cmp_value != expected && cmp_value != expected2) goto cmp_failure;
1264   //   store_result = failed([ptr] <- new_value);  // Store exclusive.
1265   // }
1266   // if (strong) {
1267   //   if (store_result) goto repeat;  // Repeat until compare fails or store exclusive succeeds.
1268   // } else {
1269   //   store_result = store_result ^ 1;  // Report success as 1, failure as 0.
1270   // }
1271   //
1272   // (If `mask` is not valid, `expected` is compared with `old_value` instead of `cmp_value`.)
1273   // (If `expected2` is not valid, the `cmp_value == expected2` part is not emitted.)
1274 
1275   // Note: We're using "bare" local branches to enforce that they shall not be expanded
1276   // and the scrach register `TMP` shall not be clobbered if taken. Taking the branch to
1277   // `cmp_failure` can theoretically clobber `TMP` (if outside the 1 MiB range).
1278   Riscv64Label loop;
1279   if (strong) {
1280     __ Bind(&loop);
1281   }
1282   EmitLoadReserved(assembler, type, ptr, old_value, load_aqrl);
1283   XRegister to_store = new_value;
1284   {
1285     ScopedLrScExtensionsRestriction slser(assembler);
1286     if (mask != kNoXRegister) {
1287       DCHECK_EQ(expected2, kNoXRegister);
1288       DCHECK_NE(masked, kNoXRegister);
1289       __ And(masked, old_value, mask);
1290       __ Bne(masked, expected, cmp_failure);
1291       // The `old_value` does not need to be preserved as the caller shall use `masked`
1292       // to return the old value if needed.
1293       to_store = old_value;
1294       // TODO(riscv64): We could XOR the old and new value before the loop and use a single XOR here
1295       // instead of the XOR+OR. (The `new_value` is either Zero or a temporary we can clobber.)
1296       __ Xor(to_store, old_value, masked);
1297       __ Or(to_store, to_store, new_value);
1298     } else if (expected2 != kNoXRegister) {
1299       Riscv64Label match2;
1300       __ Beq(old_value, expected2, &match2, /*is_bare=*/ true);
1301       __ Bne(old_value, expected, cmp_failure);
1302       __ Bind(&match2);
1303     } else {
1304       __ Bne(old_value, expected, cmp_failure);
1305     }
1306   }
1307   EmitStoreConditional(assembler, type, ptr, store_result, to_store, store_aqrl);
1308   if (strong) {
1309     __ Bnez(store_result, &loop, /*is_bare=*/ true);
1310   } else {
1311     // Flip the `store_result` register to indicate success by 1 and failure by 0.
1312     __ Xori(store_result, store_result, 1);
1313   }
1314 }
1315 
1316 class ReadBarrierCasSlowPathRISCV64 : public SlowPathCodeRISCV64 {
1317  public:
ReadBarrierCasSlowPathRISCV64(HInvoke * invoke,std::memory_order order,bool strong,XRegister base,XRegister offset,XRegister expected,XRegister new_value,XRegister old_value,XRegister old_value_temp,XRegister store_result,bool update_old_value,CodeGeneratorRISCV64 * riscv64_codegen)1318   ReadBarrierCasSlowPathRISCV64(HInvoke* invoke,
1319                                 std::memory_order order,
1320                                 bool strong,
1321                                 XRegister base,
1322                                 XRegister offset,
1323                                 XRegister expected,
1324                                 XRegister new_value,
1325                                 XRegister old_value,
1326                                 XRegister old_value_temp,
1327                                 XRegister store_result,
1328                                 bool update_old_value,
1329                                 CodeGeneratorRISCV64* riscv64_codegen)
1330       : SlowPathCodeRISCV64(invoke),
1331         order_(order),
1332         strong_(strong),
1333         base_(base),
1334         offset_(offset),
1335         expected_(expected),
1336         new_value_(new_value),
1337         old_value_(old_value),
1338         old_value_temp_(old_value_temp),
1339         store_result_(store_result),
1340         update_old_value_(update_old_value),
1341         mark_old_value_slow_path_(nullptr),
1342         update_old_value_slow_path_(nullptr) {
1343     // We need to add slow paths now, it is too late when emitting slow path code.
1344     Location old_value_loc = Location::RegisterLocation(old_value);
1345     Location old_value_temp_loc = Location::RegisterLocation(old_value_temp);
1346     if (kUseBakerReadBarrier) {
1347       mark_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath(
1348           invoke, old_value_temp_loc, kBakerReadBarrierTemp);
1349       if (update_old_value_) {
1350         update_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath(
1351             invoke, old_value_loc, kBakerReadBarrierTemp);
1352       }
1353     } else {
1354       Location base_loc = Location::RegisterLocation(base);
1355       Location index = Location::RegisterLocation(offset);
1356       mark_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath(
1357           invoke, old_value_temp_loc, old_value_loc, base_loc, /*offset=*/ 0u, index);
1358       if (update_old_value_) {
1359         update_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath(
1360             invoke, old_value_loc, old_value_temp_loc, base_loc, /*offset=*/ 0u, index);
1361       }
1362     }
1363   }
1364 
GetDescription() const1365   const char* GetDescription() const override { return "ReadBarrierCasSlowPathRISCV64"; }
1366 
1367   // We return to a different label on success for a strong CAS that does not return old value.
GetSuccessExitLabel()1368   Riscv64Label* GetSuccessExitLabel() {
1369     return &success_exit_label_;
1370   }
1371 
EmitNativeCode(CodeGenerator * codegen)1372   void EmitNativeCode(CodeGenerator* codegen) override {
1373     CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
1374     Riscv64Assembler* assembler = riscv64_codegen->GetAssembler();
1375     __ Bind(GetEntryLabel());
1376 
1377     // Mark the `old_value_` from the main path and compare with `expected_`.
1378     DCHECK(mark_old_value_slow_path_ != nullptr);
1379     if (kUseBakerReadBarrier) {
1380       __ Mv(old_value_temp_, old_value_);
1381       riscv64_codegen->EmitBakerReadBarierMarkingCheck(mark_old_value_slow_path_,
1382                                                        Location::RegisterLocation(old_value_temp_),
1383                                                        kBakerReadBarrierTemp);
1384     } else {
1385       __ J(mark_old_value_slow_path_->GetEntryLabel());
1386       __ Bind(mark_old_value_slow_path_->GetExitLabel());
1387     }
1388     Riscv64Label move_marked_old_value;
1389     __ Bne(old_value_temp_, expected_, update_old_value_ ? &move_marked_old_value : GetExitLabel());
1390 
1391     // The `old_value` we have read did not match `expected` (which is always a to-space
1392     // reference) but after the read barrier the marked to-space value matched, so the
1393     // `old_value` must be a from-space reference to the same object. Do the same CAS loop
1394     // as the main path but check for both `expected` and the unmarked old value
1395     // representing the to-space and from-space references for the same object.
1396 
1397     ScratchRegisterScope srs(assembler);
1398     XRegister tmp_ptr = srs.AllocateXRegister();
1399     XRegister store_result =
1400         store_result_ != kNoXRegister ? store_result_ : srs.AllocateXRegister();
1401 
1402     // Recalculate the `tmp_ptr` from main path potentially clobbered by the read barrier above
1403     // or by an expanded conditional branch (clobbers `TMP` if beyond 1MiB).
1404     __ Add(tmp_ptr, base_, offset_);
1405 
1406     Riscv64Label mark_old_value;
1407     GenerateCompareAndSet(riscv64_codegen->GetAssembler(),
1408                           DataType::Type::kReference,
1409                           order_,
1410                           strong_,
1411                           /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
1412                           tmp_ptr,
1413                           new_value_,
1414                           /*old_value=*/ old_value_temp_,
1415                           /*mask=*/ kNoXRegister,
1416                           /*masked=*/ kNoXRegister,
1417                           store_result,
1418                           expected_,
1419                           /*expected2=*/ old_value_);
1420     if (update_old_value_) {
1421       // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
1422       // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
1423       __ Mv(old_value_, expected_);
1424     }
1425     if (!update_old_value_ && strong_) {
1426       // Load success value to the result register.
1427       // We must jump to the instruction that loads the success value in the main path.
1428       // Note that a SC failure in the CAS loop sets the `store_result` to 1, so the main
1429       // path must not use the `store_result` as an indication of success.
1430       __ J(GetSuccessExitLabel());
1431     } else {
1432       __ J(GetExitLabel());
1433     }
1434 
1435     if (update_old_value_) {
1436       // TODO(riscv64): If we initially saw a from-space reference and then saw
1437       // a different reference, can the latter be also a from-space reference?
1438       // (Shouldn't every reference write store a to-space reference?)
1439       DCHECK(update_old_value_slow_path_ != nullptr);
1440       __ Bind(&mark_old_value);
1441       if (kUseBakerReadBarrier) {
1442         __ Mv(old_value_, old_value_temp_);
1443         riscv64_codegen->EmitBakerReadBarierMarkingCheck(update_old_value_slow_path_,
1444                                                          Location::RegisterLocation(old_value_),
1445                                                          kBakerReadBarrierTemp);
1446       } else {
1447         // Note: We could redirect the `failure` above directly to the entry label and bind
1448         // the exit label in the main path, but the main path would need to access the
1449         // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
1450         __ J(update_old_value_slow_path_->GetEntryLabel());
1451         __ Bind(update_old_value_slow_path_->GetExitLabel());
1452       }
1453       __ J(GetExitLabel());
1454 
1455       __ Bind(&move_marked_old_value);
1456       __ Mv(old_value_, old_value_temp_);
1457       __ J(GetExitLabel());
1458     }
1459   }
1460 
1461  private:
1462   // Use RA as temp. It is clobbered in the slow path anyway.
1463   static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
1464 
1465   std::memory_order order_;
1466   bool strong_;
1467   XRegister base_;
1468   XRegister offset_;
1469   XRegister expected_;
1470   XRegister new_value_;
1471   XRegister old_value_;
1472   XRegister old_value_temp_;
1473   XRegister store_result_;
1474   bool update_old_value_;
1475   SlowPathCodeRISCV64* mark_old_value_slow_path_;
1476   SlowPathCodeRISCV64* update_old_value_slow_path_;
1477   Riscv64Label success_exit_label_;
1478 };
1479 
EmitBlt32(Riscv64Assembler * assembler,XRegister rs1,Location rs2,Riscv64Label * label,XRegister temp)1480 static void EmitBlt32(Riscv64Assembler* assembler,
1481                       XRegister rs1,
1482                       Location rs2,
1483                       Riscv64Label* label,
1484                       XRegister temp) {
1485   if (rs2.IsConstant()) {
1486     __ Li(temp, rs2.GetConstant()->AsIntConstant()->GetValue());
1487     __ Blt(rs1, temp, label);
1488   } else {
1489     __ Blt(rs1, rs2.AsRegister<XRegister>(), label);
1490   }
1491 }
1492 
CheckSystemArrayCopyPosition(Riscv64Assembler * assembler,XRegister array,Location pos,Location length,SlowPathCodeRISCV64 * slow_path,XRegister temp1,XRegister temp2,bool length_is_array_length,bool position_sign_checked)1493 static void CheckSystemArrayCopyPosition(Riscv64Assembler* assembler,
1494                                          XRegister array,
1495                                          Location pos,
1496                                          Location length,
1497                                          SlowPathCodeRISCV64* slow_path,
1498                                          XRegister temp1,
1499                                          XRegister temp2,
1500                                          bool length_is_array_length,
1501                                          bool position_sign_checked) {
1502   const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
1503   if (pos.IsConstant()) {
1504     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1505     DCHECK_GE(pos_const, 0);  // Checked in location builder.
1506     if (pos_const == 0) {
1507       if (!length_is_array_length) {
1508         // Check that length(array) >= length.
1509         __ Loadw(temp1, array, length_offset);
1510         EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
1511       }
1512     } else {
1513       // Calculate length(array) - pos.
1514       // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1515       // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1516       __ Loadw(temp1, array, length_offset);
1517       __ AddConst32(temp1, temp1, -pos_const);
1518 
1519       // Check that (length(array) - pos) >= length.
1520       EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
1521     }
1522   } else if (length_is_array_length) {
1523     // The only way the copy can succeed is if pos is zero.
1524     __ Bnez(pos.AsRegister<XRegister>(), slow_path->GetEntryLabel());
1525   } else {
1526     // Check that pos >= 0.
1527     XRegister pos_reg = pos.AsRegister<XRegister>();
1528     if (!position_sign_checked) {
1529       __ Bltz(pos_reg, slow_path->GetEntryLabel());
1530     }
1531 
1532     // Calculate length(array) - pos.
1533     // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1534     // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1535     __ Loadw(temp1, array, length_offset);
1536     __ Sub(temp1, temp1, pos_reg);
1537 
1538     // Check that (length(array) - pos) >= length.
1539     EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
1540   }
1541 }
1542 
GenArrayAddress(CodeGeneratorRISCV64 * codegen,XRegister dest,XRegister base,Location pos,DataType::Type type,int32_t data_offset)1543 static void GenArrayAddress(CodeGeneratorRISCV64* codegen,
1544                             XRegister dest,
1545                             XRegister base,
1546                             Location pos,
1547                             DataType::Type type,
1548                             int32_t data_offset) {
1549   Riscv64Assembler* assembler = codegen->GetAssembler();
1550   if (pos.IsConstant()) {
1551     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
1552     __ AddConst64(dest, base, DataType::Size(type) * constant + data_offset);
1553   } else {
1554     codegen->GetInstructionVisitor()->ShNAdd(dest, pos.AsRegister<XRegister>(), base, type);
1555     if (data_offset != 0) {
1556       __ AddConst64(dest, dest, data_offset);
1557     }
1558   }
1559 }
1560 
1561 // Compute base source address, base destination address, and end
1562 // source address for System.arraycopy* intrinsics in `src_base`,
1563 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(CodeGeneratorRISCV64 * codegen,DataType::Type type,XRegister src,Location src_pos,XRegister dst,Location dst_pos,Location copy_length,XRegister src_base,XRegister dst_base,XRegister src_end)1564 static void GenSystemArrayCopyAddresses(CodeGeneratorRISCV64* codegen,
1565                                         DataType::Type type,
1566                                         XRegister src,
1567                                         Location src_pos,
1568                                         XRegister dst,
1569                                         Location dst_pos,
1570                                         Location copy_length,
1571                                         XRegister src_base,
1572                                         XRegister dst_base,
1573                                         XRegister src_end) {
1574   // This routine is used by the SystemArrayCopyX intrinsics.
1575   DCHECK(type == DataType::Type::kReference || type == DataType::Type::kInt8 ||
1576          type == DataType::Type::kUint16 || type == DataType::Type::kInt32)
1577       << "Unexpected element type: " << type;
1578   const int32_t element_size = DataType::Size(type);
1579   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1580 
1581   GenArrayAddress(codegen, src_base, src, src_pos, type, data_offset);
1582   GenArrayAddress(codegen, dst_base, dst, dst_pos, type, data_offset);
1583   GenArrayAddress(codegen, src_end, src_base, copy_length, type, /*data_offset=*/ 0);
1584 }
1585 
LocationForSystemArrayCopyInput(HInstruction * input)1586 static Location LocationForSystemArrayCopyInput(HInstruction* input) {
1587   HIntConstant* const_input = input->AsIntConstantOrNull();
1588   if (const_input != nullptr && IsInt<12>(const_input->GetValue())) {
1589     return Location::ConstantLocation(const_input);
1590   } else {
1591     return Location::RequiresRegister();
1592   }
1593 }
1594 
1595 // We can choose to use the native implementation there for longer copy lengths.
1596 static constexpr int32_t kSystemArrayCopyThreshold = 128;
1597 
VisitSystemArrayCopy(HInvoke * invoke)1598 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
1599   // The only read barrier implementation supporting the
1600   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1601   if (codegen_->EmitNonBakerReadBarrier()) {
1602     return;
1603   }
1604 
1605   size_t num_temps = codegen_->EmitBakerReadBarrier() ? 4u : 2u;
1606   LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
1607       invoke, kSystemArrayCopyThreshold, num_temps);
1608   if (locations != nullptr) {
1609     // We request position and length as constants only for small integral values.
1610     locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
1611     locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
1612     locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
1613   }
1614 }
1615 
VisitSystemArrayCopy(HInvoke * invoke)1616 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
1617   // The only read barrier implementation supporting the
1618   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1619   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
1620 
1621   Riscv64Assembler* assembler = GetAssembler();
1622   LocationSummary* locations = invoke->GetLocations();
1623 
1624   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1625   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1626   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1627   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1628   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1629 
1630   XRegister src = locations->InAt(0).AsRegister<XRegister>();
1631   Location src_pos = locations->InAt(1);
1632   XRegister dest = locations->InAt(2).AsRegister<XRegister>();
1633   Location dest_pos = locations->InAt(3);
1634   Location length = locations->InAt(4);
1635   XRegister temp1 = locations->GetTemp(0).AsRegister<XRegister>();
1636   XRegister temp2 = locations->GetTemp(1).AsRegister<XRegister>();
1637 
1638   SlowPathCodeRISCV64* intrinsic_slow_path =
1639       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
1640   codegen_->AddSlowPath(intrinsic_slow_path);
1641 
1642   Riscv64Label conditions_on_positions_validated;
1643   SystemArrayCopyOptimizations optimizations(invoke);
1644 
1645   // If source and destination are the same, we go to slow path if we need to do forward copying.
1646   // We do not need to do this check if the source and destination positions are the same.
1647   if (!optimizations.GetSourcePositionIsDestinationPosition()) {
1648     if (src_pos.IsConstant()) {
1649       int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1650       if (dest_pos.IsConstant()) {
1651         int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1652         if (optimizations.GetDestinationIsSource()) {
1653           // Checked when building locations.
1654           DCHECK_GE(src_pos_constant, dest_pos_constant);
1655         } else if (src_pos_constant < dest_pos_constant) {
1656           __ Beq(src, dest, intrinsic_slow_path->GetEntryLabel());
1657         }
1658       } else {
1659         if (!optimizations.GetDestinationIsSource()) {
1660           __ Bne(src, dest, &conditions_on_positions_validated);
1661         }
1662         __ Li(temp1, src_pos_constant);
1663         __ Bgt(dest_pos.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel());
1664       }
1665     } else {
1666       if (!optimizations.GetDestinationIsSource()) {
1667         __ Bne(src, dest, &conditions_on_positions_validated);
1668       }
1669       XRegister src_pos_reg = src_pos.AsRegister<XRegister>();
1670       EmitBlt32(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel(), temp2);
1671     }
1672   }
1673 
1674   __ Bind(&conditions_on_positions_validated);
1675 
1676   if (!optimizations.GetSourceIsNotNull()) {
1677     // Bail out if the source is null.
1678     __ Beqz(src, intrinsic_slow_path->GetEntryLabel());
1679   }
1680 
1681   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1682     // Bail out if the destination is null.
1683     __ Beqz(dest, intrinsic_slow_path->GetEntryLabel());
1684   }
1685 
1686   // We have already checked in the LocationsBuilder for the constant case.
1687   if (!length.IsConstant()) {
1688     // Merge the following two comparisons into one:
1689     //   If the length is negative, bail out (delegate to libcore's native implementation).
1690     //   If the length >= 128 then (currently) prefer native implementation.
1691     __ Li(temp1, kSystemArrayCopyThreshold);
1692     __ Bgeu(length.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel());
1693   }
1694   // Validity checks: source.
1695   CheckSystemArrayCopyPosition(assembler,
1696                                src,
1697                                src_pos,
1698                                length,
1699                                intrinsic_slow_path,
1700                                temp1,
1701                                temp2,
1702                                optimizations.GetCountIsSourceLength(),
1703                                /*position_sign_checked=*/ false);
1704 
1705   // Validity checks: dest.
1706   bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
1707   CheckSystemArrayCopyPosition(assembler,
1708                                dest,
1709                                dest_pos,
1710                                length,
1711                                intrinsic_slow_path,
1712                                temp1,
1713                                temp2,
1714                                optimizations.GetCountIsDestinationLength(),
1715                                dest_position_sign_checked);
1716 
1717   auto check_non_primitive_array_class = [&](XRegister klass, XRegister temp) {
1718     // No read barrier is needed for reading a chain of constant references for comparing
1719     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1720     // /* HeapReference<Class> */ temp = klass->component_type_
1721     __ Loadwu(temp, klass, component_offset);
1722     codegen_->MaybeUnpoisonHeapReference(temp);
1723     // Check that the component type is not null.
1724     __ Beqz(temp, intrinsic_slow_path->GetEntryLabel());
1725     // Check that the component type is not a primitive.
1726     // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_);
1727     __ Loadhu(temp, temp, primitive_offset);
1728     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1729     __ Bnez(temp, intrinsic_slow_path->GetEntryLabel());
1730   };
1731 
1732   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1733     // Check whether all elements of the source array are assignable to the component
1734     // type of the destination array. We do two checks: the classes are the same,
1735     // or the destination is Object[]. If none of these checks succeed, we go to the
1736     // slow path.
1737 
1738     if (codegen_->EmitBakerReadBarrier()) {
1739       XRegister temp3 = locations->GetTemp(2).AsRegister<XRegister>();
1740       // /* HeapReference<Class> */ temp1 = dest->klass_
1741       codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
1742                                                       Location::RegisterLocation(temp1),
1743                                                       dest,
1744                                                       class_offset,
1745                                                       Location::RegisterLocation(temp3),
1746                                                       /* needs_null_check= */ false);
1747       // /* HeapReference<Class> */ temp2 = src->klass_
1748       codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
1749                                                       Location::RegisterLocation(temp2),
1750                                                       src,
1751                                                       class_offset,
1752                                                       Location::RegisterLocation(temp3),
1753                                                       /* needs_null_check= */ false);
1754     } else {
1755       // /* HeapReference<Class> */ temp1 = dest->klass_
1756       __ Loadwu(temp1, dest, class_offset);
1757       codegen_->MaybeUnpoisonHeapReference(temp1);
1758       // /* HeapReference<Class> */ temp2 = src->klass_
1759       __ Loadwu(temp2, src, class_offset);
1760       codegen_->MaybeUnpoisonHeapReference(temp2);
1761     }
1762 
1763     if (optimizations.GetDestinationIsTypedObjectArray()) {
1764       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1765       Riscv64Label do_copy;
1766       // For class match, we can skip the source type check regardless of the optimization flag.
1767       __ Beq(temp1, temp2, &do_copy);
1768       // No read barrier is needed for reading a chain of constant references
1769       // for comparing with null, see `ReadBarrierOption`.
1770       // /* HeapReference<Class> */ temp1 = temp1->component_type_
1771       __ Loadwu(temp1, temp1, component_offset);
1772       codegen_->MaybeUnpoisonHeapReference(temp1);
1773       // /* HeapReference<Class> */ temp1 = temp1->super_class_
1774       __ Loadwu(temp1, temp1, super_offset);
1775       // No need to unpoison the result, we're comparing against null.
1776       __ Bnez(temp1, intrinsic_slow_path->GetEntryLabel());
1777       // Bail out if the source is not a non primitive array.
1778       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1779         check_non_primitive_array_class(temp2, temp2);
1780       }
1781       __ Bind(&do_copy);
1782     } else {
1783       DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1784       // For class match, we can skip the array type check completely if at least one of source
1785       // and destination is known to be a non primitive array, otherwise one check is enough.
1786       __ Bne(temp1, temp2, intrinsic_slow_path->GetEntryLabel());
1787       if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1788           !optimizations.GetSourceIsNonPrimitiveArray()) {
1789         check_non_primitive_array_class(temp2, temp2);
1790       }
1791     }
1792   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1793     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1794     // Bail out if the source is not a non primitive array.
1795     // No read barrier is needed for reading a chain of constant references for comparing
1796     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1797     // /* HeapReference<Class> */ temp2 = src->klass_
1798     __ Loadwu(temp2, src, class_offset);
1799     codegen_->MaybeUnpoisonHeapReference(temp2);
1800     check_non_primitive_array_class(temp2, temp2);
1801   }
1802 
1803   if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
1804     // Null constant length: not need to emit the loop code at all.
1805   } else {
1806     Riscv64Label skip_copy_and_write_barrier;
1807     if (length.IsRegister()) {
1808       // Don't enter the copy loop if the length is null.
1809       __ Beqz(length.AsRegister<XRegister>(), &skip_copy_and_write_barrier);
1810     }
1811 
1812     {
1813       // We use a block to end the scratch scope before the write barrier, thus
1814       // freeing the scratch registers so they can be used in `MarkGCCard`.
1815       ScratchRegisterScope srs(assembler);
1816       bool emit_rb = codegen_->EmitBakerReadBarrier();
1817       XRegister temp3 =
1818           emit_rb ? locations->GetTemp(2).AsRegister<XRegister>() : srs.AllocateXRegister();
1819 
1820       XRegister src_curr_addr = temp1;
1821       XRegister dst_curr_addr = temp2;
1822       XRegister src_stop_addr = temp3;
1823       const DataType::Type type = DataType::Type::kReference;
1824       const int32_t element_size = DataType::Size(type);
1825 
1826       XRegister tmp = kNoXRegister;
1827       SlowPathCodeRISCV64* read_barrier_slow_path = nullptr;
1828       if (emit_rb) {
1829         // TODO: Also convert this intrinsic to the IsGcMarking strategy?
1830 
1831         // SystemArrayCopy implementation for Baker read barriers (see
1832         // also CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier):
1833         //
1834         //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1835         //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1836         //   bool is_gray = (rb_state == ReadBarrier::GrayState());
1837         //   if (is_gray) {
1838         //     // Slow-path copy.
1839         //     do {
1840         //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1841         //     } while (src_ptr != end_ptr)
1842         //   } else {
1843         //     // Fast-path copy.
1844         //     do {
1845         //       *dest_ptr++ = *src_ptr++;
1846         //     } while (src_ptr != end_ptr)
1847         //   }
1848 
1849         // /* uint32_t */ monitor = src->monitor_
1850         tmp = locations->GetTemp(3).AsRegister<XRegister>();
1851         __ Loadwu(tmp, src, monitor_offset);
1852         // /* LockWord */ lock_word = LockWord(monitor)
1853         static_assert(sizeof(LockWord) == sizeof(int32_t),
1854                       "art::LockWord and int32_t have different sizes.");
1855 
1856         // Shift the RB state bit to the sign bit while also clearing the low 32 bits
1857         // for the fake dependency below.
1858         static_assert(LockWord::kReadBarrierStateShift < 31);
1859         __ Slli(tmp, tmp, 63 - LockWord::kReadBarrierStateShift);
1860 
1861         // Introduce a dependency on the lock_word including rb_state, to prevent load-load
1862         // reordering, and without using a memory barrier (which would be more expensive).
1863         // `src` is unchanged by this operation (since Adduw adds low 32 bits
1864         // which are zero after left shift), but its value now depends on `tmp`.
1865         __ AddUw(src, tmp, src);
1866 
1867         // Slow path used to copy array when `src` is gray.
1868         read_barrier_slow_path = new (codegen_->GetScopedAllocator())
1869             ReadBarrierSystemArrayCopySlowPathRISCV64(invoke, Location::RegisterLocation(tmp));
1870         codegen_->AddSlowPath(read_barrier_slow_path);
1871       }
1872 
1873       // Compute base source address, base destination address, and end source address for
1874       // System.arraycopy* intrinsics in `src_base`, `dst_base` and `src_end` respectively.
1875       // Note that `src_curr_addr` is computed from from `src` (and `src_pos`) here, and
1876       // thus honors the artificial dependency of `src` on `tmp` for read barriers.
1877       GenSystemArrayCopyAddresses(codegen_,
1878                                   type,
1879                                   src,
1880                                   src_pos,
1881                                   dest,
1882                                   dest_pos,
1883                                   length,
1884                                   src_curr_addr,
1885                                   dst_curr_addr,
1886                                   src_stop_addr);
1887 
1888       if (emit_rb) {
1889         // Given the numeric representation, it's enough to check the low bit of the RB state.
1890         static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1891         static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1892         DCHECK_NE(tmp, kNoXRegister);
1893         __ Bltz(tmp, read_barrier_slow_path->GetEntryLabel());
1894       } else {
1895         // After allocating the last scrach register, we cannot use macro load/store instructions
1896         // such as `Loadwu()` and need to use raw instructions. However, all offsets below are 0.
1897         DCHECK_EQ(tmp, kNoXRegister);
1898         tmp = srs.AllocateXRegister();
1899       }
1900 
1901       // Iterate over the arrays and do a raw copy of the objects. We don't need to
1902       // poison/unpoison.
1903       Riscv64Label loop;
1904       __ Bind(&loop);
1905       __ Lwu(tmp, src_curr_addr, 0);
1906       __ Sw(tmp, dst_curr_addr, 0);
1907       __ Addi(src_curr_addr, src_curr_addr, element_size);
1908       __ Addi(dst_curr_addr, dst_curr_addr, element_size);
1909       // Bare: `TMP` shall not be clobbered.
1910       __ Bne(src_curr_addr, src_stop_addr, &loop, /*is_bare=*/ true);
1911 
1912       if (emit_rb) {
1913         DCHECK(read_barrier_slow_path != nullptr);
1914         __ Bind(read_barrier_slow_path->GetExitLabel());
1915       }
1916     }
1917 
1918     // We only need one card marking on the destination array.
1919     codegen_->MarkGCCard(dest);
1920 
1921     __ Bind(&skip_copy_and_write_barrier);
1922   }
1923 
1924   __ Bind(intrinsic_slow_path->GetExitLabel());
1925 }
1926 
1927 // This value is in bytes and greater than ARRAYCOPY_SHORT_XXX_ARRAY_THRESHOLD
1928 // in libcore, so if we choose to jump to the slow path we will end up
1929 // in the native implementation.
1930 static constexpr int32_t kSystemArrayCopyPrimThreshold = 384;
1931 
CreateSystemArrayCopyLocations(HInvoke * invoke,DataType::Type type)1932 static void CreateSystemArrayCopyLocations(HInvoke* invoke, DataType::Type type) {
1933   int32_t copy_threshold = kSystemArrayCopyPrimThreshold / DataType::Size(type);
1934 
1935   // Check to see if we have known failures that will cause us to have to bail out
1936   // to the runtime, and just generate the runtime call directly.
1937   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
1938   HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstantOrNull();
1939 
1940   // The positions must be non-negative.
1941   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
1942       (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
1943     // We will have to fail anyways.
1944     return;
1945   }
1946 
1947   // The length must be >= 0 and not so long that we would (currently) prefer libcore's
1948   // native implementation.
1949   HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
1950   if (length != nullptr) {
1951     int32_t len = length->GetValue();
1952     if (len < 0 || len > copy_threshold) {
1953       // Just call as normal.
1954       return;
1955     }
1956   }
1957 
1958   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
1959   LocationSummary* locations =
1960       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
1961   // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
1962   locations->SetInAt(0, Location::RequiresRegister());
1963   locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
1964   locations->SetInAt(2, Location::RequiresRegister());
1965   locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
1966   locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
1967 
1968   locations->AddRegisterTemps(3);
1969 }
1970 
VisitSystemArrayCopyByte(HInvoke * invoke)1971 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopyByte(HInvoke* invoke) {
1972   CreateSystemArrayCopyLocations(invoke, DataType::Type::kInt8);
1973 }
1974 
VisitSystemArrayCopyChar(HInvoke * invoke)1975 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopyChar(HInvoke* invoke) {
1976   CreateSystemArrayCopyLocations(invoke, DataType::Type::kUint16);
1977 }
1978 
VisitSystemArrayCopyInt(HInvoke * invoke)1979 void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopyInt(HInvoke* invoke) {
1980   CreateSystemArrayCopyLocations(invoke, DataType::Type::kInt32);
1981 }
1982 
GenerateUnsignedLoad(Riscv64Assembler * assembler,XRegister rd,XRegister rs1,int32_t offset,size_t type_size)1983 static void GenerateUnsignedLoad(
1984     Riscv64Assembler* assembler, XRegister rd, XRegister rs1, int32_t offset, size_t type_size) {
1985   switch (type_size) {
1986     case 1:
1987       __ Lbu(rd, rs1, offset);
1988       break;
1989     case 2:
1990       __ Lhu(rd, rs1, offset);
1991       break;
1992     case 4:
1993       __ Lwu(rd, rs1, offset);
1994       break;
1995     case 8:
1996       __ Ld(rd, rs1, offset);
1997       break;
1998     default:
1999       LOG(FATAL) << "Unexpected data type";
2000   }
2001 }
2002 
GenerateStore(Riscv64Assembler * assembler,XRegister rs2,XRegister rs1,int32_t offset,size_t type_size)2003 static void GenerateStore(
2004     Riscv64Assembler* assembler, XRegister rs2, XRegister rs1, int32_t offset, size_t type_size) {
2005   switch (type_size) {
2006     case 1:
2007       __ Sb(rs2, rs1, offset);
2008       break;
2009     case 2:
2010       __ Sh(rs2, rs1, offset);
2011       break;
2012     case 4:
2013       __ Sw(rs2, rs1, offset);
2014       break;
2015     case 8:
2016       __ Sd(rs2, rs1, offset);
2017       break;
2018     default:
2019       LOG(FATAL) << "Unexpected data type";
2020   }
2021 }
2022 
SystemArrayCopyPrimitive(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,DataType::Type type)2023 static void SystemArrayCopyPrimitive(HInvoke* invoke,
2024                                      CodeGeneratorRISCV64* codegen,
2025                                      DataType::Type type) {
2026   Riscv64Assembler* assembler = codegen->GetAssembler();
2027   LocationSummary* locations = invoke->GetLocations();
2028   XRegister src = locations->InAt(0).AsRegister<XRegister>();
2029   Location src_pos = locations->InAt(1);
2030   XRegister dst = locations->InAt(2).AsRegister<XRegister>();
2031   Location dst_pos = locations->InAt(3);
2032   Location length = locations->InAt(4);
2033 
2034   SlowPathCodeRISCV64* slow_path =
2035       new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
2036   codegen->AddSlowPath(slow_path);
2037 
2038   SystemArrayCopyOptimizations optimizations(invoke);
2039 
2040   // If source and destination are the same, take the slow path. Overlapping copy regions must be
2041   // copied in reverse and we can't know in all cases if it's needed.
2042   __ Beq(src, dst, slow_path->GetEntryLabel());
2043 
2044   if (!optimizations.GetSourceIsNotNull()) {
2045     // Bail out if the source is null.
2046     __ Beqz(src, slow_path->GetEntryLabel());
2047   }
2048 
2049   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2050     // Bail out if the destination is null.
2051     __ Beqz(dst, slow_path->GetEntryLabel());
2052   }
2053 
2054   int32_t copy_threshold = kSystemArrayCopyPrimThreshold / DataType::Size(type);
2055   XRegister tmp = locations->GetTemp(0).AsRegister<XRegister>();
2056   if (!length.IsConstant()) {
2057     // Merge the following two comparisons into one:
2058     //   If the length is negative, bail out (delegate to libcore's native implementation).
2059     //   If the length >= kSystemArrayCopyPrimThreshold then (currently) prefer libcore's
2060     //   native implementation.
2061     __ Li(tmp, copy_threshold);
2062     __ Bgeu(length.AsRegister<XRegister>(), tmp, slow_path->GetEntryLabel());
2063   } else {
2064     // We have already checked in the LocationsBuilder for the constant case.
2065     DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2066     DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), copy_threshold);
2067   }
2068 
2069   XRegister src_curr_addr = locations->GetTemp(1).AsRegister<XRegister>();
2070   XRegister dst_curr_addr = locations->GetTemp(2).AsRegister<XRegister>();
2071 
2072   CheckSystemArrayCopyPosition(assembler,
2073                                src,
2074                                src_pos,
2075                                length,
2076                                slow_path,
2077                                src_curr_addr,
2078                                dst_curr_addr,
2079                                /*length_is_array_length=*/ false,
2080                                /*position_sign_checked=*/ false);
2081 
2082   CheckSystemArrayCopyPosition(assembler,
2083                                dst,
2084                                dst_pos,
2085                                length,
2086                                slow_path,
2087                                src_curr_addr,
2088                                dst_curr_addr,
2089                                /*length_is_array_length=*/ false,
2090                                /*position_sign_checked=*/ false);
2091 
2092   const int32_t element_size = DataType::Size(type);
2093   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2094 
2095   GenArrayAddress(codegen, src_curr_addr, src, src_pos, type, data_offset);
2096   GenArrayAddress(codegen, dst_curr_addr, dst, dst_pos, type, data_offset);
2097 
2098   // We split processing of the array in two parts: head and tail.
2099   // A first loop handles the head by copying a block of elements per
2100   // iteration (see: elements_per_block).
2101   // A second loop handles the tail by copying the remaining elements.
2102   // If the copy length is not constant, we copy them one-by-one.
2103   //
2104   // Both loops are inverted for better performance, meaning they are
2105   // implemented as conditional do-while loops.
2106   // Here, the loop condition is first checked to determine if there are
2107   // sufficient elements to run an iteration, then we enter the do-while: an
2108   // iteration is performed followed by a conditional branch only if another
2109   // iteration is necessary. As opposed to a standard while-loop, this inversion
2110   // can save some branching (e.g. we don't branch back to the initial condition
2111   // at the end of every iteration only to potentially immediately branch
2112   // again).
2113   //
2114   // A full block of elements is subtracted and added before and after the head
2115   // loop, respectively. This ensures that any remaining length after each
2116   // head loop iteration means there is a full block remaining, reducing the
2117   // number of conditional checks required on every iteration.
2118   ScratchRegisterScope temps(assembler);
2119   constexpr int32_t bytes_copied_per_iteration = 16;
2120   DCHECK_EQ(bytes_copied_per_iteration % element_size, 0);
2121   int32_t elements_per_block = bytes_copied_per_iteration / element_size;
2122   Riscv64Label done;
2123 
2124   XRegister length_tmp = temps.AllocateXRegister();
2125 
2126   auto emit_head_loop = [&]() {
2127     ScratchRegisterScope local_temps(assembler);
2128     XRegister tmp2 = local_temps.AllocateXRegister();
2129 
2130     Riscv64Label loop;
2131     __ Bind(&loop);
2132     __ Ld(tmp, src_curr_addr, 0);
2133     __ Ld(tmp2, src_curr_addr, 8);
2134     __ Sd(tmp, dst_curr_addr, 0);
2135     __ Sd(tmp2, dst_curr_addr, 8);
2136     __ Addi(length_tmp, length_tmp, -elements_per_block);
2137     __ Addi(src_curr_addr, src_curr_addr, bytes_copied_per_iteration);
2138     __ Addi(dst_curr_addr, dst_curr_addr, bytes_copied_per_iteration);
2139     __ Bgez(length_tmp, &loop);
2140   };
2141 
2142   auto emit_tail_loop = [&]() {
2143     Riscv64Label loop;
2144     __ Bind(&loop);
2145     GenerateUnsignedLoad(assembler, tmp, src_curr_addr, 0, element_size);
2146     GenerateStore(assembler, tmp, dst_curr_addr, 0, element_size);
2147     __ Addi(length_tmp, length_tmp, -1);
2148     __ Addi(src_curr_addr, src_curr_addr, element_size);
2149     __ Addi(dst_curr_addr, dst_curr_addr, element_size);
2150     __ Bgtz(length_tmp, &loop);
2151   };
2152 
2153   auto emit_unrolled_tail_loop = [&](int32_t tail_length) {
2154     DCHECK_LT(tail_length, elements_per_block);
2155 
2156     int32_t length_in_bytes = tail_length * element_size;
2157     size_t offset = 0;
2158     for (size_t operation_size = 8; operation_size > 0; operation_size >>= 1) {
2159       if ((length_in_bytes & operation_size) != 0) {
2160         GenerateUnsignedLoad(assembler, tmp, src_curr_addr, offset, operation_size);
2161         GenerateStore(assembler, tmp, dst_curr_addr, offset, operation_size);
2162         offset += operation_size;
2163       }
2164     }
2165   };
2166 
2167   if (length.IsConstant()) {
2168     const int32_t constant_length = length.GetConstant()->AsIntConstant()->GetValue();
2169     if (constant_length >= elements_per_block) {
2170       __ Li(length_tmp, constant_length - elements_per_block);
2171       emit_head_loop();
2172     }
2173     emit_unrolled_tail_loop(constant_length % elements_per_block);
2174   } else {
2175     Riscv64Label tail_loop;
2176     XRegister length_reg = length.AsRegister<XRegister>();
2177     __ Addi(length_tmp, length_reg, -elements_per_block);
2178     __ Bltz(length_tmp, &tail_loop);
2179 
2180     emit_head_loop();
2181 
2182     __ Bind(&tail_loop);
2183     __ Addi(length_tmp, length_tmp, elements_per_block);
2184     __ Beqz(length_tmp, &done);
2185 
2186     emit_tail_loop();
2187   }
2188 
2189   __ Bind(&done);
2190   __ Bind(slow_path->GetExitLabel());
2191 }
2192 
VisitSystemArrayCopyByte(HInvoke * invoke)2193 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopyByte(HInvoke* invoke) {
2194   SystemArrayCopyPrimitive(invoke, codegen_, DataType::Type::kInt8);
2195 }
2196 
VisitSystemArrayCopyChar(HInvoke * invoke)2197 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2198   SystemArrayCopyPrimitive(invoke, codegen_, DataType::Type::kUint16);
2199 }
2200 
VisitSystemArrayCopyInt(HInvoke * invoke)2201 void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopyInt(HInvoke* invoke) {
2202   SystemArrayCopyPrimitive(invoke, codegen_, DataType::Type::kInt32);
2203 }
2204 
2205 enum class GetAndUpdateOp {
2206   kSet,
2207   kAdd,
2208   kAnd,
2209   kOr,
2210   kXor
2211 };
2212 
2213 // Generate a GetAndUpdate operation.
2214 //
2215 // Only 32-bit and 64-bit atomics are currently supported, therefore smaller types need
2216 // special handling. The caller emits code to prepare aligned `ptr` and adjusted `arg`
2217 // and extract the needed bits from `old_value`. For bitwise operations, no extra
2218 // handling is needed here. For `GetAndUpdateOp::kSet` and `GetAndUpdateOp::kAdd` we
2219 // also use a special LR/SC sequence that uses a `mask` to update only the desired bits.
2220 // Note: The `mask` must contain the bits to keep for `GetAndUpdateOp::kSet` and
2221 // the bits to replace for `GetAndUpdateOp::kAdd`.
GenerateGetAndUpdate(CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op,DataType::Type type,std::memory_order order,XRegister ptr,XRegister arg,XRegister old_value,XRegister mask,XRegister temp)2222 static void GenerateGetAndUpdate(CodeGeneratorRISCV64* codegen,
2223                                  GetAndUpdateOp get_and_update_op,
2224                                  DataType::Type type,
2225                                  std::memory_order order,
2226                                  XRegister ptr,
2227                                  XRegister arg,
2228                                  XRegister old_value,
2229                                  XRegister mask,
2230                                  XRegister temp) {
2231   DCHECK_EQ(mask != kNoXRegister, temp != kNoXRegister);
2232   DCHECK_IMPLIES(mask != kNoXRegister, type == DataType::Type::kInt32);
2233   DCHECK_IMPLIES(
2234       mask != kNoXRegister,
2235       (get_and_update_op == GetAndUpdateOp::kSet) || (get_and_update_op == GetAndUpdateOp::kAdd));
2236   Riscv64Assembler* assembler = codegen->GetAssembler();
2237   AqRl amo_aqrl = GetAmoAqRl(order);
2238   switch (get_and_update_op) {
2239     case GetAndUpdateOp::kSet:
2240       if (type == DataType::Type::kInt64) {
2241         __ AmoSwapD(old_value, arg, ptr, amo_aqrl);
2242       } else if (mask == kNoXRegister) {
2243         DCHECK_EQ(type, DataType::Type::kInt32);
2244         __ AmoSwapW(old_value, arg, ptr, amo_aqrl);
2245       } else {
2246         DCHECK_EQ(type, DataType::Type::kInt32);
2247         DCHECK_NE(temp, kNoXRegister);
2248         auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
2249         Riscv64Label retry;
2250         __ Bind(&retry);
2251         __ LrW(old_value, ptr, load_aqrl);
2252         {
2253           ScopedLrScExtensionsRestriction slser(assembler);
2254           __ And(temp, old_value, mask);
2255           __ Or(temp, temp, arg);
2256         }
2257         __ ScW(temp, temp, ptr, store_aqrl);
2258         __ Bnez(temp, &retry, /*is_bare=*/ true);  // Bare: `TMP` shall not be clobbered.
2259       }
2260       break;
2261     case GetAndUpdateOp::kAdd:
2262       if (type == DataType::Type::kInt64) {
2263         __ AmoAddD(old_value, arg, ptr, amo_aqrl);
2264       } else if (mask == kNoXRegister) {
2265         DCHECK_EQ(type, DataType::Type::kInt32);
2266          __ AmoAddW(old_value, arg, ptr, amo_aqrl);
2267       } else {
2268         DCHECK_EQ(type, DataType::Type::kInt32);
2269         DCHECK_NE(temp, kNoXRegister);
2270         auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
2271         Riscv64Label retry;
2272         __ Bind(&retry);
2273         __ LrW(old_value, ptr, load_aqrl);
2274         {
2275           ScopedLrScExtensionsRestriction slser(assembler);
2276           __ Add(temp, old_value, arg);
2277           // We use `(A ^ B) ^ A == B` and with the masking `((A ^ B) & mask) ^ A`, the result
2278           // contains bits from `B` for bits specified in `mask` and bits from `A` elsewhere.
2279           // Note: These instructions directly depend on each other, so it's not necessarily the
2280           // fastest approach but for `(A ^ ~mask) | (B & mask)` we would need an extra register
2281           // for `~mask` because ANDN is not in the "I" instruction set as required for a LR/SC
2282           // sequence.
2283           __ Xor(temp, temp, old_value);
2284           __ And(temp, temp, mask);
2285           __ Xor(temp, temp, old_value);
2286         }
2287         __ ScW(temp, temp, ptr, store_aqrl);
2288         __ Bnez(temp, &retry, /*is_bare=*/ true);  // Bare: `TMP` shall not be clobbered.
2289       }
2290       break;
2291     case GetAndUpdateOp::kAnd:
2292       if (type == DataType::Type::kInt64) {
2293         __ AmoAndD(old_value, arg, ptr, amo_aqrl);
2294       } else {
2295         DCHECK_EQ(type, DataType::Type::kInt32);
2296         __ AmoAndW(old_value, arg, ptr, amo_aqrl);
2297       }
2298       break;
2299     case GetAndUpdateOp::kOr:
2300       if (type == DataType::Type::kInt64) {
2301         __ AmoOrD(old_value, arg, ptr, amo_aqrl);
2302       } else {
2303         DCHECK_EQ(type, DataType::Type::kInt32);
2304         __ AmoOrW(old_value, arg, ptr, amo_aqrl);
2305       }
2306       break;
2307     case GetAndUpdateOp::kXor:
2308       if (type == DataType::Type::kInt64) {
2309         __ AmoXorD(old_value, arg, ptr, amo_aqrl);
2310       } else {
2311         DCHECK_EQ(type, DataType::Type::kInt32);
2312         __ AmoXorW(old_value, arg, ptr, amo_aqrl);
2313       }
2314       break;
2315   }
2316 }
2317 
CreateUnsafeGetLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorRISCV64 * codegen)2318 static void CreateUnsafeGetLocations(ArenaAllocator* allocator,
2319                                      HInvoke* invoke,
2320                                      CodeGeneratorRISCV64* codegen) {
2321   bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
2322   LocationSummary* locations = new (allocator) LocationSummary(
2323       invoke,
2324       can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
2325       kIntrinsified);
2326   if (can_call && kUseBakerReadBarrier) {
2327     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2328   }
2329   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2330   locations->SetInAt(1, Location::RequiresRegister());
2331   locations->SetInAt(2, Location::RequiresRegister());
2332   locations->SetOut(Location::RequiresRegister(),
2333                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2334 }
2335 
CreateUnsafeGetAbsoluteLocations(ArenaAllocator * allocator,HInvoke * invoke)2336 static void CreateUnsafeGetAbsoluteLocations(ArenaAllocator* allocator,
2337                                              HInvoke* invoke) {
2338   LocationSummary* locations =
2339       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2340   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2341   locations->SetInAt(1, Location::RequiresRegister());
2342   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2343 }
2344 
GenUnsafeGet(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2345 static void GenUnsafeGet(HInvoke* invoke,
2346                          CodeGeneratorRISCV64* codegen,
2347                          std::memory_order order,
2348                          DataType::Type type) {
2349   DCHECK((type == DataType::Type::kInt8) ||
2350          (type == DataType::Type::kInt32) ||
2351          (type == DataType::Type::kInt64) ||
2352          (type == DataType::Type::kReference));
2353   LocationSummary* locations = invoke->GetLocations();
2354   Location object_loc = locations->InAt(1);
2355   XRegister object = object_loc.AsRegister<XRegister>();  // Object pointer.
2356   Location offset_loc = locations->InAt(2);
2357   XRegister offset = offset_loc.AsRegister<XRegister>();  // Long offset.
2358   Location out_loc = locations->Out();
2359   XRegister out = out_loc.AsRegister<XRegister>();
2360 
2361   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2362   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
2363   DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2364 
2365   if (seq_cst_barrier) {
2366     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2367   }
2368 
2369   if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
2370     // JdkUnsafeGetReference/JdkUnsafeGetReferenceVolatile with Baker's read barrier case.
2371     // TODO(riscv64): Revisit when we add checking if the holder is black.
2372     Location temp = Location::NoLocation();
2373     codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
2374                                                        out_loc,
2375                                                        object,
2376                                                        /*offset=*/ 0,
2377                                                        /*index=*/ offset_loc,
2378                                                        temp,
2379                                                        /*needs_null_check=*/ false);
2380   } else {
2381     // Other cases.
2382     Riscv64Assembler* assembler = codegen->GetAssembler();
2383     __ Add(out, object, offset);
2384     codegen->GetInstructionVisitor()->Load(out_loc, out, /*offset=*/ 0, type);
2385 
2386     if (type == DataType::Type::kReference) {
2387       codegen->MaybeGenerateReadBarrierSlow(
2388           invoke, out_loc, out_loc, object_loc, /*offset=*/ 0u, /*index=*/ offset_loc);
2389     }
2390   }
2391 
2392   if (acquire_barrier) {
2393     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2394   }
2395 }
2396 
GenUnsafeGetAbsolute(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2397 static void GenUnsafeGetAbsolute(HInvoke* invoke,
2398                                  CodeGeneratorRISCV64* codegen,
2399                                  std::memory_order order,
2400                                  DataType::Type type) {
2401   DCHECK((type == DataType::Type::kInt8) ||
2402          (type == DataType::Type::kInt32) ||
2403          (type == DataType::Type::kInt64));
2404   LocationSummary* locations = invoke->GetLocations();
2405   Location address_loc = locations->InAt(1);
2406   XRegister address = address_loc.AsRegister<XRegister>();
2407   Location out_loc = locations->Out();
2408 
2409   bool seq_cst_barrier = order == std::memory_order_seq_cst;
2410   bool acquire_barrier = seq_cst_barrier || order == std::memory_order_acquire;
2411   DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2412 
2413   if (seq_cst_barrier) {
2414     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2415   }
2416 
2417   codegen->GetInstructionVisitor()->Load(out_loc, address, /*offset=*/ 0, type);
2418 
2419   if (acquire_barrier) {
2420     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2421   }
2422 }
2423 
VisitUnsafeGet(HInvoke * invoke)2424 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGet(HInvoke* invoke) {
2425   VisitJdkUnsafeGet(invoke);
2426 }
2427 
VisitUnsafeGetAbsolute(HInvoke * invoke)2428 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2429   VisitJdkUnsafeGetAbsolute(invoke);
2430 }
2431 
VisitUnsafeGet(HInvoke * invoke)2432 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGet(HInvoke* invoke) {
2433   VisitJdkUnsafeGet(invoke);
2434 }
2435 
VisitUnsafeGetAbsolute(HInvoke * invoke)2436 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2437   VisitJdkUnsafeGetAbsolute(invoke);
2438 }
2439 
VisitUnsafeGetVolatile(HInvoke * invoke)2440 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2441   VisitJdkUnsafeGetVolatile(invoke);
2442 }
2443 
VisitUnsafeGetVolatile(HInvoke * invoke)2444 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2445   VisitJdkUnsafeGetVolatile(invoke);
2446 }
2447 
VisitUnsafeGetObject(HInvoke * invoke)2448 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetObject(HInvoke* invoke) {
2449   VisitJdkUnsafeGetReference(invoke);
2450 }
2451 
VisitUnsafeGetObject(HInvoke * invoke)2452 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetObject(HInvoke* invoke) {
2453   VisitJdkUnsafeGetReference(invoke);
2454 }
2455 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2456 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2457   VisitJdkUnsafeGetReferenceVolatile(invoke);
2458 }
2459 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2460 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2461   VisitJdkUnsafeGetReferenceVolatile(invoke);
2462 }
2463 
VisitUnsafeGetLong(HInvoke * invoke)2464 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetLong(HInvoke* invoke) {
2465   VisitJdkUnsafeGetLong(invoke);
2466 }
2467 
VisitUnsafeGetLong(HInvoke * invoke)2468 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetLong(HInvoke* invoke) {
2469   VisitJdkUnsafeGetLong(invoke);
2470 }
2471 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2472 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2473   VisitJdkUnsafeGetLongVolatile(invoke);
2474 }
2475 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2476 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2477   VisitJdkUnsafeGetLongVolatile(invoke);
2478 }
2479 
VisitUnsafeGetByte(HInvoke * invoke)2480 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetByte(HInvoke* invoke) {
2481   VisitJdkUnsafeGetByte(invoke);
2482 }
2483 
VisitUnsafeGetByte(HInvoke * invoke)2484 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetByte(HInvoke* invoke) {
2485   VisitJdkUnsafeGetByte(invoke);
2486 }
2487 
VisitJdkUnsafeGet(HInvoke * invoke)2488 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGet(HInvoke* invoke) {
2489   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2490 }
2491 
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2492 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2493   CreateUnsafeGetAbsoluteLocations(allocator_, invoke);
2494 }
2495 
VisitJdkUnsafeGet(HInvoke * invoke)2496 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGet(HInvoke* invoke) {
2497   GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2498 }
2499 
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2500 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2501   GenUnsafeGetAbsolute(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2502 }
2503 
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2504 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2505   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2506 }
2507 
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2508 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2509   GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kInt32);
2510 }
2511 
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2512 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2513   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2514 }
2515 
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2516 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2517   GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt32);
2518 }
2519 
VisitJdkUnsafeGetReference(HInvoke * invoke)2520 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2521   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2522 }
2523 
VisitJdkUnsafeGetReference(HInvoke * invoke)2524 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2525   GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kReference);
2526 }
2527 
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2528 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2529   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2530 }
2531 
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2532 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2533   GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kReference);
2534 }
2535 
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2536 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2537   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2538 }
2539 
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2540 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2541   GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kReference);
2542 }
2543 
VisitJdkUnsafeGetLong(HInvoke * invoke)2544 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2545   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2546 }
2547 
VisitJdkUnsafeGetLong(HInvoke * invoke)2548 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2549   GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt64);
2550 }
2551 
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2552 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2553   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2554 }
2555 
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2556 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2557   GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kInt64);
2558 }
2559 
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2560 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2561   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2562 }
2563 
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2564 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2565   GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt64);
2566 }
2567 
VisitJdkUnsafeGetByte(HInvoke * invoke)2568 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2569   CreateUnsafeGetLocations(allocator_, invoke, codegen_);
2570 }
2571 
VisitJdkUnsafeGetByte(HInvoke * invoke)2572 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2573   GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt8);
2574 }
2575 
CreateUnsafePutLocations(ArenaAllocator * allocator,HInvoke * invoke)2576 static void CreateUnsafePutLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2577   LocationSummary* locations =
2578       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2579   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2580   locations->SetInAt(1, Location::RequiresRegister());
2581   locations->SetInAt(2, Location::RequiresRegister());
2582   locations->SetInAt(3, Location::RequiresRegister());
2583   if (kPoisonHeapReferences && invoke->InputAt(3)->GetType() == DataType::Type::kReference) {
2584     locations->AddTemp(Location::RequiresRegister());
2585   }
2586 }
2587 
CreateUnsafePutAbsoluteLocations(ArenaAllocator * allocator,HInvoke * invoke)2588 static void CreateUnsafePutAbsoluteLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2589   LocationSummary* locations =
2590       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2591   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2592   locations->SetInAt(1, Location::RequiresRegister());
2593   locations->SetInAt(2, Location::RequiresRegister());
2594 }
2595 
GenUnsafePut(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2596 static void GenUnsafePut(HInvoke* invoke,
2597                          CodeGeneratorRISCV64* codegen,
2598                          std::memory_order order,
2599                          DataType::Type type) {
2600   Riscv64Assembler* assembler = codegen->GetAssembler();
2601   LocationSummary* locations = invoke->GetLocations();
2602   XRegister base = locations->InAt(1).AsRegister<XRegister>();    // Object pointer.
2603   XRegister offset = locations->InAt(2).AsRegister<XRegister>();  // Long offset.
2604   Location value = locations->InAt(3);
2605 
2606   {
2607     // We use a block to end the scratch scope before the write barrier, thus
2608     // freeing the temporary registers so they can be used in `MarkGCCard()`.
2609     ScratchRegisterScope srs(assembler);
2610     // Heap poisoning needs two scratch registers in `Store()`.
2611     XRegister address = (kPoisonHeapReferences && type == DataType::Type::kReference)
2612         ? locations->GetTemp(0).AsRegister<XRegister>()
2613         : srs.AllocateXRegister();
2614     __ Add(address, base, offset);
2615     GenerateSet(codegen, order, value, address, /*offset=*/ 0, type);
2616   }
2617 
2618   if (type == DataType::Type::kReference) {
2619     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2620     codegen->MaybeMarkGCCard(base, value.AsRegister<XRegister>(), value_can_be_null);
2621   }
2622 }
2623 
GenUnsafePutAbsolute(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)2624 static void GenUnsafePutAbsolute(HInvoke* invoke,
2625                                  CodeGeneratorRISCV64* codegen,
2626                                  std::memory_order order,
2627                                  DataType::Type type) {
2628   LocationSummary* locations = invoke->GetLocations();
2629   XRegister address = locations->InAt(1).AsRegister<XRegister>();
2630   Location value = locations->InAt(2);
2631 
2632   GenerateSet(codegen, order, value, address, /*offset=*/ 0, type);
2633 }
2634 
VisitUnsafePut(HInvoke * invoke)2635 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePut(HInvoke* invoke) {
2636   VisitJdkUnsafePut(invoke);
2637 }
2638 
VisitUnsafePutAbsolute(HInvoke * invoke)2639 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2640   VisitJdkUnsafePutAbsolute(invoke);
2641 }
2642 
VisitUnsafePut(HInvoke * invoke)2643 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePut(HInvoke* invoke) {
2644   VisitJdkUnsafePut(invoke);
2645 }
2646 
VisitUnsafePutAbsolute(HInvoke * invoke)2647 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2648   VisitJdkUnsafePutAbsolute(invoke);
2649 }
2650 
VisitUnsafePutOrdered(HInvoke * invoke)2651 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutOrdered(HInvoke* invoke) {
2652   VisitJdkUnsafePutOrdered(invoke);
2653 }
2654 
VisitUnsafePutOrdered(HInvoke * invoke)2655 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutOrdered(HInvoke* invoke) {
2656   VisitJdkUnsafePutOrdered(invoke);
2657 }
2658 
VisitUnsafePutVolatile(HInvoke * invoke)2659 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutVolatile(HInvoke* invoke) {
2660   VisitJdkUnsafePutVolatile(invoke);
2661 }
2662 
VisitUnsafePutVolatile(HInvoke * invoke)2663 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutVolatile(HInvoke* invoke) {
2664   VisitJdkUnsafePutVolatile(invoke);
2665 }
2666 
VisitUnsafePutObject(HInvoke * invoke)2667 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObject(HInvoke* invoke) {
2668   VisitJdkUnsafePutReference(invoke);
2669 }
2670 
VisitUnsafePutObject(HInvoke * invoke)2671 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObject(HInvoke* invoke) {
2672   VisitJdkUnsafePutReference(invoke);
2673 }
2674 
VisitUnsafePutObjectOrdered(HInvoke * invoke)2675 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2676   VisitJdkUnsafePutObjectOrdered(invoke);
2677 }
2678 
VisitUnsafePutObjectOrdered(HInvoke * invoke)2679 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2680   VisitJdkUnsafePutObjectOrdered(invoke);
2681 }
2682 
VisitUnsafePutObjectVolatile(HInvoke * invoke)2683 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2684   VisitJdkUnsafePutReferenceVolatile(invoke);
2685 }
2686 
VisitUnsafePutObjectVolatile(HInvoke * invoke)2687 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2688   VisitJdkUnsafePutReferenceVolatile(invoke);
2689 }
2690 
VisitUnsafePutLong(HInvoke * invoke)2691 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLong(HInvoke* invoke) {
2692   VisitJdkUnsafePutLong(invoke);
2693 }
2694 
VisitUnsafePutLong(HInvoke * invoke)2695 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLong(HInvoke* invoke) {
2696   VisitJdkUnsafePutLong(invoke);
2697 }
2698 
VisitUnsafePutLongOrdered(HInvoke * invoke)2699 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2700   VisitJdkUnsafePutLongOrdered(invoke);
2701 }
2702 
VisitUnsafePutLongOrdered(HInvoke * invoke)2703 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2704   VisitJdkUnsafePutLongOrdered(invoke);
2705 }
2706 
VisitUnsafePutLongVolatile(HInvoke * invoke)2707 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2708   VisitJdkUnsafePutLongVolatile(invoke);
2709 }
2710 
VisitUnsafePutLongVolatile(HInvoke * invoke)2711 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2712   VisitJdkUnsafePutLongVolatile(invoke);
2713 }
2714 
VisitUnsafePutByte(HInvoke * invoke)2715 void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutByte(HInvoke* invoke) {
2716   VisitJdkUnsafePutByte(invoke);
2717 }
2718 
VisitUnsafePutByte(HInvoke * invoke)2719 void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutByte(HInvoke* invoke) {
2720   VisitJdkUnsafePutByte(invoke);
2721 }
2722 
VisitJdkUnsafePut(HInvoke * invoke)2723 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePut(HInvoke* invoke) {
2724   CreateUnsafePutLocations(allocator_, invoke);
2725 }
2726 
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2727 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2728   CreateUnsafePutAbsoluteLocations(allocator_, invoke);
2729 }
2730 
VisitJdkUnsafePut(HInvoke * invoke)2731 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePut(HInvoke* invoke) {
2732   GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2733 }
2734 
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2735 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2736   GenUnsafePutAbsolute(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
2737 }
2738 
VisitJdkUnsafePutOrdered(HInvoke * invoke)2739 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2740   CreateUnsafePutLocations(allocator_, invoke);
2741 }
2742 
VisitJdkUnsafePutOrdered(HInvoke * invoke)2743 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2744   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt32);
2745 }
2746 
VisitJdkUnsafePutRelease(HInvoke * invoke)2747 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2748   CreateUnsafePutLocations(allocator_, invoke);
2749 }
2750 
VisitJdkUnsafePutRelease(HInvoke * invoke)2751 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2752   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt32);
2753 }
2754 
VisitJdkUnsafePutVolatile(HInvoke * invoke)2755 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2756   CreateUnsafePutLocations(allocator_, invoke);
2757 }
2758 
VisitJdkUnsafePutVolatile(HInvoke * invoke)2759 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2760   GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt32);
2761 }
2762 
VisitJdkUnsafePutReference(HInvoke * invoke)2763 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2764   CreateUnsafePutLocations(allocator_, invoke);
2765 }
2766 
VisitJdkUnsafePutReference(HInvoke * invoke)2767 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2768   GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kReference);
2769 }
2770 
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2771 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2772   CreateUnsafePutLocations(allocator_, invoke);
2773 }
2774 
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2775 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2776   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kReference);
2777 }
2778 
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2779 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2780   CreateUnsafePutLocations(allocator_, invoke);
2781 }
2782 
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2783 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2784   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kReference);
2785 }
2786 
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2787 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2788   CreateUnsafePutLocations(allocator_, invoke);
2789 }
2790 
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2791 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2792   GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kReference);
2793 }
2794 
VisitJdkUnsafePutLong(HInvoke * invoke)2795 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2796   CreateUnsafePutLocations(allocator_, invoke);
2797 }
2798 
VisitJdkUnsafePutLong(HInvoke * invoke)2799 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2800   GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt64);
2801 }
2802 
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2803 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2804   CreateUnsafePutLocations(allocator_, invoke);
2805 }
2806 
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2807 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2808   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt64);
2809 }
2810 
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2811 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2812   CreateUnsafePutLocations(allocator_, invoke);
2813 }
2814 
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2815 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2816   GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt64);
2817 }
2818 
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2819 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2820   CreateUnsafePutLocations(allocator_, invoke);
2821 }
2822 
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2823 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2824   GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt64);
2825 }
2826 
VisitJdkUnsafePutByte(HInvoke * invoke)2827 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2828   CreateUnsafePutLocations(allocator_, invoke);
2829 }
2830 
VisitJdkUnsafePutByte(HInvoke * invoke)2831 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2832   GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt8);
2833 }
2834 
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorRISCV64 * codegen)2835 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
2836                                      HInvoke* invoke,
2837                                      CodeGeneratorRISCV64* codegen) {
2838   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
2839   LocationSummary* locations = new (allocator) LocationSummary(
2840       invoke,
2841       can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
2842       kIntrinsified);
2843   if (can_call && kUseBakerReadBarrier) {
2844     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2845   }
2846   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2847   locations->SetInAt(1, Location::RequiresRegister());
2848   locations->SetInAt(2, Location::RequiresRegister());
2849   locations->SetInAt(3, Location::RequiresRegister());
2850   locations->SetInAt(4, Location::RequiresRegister());
2851 
2852   locations->SetOut(Location::RequiresRegister());
2853 }
2854 
GenUnsafeCas(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,DataType::Type type)2855 static void GenUnsafeCas(HInvoke* invoke, CodeGeneratorRISCV64* codegen, DataType::Type type) {
2856   Riscv64Assembler* assembler = codegen->GetAssembler();
2857   LocationSummary* locations = invoke->GetLocations();
2858   XRegister out = locations->Out().AsRegister<XRegister>();            // Boolean result.
2859   XRegister object = locations->InAt(1).AsRegister<XRegister>();       // Object pointer.
2860   XRegister offset = locations->InAt(2).AsRegister<XRegister>();       // Long offset.
2861   XRegister expected = locations->InAt(3).AsRegister<XRegister>();     // Expected.
2862   XRegister new_value = locations->InAt(4).AsRegister<XRegister>();    // New value.
2863 
2864   // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
2865   if (type == DataType::Type::kReference) {
2866     // Mark card for object assuming new value is stored.
2867     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
2868     codegen->MaybeMarkGCCard(object, new_value, new_value_can_be_null);
2869   }
2870 
2871   ScratchRegisterScope srs(assembler);
2872   XRegister tmp_ptr = srs.AllocateXRegister();                         // Pointer to actual memory.
2873   XRegister old_value;                                                 // Value in memory.
2874 
2875   Riscv64Label exit_loop_label;
2876   Riscv64Label* exit_loop = &exit_loop_label;
2877   Riscv64Label* cmp_failure = &exit_loop_label;
2878 
2879   ReadBarrierCasSlowPathRISCV64* slow_path = nullptr;
2880   if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
2881     // We need to store the `old_value` in a non-scratch register to make sure
2882     // the read barrier in the slow path does not clobber it.
2883     old_value = locations->GetTemp(0).AsRegister<XRegister>();  // The old value from main path.
2884     // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked
2885     // reloaded old value for subsequent CAS in the slow path. We make this a scratch register
2886     // as we do have marking entrypoints on riscv64 even for scratch registers.
2887     XRegister old_value_temp = srs.AllocateXRegister();
2888     slow_path = new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathRISCV64(
2889         invoke,
2890         std::memory_order_seq_cst,
2891         /*strong=*/ true,
2892         object,
2893         offset,
2894         expected,
2895         new_value,
2896         old_value,
2897         old_value_temp,
2898         /*store_result=*/ old_value_temp,  // Let the SC result clobber the reloaded old_value.
2899         /*update_old_value=*/ false,
2900         codegen);
2901     codegen->AddSlowPath(slow_path);
2902     exit_loop = slow_path->GetExitLabel();
2903     cmp_failure = slow_path->GetEntryLabel();
2904   } else {
2905     old_value = srs.AllocateXRegister();
2906   }
2907 
2908   __ Add(tmp_ptr, object, offset);
2909 
2910   // Pre-populate the result register with failure.
2911   __ Li(out, 0);
2912 
2913   GenerateCompareAndSet(assembler,
2914                         type,
2915                         std::memory_order_seq_cst,
2916                         /*strong=*/ true,
2917                         cmp_failure,
2918                         tmp_ptr,
2919                         new_value,
2920                         old_value,
2921                         /*mask=*/ kNoXRegister,
2922                         /*masked=*/ kNoXRegister,
2923                         /*store_result=*/ old_value,  // Let the SC result clobber the `old_value`.
2924                         expected);
2925 
2926   DCHECK_EQ(slow_path != nullptr, type == DataType::Type::kReference && codegen->EmitReadBarrier());
2927   if (slow_path != nullptr) {
2928     __ Bind(slow_path->GetSuccessExitLabel());
2929   }
2930 
2931   // Indicate success if we successfully execute the SC.
2932   __ Li(out, 1);
2933 
2934   __ Bind(exit_loop);
2935 }
2936 
VisitUnsafeCASInt(HInvoke * invoke)2937 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASInt(HInvoke* invoke) {
2938   VisitJdkUnsafeCASInt(invoke);
2939 }
2940 
VisitUnsafeCASInt(HInvoke * invoke)2941 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASInt(HInvoke* invoke) {
2942   VisitJdkUnsafeCASInt(invoke);
2943 }
2944 
VisitUnsafeCASLong(HInvoke * invoke)2945 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASLong(HInvoke* invoke) {
2946   VisitJdkUnsafeCASLong(invoke);
2947 }
2948 
VisitUnsafeCASLong(HInvoke * invoke)2949 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASLong(HInvoke* invoke) {
2950   VisitJdkUnsafeCASLong(invoke);
2951 }
2952 
VisitUnsafeCASObject(HInvoke * invoke)2953 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASObject(HInvoke* invoke) {
2954   VisitJdkUnsafeCASObject(invoke);
2955 }
2956 
VisitUnsafeCASObject(HInvoke * invoke)2957 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASObject(HInvoke* invoke) {
2958   VisitJdkUnsafeCASObject(invoke);
2959 }
2960 
VisitJdkUnsafeCASInt(HInvoke * invoke)2961 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2962   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2963   VisitJdkUnsafeCompareAndSetInt(invoke);
2964 }
2965 
VisitJdkUnsafeCASInt(HInvoke * invoke)2966 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2967   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2968   VisitJdkUnsafeCompareAndSetInt(invoke);
2969 }
2970 
VisitJdkUnsafeCASLong(HInvoke * invoke)2971 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2972   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2973   VisitJdkUnsafeCompareAndSetLong(invoke);
2974 }
2975 
VisitJdkUnsafeCASLong(HInvoke * invoke)2976 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2977   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2978   VisitJdkUnsafeCompareAndSetLong(invoke);
2979 }
2980 
VisitJdkUnsafeCASObject(HInvoke * invoke)2981 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2982   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2983   VisitJdkUnsafeCompareAndSetReference(invoke);
2984 }
2985 
VisitJdkUnsafeCASObject(HInvoke * invoke)2986 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2987   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2988   VisitJdkUnsafeCompareAndSetReference(invoke);
2989 }
2990 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2991 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2992   CreateUnsafeCASLocations(allocator_, invoke, codegen_);
2993 }
2994 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2995 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2996   GenUnsafeCas(invoke, codegen_, DataType::Type::kInt32);
2997 }
2998 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2999 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
3000   CreateUnsafeCASLocations(allocator_, invoke, codegen_);
3001 }
3002 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)3003 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
3004   GenUnsafeCas(invoke, codegen_, DataType::Type::kInt64);
3005 }
3006 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3007 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3008   // The only supported read barrier implementation is the Baker-style read barriers.
3009   if (codegen_->EmitNonBakerReadBarrier()) {
3010     return;
3011   }
3012 
3013   // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
3014   if (kPoisonHeapReferences) {
3015     return;
3016   }
3017 
3018   CreateUnsafeCASLocations(allocator_, invoke, codegen_);
3019   if (codegen_->EmitReadBarrier()) {
3020     DCHECK(kUseBakerReadBarrier);
3021     // We need one non-scratch temporary register for read barrier.
3022     LocationSummary* locations = invoke->GetLocations();
3023     locations->AddTemp(Location::RequiresRegister());
3024   }
3025 }
3026 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3027 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3028   GenUnsafeCas(invoke, codegen_, DataType::Type::kReference);
3029 }
3030 
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorRISCV64 * codegen)3031 static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
3032                                               HInvoke* invoke,
3033                                               CodeGeneratorRISCV64* codegen) {
3034   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
3035   LocationSummary* locations = new (allocator) LocationSummary(
3036       invoke,
3037       can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
3038       kIntrinsified);
3039   if (can_call && kUseBakerReadBarrier) {
3040     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3041   }
3042   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
3043   locations->SetInAt(1, Location::RequiresRegister());
3044   locations->SetInAt(2, Location::RequiresRegister());
3045   locations->SetInAt(3, Location::RequiresRegister());
3046 
3047   // Request another temporary register for methods that don't return a value.
3048   DataType::Type return_type = invoke->GetType();
3049   const bool is_void = return_type == DataType::Type::kVoid;
3050   if (is_void) {
3051     locations->AddTemp(Location::RequiresRegister());
3052   } else {
3053     locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3054   }
3055 }
3056 
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op)3057 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
3058                                   DataType::Type type,
3059                                   CodeGeneratorRISCV64* codegen,
3060                                   GetAndUpdateOp get_and_update_op) {
3061   // Currently only used for these GetAndUpdateOp. Might be fine for other ops but double check
3062   // before using.
3063   DCHECK(get_and_update_op == GetAndUpdateOp::kAdd || get_and_update_op == GetAndUpdateOp::kSet);
3064 
3065   Riscv64Assembler* assembler = codegen->GetAssembler();
3066   LocationSummary* locations = invoke->GetLocations();
3067   DataType::Type return_type = invoke->GetType();
3068   const bool is_void = return_type == DataType::Type::kVoid;
3069   // We use a temporary for void methods, as we don't return the value.
3070   Location out_or_temp_loc =
3071       is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
3072   XRegister out_or_temp = out_or_temp_loc.AsRegister<XRegister>();  // Result.
3073   XRegister base = locations->InAt(1).AsRegister<XRegister>();      // Object pointer.
3074   XRegister offset = locations->InAt(2).AsRegister<XRegister>();    // Long offset.
3075   XRegister arg = locations->InAt(3).AsRegister<XRegister>();       // New value or addend.
3076 
3077   // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
3078   if (type == DataType::Type::kReference) {
3079     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
3080     // Mark card for object as a new value shall be stored.
3081     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
3082     codegen->MaybeMarkGCCard(base, /*value=*/arg, new_value_can_be_null);
3083   }
3084 
3085   ScratchRegisterScope srs(assembler);
3086   XRegister tmp_ptr = srs.AllocateXRegister();                        // Pointer to actual memory.
3087   __ Add(tmp_ptr, base, offset);
3088   GenerateGetAndUpdate(codegen,
3089                        get_and_update_op,
3090                        (type == DataType::Type::kReference) ? DataType::Type::kInt32 : type,
3091                        std::memory_order_seq_cst,
3092                        tmp_ptr,
3093                        arg,
3094                        /*old_value=*/ out_or_temp,
3095                        /*mask=*/ kNoXRegister,
3096                        /*temp=*/ kNoXRegister);
3097 
3098   if (!is_void && type == DataType::Type::kReference) {
3099     __ ZextW(out_or_temp, out_or_temp);
3100     if (codegen->EmitReadBarrier()) {
3101       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
3102       if (kUseBakerReadBarrier) {
3103         // Use RA as temp. It is clobbered in the slow path anyway.
3104         static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
3105         SlowPathCodeRISCV64* rb_slow_path = codegen->AddGcRootBakerBarrierBarrierSlowPath(
3106             invoke, out_or_temp_loc, kBakerReadBarrierTemp);
3107         codegen->EmitBakerReadBarierMarkingCheck(
3108             rb_slow_path, out_or_temp_loc, kBakerReadBarrierTemp);
3109       } else {
3110         codegen->GenerateReadBarrierSlow(invoke,
3111                                          out_or_temp_loc,
3112                                          out_or_temp_loc,
3113                                          Location::RegisterLocation(base),
3114                                          /*offset=*/ 0u,
3115                                          /*index=*/ Location::RegisterLocation(offset));
3116       }
3117     }
3118   }
3119 }
3120 
VisitUnsafeGetAndAddInt(HInvoke * invoke)3121 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
3122   VisitJdkUnsafeGetAndAddInt(invoke);
3123 }
3124 
VisitUnsafeGetAndAddInt(HInvoke * invoke)3125 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
3126   VisitJdkUnsafeGetAndAddInt(invoke);
3127 }
3128 
VisitUnsafeGetAndAddLong(HInvoke * invoke)3129 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
3130   VisitJdkUnsafeGetAndAddLong(invoke);
3131 }
3132 
VisitUnsafeGetAndAddLong(HInvoke * invoke)3133 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
3134   VisitJdkUnsafeGetAndAddLong(invoke);
3135 }
3136 
VisitUnsafeGetAndSetInt(HInvoke * invoke)3137 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
3138   VisitJdkUnsafeGetAndSetInt(invoke);
3139 }
3140 
VisitUnsafeGetAndSetInt(HInvoke * invoke)3141 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
3142   VisitJdkUnsafeGetAndSetInt(invoke);
3143 }
3144 
VisitUnsafeGetAndSetLong(HInvoke * invoke)3145 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
3146   VisitJdkUnsafeGetAndSetLong(invoke);
3147 }
3148 
VisitUnsafeGetAndSetLong(HInvoke * invoke)3149 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
3150   VisitJdkUnsafeGetAndSetLong(invoke);
3151 }
3152 
VisitUnsafeGetAndSetObject(HInvoke * invoke)3153 void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
3154   VisitJdkUnsafeGetAndSetReference(invoke);
3155 }
3156 
VisitUnsafeGetAndSetObject(HInvoke * invoke)3157 void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
3158   VisitJdkUnsafeGetAndSetReference(invoke);
3159 }
3160 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)3161 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
3162   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3163 }
3164 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)3165 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
3166   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
3167 }
3168 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)3169 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
3170   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3171 }
3172 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)3173 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
3174   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
3175 }
3176 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)3177 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
3178   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3179 }
3180 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)3181 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
3182   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
3183 }
3184 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)3185 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
3186   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3187 }
3188 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)3189 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
3190   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
3191 }
3192 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)3193 void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
3194   // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
3195   if (kPoisonHeapReferences) {
3196     return;
3197   }
3198 
3199   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
3200 }
3201 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)3202 void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
3203   GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
3204 }
3205 
VisitStringCompareTo(HInvoke * invoke)3206 void IntrinsicLocationsBuilderRISCV64::VisitStringCompareTo(HInvoke* invoke) {
3207   LocationSummary* locations =
3208       new (allocator_) LocationSummary(invoke,
3209                                        invoke->InputAt(1)->CanBeNull()
3210                                            ? LocationSummary::kCallOnSlowPath
3211                                            : LocationSummary::kNoCall,
3212                                        kIntrinsified);
3213   locations->SetInAt(0, Location::RequiresRegister());
3214   locations->SetInAt(1, Location::RequiresRegister());
3215   locations->AddRegisterTemps(3);
3216   // Need temporary registers for String compression's feature.
3217   if (mirror::kUseStringCompression) {
3218     locations->AddTemp(Location::RequiresRegister());
3219   }
3220   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3221 }
3222 
VisitStringCompareTo(HInvoke * invoke)3223 void IntrinsicCodeGeneratorRISCV64::VisitStringCompareTo(HInvoke* invoke) {
3224   Riscv64Assembler* assembler = GetAssembler();
3225   DCHECK(assembler->IsExtensionEnabled(Riscv64Extension::kZbb));
3226   LocationSummary* locations = invoke->GetLocations();
3227 
3228   XRegister str = locations->InAt(0).AsRegister<XRegister>();
3229   XRegister arg = locations->InAt(1).AsRegister<XRegister>();
3230   XRegister out = locations->Out().AsRegister<XRegister>();
3231 
3232   XRegister temp0 = locations->GetTemp(0).AsRegister<XRegister>();
3233   XRegister temp1 = locations->GetTemp(1).AsRegister<XRegister>();
3234   XRegister temp2 = locations->GetTemp(2).AsRegister<XRegister>();
3235   XRegister temp3 = kNoXRegister;
3236   if (mirror::kUseStringCompression) {
3237     temp3 = locations->GetTemp(3).AsRegister<XRegister>();
3238   }
3239 
3240   Riscv64Label loop;
3241   Riscv64Label find_char_diff;
3242   Riscv64Label end;
3243   Riscv64Label different_compression;
3244 
3245   // Get offsets of count and value fields within a string object.
3246   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
3247   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
3248 
3249   // Note that the null check must have been done earlier.
3250   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
3251 
3252   // Take slow path and throw if input can be and is null.
3253   SlowPathCodeRISCV64* slow_path = nullptr;
3254   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
3255   if (can_slow_path) {
3256     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
3257     codegen_->AddSlowPath(slow_path);
3258     __ Beqz(arg, slow_path->GetEntryLabel());
3259   }
3260 
3261   // Reference equality check, return 0 if same reference.
3262   __ Sub(out, str, arg);
3263   __ Beqz(out, &end);
3264 
3265   if (mirror::kUseStringCompression) {
3266     // Load `count` fields of this and argument strings.
3267     __ Loadwu(temp3, str, count_offset);
3268     __ Loadwu(temp2, arg, count_offset);
3269     // Clean out compression flag from lengths.
3270     __ Srliw(temp0, temp3, 1u);
3271     __ Srliw(temp1, temp2, 1u);
3272   } else {
3273     // Load lengths of this and argument strings.
3274     __ Loadwu(temp0, str, count_offset);
3275     __ Loadwu(temp1, arg, count_offset);
3276   }
3277   // out = length diff.
3278   __ Subw(out, temp0, temp1);
3279 
3280   // Find the length of the shorter string
3281   __ Minu(temp0, temp0, temp1);
3282   // Shorter string is empty?
3283   __ Beqz(temp0, &end);
3284 
3285   if (mirror::kUseStringCompression) {
3286     // Extract both compression flags
3287     __ Andi(temp3, temp3, 1);
3288     __ Andi(temp2, temp2, 1);
3289     __ Bne(temp2, temp3, &different_compression);
3290   }
3291   // Store offset of string value in preparation for comparison loop.
3292   __ Li(temp1, value_offset);
3293   if (mirror::kUseStringCompression) {
3294     // For string compression, calculate the number of bytes to compare (not chars).
3295     __ Sll(temp0, temp0, temp3);
3296   }
3297 
3298   // Assertions that must hold in order to compare strings 8 bytes at a time.
3299   DCHECK_ALIGNED(value_offset, 8);
3300   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
3301 
3302   constexpr size_t char_size = DataType::Size(DataType::Type::kUint16);
3303   static_assert(char_size == 2u, "Char expected to be 2 bytes wide");
3304 
3305   ScratchRegisterScope scratch_scope(assembler);
3306   XRegister temp4 = scratch_scope.AllocateXRegister();
3307 
3308   // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
3309   __ Bind(&loop);
3310   __ Add(temp4, str, temp1);
3311   __ Ld(temp4, temp4, 0);
3312   __ Add(temp2, arg, temp1);
3313   __ Ld(temp2, temp2, 0);
3314   __ Bne(temp4, temp2, &find_char_diff);
3315   __ Addi(temp1, temp1, char_size * 4);
3316   // With string compression, we have compared 8 bytes, otherwise 4 chars.
3317   __ Addi(temp0, temp0, (mirror::kUseStringCompression) ? -8 : -4);
3318   __ Bgtz(temp0, &loop);
3319   __ J(&end);
3320 
3321   // Find the single character difference.
3322   __ Bind(&find_char_diff);
3323   // Get the bit position of the first character that differs.
3324   __ Xor(temp1, temp2, temp4);
3325   __ Ctz(temp1, temp1);
3326 
3327   // If the number of chars remaining <= the index where the difference occurs (0-3), then
3328   // the difference occurs outside the remaining string data, so just return length diff (out).
3329   __ Srliw(temp1, temp1, (mirror::kUseStringCompression) ? 3 : 4);
3330   __ Ble(temp0, temp1, &end);
3331 
3332   // Extract the characters and calculate the difference.
3333   __ Slliw(temp1, temp1, (mirror::kUseStringCompression) ? 3 : 4);
3334   if (mirror:: kUseStringCompression) {
3335     __ Slliw(temp3, temp3, 3u);
3336     __ Andn(temp1, temp1, temp3);
3337   }
3338   __ Srl(temp2, temp2, temp1);
3339   __ Srl(temp4, temp4, temp1);
3340   if (mirror::kUseStringCompression) {
3341     __ Li(temp0, -256);           // ~0xff
3342     __ Sllw(temp0, temp0, temp3);  // temp3 = 0 or 8, temp0 := ~0xff or ~0xffff
3343     __ Andn(temp4, temp4, temp0);  // Extract 8 or 16 bits.
3344     __ Andn(temp2, temp2, temp0);  // Extract 8 or 16 bits.
3345   } else {
3346     __ ZextH(temp4, temp4);
3347     __ ZextH(temp2, temp2);
3348   }
3349 
3350   __ Subw(out, temp4, temp2);
3351 
3352   if (mirror::kUseStringCompression) {
3353     __ J(&end);
3354     __ Bind(&different_compression);
3355 
3356     // Comparison for different compression style.
3357     constexpr size_t c_char_size = DataType::Size(DataType::Type::kInt8);
3358     static_assert(c_char_size == 1u, "Compressed char expected to be 1 byte wide");
3359 
3360     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
3361     __ Xor(temp4, str, arg);
3362     __ Addi(temp3, temp3, -1);    // -1 if str is compressed, 0 otherwise
3363     __ And(temp2, temp4, temp3);  // str^arg if str is compressed, 0 otherwise
3364     __ Xor(temp1, temp2, arg);    // str if str is compressed, arg otherwise
3365     __ Xor(temp2, temp2, str);    // arg if str is compressed, str otherwise
3366 
3367     // We want to free up the temp3, currently holding `str` compression flag, for comparison.
3368     // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
3369     // as unsigned. This will allow `addi temp0, temp0, -2; bgtz different_compression_loop`
3370     // to serve as the loop condition.
3371     __ Sh1Add(temp0, temp0, temp3);
3372 
3373     // Adjust temp1 and temp2 from string pointers to data pointers.
3374     __ Addi(temp1, temp1, value_offset);
3375     __ Addi(temp2, temp2, value_offset);
3376 
3377     Riscv64Label different_compression_loop;
3378     Riscv64Label different_compression_diff;
3379 
3380     __ Bind(&different_compression_loop);
3381     __ Lbu(temp4, temp1, 0);
3382     __ Addiw(temp1, temp1, c_char_size);
3383     __ Lhu(temp3, temp2, 0);
3384     __ Addi(temp2, temp2, char_size);
3385     __ Sub(temp4, temp4, temp3);
3386     __ Bnez(temp4, &different_compression_diff);
3387     __ Addi(temp0, temp0, -2);
3388     __ Bgtz(temp0, &different_compression_loop);
3389     __ J(&end);
3390 
3391     // Calculate the difference.
3392     __ Bind(&different_compression_diff);
3393     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
3394                   "Expecting 0=compressed, 1=uncompressed");
3395     __ Andi(temp0, temp0, 1);
3396     __ Addi(temp0, temp0, -1);
3397     __ Xor(out, temp4, temp0);
3398     __ Sub(out, out, temp0);
3399   }
3400 
3401   __ Bind(&end);
3402 
3403   if (can_slow_path) {
3404     __ Bind(slow_path->GetExitLabel());
3405   }
3406 }
3407 
3408 class VarHandleSlowPathRISCV64 : public IntrinsicSlowPathRISCV64 {
3409  public:
VarHandleSlowPathRISCV64(HInvoke * invoke,std::memory_order order)3410   VarHandleSlowPathRISCV64(HInvoke* invoke, std::memory_order order)
3411       : IntrinsicSlowPathRISCV64(invoke),
3412         order_(order),
3413         return_success_(false),
3414         strong_(false),
3415         get_and_update_op_(GetAndUpdateOp::kAdd) {
3416   }
3417 
GetByteArrayViewCheckLabel()3418   Riscv64Label* GetByteArrayViewCheckLabel() {
3419     return &byte_array_view_check_label_;
3420   }
3421 
GetNativeByteOrderLabel()3422   Riscv64Label* GetNativeByteOrderLabel() {
3423     return &native_byte_order_label_;
3424   }
3425 
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)3426   void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
3427     if (return_success) {
3428       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
3429     } else {
3430       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
3431     }
3432     return_success_ = return_success;
3433     strong_ = strong;
3434   }
3435 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3436   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3437     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
3438     get_and_update_op_ = get_and_update_op;
3439   }
3440 
EmitNativeCode(CodeGenerator * codegen_in)3441   void EmitNativeCode(CodeGenerator* codegen_in) override {
3442     if (GetByteArrayViewCheckLabel()->IsLinked()) {
3443       EmitByteArrayViewCode(codegen_in);
3444     }
3445     IntrinsicSlowPathRISCV64::EmitNativeCode(codegen_in);
3446   }
3447 
3448  private:
GetInvoke() const3449   HInvoke* GetInvoke() const {
3450     return GetInstruction()->AsInvoke();
3451   }
3452 
GetAccessModeTemplate() const3453   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3454     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3455   }
3456 
3457   void EmitByteArrayViewCode(CodeGenerator* codegen_in);
3458 
3459   Riscv64Label byte_array_view_check_label_;
3460   Riscv64Label native_byte_order_label_;
3461   // Shared parameter for all VarHandle intrinsics.
3462   std::memory_order order_;
3463   // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
3464   bool return_success_;
3465   bool strong_;
3466   // Extra argument for GenerateVarHandleGetAndUpdate().
3467   GetAndUpdateOp get_and_update_op_;
3468 };
3469 
3470 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path,XRegister object,XRegister type,bool object_can_be_null=true)3471 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorRISCV64* codegen,
3472                                                     SlowPathCodeRISCV64* slow_path,
3473                                                     XRegister object,
3474                                                     XRegister type,
3475                                                     bool object_can_be_null = true) {
3476   Riscv64Assembler* assembler = codegen->GetAssembler();
3477 
3478   const MemberOffset class_offset = mirror::Object::ClassOffset();
3479   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3480 
3481   Riscv64Label success;
3482   if (object_can_be_null) {
3483     __ Beqz(object, &success);
3484   }
3485 
3486   ScratchRegisterScope srs(assembler);
3487   XRegister temp = srs.AllocateXRegister();
3488 
3489   // Note: The `type` can be `TMP`. We're using "bare" local branches to enforce that they shall
3490   // not be expanded and the scrach register `TMP` shall not be clobbered if taken. Taking the
3491   // branch to the slow path can theoretically clobber `TMP` (if outside the 1 MiB range).
3492   __ Loadwu(temp, object, class_offset.Int32Value());
3493   codegen->MaybeUnpoisonHeapReference(temp);
3494   Riscv64Label loop;
3495   __ Bind(&loop);
3496   __ Beq(type, temp, &success, /*is_bare=*/ true);
3497   // We may not have another scratch register for `Loadwu()`. Use `Lwu()` directly.
3498   DCHECK(IsInt<12>(super_class_offset.Int32Value()));
3499   __ Lwu(temp, temp, super_class_offset.Int32Value());
3500   codegen->MaybeUnpoisonHeapReference(temp);
3501   __ Beqz(temp, slow_path->GetEntryLabel());
3502   __ J(&loop, /*is_bare=*/ true);
3503   __ Bind(&success);
3504 }
3505 
3506 // Check access mode and the primitive type from VarHandle.varType.
3507 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3508 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path,DataType::Type type)3509 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3510                                                         CodeGeneratorRISCV64* codegen,
3511                                                         SlowPathCodeRISCV64* slow_path,
3512                                                         DataType::Type type) {
3513   mirror::VarHandle::AccessMode access_mode =
3514       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3515   Primitive::Type primitive_type = DataTypeToPrimitive(type);
3516 
3517   Riscv64Assembler* assembler = codegen->GetAssembler();
3518   LocationSummary* locations = invoke->GetLocations();
3519   XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3520 
3521   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3522   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3523   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3524 
3525   ScratchRegisterScope srs(assembler);
3526   XRegister temp = srs.AllocateXRegister();
3527   XRegister temp2 = srs.AllocateXRegister();
3528 
3529   // Check that the operation is permitted.
3530   __ Loadw(temp, varhandle, access_mode_bit_mask_offset.Int32Value());
3531   DCHECK_LT(enum_cast<uint32_t>(access_mode), 31u);  // We cannot avoid the shift below.
3532   __ Slliw(temp, temp, 31 - enum_cast<uint32_t>(access_mode));  // Shift tested bit to sign bit.
3533   __ Bgez(temp, slow_path->GetEntryLabel());  // If not permitted, go to slow path.
3534 
3535   // For primitive types, we do not need a read barrier when loading a reference only for loading
3536   // constant field through the reference. For reference types, we deliberately avoid the read
3537   // barrier, letting the slow path handle the false negatives.
3538   __ Loadwu(temp, varhandle, var_type_offset.Int32Value());
3539   codegen->MaybeUnpoisonHeapReference(temp);
3540 
3541   // Check the varType.primitiveType field against the type we're trying to use.
3542   __ Loadhu(temp2, temp, primitive_type_offset.Int32Value());
3543   if (primitive_type == Primitive::kPrimNot) {
3544     static_assert(Primitive::kPrimNot == 0);
3545     __ Bnez(temp2, slow_path->GetEntryLabel());
3546   } else {
3547     __ Li(temp, enum_cast<int32_t>(primitive_type));  // `temp` can be clobbered.
3548     __ Bne(temp2, temp, slow_path->GetEntryLabel());
3549   }
3550 
3551   srs.FreeXRegister(temp2);
3552 
3553   if (type == DataType::Type::kReference) {
3554     // Check reference arguments against the varType.
3555     // False negatives due to varType being an interface or array type
3556     // or due to the missing read barrier are handled by the slow path.
3557     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3558     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3559     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3560     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3561       HInstruction* arg = invoke->InputAt(arg_index);
3562       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3563       if (!arg->IsNullConstant()) {
3564         XRegister arg_reg = locations->InAt(arg_index).AsRegister<XRegister>();
3565         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp);
3566       }
3567     }
3568   }
3569 }
3570 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path)3571 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3572                                               CodeGeneratorRISCV64* codegen,
3573                                               SlowPathCodeRISCV64* slow_path) {
3574   Riscv64Assembler* assembler = codegen->GetAssembler();
3575   XRegister varhandle = invoke->GetLocations()->InAt(0).AsRegister<XRegister>();
3576 
3577   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3578 
3579   ScratchRegisterScope srs(assembler);
3580   XRegister temp = srs.AllocateXRegister();
3581 
3582   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3583   // Do not emit read barrier (or unpoison the reference) for comparing to null.
3584   __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
3585   __ Bnez(temp, slow_path->GetEntryLabel());
3586 }
3587 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,SlowPathCodeRISCV64 * slow_path)3588 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3589                                                  CodeGeneratorRISCV64* codegen,
3590                                                  SlowPathCodeRISCV64* slow_path) {
3591   VarHandleOptimizations optimizations(invoke);
3592   Riscv64Assembler* assembler = codegen->GetAssembler();
3593   LocationSummary* locations = invoke->GetLocations();
3594   XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3595   XRegister object = locations->InAt(1).AsRegister<XRegister>();
3596 
3597   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3598   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3599 
3600   // Null-check the object.
3601   if (!optimizations.GetSkipObjectNullCheck()) {
3602     __ Beqz(object, slow_path->GetEntryLabel());
3603   }
3604 
3605   if (!optimizations.GetUseKnownImageVarHandle()) {
3606     ScratchRegisterScope srs(assembler);
3607     XRegister temp = srs.AllocateXRegister();
3608 
3609     // Check that the VarHandle references an instance field by checking that
3610     // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
3611     // type compatibility check with the source object's type, which will fail for null.
3612     __ Loadwu(temp, varhandle, coordinate_type1_offset.Int32Value());
3613     // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3614     __ Bnez(temp, slow_path->GetEntryLabel());
3615 
3616     // Check that the object has the correct type.
3617     // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3618     __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
3619     codegen->MaybeUnpoisonHeapReference(temp);
3620     GenerateSubTypeObjectCheckNoReadBarrier(
3621         codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
3622   }
3623 }
3624 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,VarHandleSlowPathRISCV64 * slow_path)3625 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3626                                          CodeGeneratorRISCV64* codegen,
3627                                          VarHandleSlowPathRISCV64* slow_path) {
3628   VarHandleOptimizations optimizations(invoke);
3629   Riscv64Assembler* assembler = codegen->GetAssembler();
3630   LocationSummary* locations = invoke->GetLocations();
3631   XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3632   XRegister object = locations->InAt(1).AsRegister<XRegister>();
3633   XRegister index = locations->InAt(2).AsRegister<XRegister>();
3634   DataType::Type value_type =
3635       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3636   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3637 
3638   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3639   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3640   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3641   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3642   const MemberOffset class_offset = mirror::Object::ClassOffset();
3643   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3644 
3645   // Null-check the object.
3646   if (!optimizations.GetSkipObjectNullCheck()) {
3647     __ Beqz(object, slow_path->GetEntryLabel());
3648   }
3649 
3650   ScratchRegisterScope srs(assembler);
3651   XRegister temp = srs.AllocateXRegister();
3652   XRegister temp2 = srs.AllocateXRegister();
3653 
3654   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3655   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3656   // coordinateType0 shall not be null but we do not explicitly verify that.
3657   __ Loadwu(temp, varhandle, coordinate_type1_offset.Int32Value());
3658   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3659   __ Beqz(temp, slow_path->GetEntryLabel());
3660 
3661   // Check object class against componentType0.
3662   //
3663   // This is an exact check and we defer other cases to the runtime. This includes
3664   // conversion to array of superclass references, which is valid but subsequently
3665   // requires all update operations to check that the value can indeed be stored.
3666   // We do not want to perform such extra checks in the intrinsified code.
3667   //
3668   // We do this check without read barrier, so there can be false negatives which we
3669   // defer to the slow path. There shall be no false negatives for array classes in the
3670   // boot image (including Object[] and primitive arrays) because they are non-movable.
3671   __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
3672   __ Loadwu(temp2, object, class_offset.Int32Value());
3673   __ Bne(temp, temp2, slow_path->GetEntryLabel());
3674 
3675   // Check that the coordinateType0 is an array type. We do not need a read barrier
3676   // for loading constant reference fields (or chains of them) for comparison with null,
3677   // nor for finally loading a constant primitive field (primitive type) below.
3678   codegen->MaybeUnpoisonHeapReference(temp);
3679   __ Loadwu(temp2, temp, component_type_offset.Int32Value());
3680   codegen->MaybeUnpoisonHeapReference(temp2);
3681   __ Beqz(temp2, slow_path->GetEntryLabel());
3682 
3683   // Check that the array component type matches the primitive type.
3684   __ Loadhu(temp, temp2, primitive_type_offset.Int32Value());
3685   if (primitive_type == Primitive::kPrimNot) {
3686     static_assert(Primitive::kPrimNot == 0);
3687     __ Bnez(temp, slow_path->GetEntryLabel());
3688   } else {
3689     // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
3690     // we shall check for a byte array view in the slow path.
3691     // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
3692     // so we cannot emit that if we're JITting without boot image.
3693     bool boot_image_available =
3694         codegen->GetCompilerOptions().IsBootImage() ||
3695         !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
3696     bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
3697     Riscv64Label* slow_path_label =
3698         can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
3699     __ Li(temp2, enum_cast<int32_t>(primitive_type));
3700     __ Bne(temp, temp2, slow_path_label);
3701   }
3702 
3703   // Check for array index out of bounds.
3704   __ Loadw(temp, object, array_length_offset.Int32Value());
3705   __ Bgeu(index, temp, slow_path->GetEntryLabel());
3706 }
3707 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,VarHandleSlowPathRISCV64 * slow_path)3708 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
3709                                               CodeGeneratorRISCV64* codegen,
3710                                               VarHandleSlowPathRISCV64* slow_path) {
3711   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3712   if (expected_coordinates_count == 0u) {
3713     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
3714   } else if (expected_coordinates_count == 1u) {
3715     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
3716   } else {
3717     DCHECK_EQ(expected_coordinates_count, 2u);
3718     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
3719   }
3720 }
3721 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,DataType::Type type)3722 static VarHandleSlowPathRISCV64* GenerateVarHandleChecks(HInvoke* invoke,
3723                                                          CodeGeneratorRISCV64* codegen,
3724                                                          std::memory_order order,
3725                                                          DataType::Type type) {
3726   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3727   VarHandleOptimizations optimizations(invoke);
3728   if (optimizations.GetUseKnownImageVarHandle()) {
3729     DCHECK_NE(expected_coordinates_count, 2u);
3730     if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
3731       return nullptr;
3732     }
3733   }
3734 
3735   VarHandleSlowPathRISCV64* slow_path =
3736       new (codegen->GetScopedAllocator()) VarHandleSlowPathRISCV64(invoke, order);
3737   codegen->AddSlowPath(slow_path);
3738 
3739   if (!optimizations.GetUseKnownImageVarHandle()) {
3740     GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
3741   }
3742   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
3743 
3744   return slow_path;
3745 }
3746 
3747 struct VarHandleTarget {
3748   XRegister object;  // The object holding the value to operate on.
3749   XRegister offset;  // The offset of the value to operate on.
3750 };
3751 
GetVarHandleTarget(HInvoke * invoke)3752 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
3753   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3754   LocationSummary* locations = invoke->GetLocations();
3755 
3756   VarHandleTarget target;
3757   // The temporary allocated for loading the offset.
3758   target.offset = locations->GetTemp(0u).AsRegister<XRegister>();
3759   // The reference to the object that holds the value to operate on.
3760   target.object = (expected_coordinates_count == 0u)
3761       ? locations->GetTemp(1u).AsRegister<XRegister>()
3762       : locations->InAt(1).AsRegister<XRegister>();
3763   return target;
3764 }
3765 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorRISCV64 * codegen)3766 static void GenerateVarHandleTarget(HInvoke* invoke,
3767                                     const VarHandleTarget& target,
3768                                     CodeGeneratorRISCV64* codegen) {
3769   Riscv64Assembler* assembler = codegen->GetAssembler();
3770   LocationSummary* locations = invoke->GetLocations();
3771   XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
3772   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3773 
3774   if (expected_coordinates_count <= 1u) {
3775     if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
3776       ScopedObjectAccess soa(Thread::Current());
3777       ArtField* target_field = GetBootImageVarHandleField(invoke);
3778       if (expected_coordinates_count == 0u) {
3779         ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
3780         if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
3781           uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
3782           codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
3783         } else {
3784           codegen->LoadTypeForBootImageIntrinsic(
3785               target.object,
3786               TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
3787         }
3788       }
3789       __ Li(target.offset, target_field->GetOffset().Uint32Value());
3790     } else {
3791       // For static fields, we need to fill the `target.object` with the declaring class,
3792       // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
3793       // we do not need the declaring class, so we can forget the `ArtField*` when
3794       // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
3795       XRegister field = (expected_coordinates_count == 0) ? target.object : target.offset;
3796 
3797       const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
3798       const MemberOffset offset_offset = ArtField::OffsetOffset();
3799 
3800       // Load the ArtField*, the offset and, if needed, declaring class.
3801       __ Loadd(field, varhandle, art_field_offset.Int32Value());
3802       __ Loadwu(target.offset, field, offset_offset.Int32Value());
3803       if (expected_coordinates_count == 0u) {
3804         codegen->GenerateGcRootFieldLoad(
3805             invoke,
3806             Location::RegisterLocation(target.object),
3807             field,
3808             ArtField::DeclaringClassOffset().Int32Value(),
3809             codegen->GetCompilerReadBarrierOption());
3810       }
3811     }
3812   } else {
3813     DCHECK_EQ(expected_coordinates_count, 2u);
3814     DataType::Type value_type =
3815         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3816     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
3817 
3818     XRegister index = locations->InAt(2).AsRegister<XRegister>();
3819     __ Li(target.offset, data_offset.Int32Value());
3820     codegen->GetInstructionVisitor()->ShNAdd(target.offset, index, target.offset, value_type);
3821   }
3822 }
3823 
CreateVarHandleCommonLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen)3824 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
3825                                                        CodeGeneratorRISCV64* codegen) {
3826   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3827   DataType::Type return_type = invoke->GetType();
3828 
3829   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3830   LocationSummary* locations =
3831       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3832   locations->SetInAt(0, Location::RequiresRegister());
3833   // Require coordinates in registers. These are the object holding the value
3834   // to operate on (except for static fields) and index (for arrays and views).
3835   for (size_t i = 0; i != expected_coordinates_count; ++i) {
3836     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
3837   }
3838   if (return_type != DataType::Type::kVoid) {
3839     if (DataType::IsFloatingPointType(return_type)) {
3840       locations->SetOut(Location::RequiresFpuRegister());
3841     } else {
3842       locations->SetOut(Location::RequiresRegister());
3843     }
3844   }
3845   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3846   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3847   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3848     HInstruction* arg = invoke->InputAt(arg_index);
3849     if (IsZeroBitPattern(arg)) {
3850       locations->SetInAt(arg_index, Location::ConstantLocation(arg));
3851     } else if (DataType::IsFloatingPointType(arg->GetType())) {
3852       locations->SetInAt(arg_index, Location::RequiresFpuRegister());
3853     } else {
3854       locations->SetInAt(arg_index, Location::RequiresRegister());
3855     }
3856   }
3857 
3858   // Add a temporary for offset.
3859   if (codegen->EmitNonBakerReadBarrier() &&
3860       GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
3861     // To preserve the offset value across the non-Baker read barrier slow path
3862     // for loading the declaring class, use a fixed callee-save register.
3863     constexpr int first_callee_save = CTZ(kRiscv64CalleeSaveRefSpills);
3864     locations->AddTemp(Location::RegisterLocation(first_callee_save));
3865   } else {
3866     locations->AddTemp(Location::RequiresRegister());
3867   }
3868   if (expected_coordinates_count == 0u) {
3869     // Add a temporary to hold the declaring class.
3870     locations->AddTemp(Location::RequiresRegister());
3871   }
3872 
3873   return locations;
3874 }
3875 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen)3876 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
3877   VarHandleOptimizations optimizations(invoke);
3878   if (optimizations.GetDoNotIntrinsify()) {
3879     return;
3880   }
3881 
3882   if (codegen->EmitNonBakerReadBarrier() &&
3883       invoke->GetType() == DataType::Type::kReference &&
3884       invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
3885       invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
3886     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
3887     // the passed reference and reloads it from the field. This gets the memory visibility
3888     // wrong for Acquire/Volatile operations. b/173104084
3889     return;
3890   }
3891 
3892   CreateVarHandleCommonLocations(invoke, codegen);
3893 }
3894 
IntTypeForFloatingPointType(DataType::Type fp_type)3895 DataType::Type IntTypeForFloatingPointType(DataType::Type fp_type) {
3896   DCHECK(DataType::IsFloatingPointType(fp_type));
3897   return (fp_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64;
3898 }
3899 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,bool byte_swap=false)3900 static void GenerateVarHandleGet(HInvoke* invoke,
3901                                  CodeGeneratorRISCV64* codegen,
3902                                  std::memory_order order,
3903                                  bool byte_swap = false) {
3904   DataType::Type type = invoke->GetType();
3905   DCHECK_NE(type, DataType::Type::kVoid);
3906 
3907   LocationSummary* locations = invoke->GetLocations();
3908   Riscv64Assembler* assembler = codegen->GetAssembler();
3909   Location out = locations->Out();
3910 
3911   VarHandleTarget target = GetVarHandleTarget(invoke);
3912   VarHandleSlowPathRISCV64* slow_path = nullptr;
3913   if (!byte_swap) {
3914     slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
3915     GenerateVarHandleTarget(invoke, target, codegen);
3916     if (slow_path != nullptr) {
3917       __ Bind(slow_path->GetNativeByteOrderLabel());
3918     }
3919   }
3920 
3921   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
3922   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
3923   DCHECK(acquire_barrier || order == std::memory_order_relaxed);
3924 
3925   if (seq_cst_barrier) {
3926     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3927   }
3928 
3929   // Load the value from the target location.
3930   if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
3931     Location index = Location::RegisterLocation(target.offset);
3932     // TODO(riscv64): Revisit when we add checking if the holder is black.
3933     Location temp = Location::NoLocation();
3934     codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
3935                                                        out,
3936                                                        target.object,
3937                                                        /*offset=*/ 0,
3938                                                        index,
3939                                                        temp,
3940                                                        /*needs_null_check=*/ false);
3941     DCHECK(!byte_swap);
3942   } else {
3943     ScratchRegisterScope srs(assembler);
3944     XRegister address = srs.AllocateXRegister();
3945     __ Add(address, target.object, target.offset);
3946     Location load_loc = out;
3947     DataType::Type load_type = type;
3948     if (byte_swap && DataType::IsFloatingPointType(type)) {
3949       load_loc = Location::RegisterLocation(target.offset);  // Load to the offset temporary.
3950       load_type = IntTypeForFloatingPointType(type);
3951     }
3952     codegen->GetInstructionVisitor()->Load(load_loc, address, /*offset=*/ 0, load_type);
3953     if (type == DataType::Type::kReference) {
3954       DCHECK(!byte_swap);
3955       Location object_loc = Location::RegisterLocation(target.object);
3956       Location offset_loc = Location::RegisterLocation(target.offset);
3957       codegen->MaybeGenerateReadBarrierSlow(
3958           invoke, out, out, object_loc, /*offset=*/ 0u, /*index=*/ offset_loc);
3959     } else if (byte_swap) {
3960       GenerateReverseBytes(codegen, out, load_loc.AsRegister<XRegister>(), type);
3961     }
3962   }
3963 
3964   if (acquire_barrier) {
3965     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3966   }
3967 
3968   if (slow_path != nullptr) {
3969     DCHECK(!byte_swap);
3970     __ Bind(slow_path->GetExitLabel());
3971   }
3972 }
3973 
VisitVarHandleGet(HInvoke * invoke)3974 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGet(HInvoke* invoke) {
3975   CreateVarHandleGetLocations(invoke, codegen_);
3976 }
3977 
VisitVarHandleGet(HInvoke * invoke)3978 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGet(HInvoke* invoke) {
3979   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
3980 }
3981 
VisitVarHandleGetOpaque(HInvoke * invoke)3982 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetOpaque(HInvoke* invoke) {
3983   CreateVarHandleGetLocations(invoke, codegen_);
3984 }
3985 
VisitVarHandleGetOpaque(HInvoke * invoke)3986 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetOpaque(HInvoke* invoke) {
3987   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
3988 }
3989 
VisitVarHandleGetAcquire(HInvoke * invoke)3990 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAcquire(HInvoke* invoke) {
3991   CreateVarHandleGetLocations(invoke, codegen_);
3992 }
3993 
VisitVarHandleGetAcquire(HInvoke * invoke)3994 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAcquire(HInvoke* invoke) {
3995   GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire);
3996 }
3997 
VisitVarHandleGetVolatile(HInvoke * invoke)3998 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetVolatile(HInvoke* invoke) {
3999   CreateVarHandleGetLocations(invoke, codegen_);
4000 }
4001 
VisitVarHandleGetVolatile(HInvoke * invoke)4002 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4003   GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst);
4004 }
4005 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen)4006 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
4007   VarHandleOptimizations optimizations(invoke);
4008   if (optimizations.GetDoNotIntrinsify()) {
4009     return;
4010   }
4011 
4012   CreateVarHandleCommonLocations(invoke, codegen);
4013   if (kPoisonHeapReferences && invoke->GetLocations() != nullptr) {
4014     LocationSummary* locations = invoke->GetLocations();
4015     uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4016     DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4017     if (value_type == DataType::Type::kReference && !locations->InAt(value_index).IsConstant()) {
4018       locations->AddTemp(Location::RequiresRegister());
4019     }
4020   }
4021 }
4022 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,bool byte_swap=false)4023 static void GenerateVarHandleSet(HInvoke* invoke,
4024                                  CodeGeneratorRISCV64* codegen,
4025                                  std::memory_order order,
4026                                  bool byte_swap = false) {
4027   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4028   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4029 
4030   Riscv64Assembler* assembler = codegen->GetAssembler();
4031   Location value = invoke->GetLocations()->InAt(value_index);
4032 
4033   VarHandleTarget target = GetVarHandleTarget(invoke);
4034   VarHandleSlowPathRISCV64* slow_path = nullptr;
4035   if (!byte_swap) {
4036     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4037     GenerateVarHandleTarget(invoke, target, codegen);
4038     if (slow_path != nullptr) {
4039       __ Bind(slow_path->GetNativeByteOrderLabel());
4040     }
4041   }
4042 
4043   {
4044     ScratchRegisterScope srs(assembler);
4045     // Heap poisoning needs two scratch registers in `Store()`, except for null constants.
4046     XRegister address =
4047         (kPoisonHeapReferences && value_type == DataType::Type::kReference && !value.IsConstant())
4048             ? invoke->GetLocations()->GetTemp(0).AsRegister<XRegister>()
4049             : srs.AllocateXRegister();
4050     __ Add(address, target.object, target.offset);
4051 
4052     if (byte_swap) {
4053       DCHECK(!value.IsConstant());  // Zero uses the main path as it does not need a byte swap.
4054       // The offset is no longer needed, so reuse the offset temporary for the byte-swapped value.
4055       Location new_value = Location::RegisterLocation(target.offset);
4056       if (DataType::IsFloatingPointType(value_type)) {
4057         value_type = IntTypeForFloatingPointType(value_type);
4058         codegen->MoveLocation(new_value, value, value_type);
4059         value = new_value;
4060       }
4061       GenerateReverseBytes(codegen, new_value, value.AsRegister<XRegister>(), value_type);
4062       value = new_value;
4063     }
4064 
4065     GenerateSet(codegen, order, value, address, /*offset=*/ 0, value_type);
4066   }
4067 
4068   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4069     codegen->MaybeMarkGCCard(
4070         target.object, value.AsRegister<XRegister>(), /* emit_null_check= */ true);
4071   }
4072 
4073   if (slow_path != nullptr) {
4074     DCHECK(!byte_swap);
4075     __ Bind(slow_path->GetExitLabel());
4076   }
4077 }
4078 
VisitVarHandleSet(HInvoke * invoke)4079 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSet(HInvoke* invoke) {
4080   CreateVarHandleSetLocations(invoke, codegen_);
4081 }
4082 
VisitVarHandleSet(HInvoke * invoke)4083 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSet(HInvoke* invoke) {
4084   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
4085 }
4086 
VisitVarHandleSetOpaque(HInvoke * invoke)4087 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4088   CreateVarHandleSetLocations(invoke, codegen_);
4089 }
4090 
VisitVarHandleSetOpaque(HInvoke * invoke)4091 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4092   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
4093 }
4094 
VisitVarHandleSetRelease(HInvoke * invoke)4095 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetRelease(HInvoke* invoke) {
4096   CreateVarHandleSetLocations(invoke, codegen_);
4097 }
4098 
VisitVarHandleSetRelease(HInvoke * invoke)4099 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetRelease(HInvoke* invoke) {
4100   GenerateVarHandleSet(invoke, codegen_, std::memory_order_release);
4101 }
4102 
VisitVarHandleSetVolatile(HInvoke * invoke)4103 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4104   CreateVarHandleSetLocations(invoke, codegen_);
4105 }
4106 
VisitVarHandleSetVolatile(HInvoke * invoke)4107 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4108   GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst);
4109 }
4110 
ScratchXRegisterNeeded(Location loc,DataType::Type type,bool byte_swap)4111 static bool ScratchXRegisterNeeded(Location loc, DataType::Type type, bool byte_swap) {
4112   if (loc.IsConstant()) {
4113     DCHECK(loc.GetConstant()->IsZeroBitPattern());
4114     return false;
4115   }
4116   return DataType::IsFloatingPointType(type) || DataType::Size(type) < 4u || byte_swap;
4117 }
4118 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,bool return_success)4119 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4120                                                             CodeGeneratorRISCV64* codegen,
4121                                                             bool return_success) {
4122   VarHandleOptimizations optimizations(invoke);
4123   if (optimizations.GetDoNotIntrinsify()) {
4124     return;
4125   }
4126 
4127   uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4128   uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4129   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4130   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4131 
4132   bool is_reference = (value_type == DataType::Type::kReference);
4133   if (is_reference && codegen->EmitNonBakerReadBarrier()) {
4134     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4135     // the passed reference and reloads it from the field. This breaks the read barriers
4136     // in slow path in different ways. The marked old value may not actually be a to-space
4137     // reference to the same object as `old_value`, breaking slow path assumptions. And
4138     // for CompareAndExchange, marking the old value after comparison failure may actually
4139     // return the reference to `expected`, erroneously indicating success even though we
4140     // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
4141     return;
4142   }
4143 
4144   // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
4145   if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
4146     return;
4147   }
4148 
4149   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4150   DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4151 
4152   if (codegen->EmitNonBakerReadBarrier()) {
4153     // We need callee-save registers for both the class object and offset instead of
4154     // the temporaries reserved in CreateVarHandleCommonLocations().
4155     static_assert(POPCOUNT(kRiscv64CalleeSaveRefSpills) >= 2u);
4156     uint32_t first_callee_save = CTZ(kRiscv64CalleeSaveRefSpills);
4157     uint32_t second_callee_save = CTZ(kRiscv64CalleeSaveRefSpills ^ (1u << first_callee_save));
4158     if (expected_index == 1u) {  // For static fields.
4159       DCHECK_EQ(locations->GetTempCount(), 2u);
4160       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4161       DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4162       locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4163     } else {
4164       DCHECK_EQ(locations->GetTempCount(), 1u);
4165       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4166       locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4167     }
4168   }
4169 
4170   size_t old_temp_count = locations->GetTempCount();
4171   DCHECK_EQ(old_temp_count, (expected_index == 1u) ? 2u : 1u);
4172   Location expected = locations->InAt(expected_index);
4173   Location new_value = locations->InAt(new_value_index);
4174   size_t data_size = DataType::Size(value_type);
4175   bool is_small = (data_size < 4u);
4176   bool can_byte_swap =
4177       (expected_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
4178   bool is_fp = DataType::IsFloatingPointType(value_type);
4179   size_t temps_needed =
4180       // The offset temp is used for the `tmp_ptr`, except for the read barrier case. For read
4181       // barrier we must preserve the offset and class pointer (if any) for the slow path and
4182       // use a separate temp for `tmp_ptr` and we also need another temp for `old_value_temp`.
4183       ((is_reference && codegen->EmitReadBarrier()) ? old_temp_count + 2u : 1u) +
4184       // For small values, we need a temp for the `mask`, `masked` and maybe also for the `shift`.
4185       (is_small ? (return_success ? 2u : 3u) : 0u) +
4186       // Some cases need modified copies of `new_value` and `expected`.
4187       (ScratchXRegisterNeeded(expected, value_type, can_byte_swap) ? 1u : 0u) +
4188       (ScratchXRegisterNeeded(new_value, value_type, can_byte_swap) ? 1u : 0u) +
4189       // We need a scratch register either for the old value or for the result of SC.
4190       // If we need to return a floating point old value, we need a temp for each.
4191       ((!return_success && is_fp) ? 2u : 1u);
4192   size_t scratch_registers_available = 2u;
4193   DCHECK_EQ(scratch_registers_available,
4194             ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
4195   if (temps_needed > old_temp_count + scratch_registers_available) {
4196     locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available));
4197   }
4198 }
4199 
PrepareXRegister(CodeGeneratorRISCV64 * codegen,Location loc,DataType::Type type,XRegister shift,XRegister mask,bool byte_swap,ScratchRegisterScope * srs)4200 static XRegister PrepareXRegister(CodeGeneratorRISCV64* codegen,
4201                                   Location loc,
4202                                   DataType::Type type,
4203                                   XRegister shift,
4204                                   XRegister mask,
4205                                   bool byte_swap,
4206                                   ScratchRegisterScope* srs) {
4207   DCHECK_IMPLIES(mask != kNoXRegister, shift != kNoXRegister);
4208   DCHECK_EQ(shift == kNoXRegister, DataType::Size(type) >= 4u);
4209   if (loc.IsConstant()) {
4210     // The `shift`/`mask` and `byte_swap` are irrelevant for zero input.
4211     DCHECK(loc.GetConstant()->IsZeroBitPattern());
4212     return Zero;
4213   }
4214 
4215   Location result = loc;
4216   if (DataType::IsFloatingPointType(type)) {
4217     type = IntTypeForFloatingPointType(type);
4218     result = Location::RegisterLocation(srs->AllocateXRegister());
4219     codegen->MoveLocation(result, loc, type);
4220     loc = result;
4221   } else if (byte_swap || shift != kNoXRegister) {
4222     result = Location::RegisterLocation(srs->AllocateXRegister());
4223   }
4224   if (byte_swap) {
4225     if (type == DataType::Type::kInt16) {
4226       type = DataType::Type::kUint16;  // Do the masking as part of the byte swap.
4227     }
4228     GenerateReverseBytes(codegen, result, loc.AsRegister<XRegister>(), type);
4229     loc = result;
4230   }
4231   if (shift != kNoXRegister) {
4232     Riscv64Assembler* assembler = codegen->GetAssembler();
4233     __ Sllw(result.AsRegister<XRegister>(), loc.AsRegister<XRegister>(), shift);
4234     DCHECK_NE(type, DataType::Type::kUint8);
4235     if (mask != kNoXRegister && type != DataType::Type::kUint16 && type != DataType::Type::kBool) {
4236       __ And(result.AsRegister<XRegister>(), result.AsRegister<XRegister>(), mask);
4237     }
4238   }
4239   return result.AsRegister<XRegister>();
4240 }
4241 
GenerateByteSwapAndExtract(CodeGeneratorRISCV64 * codegen,Location rd,XRegister rs1,XRegister shift,DataType::Type type)4242 static void GenerateByteSwapAndExtract(CodeGeneratorRISCV64* codegen,
4243                                        Location rd,
4244                                        XRegister rs1,
4245                                        XRegister shift,
4246                                        DataType::Type type) {
4247   // Apply shift before `GenerateReverseBytes()` for small types.
4248   DCHECK_EQ(shift != kNoXRegister, DataType::Size(type) < 4u);
4249   if (shift != kNoXRegister) {
4250     Riscv64Assembler* assembler = codegen->GetAssembler();
4251     __ Srlw(rd.AsRegister<XRegister>(), rs1, shift);
4252     rs1 = rd.AsRegister<XRegister>();
4253   }
4254   // Also handles moving to FP registers.
4255   GenerateReverseBytes(codegen, rd, rs1, type);
4256 }
4257 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)4258 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4259                                                      CodeGeneratorRISCV64* codegen,
4260                                                      std::memory_order order,
4261                                                      bool return_success,
4262                                                      bool strong,
4263                                                      bool byte_swap = false) {
4264   DCHECK(return_success || strong);
4265 
4266   uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4267   uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4268   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4269   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4270 
4271   Riscv64Assembler* assembler = codegen->GetAssembler();
4272   LocationSummary* locations = invoke->GetLocations();
4273   Location expected = locations->InAt(expected_index);
4274   Location new_value = locations->InAt(new_value_index);
4275   Location out = locations->Out();
4276 
4277   VarHandleTarget target = GetVarHandleTarget(invoke);
4278   VarHandleSlowPathRISCV64* slow_path = nullptr;
4279   if (!byte_swap) {
4280     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4281     GenerateVarHandleTarget(invoke, target, codegen);
4282     if (slow_path != nullptr) {
4283       slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
4284       __ Bind(slow_path->GetNativeByteOrderLabel());
4285     }
4286   }
4287 
4288   // This needs to be before we allocate the scratch registers, as MarkGCCard also uses them.
4289   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
4290     // Mark card for object assuming new value is stored.
4291     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
4292     codegen->MaybeMarkGCCard(
4293         target.object, new_value.AsRegister<XRegister>(), new_value_can_be_null);
4294   }
4295 
4296   // Scratch registers may be needed for `new_value` and `expected`.
4297   ScratchRegisterScope srs(assembler);
4298   DCHECK_EQ(srs.AvailableXRegisters(), 2u);
4299   size_t available_scratch_registers =
4300       (ScratchXRegisterNeeded(expected, value_type, byte_swap) ? 0u : 1u) +
4301       (ScratchXRegisterNeeded(new_value, value_type, byte_swap) ? 0u : 1u);
4302 
4303   // Reuse the `offset` temporary for the pointer to the target location,
4304   // except for references that need the offset for the read barrier.
4305   DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
4306   size_t next_temp = 1u;
4307   XRegister tmp_ptr = target.offset;
4308   bool is_reference = (value_type == DataType::Type::kReference);
4309   if (is_reference && codegen->EmitReadBarrier()) {
4310     // Reserve scratch registers for `tmp_ptr` and `old_value_temp`.
4311     DCHECK_EQ(available_scratch_registers, 2u);
4312     available_scratch_registers = 0u;
4313     DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4314     next_temp = expected_index == 1u ? 2u : 1u;  // Preserve the class register for static field.
4315     tmp_ptr = srs.AllocateXRegister();
4316   }
4317   __ Add(tmp_ptr, target.object, target.offset);
4318 
4319   auto get_temp = [&]() {
4320     if (available_scratch_registers != 0u) {
4321       available_scratch_registers -= 1u;
4322       return srs.AllocateXRegister();
4323     } else {
4324       XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
4325       next_temp += 1u;
4326       return temp;
4327     }
4328   };
4329 
4330   XRegister shift = kNoXRegister;
4331   XRegister mask = kNoXRegister;
4332   XRegister masked = kNoXRegister;
4333   size_t data_size = DataType::Size(value_type);
4334   bool is_small = (data_size < 4u);
4335   if (is_small) {
4336     // When returning "success" and not the old value, we shall not need the `shift` after
4337     // the raw CAS operation, so use the output register as a temporary here.
4338     shift = return_success ? locations->Out().AsRegister<XRegister>() : get_temp();
4339     mask = get_temp();
4340     masked = get_temp();
4341     // Upper bits of the shift are not used, so we do not need to clear them.
4342     __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
4343     __ Andi(tmp_ptr, tmp_ptr, -4);
4344     __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
4345     __ Sllw(mask, mask, shift);
4346   }
4347 
4348   // Move floating point values to scratch registers and apply shift, mask and byte swap if needed.
4349   // Note that float/double CAS uses bitwise comparison, rather than the operator==.
4350   XRegister expected_reg =
4351       PrepareXRegister(codegen, expected, value_type, shift, mask, byte_swap, &srs);
4352   XRegister new_value_reg =
4353       PrepareXRegister(codegen, new_value, value_type, shift, mask, byte_swap, &srs);
4354   bool is_fp = DataType::IsFloatingPointType(value_type);
4355   DataType::Type cas_type = is_fp
4356       ? IntTypeForFloatingPointType(value_type)
4357       : (is_small ? DataType::Type::kInt32 : value_type);
4358 
4359   // Prepare registers for old value and the result of the store conditional.
4360   XRegister old_value;
4361   XRegister store_result;
4362   if (return_success) {
4363     // Use a temp for the old value.
4364     old_value = get_temp();
4365     // For strong CAS, use the `old_value` temp also for the SC result.
4366     // For weak CAS, put the SC result directly to `out`.
4367     store_result = strong ? old_value : out.AsRegister<XRegister>();
4368   } else if (is_fp) {
4369     // We need two temporary registers.
4370     old_value = get_temp();
4371     store_result = get_temp();
4372   } else {
4373     // Use the output register for the old value and a temp for the store conditional result.
4374     old_value = out.AsRegister<XRegister>();
4375     store_result = get_temp();
4376   }
4377 
4378   Riscv64Label exit_loop_label;
4379   Riscv64Label* exit_loop = &exit_loop_label;
4380   Riscv64Label* cmp_failure = &exit_loop_label;
4381 
4382   ReadBarrierCasSlowPathRISCV64* rb_slow_path = nullptr;
4383   if (is_reference && codegen->EmitReadBarrier()) {
4384     // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked
4385     // reloaded old value for subsequent CAS in the slow path. We make this a scratch register
4386     // as we do have marking entrypoints on riscv64 even for scratch registers.
4387     XRegister old_value_temp = srs.AllocateXRegister();
4388     // For strong CAS, use the `old_value_temp` also for the SC result as the reloaded old value
4389     // is no longer needed after the comparison. For weak CAS, store the SC result in the same
4390     // result register as the main path.
4391     // Note that for a strong CAS, a SC failure in the slow path can set the register to 1, so
4392     // we cannot use that register to indicate success without resetting it to 0 at the start of
4393     // the retry loop. Instead, we return to the success indicating instruction in the main path.
4394     XRegister slow_path_store_result = strong ? old_value_temp : store_result;
4395     rb_slow_path = new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathRISCV64(
4396         invoke,
4397         order,
4398         strong,
4399         target.object,
4400         target.offset,
4401         expected_reg,
4402         new_value_reg,
4403         old_value,
4404         old_value_temp,
4405         slow_path_store_result,
4406         /*update_old_value=*/ !return_success,
4407         codegen);
4408     codegen->AddSlowPath(rb_slow_path);
4409     exit_loop = rb_slow_path->GetExitLabel();
4410     cmp_failure = rb_slow_path->GetEntryLabel();
4411   }
4412 
4413   if (return_success) {
4414     // Pre-populate the output register with failure for the case when the old value
4415     // differs and we do not execute the store conditional.
4416     __ Li(out.AsRegister<XRegister>(), 0);
4417   }
4418   GenerateCompareAndSet(codegen->GetAssembler(),
4419                         cas_type,
4420                         order,
4421                         strong,
4422                         cmp_failure,
4423                         tmp_ptr,
4424                         new_value_reg,
4425                         old_value,
4426                         mask,
4427                         masked,
4428                         store_result,
4429                         expected_reg);
4430   if (return_success && strong) {
4431     if (rb_slow_path != nullptr) {
4432       // Slow path returns here on success.
4433       __ Bind(rb_slow_path->GetSuccessExitLabel());
4434     }
4435     // Load success value to the output register.
4436     // `GenerateCompareAndSet()` does not emit code to indicate success for a strong CAS.
4437     __ Li(out.AsRegister<XRegister>(), 1);
4438   } else if (rb_slow_path != nullptr) {
4439     DCHECK(!rb_slow_path->GetSuccessExitLabel()->IsLinked());
4440   }
4441   __ Bind(exit_loop);
4442 
4443   if (return_success) {
4444     // Nothing to do, the result register already contains 1 on success and 0 on failure.
4445   } else if (byte_swap) {
4446     DCHECK_IMPLIES(is_small, out.AsRegister<XRegister>() == old_value)
4447         << " " << value_type << " " << out.AsRegister<XRegister>() << "!=" << old_value;
4448     GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type);
4449   } else if (is_fp) {
4450     codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
4451   } else if (is_small) {
4452     __ Srlw(old_value, masked, shift);
4453     if (value_type == DataType::Type::kInt8) {
4454       __ SextB(old_value, old_value);
4455     } else if (value_type == DataType::Type::kInt16) {
4456       __ SextH(old_value, old_value);
4457     }
4458   }
4459 
4460   if (slow_path != nullptr) {
4461     DCHECK(!byte_swap);
4462     __ Bind(slow_path->GetExitLabel());
4463   }
4464 
4465   // Check that we have allocated the right number of temps. We may need more registers
4466   // for byte swapped CAS in the slow path, so skip this check for the main path in that case.
4467   bool has_byte_swap = (expected_index == 3u) && (!is_reference && data_size != 1u);
4468   if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) {
4469     // We allocate a temporary register for the class object for a static field `VarHandle` but
4470     // we do not update the `next_temp` if it's otherwise unused after the address calculation.
4471     CHECK_EQ(expected_index, 1u);
4472     CHECK_EQ(next_temp, 1u);
4473     CHECK_EQ(locations->GetTempCount(), 2u);
4474   }
4475 }
4476 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4477 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4478   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
4479 }
4480 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4481 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4482   GenerateVarHandleCompareAndSetOrExchange(
4483       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
4484 }
4485 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4486 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4487   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
4488 }
4489 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4490 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4491   GenerateVarHandleCompareAndSetOrExchange(
4492       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
4493 }
4494 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4495 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4496   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
4497 }
4498 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4499 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4500   GenerateVarHandleCompareAndSetOrExchange(
4501       invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
4502 }
4503 
VisitVarHandleCompareAndSet(HInvoke * invoke)4504 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4505   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4506 }
4507 
VisitVarHandleCompareAndSet(HInvoke * invoke)4508 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4509   GenerateVarHandleCompareAndSetOrExchange(
4510       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
4511 }
4512 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4513 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4514   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4515 }
4516 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4517 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4518   GenerateVarHandleCompareAndSetOrExchange(
4519       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
4520 }
4521 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4522 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4523   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4524 }
4525 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4526 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4527   GenerateVarHandleCompareAndSetOrExchange(
4528       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
4529 }
4530 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4531 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4532   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4533 }
4534 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4535 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4536   GenerateVarHandleCompareAndSetOrExchange(
4537       invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
4538 }
4539 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4540 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4541   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
4542 }
4543 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4544 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4545   GenerateVarHandleCompareAndSetOrExchange(
4546       invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
4547 }
4548 
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op)4549 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
4550                                                  CodeGeneratorRISCV64* codegen,
4551                                                  GetAndUpdateOp get_and_update_op) {
4552   VarHandleOptimizations optimizations(invoke);
4553   if (optimizations.GetDoNotIntrinsify()) {
4554     return;
4555   }
4556 
4557   // Get the type from the shorty as the invokes may not return a value.
4558   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
4559   DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4560   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
4561   if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
4562     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4563     // the passed reference and reloads it from the field, thus seeing the new value
4564     // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
4565     return;
4566   }
4567 
4568   // TODO(riscv64): Fix this intrinsic for heap poisoning configuration.
4569   if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
4570     return;
4571   }
4572 
4573   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4574   Location arg = locations->InAt(arg_index);
4575 
4576   bool is_fp = DataType::IsFloatingPointType(value_type);
4577   if (is_fp) {
4578     if (get_and_update_op == GetAndUpdateOp::kAdd) {
4579       // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
4580       locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4581     } else {
4582       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
4583     }
4584   }
4585 
4586   size_t data_size = DataType::Size(value_type);
4587   bool can_byte_swap =
4588       (arg_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
4589   bool can_use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (can_byte_swap || is_fp);
4590   bool is_small = (data_size < 4u);
4591   bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
4592   bool is_bitwise =
4593       (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd);
4594 
4595   size_t temps_needed =
4596       // The offset temp is used for the `tmp_ptr`.
4597       1u +
4598       // For small values, we need temps for `shift` and maybe also `mask` and `temp`.
4599       (is_small ? (is_bitwise ? 1u : 3u) : 0u) +
4600       // Some cases need modified copies of `arg`.
4601       (is_small_and || ScratchXRegisterNeeded(arg, value_type, can_byte_swap) ? 1u : 0u) +
4602       // For FP types, we need a temp for `old_value` which cannot be loaded directly to `out`.
4603       (is_fp ? 1u : 0u);
4604   if (can_use_cas) {
4605     size_t cas_temps_needed =
4606         // The offset temp is used for the `tmp_ptr`.
4607         1u +
4608         // For small values, we need a temp for `shift`.
4609         (is_small ? 1u : 0u) +
4610         // And we always need temps for `old_value`, `new_value` and `reloaded_old_value`.
4611         3u;
4612     DCHECK_GE(cas_temps_needed, temps_needed);
4613     temps_needed = cas_temps_needed;
4614   }
4615 
4616   size_t scratch_registers_available = 2u;
4617   DCHECK_EQ(scratch_registers_available,
4618             ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
4619   size_t old_temp_count = locations->GetTempCount();
4620   DCHECK_EQ(old_temp_count, (arg_index == 1u) ? 2u : 1u);
4621   if (temps_needed > old_temp_count + scratch_registers_available) {
4622     locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available));
4623   }
4624 
4625   // Request another temporary register for methods that don't return a value.
4626   // For the non-void case, we already set `out` in `CreateVarHandleCommonLocations`.
4627   DataType::Type return_type = invoke->GetType();
4628   const bool is_void = return_type == DataType::Type::kVoid;
4629   DCHECK_IMPLIES(!is_void, return_type == value_type);
4630   if (is_void) {
4631     if (DataType::IsFloatingPointType(value_type)) {
4632       locations->AddTemp(Location::RequiresFpuRegister());
4633     } else {
4634       locations->AddTemp(Location::RequiresRegister());
4635     }
4636   }
4637 }
4638 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorRISCV64 * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)4639 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
4640                                           CodeGeneratorRISCV64* codegen,
4641                                           GetAndUpdateOp get_and_update_op,
4642                                           std::memory_order order,
4643                                           bool byte_swap = false) {
4644   // Get the type from the shorty as the invokes may not return a value.
4645   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
4646   DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
4647   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
4648 
4649   Riscv64Assembler* assembler = codegen->GetAssembler();
4650   LocationSummary* locations = invoke->GetLocations();
4651   Location arg = locations->InAt(arg_index);
4652   DCHECK_IMPLIES(arg.IsConstant(), arg.GetConstant()->IsZeroBitPattern());
4653   DataType::Type return_type = invoke->GetType();
4654   const bool is_void = return_type == DataType::Type::kVoid;
4655   DCHECK_IMPLIES(!is_void, return_type == value_type);
4656   // We use a temporary for void methods, as we don't return the value.
4657   Location out_or_temp =
4658       is_void ? locations->GetTemp(locations->GetTempCount() - 1u) : locations->Out();
4659 
4660   VarHandleTarget target = GetVarHandleTarget(invoke);
4661   VarHandleSlowPathRISCV64* slow_path = nullptr;
4662   if (!byte_swap) {
4663     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4664     GenerateVarHandleTarget(invoke, target, codegen);
4665     if (slow_path != nullptr) {
4666       slow_path->SetGetAndUpdateOp(get_and_update_op);
4667       __ Bind(slow_path->GetNativeByteOrderLabel());
4668     }
4669   }
4670 
4671   // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
4672   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
4673     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
4674     // Mark card for object, the new value shall be stored.
4675     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
4676     codegen->MaybeMarkGCCard(target.object, arg.AsRegister<XRegister>(), new_value_can_be_null);
4677   }
4678 
4679   size_t data_size = DataType::Size(value_type);
4680   bool is_fp = DataType::IsFloatingPointType(value_type);
4681   bool use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (byte_swap || is_fp);
4682   bool is_small = (data_size < 4u);
4683   bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
4684   bool is_reference = (value_type == DataType::Type::kReference);
4685   DataType::Type op_type = is_fp
4686       ? IntTypeForFloatingPointType(value_type)
4687       : (is_small || is_reference ? DataType::Type::kInt32 : value_type);
4688 
4689   ScratchRegisterScope srs(assembler);
4690   DCHECK_EQ(srs.AvailableXRegisters(), 2u);
4691   size_t available_scratch_registers = use_cas
4692       // We use scratch registers differently for the CAS path.
4693       ? 0u
4694       // Reserve one scratch register for `PrepareXRegister()` or similar `arg_reg` allocation.
4695       : (is_small_and || ScratchXRegisterNeeded(arg, value_type, byte_swap) ? 1u : 2u);
4696 
4697   // Reuse the `target.offset` temporary for the pointer to the target location,
4698   // except for references that need the offset for the non-Baker read barrier.
4699   DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
4700   size_t next_temp = 1u;
4701   XRegister tmp_ptr = target.offset;
4702   if (is_reference && codegen->EmitNonBakerReadBarrier()) {
4703     DCHECK_EQ(available_scratch_registers, 2u);
4704     available_scratch_registers -= 1u;
4705     tmp_ptr = srs.AllocateXRegister();
4706   }
4707   __ Add(tmp_ptr, target.object, target.offset);
4708 
4709   auto get_temp = [&]() {
4710     if (available_scratch_registers != 0u) {
4711       available_scratch_registers -= 1u;
4712       return srs.AllocateXRegister();
4713     } else {
4714       DCHECK_IMPLIES(is_void, next_temp != locations->GetTempCount() - 1u)
4715           << "The last temp is special for the void case, as it represents the out register.";
4716       XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
4717       next_temp += 1u;
4718       return temp;
4719     }
4720   };
4721 
4722   XRegister shift = kNoXRegister;
4723   XRegister mask = kNoXRegister;
4724   XRegister prepare_mask = kNoXRegister;
4725   XRegister temp = kNoXRegister;
4726   XRegister arg_reg = kNoXRegister;
4727   if (is_small) {
4728     shift = get_temp();
4729     // Upper bits of the shift are not used, so we do not need to clear them.
4730     __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
4731     __ Andi(tmp_ptr, tmp_ptr, -4);
4732     switch (get_and_update_op) {
4733       case GetAndUpdateOp::kAdd:
4734         if (byte_swap) {
4735           // The mask is not needed in the CAS path.
4736           DCHECK(use_cas);
4737           break;
4738         }
4739         FALLTHROUGH_INTENDED;
4740       case GetAndUpdateOp::kSet:
4741         mask = get_temp();
4742         temp = get_temp();
4743         __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
4744         __ Sllw(mask, mask, shift);
4745         // The argument does not need to be masked for `GetAndUpdateOp::kAdd`,
4746         // the mask shall be applied after the ADD instruction.
4747         prepare_mask = (get_and_update_op == GetAndUpdateOp::kSet) ? mask : kNoXRegister;
4748         break;
4749       case GetAndUpdateOp::kAnd:
4750         // We need to set all other bits, so we always need a temp.
4751         arg_reg = srs.AllocateXRegister();
4752         if (data_size == 1u) {
4753           __ Ori(arg_reg, InputXRegisterOrZero(arg), ~0xff);
4754           DCHECK(!byte_swap);
4755         } else {
4756           DCHECK_EQ(data_size, 2u);
4757           __ Li(arg_reg, ~0xffff);
4758           __ Or(arg_reg, InputXRegisterOrZero(arg), arg_reg);
4759           if (byte_swap) {
4760             __ Rev8(arg_reg, arg_reg);
4761             __ Rori(arg_reg, arg_reg, 48);
4762           }
4763         }
4764         __ Rolw(arg_reg, arg_reg, shift);
4765         break;
4766       case GetAndUpdateOp::kOr:
4767       case GetAndUpdateOp::kXor:
4768         // Signed values need to be truncated but we're keeping `prepare_mask == kNoXRegister`.
4769         if (value_type == DataType::Type::kInt8 && !arg.IsConstant()) {
4770           DCHECK(!byte_swap);
4771           arg_reg = srs.AllocateXRegister();
4772           __ ZextB(arg_reg, arg.AsRegister<XRegister>());
4773           __ Sllw(arg_reg, arg_reg, shift);
4774         } else if (value_type == DataType::Type::kInt16 && !arg.IsConstant() && !byte_swap) {
4775           arg_reg = srs.AllocateXRegister();
4776           __ ZextH(arg_reg, arg.AsRegister<XRegister>());
4777           __ Sllw(arg_reg, arg_reg, shift);
4778         }  // else handled by `PrepareXRegister()` below.
4779         break;
4780     }
4781   }
4782   if (arg_reg == kNoXRegister && !use_cas) {
4783     arg_reg = PrepareXRegister(codegen, arg, value_type, shift, prepare_mask, byte_swap, &srs);
4784   }
4785   if (mask != kNoXRegister && get_and_update_op == GetAndUpdateOp::kSet) {
4786     __ Not(mask, mask);  // We need to flip the mask for `kSet`, see `GenerateGetAndUpdate()`.
4787   }
4788 
4789   if (use_cas) {
4790     // Allocate scratch registers for temps that can theoretically be clobbered on retry.
4791     // (Even though the `retry` label shall never be far enough for `TMP` to be clobbered.)
4792     DCHECK_EQ(available_scratch_registers, 0u);  // Reserved for the two uses below.
4793     XRegister old_value = srs.AllocateXRegister();
4794     XRegister new_value = srs.AllocateXRegister();
4795     // Allocate other needed temporaries.
4796     XRegister reloaded_old_value = get_temp();
4797     XRegister store_result = reloaded_old_value;  // Clobber reloaded old value by store result.
4798     FRegister ftmp = is_fp ? srs.AllocateFRegister() : kNoFRegister;
4799 
4800     Riscv64Label retry;
4801     __ Bind(&retry);
4802     codegen->GetInstructionVisitor()->Load(
4803         Location::RegisterLocation(old_value), tmp_ptr, /*offset=*/ 0, op_type);
4804     if (byte_swap) {
4805       GenerateByteSwapAndExtract(codegen, out_or_temp, old_value, shift, value_type);
4806     } else {
4807       DCHECK(is_fp);
4808       codegen->MoveLocation(out_or_temp, Location::RegisterLocation(old_value), value_type);
4809     }
4810     if (is_fp) {
4811       codegen->GetInstructionVisitor()->FAdd(
4812           ftmp, out_or_temp.AsFpuRegister<FRegister>(), arg.AsFpuRegister<FRegister>(), value_type);
4813       codegen->MoveLocation(
4814           Location::RegisterLocation(new_value), Location::FpuRegisterLocation(ftmp), op_type);
4815     } else if (arg.IsConstant()) {
4816       DCHECK(arg.GetConstant()->IsZeroBitPattern());
4817       __ Mv(new_value, out_or_temp.AsRegister<XRegister>());
4818     } else if (value_type == DataType::Type::kInt64) {
4819       __ Add(new_value, out_or_temp.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
4820     } else {
4821       DCHECK_EQ(op_type, DataType::Type::kInt32);
4822       __ Addw(new_value, out_or_temp.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
4823     }
4824     if (byte_swap) {
4825       DataType::Type swap_type = op_type;
4826       if (is_small) {
4827         DCHECK_EQ(data_size, 2u);
4828         // We want to update only 16 bits of the 32-bit location. The 16 bits we want to replace
4829         // are present in both `old_value` and `out` but in different bits and byte order.
4830         // To update the 16 bits, we can XOR the new value with the `out`, byte swap as Uint16
4831         // (extracting only the bits we want to update), shift and XOR with the old value.
4832         swap_type = DataType::Type::kUint16;
4833         __ Xor(new_value, new_value, out_or_temp.AsRegister<XRegister>());
4834       }
4835       GenerateReverseBytes(codegen, Location::RegisterLocation(new_value), new_value, swap_type);
4836       if (is_small) {
4837         __ Sllw(new_value, new_value, shift);
4838         __ Xor(new_value, new_value, old_value);
4839       }
4840     }
4841     GenerateCompareAndSet(assembler,
4842                           op_type,
4843                           order,
4844                           /*strong=*/ true,
4845                           /*cmp_failure=*/ &retry,
4846                           tmp_ptr,
4847                           new_value,
4848                           /*old_value=*/ reloaded_old_value,
4849                           /*mask=*/ kNoXRegister,
4850                           /*masked=*/ kNoXRegister,
4851                           store_result,
4852                           /*expected=*/ old_value);
4853   } else {
4854     XRegister old_value = is_fp ? get_temp() : out_or_temp.AsRegister<XRegister>();
4855     GenerateGetAndUpdate(
4856         codegen, get_and_update_op, op_type, order, tmp_ptr, arg_reg, old_value, mask, temp);
4857     if (byte_swap) {
4858       DCHECK_IMPLIES(is_small, out_or_temp.AsRegister<XRegister>() == old_value)
4859           << " " << value_type << " " << out_or_temp.AsRegister<XRegister>() << "!=" << old_value;
4860       GenerateByteSwapAndExtract(codegen, out_or_temp, old_value, shift, value_type);
4861     } else if (is_fp) {
4862       codegen->MoveLocation(out_or_temp, Location::RegisterLocation(old_value), value_type);
4863     } else if (is_small) {
4864       __ Srlw(old_value, old_value, shift);
4865       DCHECK_NE(value_type, DataType::Type::kUint8);
4866       if (value_type == DataType::Type::kInt8) {
4867         __ SextB(old_value, old_value);
4868       } else if (value_type == DataType::Type::kBool) {
4869         __ ZextB(old_value, old_value);
4870       } else if (value_type == DataType::Type::kInt16) {
4871         __ SextH(old_value, old_value);
4872       } else {
4873         DCHECK_EQ(value_type, DataType::Type::kUint16);
4874         __ ZextH(old_value, old_value);
4875       }
4876     } else if (is_reference) {
4877       __ ZextW(old_value, old_value);
4878       if (codegen->EmitBakerReadBarrier()) {
4879         // Use RA as temp. It is clobbered in the slow path anyway.
4880         static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
4881         SlowPathCodeRISCV64* rb_slow_path = codegen->AddGcRootBakerBarrierBarrierSlowPath(
4882             invoke, out_or_temp, kBakerReadBarrierTemp);
4883         codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out_or_temp, kBakerReadBarrierTemp);
4884       } else if (codegen->EmitNonBakerReadBarrier()) {
4885         Location base_loc = Location::RegisterLocation(target.object);
4886         Location index = Location::RegisterLocation(target.offset);
4887         SlowPathCodeRISCV64* rb_slow_path = codegen->AddReadBarrierSlowPath(
4888             invoke, out_or_temp, out_or_temp, base_loc, /*offset=*/ 0u, index);
4889         __ J(rb_slow_path->GetEntryLabel());
4890         __ Bind(rb_slow_path->GetExitLabel());
4891       }
4892     }
4893   }
4894 
4895   if (slow_path != nullptr) {
4896     DCHECK(!byte_swap);
4897     __ Bind(slow_path->GetExitLabel());
4898   }
4899 
4900   // Check that we have allocated the right number of temps. We may need more registers
4901   // for byte swapped CAS in the slow path, so skip this check for the main path in that case.
4902   // In the void case, we requested an extra register to mimic the `out` register.
4903   const size_t extra_temp_registers = is_void ? 1u : 0u;
4904   bool has_byte_swap = (arg_index == 3u) && (!is_reference && data_size != 1u);
4905   if ((!has_byte_swap || byte_swap) &&
4906       next_temp != locations->GetTempCount() - extra_temp_registers) {
4907     // We allocate a temporary register for the class object for a static field `VarHandle` but
4908     // we do not update the `next_temp` if it's otherwise unused after the address calculation.
4909     CHECK_EQ(arg_index, 1u);
4910     CHECK_EQ(next_temp, 1u);
4911     CHECK_EQ(locations->GetTempCount(), 2u + extra_temp_registers);
4912   }
4913 }
4914 
VisitVarHandleGetAndSet(HInvoke * invoke)4915 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4916   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
4917 }
4918 
VisitVarHandleGetAndSet(HInvoke * invoke)4919 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4920   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
4921 }
4922 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4923 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4924   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
4925 }
4926 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4927 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4928   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
4929 }
4930 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4931 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4932   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
4933 }
4934 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4935 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4936   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
4937 }
4938 
VisitVarHandleGetAndAdd(HInvoke * invoke)4939 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4940   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
4941 }
4942 
VisitVarHandleGetAndAdd(HInvoke * invoke)4943 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4944   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
4945 }
4946 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4947 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4948   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
4949 }
4950 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4951 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4952   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
4953 }
4954 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4955 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4956   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
4957 }
4958 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4959 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4960   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
4961 }
4962 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4963 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4964   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
4965 }
4966 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4967 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4968   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
4969 }
4970 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4971 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4972   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
4973 }
4974 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4975 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4976   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
4977 }
4978 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4979 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4980   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
4981 }
4982 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4983 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4984   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
4985 }
4986 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4987 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4988   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
4989 }
4990 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4991 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4992   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
4993 }
4994 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4995 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4996   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
4997 }
4998 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4999 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5000   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5001 }
5002 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5003 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5004   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5005 }
5006 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5007 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5008   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5009 }
5010 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5011 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5012   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5013 }
5014 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5015 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5016   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5017 }
5018 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5019 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5020   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5021 }
5022 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5023 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5024   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5025 }
5026 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5027 void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5028   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5029 }
5030 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5031 void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5032   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5033 }
5034 
EmitByteArrayViewCode(CodeGenerator * codegen_in)5035 void VarHandleSlowPathRISCV64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5036   DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5037   CodeGeneratorRISCV64* codegen = down_cast<CodeGeneratorRISCV64*>(codegen_in);
5038   Riscv64Assembler* assembler = codegen->GetAssembler();
5039   HInvoke* invoke = GetInvoke();
5040   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5041   DataType::Type value_type =
5042       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5043   DCHECK_NE(value_type, DataType::Type::kReference);
5044   size_t size = DataType::Size(value_type);
5045   DCHECK_GT(size, 1u);
5046   LocationSummary* locations = invoke->GetLocations();
5047   XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
5048   XRegister object = locations->InAt(1).AsRegister<XRegister>();
5049   XRegister index = locations->InAt(2).AsRegister<XRegister>();
5050 
5051   MemberOffset class_offset = mirror::Object::ClassOffset();
5052   MemberOffset array_length_offset = mirror::Array::LengthOffset();
5053   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5054   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5055 
5056   __ Bind(GetByteArrayViewCheckLabel());
5057 
5058   VarHandleTarget target = GetVarHandleTarget(invoke);
5059   {
5060     ScratchRegisterScope srs(assembler);
5061     XRegister temp = srs.AllocateXRegister();
5062     XRegister temp2 = srs.AllocateXRegister();
5063 
5064     // The main path checked that the coordinateType0 is an array class that matches
5065     // the class of the actual coordinate argument but it does not match the value type.
5066     // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5067     __ Loadwu(temp, varhandle, class_offset.Int32Value());
5068     codegen->MaybeUnpoisonHeapReference(temp);
5069     codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5070     __ Bne(temp, temp2, GetEntryLabel());
5071 
5072     // Check for array index out of bounds.
5073     __ Loadw(temp, object, array_length_offset.Int32Value());
5074     __ Bgeu(index, temp, GetEntryLabel());
5075     __ Addi(temp2, index, size - 1u);
5076     __ Bgeu(temp2, temp, GetEntryLabel());
5077 
5078     // Construct the target.
5079     __ Addi(target.offset, index, data_offset.Int32Value());
5080 
5081     // Alignment check. For unaligned access, go to the runtime.
5082     DCHECK(IsPowerOfTwo(size));
5083     __ Andi(temp, target.offset, size - 1u);
5084     __ Bnez(temp, GetEntryLabel());
5085 
5086     // Byte order check. For native byte order return to the main path.
5087     if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5088         IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5089       // There is no reason to differentiate between native byte order and byte-swap
5090       // for setting a zero bit pattern. Just return to the main path.
5091       __ J(GetNativeByteOrderLabel());
5092       return;
5093     }
5094     __ Loadbu(temp, varhandle, native_byte_order_offset.Int32Value());
5095     __ Bnez(temp, GetNativeByteOrderLabel());
5096   }
5097 
5098   switch (access_mode_template) {
5099     case mirror::VarHandle::AccessModeTemplate::kGet:
5100       GenerateVarHandleGet(invoke, codegen, order_, /*byte_swap=*/ true);
5101       break;
5102     case mirror::VarHandle::AccessModeTemplate::kSet:
5103       GenerateVarHandleSet(invoke, codegen, order_, /*byte_swap=*/ true);
5104       break;
5105     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5106     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5107       GenerateVarHandleCompareAndSetOrExchange(
5108           invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5109       break;
5110     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5111       GenerateVarHandleGetAndUpdate(
5112           invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5113       break;
5114   }
5115   __ J(GetExitLabel());
5116 }
5117 
VisitThreadCurrentThread(HInvoke * invoke)5118 void IntrinsicLocationsBuilderRISCV64::VisitThreadCurrentThread(HInvoke* invoke) {
5119   LocationSummary* locations =
5120       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5121   locations->SetOut(Location::RequiresRegister());
5122 }
5123 
VisitThreadCurrentThread(HInvoke * invoke)5124 void IntrinsicCodeGeneratorRISCV64::VisitThreadCurrentThread(HInvoke* invoke) {
5125   Riscv64Assembler* assembler = GetAssembler();
5126   XRegister out = invoke->GetLocations()->Out().AsRegister<XRegister>();
5127   __ Loadwu(out, TR, Thread::PeerOffset<kRiscv64PointerSize>().Int32Value());
5128 }
5129 
VisitThreadInterrupted(HInvoke * invoke)5130 void IntrinsicLocationsBuilderRISCV64::VisitThreadInterrupted(HInvoke* invoke) {
5131   LocationSummary* locations =
5132       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5133   locations->SetOut(Location::RequiresRegister());
5134 }
5135 
VisitThreadInterrupted(HInvoke * invoke)5136 void IntrinsicCodeGeneratorRISCV64::VisitThreadInterrupted(HInvoke* invoke) {
5137   LocationSummary* locations = invoke->GetLocations();
5138   Riscv64Assembler* assembler = GetAssembler();
5139   XRegister out = locations->Out().AsRegister<XRegister>();
5140   Riscv64Label done;
5141 
5142   codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5143   __ Loadw(out, TR, Thread::InterruptedOffset<kRiscv64PointerSize>().Int32Value());
5144   __ Beqz(out, &done);
5145   __ Storew(Zero, TR, Thread::InterruptedOffset<kRiscv64PointerSize>().Int32Value());
5146   codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5147   __ Bind(&done);
5148 }
5149 
VisitReachabilityFence(HInvoke * invoke)5150 void IntrinsicLocationsBuilderRISCV64::VisitReachabilityFence(HInvoke* invoke) {
5151   LocationSummary* locations =
5152       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5153   locations->SetInAt(0, Location::Any());
5154 }
5155 
VisitReachabilityFence(HInvoke * invoke)5156 void IntrinsicCodeGeneratorRISCV64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
5157 
VisitMathFmaDouble(HInvoke * invoke)5158 void IntrinsicLocationsBuilderRISCV64::VisitMathFmaDouble(HInvoke* invoke) {
5159   CreateFpFpFpToFpNoOverlapLocations(allocator_, invoke);
5160 }
5161 
VisitMathFmaDouble(HInvoke * invoke)5162 void IntrinsicCodeGeneratorRISCV64::VisitMathFmaDouble(HInvoke* invoke) {
5163   LocationSummary* locations = invoke->GetLocations();
5164   Riscv64Assembler* assembler = GetAssembler();
5165   FRegister n = locations->InAt(0).AsFpuRegister<FRegister>();
5166   FRegister m = locations->InAt(1).AsFpuRegister<FRegister>();
5167   FRegister a = locations->InAt(2).AsFpuRegister<FRegister>();
5168   FRegister out = locations->Out().AsFpuRegister<FRegister>();
5169 
5170   __ FMAddD(out, n, m, a);
5171 }
5172 
VisitMathFmaFloat(HInvoke * invoke)5173 void IntrinsicLocationsBuilderRISCV64::VisitMathFmaFloat(HInvoke* invoke) {
5174   CreateFpFpFpToFpNoOverlapLocations(allocator_, invoke);
5175 }
5176 
VisitMathFmaFloat(HInvoke * invoke)5177 void IntrinsicCodeGeneratorRISCV64::VisitMathFmaFloat(HInvoke* invoke) {
5178   LocationSummary* locations = invoke->GetLocations();
5179   Riscv64Assembler* assembler = GetAssembler();
5180   FRegister n = locations->InAt(0).AsFpuRegister<FRegister>();
5181   FRegister m = locations->InAt(1).AsFpuRegister<FRegister>();
5182   FRegister a = locations->InAt(2).AsFpuRegister<FRegister>();
5183   FRegister out = locations->Out().AsFpuRegister<FRegister>();
5184 
5185   __ FMAddS(out, n, m, a);
5186 }
5187 
5188 
VisitMathCos(HInvoke * invoke)5189 void IntrinsicLocationsBuilderRISCV64::VisitMathCos(HInvoke* invoke) {
5190   CreateFPToFPCallLocations(allocator_, invoke);
5191 }
5192 
VisitMathCos(HInvoke * invoke)5193 void IntrinsicCodeGeneratorRISCV64::VisitMathCos(HInvoke* invoke) {
5194   codegen_->InvokeRuntime(kQuickCos, invoke, invoke->GetDexPc());
5195 }
5196 
VisitMathSin(HInvoke * invoke)5197 void IntrinsicLocationsBuilderRISCV64::VisitMathSin(HInvoke* invoke) {
5198   CreateFPToFPCallLocations(allocator_, invoke);
5199 }
5200 
VisitMathSin(HInvoke * invoke)5201 void IntrinsicCodeGeneratorRISCV64::VisitMathSin(HInvoke* invoke) {
5202   codegen_->InvokeRuntime(kQuickSin, invoke, invoke->GetDexPc());
5203 }
5204 
VisitMathAcos(HInvoke * invoke)5205 void IntrinsicLocationsBuilderRISCV64::VisitMathAcos(HInvoke* invoke) {
5206   CreateFPToFPCallLocations(allocator_, invoke);
5207 }
5208 
VisitMathAcos(HInvoke * invoke)5209 void IntrinsicCodeGeneratorRISCV64::VisitMathAcos(HInvoke* invoke) {
5210   codegen_->InvokeRuntime(kQuickAcos, invoke, invoke->GetDexPc());
5211 }
5212 
VisitMathAsin(HInvoke * invoke)5213 void IntrinsicLocationsBuilderRISCV64::VisitMathAsin(HInvoke* invoke) {
5214   CreateFPToFPCallLocations(allocator_, invoke);
5215 }
5216 
VisitMathAsin(HInvoke * invoke)5217 void IntrinsicCodeGeneratorRISCV64::VisitMathAsin(HInvoke* invoke) {
5218   codegen_->InvokeRuntime(kQuickAsin, invoke, invoke->GetDexPc());
5219 }
5220 
VisitMathAtan(HInvoke * invoke)5221 void IntrinsicLocationsBuilderRISCV64::VisitMathAtan(HInvoke* invoke) {
5222   CreateFPToFPCallLocations(allocator_, invoke);
5223 }
5224 
VisitMathAtan(HInvoke * invoke)5225 void IntrinsicCodeGeneratorRISCV64::VisitMathAtan(HInvoke* invoke) {
5226   codegen_->InvokeRuntime(kQuickAtan, invoke, invoke->GetDexPc());
5227 }
5228 
VisitMathAtan2(HInvoke * invoke)5229 void IntrinsicLocationsBuilderRISCV64::VisitMathAtan2(HInvoke* invoke) {
5230   CreateFPFPToFPCallLocations(allocator_, invoke);
5231 }
5232 
VisitMathAtan2(HInvoke * invoke)5233 void IntrinsicCodeGeneratorRISCV64::VisitMathAtan2(HInvoke* invoke) {
5234   codegen_->InvokeRuntime(kQuickAtan2, invoke, invoke->GetDexPc());
5235 }
5236 
VisitMathPow(HInvoke * invoke)5237 void IntrinsicLocationsBuilderRISCV64::VisitMathPow(HInvoke* invoke) {
5238   CreateFPFPToFPCallLocations(allocator_, invoke);
5239 }
5240 
VisitMathPow(HInvoke * invoke)5241 void IntrinsicCodeGeneratorRISCV64::VisitMathPow(HInvoke* invoke) {
5242   codegen_->InvokeRuntime(kQuickPow, invoke, invoke->GetDexPc());
5243 }
5244 
VisitMathCbrt(HInvoke * invoke)5245 void IntrinsicLocationsBuilderRISCV64::VisitMathCbrt(HInvoke* invoke) {
5246   CreateFPToFPCallLocations(allocator_, invoke);
5247 }
5248 
VisitMathCbrt(HInvoke * invoke)5249 void IntrinsicCodeGeneratorRISCV64::VisitMathCbrt(HInvoke* invoke) {
5250   codegen_->InvokeRuntime(kQuickCbrt, invoke, invoke->GetDexPc());
5251 }
5252 
VisitMathCosh(HInvoke * invoke)5253 void IntrinsicLocationsBuilderRISCV64::VisitMathCosh(HInvoke* invoke) {
5254   CreateFPToFPCallLocations(allocator_, invoke);
5255 }
5256 
VisitMathCosh(HInvoke * invoke)5257 void IntrinsicCodeGeneratorRISCV64::VisitMathCosh(HInvoke* invoke) {
5258   codegen_->InvokeRuntime(kQuickCosh, invoke, invoke->GetDexPc());
5259 }
5260 
VisitMathExp(HInvoke * invoke)5261 void IntrinsicLocationsBuilderRISCV64::VisitMathExp(HInvoke* invoke) {
5262   CreateFPToFPCallLocations(allocator_, invoke);
5263 }
5264 
VisitMathExp(HInvoke * invoke)5265 void IntrinsicCodeGeneratorRISCV64::VisitMathExp(HInvoke* invoke) {
5266   codegen_->InvokeRuntime(kQuickExp, invoke, invoke->GetDexPc());
5267 }
5268 
VisitMathExpm1(HInvoke * invoke)5269 void IntrinsicLocationsBuilderRISCV64::VisitMathExpm1(HInvoke* invoke) {
5270   CreateFPToFPCallLocations(allocator_, invoke);
5271 }
5272 
VisitMathExpm1(HInvoke * invoke)5273 void IntrinsicCodeGeneratorRISCV64::VisitMathExpm1(HInvoke* invoke) {
5274   codegen_->InvokeRuntime(kQuickExpm1, invoke, invoke->GetDexPc());
5275 }
5276 
VisitMathHypot(HInvoke * invoke)5277 void IntrinsicLocationsBuilderRISCV64::VisitMathHypot(HInvoke* invoke) {
5278   CreateFPFPToFPCallLocations(allocator_, invoke);
5279 }
5280 
VisitMathHypot(HInvoke * invoke)5281 void IntrinsicCodeGeneratorRISCV64::VisitMathHypot(HInvoke* invoke) {
5282   codegen_->InvokeRuntime(kQuickHypot, invoke, invoke->GetDexPc());
5283 }
5284 
VisitMathLog(HInvoke * invoke)5285 void IntrinsicLocationsBuilderRISCV64::VisitMathLog(HInvoke* invoke) {
5286   CreateFPToFPCallLocations(allocator_, invoke);
5287 }
5288 
VisitMathLog(HInvoke * invoke)5289 void IntrinsicCodeGeneratorRISCV64::VisitMathLog(HInvoke* invoke) {
5290   codegen_->InvokeRuntime(kQuickLog, invoke, invoke->GetDexPc());
5291 }
5292 
VisitMathLog10(HInvoke * invoke)5293 void IntrinsicLocationsBuilderRISCV64::VisitMathLog10(HInvoke* invoke) {
5294   CreateFPToFPCallLocations(allocator_, invoke);
5295 }
5296 
VisitMathLog10(HInvoke * invoke)5297 void IntrinsicCodeGeneratorRISCV64::VisitMathLog10(HInvoke* invoke) {
5298   codegen_->InvokeRuntime(kQuickLog10, invoke, invoke->GetDexPc());
5299 }
5300 
VisitMathNextAfter(HInvoke * invoke)5301 void IntrinsicLocationsBuilderRISCV64::VisitMathNextAfter(HInvoke* invoke) {
5302   CreateFPFPToFPCallLocations(allocator_, invoke);
5303 }
5304 
VisitMathNextAfter(HInvoke * invoke)5305 void IntrinsicCodeGeneratorRISCV64::VisitMathNextAfter(HInvoke* invoke) {
5306   codegen_->InvokeRuntime(kQuickNextAfter, invoke, invoke->GetDexPc());
5307 }
5308 
VisitMathSinh(HInvoke * invoke)5309 void IntrinsicLocationsBuilderRISCV64::VisitMathSinh(HInvoke* invoke) {
5310   CreateFPToFPCallLocations(allocator_, invoke);
5311 }
5312 
VisitMathSinh(HInvoke * invoke)5313 void IntrinsicCodeGeneratorRISCV64::VisitMathSinh(HInvoke* invoke) {
5314   codegen_->InvokeRuntime(kQuickSinh, invoke, invoke->GetDexPc());
5315 }
5316 
VisitMathTan(HInvoke * invoke)5317 void IntrinsicLocationsBuilderRISCV64::VisitMathTan(HInvoke* invoke) {
5318   CreateFPToFPCallLocations(allocator_, invoke);
5319 }
5320 
VisitMathTan(HInvoke * invoke)5321 void IntrinsicCodeGeneratorRISCV64::VisitMathTan(HInvoke* invoke) {
5322   codegen_->InvokeRuntime(kQuickTan, invoke, invoke->GetDexPc());
5323 }
5324 
VisitMathTanh(HInvoke * invoke)5325 void IntrinsicLocationsBuilderRISCV64::VisitMathTanh(HInvoke* invoke) {
5326   CreateFPToFPCallLocations(allocator_, invoke);
5327 }
5328 
VisitMathTanh(HInvoke * invoke)5329 void IntrinsicCodeGeneratorRISCV64::VisitMathTanh(HInvoke* invoke) {
5330   codegen_->InvokeRuntime(kQuickTanh, invoke, invoke->GetDexPc());
5331 }
5332 
VisitMathSqrt(HInvoke * invoke)5333 void IntrinsicLocationsBuilderRISCV64::VisitMathSqrt(HInvoke* invoke) {
5334   CreateFPToFPLocations(allocator_, invoke, Location::kNoOutputOverlap);
5335 }
5336 
VisitMathSqrt(HInvoke * invoke)5337 void IntrinsicCodeGeneratorRISCV64::VisitMathSqrt(HInvoke* invoke) {
5338   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
5339   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
5340 
5341   LocationSummary* locations = invoke->GetLocations();
5342   Riscv64Assembler* assembler = GetAssembler();
5343   FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5344   FRegister out = locations->Out().AsFpuRegister<FRegister>();
5345 
5346   __ FSqrtD(out, in);
5347 }
5348 
GenDoubleRound(Riscv64Assembler * assembler,HInvoke * invoke,FPRoundingMode mode)5349 static void GenDoubleRound(Riscv64Assembler* assembler, HInvoke* invoke, FPRoundingMode mode) {
5350   LocationSummary* locations = invoke->GetLocations();
5351   FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5352   FRegister out = locations->Out().AsFpuRegister<FRegister>();
5353   ScratchRegisterScope srs(assembler);
5354   XRegister tmp = srs.AllocateXRegister();
5355   FRegister ftmp = srs.AllocateFRegister();
5356   Riscv64Label done;
5357 
5358   // Load 2^52
5359   __ LoadConst64(tmp, 0x4330000000000000L);
5360   __ FMvDX(ftmp, tmp);
5361   __ FAbsD(out, in);
5362   __ FLtD(tmp, out, ftmp);
5363 
5364   // Set output as the input if input greater than the max
5365   __ FMvD(out, in);
5366   __ Beqz(tmp, &done);
5367 
5368   // Convert with rounding mode
5369   __ FCvtLD(tmp, in, mode);
5370   __ FCvtDL(ftmp, tmp, mode);
5371 
5372   // Set the signed bit
5373   __ FSgnjD(out, ftmp, in);
5374   __ Bind(&done);
5375 }
5376 
VisitMathFloor(HInvoke * invoke)5377 void IntrinsicLocationsBuilderRISCV64::VisitMathFloor(HInvoke* invoke) {
5378   CreateFPToFPLocations(allocator_, invoke);
5379 }
5380 
VisitMathFloor(HInvoke * invoke)5381 void IntrinsicCodeGeneratorRISCV64::VisitMathFloor(HInvoke* invoke) {
5382   GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRDN);
5383 }
5384 
VisitMathCeil(HInvoke * invoke)5385 void IntrinsicLocationsBuilderRISCV64::VisitMathCeil(HInvoke* invoke) {
5386   CreateFPToFPLocations(allocator_, invoke);
5387 }
5388 
VisitMathCeil(HInvoke * invoke)5389 void IntrinsicCodeGeneratorRISCV64::VisitMathCeil(HInvoke* invoke) {
5390   GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRUP);
5391 }
5392 
VisitMathRint(HInvoke * invoke)5393 void IntrinsicLocationsBuilderRISCV64::VisitMathRint(HInvoke* invoke) {
5394   CreateFPToFPLocations(allocator_, invoke);
5395 }
5396 
VisitMathRint(HInvoke * invoke)5397 void IntrinsicCodeGeneratorRISCV64::VisitMathRint(HInvoke* invoke) {
5398   GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRNE);
5399 }
5400 
GenMathRound(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)5401 void GenMathRound(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
5402   Riscv64Assembler* assembler = codegen->GetAssembler();
5403   LocationSummary* locations = invoke->GetLocations();
5404   FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5405   XRegister out = locations->Out().AsRegister<XRegister>();
5406   ScratchRegisterScope srs(assembler);
5407   FRegister ftmp = srs.AllocateFRegister();
5408   Riscv64Label done;
5409 
5410   // Check NaN
5411   codegen->GetInstructionVisitor()->FClass(out, in, type);
5412   __ Slti(out, out, kFClassNaNMinValue);
5413   __ Beqz(out, &done);
5414 
5415   if (type == DataType::Type::kFloat64) {
5416     // Add 0.5 (0x3fe0000000000000), rounding down (towards negative infinity).
5417     __ LoadConst64(out, 0x3fe0000000000000L);
5418     __ FMvDX(ftmp, out);
5419     __ FAddD(ftmp, ftmp, in, FPRoundingMode::kRDN);
5420 
5421     // Convert to managed `long`, rounding down (towards negative infinity).
5422     __ FCvtLD(out, ftmp, FPRoundingMode::kRDN);
5423   } else {
5424     // Add 0.5 (0x3f000000), rounding down (towards negative infinity).
5425     __ LoadConst32(out, 0x3f000000);
5426     __ FMvWX(ftmp, out);
5427     __ FAddS(ftmp, ftmp, in, FPRoundingMode::kRDN);
5428 
5429     // Convert to managed `int`, rounding down (towards negative infinity).
5430     __ FCvtWS(out, ftmp, FPRoundingMode::kRDN);
5431   }
5432 
5433   __ Bind(&done);
5434 }
5435 
VisitMathRoundDouble(HInvoke * invoke)5436 void IntrinsicLocationsBuilderRISCV64::VisitMathRoundDouble(HInvoke* invoke) {
5437   CreateFPToIntLocations(allocator_, invoke);
5438 }
5439 
VisitMathRoundDouble(HInvoke * invoke)5440 void IntrinsicCodeGeneratorRISCV64::VisitMathRoundDouble(HInvoke* invoke) {
5441   GenMathRound(codegen_, invoke, DataType::Type::kFloat64);
5442 }
5443 
VisitMathRoundFloat(HInvoke * invoke)5444 void IntrinsicLocationsBuilderRISCV64::VisitMathRoundFloat(HInvoke* invoke) {
5445   CreateFPToIntLocations(allocator_, invoke);
5446 }
5447 
VisitMathRoundFloat(HInvoke * invoke)5448 void IntrinsicCodeGeneratorRISCV64::VisitMathRoundFloat(HInvoke* invoke) {
5449   GenMathRound(codegen_, invoke, DataType::Type::kFloat32);
5450 }
5451 
VisitMathMultiplyHigh(HInvoke * invoke)5452 void IntrinsicLocationsBuilderRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) {
5453   LocationSummary* locations =
5454       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5455   locations->SetInAt(0, Location::RequiresRegister());
5456   locations->SetInAt(1, Location::RequiresRegister());
5457   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5458 }
5459 
VisitMathMultiplyHigh(HInvoke * invoke)5460 void IntrinsicCodeGeneratorRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) {
5461   LocationSummary* locations = invoke->GetLocations();
5462   Riscv64Assembler* assembler = GetAssembler();
5463   DCHECK(invoke->GetType() == DataType::Type::kInt64);
5464 
5465   XRegister x = locations->InAt(0).AsRegister<XRegister>();
5466   XRegister y = locations->InAt(1).AsRegister<XRegister>();
5467   XRegister out = locations->Out().AsRegister<XRegister>();
5468 
5469   // Get high 64 of the multiply
5470   __ Mulh(out, x, y);
5471 }
5472 
VisitStringGetCharsNoCheck(HInvoke * invoke)5473 void IntrinsicLocationsBuilderRISCV64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
5474   LocationSummary* locations =
5475       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5476 
5477   locations->SetInAt(0, Location::RequiresRegister());
5478   locations->SetInAt(1, Location::RequiresRegister());
5479   locations->SetInAt(2, Location::RequiresRegister());
5480   locations->SetInAt(3, Location::RequiresRegister());
5481   locations->SetInAt(4, Location::RequiresRegister());
5482 
5483   locations->AddRegisterTemps(3);
5484 }
5485 
VisitStringGetCharsNoCheck(HInvoke * invoke)5486 void IntrinsicCodeGeneratorRISCV64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
5487   Riscv64Assembler* assembler = GetAssembler();
5488   LocationSummary* locations = invoke->GetLocations();
5489 
5490   // In Java sizeof(Char) is 2.
5491   constexpr size_t char_size = DataType::Size(DataType::Type::kUint16);
5492   static_assert(char_size == 2u);
5493 
5494   // Location of data in the destination char array buffer.
5495   const uint32_t array_data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
5496 
5497   // Location of char array data in the source string.
5498   const uint32_t string_value_offset = mirror::String::ValueOffset().Uint32Value();
5499 
5500   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
5501 
5502   // The source string.
5503   XRegister source_string_object = locations->InAt(0).AsRegister<XRegister>();
5504   // Index of the first character.
5505   XRegister source_begin_index = locations->InAt(1).AsRegister<XRegister>();
5506   // Index that immediately follows the last character.
5507   XRegister source_end_index = locations->InAt(2).AsRegister<XRegister>();
5508   // The destination array.
5509   XRegister destination_array_object = locations->InAt(3).AsRegister<XRegister>();
5510   // The start offset in the destination array.
5511   XRegister destination_begin_offset = locations->InAt(4).AsRegister<XRegister>();
5512 
5513   XRegister source_ptr = locations->GetTemp(0).AsRegister<XRegister>();
5514   XRegister destination_ptr = locations->GetTemp(1).AsRegister<XRegister>();
5515   XRegister number_of_chars = locations->GetTemp(2).AsRegister<XRegister>();
5516 
5517   ScratchRegisterScope temps(assembler);
5518   XRegister tmp = temps.AllocateXRegister();
5519 
5520   Riscv64Label done;
5521 
5522   // Calculate the length(number_of_chars) of the string.
5523   __ Subw(number_of_chars, source_end_index, source_begin_index);
5524 
5525   // If the string has zero length then exit.
5526   __ Beqz(number_of_chars, &done);
5527 
5528   // Prepare a register with the destination address
5529   // to start copying to the address:
5530   // 1. set the address from which the data in the
5531   //    destination array begins (destination_array_object + array_data_offset);
5532   __ Addi(destination_ptr, destination_array_object, array_data_offset);
5533   // 2. it is necessary to add the start offset relative to the beginning
5534   //    of the data in the destination array,
5535   //    yet, due to sizeof(Char) being 2, formerly scaling must be performed
5536   //    (destination_begin_offset * 2 that equals to destination_begin_offset << 1);
5537   __ Sh1Add(destination_ptr, destination_begin_offset, destination_ptr);
5538 
5539   // Prepare a register with the source address
5540   // to start copying from the address:
5541   // 1. set the address from which the data in the
5542   //    source string begins (source_string_object + string_value_offset).
5543   // Other manipulations will be performed later,
5544   // since they depend on whether the string is compressed or not.
5545   __ Addi(source_ptr, source_string_object, string_value_offset);
5546 
5547   // The string can be compressed. It is a way to store strings more compactly.
5548   // In this instance, every character is located in one byte (instead of two).
5549   Riscv64Label compressed_string_preloop;
5550 
5551   // Information about whether the string is compressed or not is located
5552   // in the area intended for storing the length of the string.
5553   // The least significant bit of the string's length is used
5554   // as the compression flag if STRING_COMPRESSION_ENABLED.
5555   if (mirror::kUseStringCompression) {
5556     // Location of count in string.
5557     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5558     // String's length.
5559     __ Loadwu(tmp, source_string_object, count_offset);
5560 
5561     // Checking the string for compression.
5562     // If so, move to the "compressed_string_preloop".
5563     __ Andi(tmp, tmp, 0x1);
5564     __ Beqz(tmp, &compressed_string_preloop);
5565   }
5566 
5567   // Continue preparing the source register:
5568   // proceed similarly to what was done for the destination register.
5569   __ Sh1Add(source_ptr, source_begin_index, source_ptr);
5570 
5571   // If the string is not compressed, then perform ordinary copying.
5572   // Copying will occur 4 characters (8 bytes) at a time, immediately after there are
5573   // less than 4 characters left, move to the "remainder_loop" and copy the remaining
5574   // characters one character (2 bytes) at a time.
5575   // Note: Unaligned addresses are acceptable here and it is not required to embed
5576   // additional code to correct them.
5577   Riscv64Label main_loop;
5578   Riscv64Label remainder_loop;
5579 
5580   // If initially there are less than 4 characters,
5581   // then we directly calculate the remainder.
5582   __ Addi(tmp, number_of_chars, -4);
5583   __ Bltz(tmp, &remainder_loop);
5584 
5585   // Otherwise, save the value to the counter and continue.
5586   __ Mv(number_of_chars, tmp);
5587 
5588   // Main loop. Loads and stores 4 16-bit Java characters at a time.
5589   __ Bind(&main_loop);
5590 
5591   __ Loadd(tmp, source_ptr, 0);
5592   __ Addi(source_ptr, source_ptr, char_size * 4);
5593   __ Stored(tmp, destination_ptr, 0);
5594   __ Addi(destination_ptr, destination_ptr, char_size * 4);
5595 
5596   __ Addi(number_of_chars, number_of_chars, -4);
5597 
5598   __ Bgez(number_of_chars, &main_loop);
5599 
5600   // Restore the previous counter value.
5601   __ Addi(number_of_chars, number_of_chars, 4);
5602   __ Beqz(number_of_chars, &done);
5603 
5604   // Remainder loop for < 4 characters case and remainder handling.
5605   // Loads and stores one 16-bit Java character at a time.
5606   __ Bind(&remainder_loop);
5607 
5608   __ Loadhu(tmp, source_ptr, 0);
5609   __ Addi(source_ptr, source_ptr, char_size);
5610 
5611   __ Storeh(tmp, destination_ptr, 0);
5612   __ Addi(destination_ptr, destination_ptr, char_size);
5613 
5614   __ Addi(number_of_chars, number_of_chars, -1);
5615   __ Bgtz(number_of_chars, &remainder_loop);
5616 
5617   Riscv64Label compressed_string_loop;
5618   if (mirror::kUseStringCompression) {
5619     __ J(&done);
5620 
5621     // Below is the copying under the string compression circumstance mentioned above.
5622     // Every character in the source string occupies only one byte (instead of two).
5623     constexpr size_t compressed_char_size = DataType::Size(DataType::Type::kInt8);
5624     static_assert(compressed_char_size == 1u);
5625 
5626     __ Bind(&compressed_string_preloop);
5627 
5628     // Continue preparing the source register:
5629     // proceed identically to what was done for the destination register,
5630     // yet take into account that only one byte yields for every source character,
5631     // hence we need to extend it to two ones when copying it to the destination address.
5632     // Against this background scaling for source_begin_index is not needed.
5633     __ Add(source_ptr, source_ptr, source_begin_index);
5634 
5635     // Copy loop for compressed strings. Copying one 8-bit character to 16-bit one at a time.
5636     __ Bind(&compressed_string_loop);
5637 
5638     __ Loadbu(tmp, source_ptr, 0);
5639     __ Addi(source_ptr, source_ptr, compressed_char_size);
5640     __ Storeh(tmp, destination_ptr, 0);
5641     __ Addi(destination_ptr, destination_ptr, char_size);
5642 
5643     __ Addi(number_of_chars, number_of_chars, -1);
5644     __ Bgtz(number_of_chars, &compressed_string_loop);
5645   }
5646 
5647   __ Bind(&done);
5648 }
5649 
GenMathSignum(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)5650 void GenMathSignum(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
5651   LocationSummary* locations = invoke->GetLocations();
5652   DCHECK(locations->InAt(0).Equals(locations->Out()));
5653   FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
5654   Riscv64Assembler* assembler = codegen->GetAssembler();
5655   ScratchRegisterScope srs(assembler);
5656   XRegister tmp = srs.AllocateXRegister();
5657   FRegister ftmp = srs.AllocateFRegister();
5658   Riscv64Label done;
5659 
5660   if (type == DataType::Type::kFloat64) {
5661     // 0x3FF0000000000000L = 1.0
5662     __ Li(tmp, 0x3FF0000000000000L);
5663     __ FMvDX(ftmp, tmp);
5664     __ FClassD(tmp, in);
5665   } else {
5666     // 0x3f800000 = 1.0f
5667     __ Li(tmp, 0x3F800000);
5668     __ FMvWX(ftmp, tmp);
5669     __ FClassS(tmp, in);
5670   }
5671 
5672   __ Andi(tmp, tmp, kPositiveZero | kNegativeZero | kSignalingNaN | kQuietNaN);
5673   __ Bnez(tmp, &done);
5674 
5675   if (type == DataType::Type::kFloat64) {
5676     __ FSgnjD(in, ftmp, in);
5677   } else {
5678     __ FSgnjS(in, ftmp, in);
5679   }
5680 
5681   __ Bind(&done);
5682 }
5683 
VisitMathSignumDouble(HInvoke * invoke)5684 void IntrinsicLocationsBuilderRISCV64::VisitMathSignumDouble(HInvoke* invoke) {
5685   LocationSummary* locations =
5686       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5687   locations->SetInAt(0, Location::RequiresFpuRegister());
5688   locations->SetOut(Location::SameAsFirstInput());
5689 }
5690 
VisitMathSignumDouble(HInvoke * invoke)5691 void IntrinsicCodeGeneratorRISCV64::VisitMathSignumDouble(HInvoke* invoke) {
5692   GenMathSignum(codegen_, invoke, DataType::Type::kFloat64);
5693 }
5694 
VisitMathSignumFloat(HInvoke * invoke)5695 void IntrinsicLocationsBuilderRISCV64::VisitMathSignumFloat(HInvoke* invoke) {
5696   LocationSummary* locations =
5697       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
5698   locations->SetInAt(0, Location::RequiresFpuRegister());
5699   locations->SetOut(Location::SameAsFirstInput());
5700 }
5701 
VisitMathSignumFloat(HInvoke * invoke)5702 void IntrinsicCodeGeneratorRISCV64::VisitMathSignumFloat(HInvoke* invoke) {
5703   GenMathSignum(codegen_, invoke, DataType::Type::kFloat32);
5704 }
5705 
GenMathCopySign(CodeGeneratorRISCV64 * codegen,HInvoke * invoke,DataType::Type type)5706 void GenMathCopySign(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
5707   Riscv64Assembler* assembler = codegen->GetAssembler();
5708   LocationSummary* locations = invoke->GetLocations();
5709   FRegister in0 = locations->InAt(0).AsFpuRegister<FRegister>();
5710   FRegister in1 = locations->InAt(1).AsFpuRegister<FRegister>();
5711   FRegister out = locations->Out().AsFpuRegister<FRegister>();
5712 
5713   if (type == DataType::Type::kFloat64) {
5714     __ FSgnjD(out, in0, in1);
5715   } else {
5716     __ FSgnjS(out, in0, in1);
5717   }
5718 }
5719 
VisitMathCopySignDouble(HInvoke * invoke)5720 void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignDouble(HInvoke* invoke) {
5721   CreateFPFPToFPCallLocations(allocator_, invoke);
5722 }
5723 
VisitMathCopySignDouble(HInvoke * invoke)5724 void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignDouble(HInvoke* invoke) {
5725   GenMathCopySign(codegen_, invoke, DataType::Type::kFloat64);
5726 }
5727 
VisitMathCopySignFloat(HInvoke * invoke)5728 void IntrinsicLocationsBuilderRISCV64::VisitMathCopySignFloat(HInvoke* invoke) {
5729   CreateFPFPToFPCallLocations(allocator_, invoke);
5730 }
5731 
VisitMathCopySignFloat(HInvoke * invoke)5732 void IntrinsicCodeGeneratorRISCV64::VisitMathCopySignFloat(HInvoke* invoke) {
5733   GenMathCopySign(codegen_, invoke, DataType::Type::kFloat32);
5734 }
5735 
5736 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name)
5737 UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED);
5738 #undef MARK_UNIMPLEMENTED
5739 
5740 UNREACHABLE_INTRINSICS(RISCV64)
5741 
5742 }  // namespace riscv64
5743 }  // namespace art
5744