1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86.h"
18
19 #include <limits>
20
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsic_objects.h"
29 #include "intrinsics.h"
30 #include "intrinsics_utils.h"
31 #include "lock_word.h"
32 #include "mirror/array-inl.h"
33 #include "mirror/object_array-inl.h"
34 #include "mirror/reference.h"
35 #include "mirror/string.h"
36 #include "mirror/var_handle.h"
37 #include "optimizing/data_type.h"
38 #include "scoped_thread_state_change-inl.h"
39 #include "thread-current-inl.h"
40 #include "utils/x86/assembler_x86.h"
41 #include "utils/x86/constants_x86.h"
42 #include "well_known_classes.h"
43
44 namespace art HIDDEN {
45
46 namespace x86 {
47
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)48 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
49 : allocator_(codegen->GetGraph()->GetAllocator()),
50 codegen_(codegen) {
51 }
52
53
GetAssembler()54 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
55 return down_cast<X86Assembler*>(codegen_->GetAssembler());
56 }
57
GetAllocator()58 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
59 return codegen_->GetGraph()->GetAllocator();
60 }
61
TryDispatch(HInvoke * invoke)62 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
63 Dispatch(invoke);
64 LocationSummary* res = invoke->GetLocations();
65 if (res == nullptr) {
66 return false;
67 }
68 return res->Intrinsified();
69 }
70
71 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
72
73 #define __ assembler->
74
GenArrayAddress(X86Assembler * assembler,Register dest,Register base,Location pos,DataType::Type type,uint32_t data_offset)75 static void GenArrayAddress(X86Assembler* assembler,
76 Register dest,
77 Register base,
78 Location pos,
79 DataType::Type type,
80 uint32_t data_offset) {
81 if (pos.IsConstant()) {
82 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
83 __ leal(dest, Address(base, DataType::Size(type) * constant + data_offset));
84 } else {
85 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
86 __ leal(dest, Address(base, pos.AsRegister<Register>(), scale_factor, data_offset));
87 }
88 }
89
90 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
91 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
92 public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)93 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
94 : SlowPathCode(instruction) {
95 }
96
EmitNativeCode(CodeGenerator * codegen)97 void EmitNativeCode(CodeGenerator* codegen) override {
98 DCHECK(codegen->EmitBakerReadBarrier());
99 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
100 X86Assembler* assembler = x86_codegen->GetAssembler();
101 LocationSummary* locations = instruction_->GetLocations();
102 DCHECK(locations->CanCall());
103 DCHECK(instruction_->IsInvokeStaticOrDirect())
104 << "Unexpected instruction in read barrier arraycopy slow path: "
105 << instruction_->DebugName();
106 DCHECK(instruction_->GetLocations()->Intrinsified());
107 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
108 Location length = locations->InAt(4);
109
110 const DataType::Type type = DataType::Type::kReference;
111 const int32_t element_size = DataType::Size(type);
112
113 Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
114 Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
115 Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
116 Register value = locations->GetTemp(3).AsRegister<Register>();
117
118 __ Bind(GetEntryLabel());
119 // The `src_curr_addr` and `dst_curr_addr` were initialized before entering the slow-path.
120 GenArrayAddress(assembler, src_stop_addr, src_curr_addr, length, type, /*data_offset=*/ 0u);
121
122 NearLabel loop;
123 __ Bind(&loop);
124 __ movl(value, Address(src_curr_addr, 0));
125 __ MaybeUnpoisonHeapReference(value);
126 // TODO: Inline the mark bit check before calling the runtime?
127 // value = ReadBarrier::Mark(value)
128 // No need to save live registers; it's taken care of by the
129 // entrypoint. Also, there is no need to update the stack mask,
130 // as this runtime call will not trigger a garbage collection.
131 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
132 // explanations.)
133 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(value);
134 // This runtime call does not require a stack map.
135 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
136 __ MaybePoisonHeapReference(value);
137 __ movl(Address(dst_curr_addr, 0), value);
138 __ addl(src_curr_addr, Immediate(element_size));
139 __ addl(dst_curr_addr, Immediate(element_size));
140 __ cmpl(src_curr_addr, src_stop_addr);
141 __ j(kNotEqual, &loop);
142 __ jmp(GetExitLabel());
143 }
144
GetDescription() const145 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
146
147 private:
148 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
149 };
150
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)151 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
152 LocationSummary* locations =
153 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
154 locations->SetInAt(0, Location::RequiresFpuRegister());
155 locations->SetOut(Location::RequiresRegister());
156 if (is64bit) {
157 locations->AddTemp(Location::RequiresFpuRegister());
158 }
159 }
160
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)161 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
162 LocationSummary* locations =
163 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
164 locations->SetInAt(0, Location::RequiresRegister());
165 locations->SetOut(Location::RequiresFpuRegister());
166 if (is64bit) {
167 locations->AddTemp(Location::RequiresFpuRegister());
168 locations->AddTemp(Location::RequiresFpuRegister());
169 }
170 }
171
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)172 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
173 Location input = locations->InAt(0);
174 Location output = locations->Out();
175 if (is64bit) {
176 // Need to use the temporary.
177 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
178 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
179 __ movd(output.AsRegisterPairLow<Register>(), temp);
180 __ psrlq(temp, Immediate(32));
181 __ movd(output.AsRegisterPairHigh<Register>(), temp);
182 } else {
183 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
184 }
185 }
186
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)187 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
188 Location input = locations->InAt(0);
189 Location output = locations->Out();
190 if (is64bit) {
191 // Need to use the temporary.
192 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
193 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
194 __ movd(temp1, input.AsRegisterPairLow<Register>());
195 __ movd(temp2, input.AsRegisterPairHigh<Register>());
196 __ punpckldq(temp1, temp2);
197 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
198 } else {
199 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
200 }
201 }
202
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)203 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
204 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
205 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)206 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
207 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
208 }
209
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)210 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
211 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
212 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)213 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
214 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
215 }
216
VisitFloatFloatToRawIntBits(HInvoke * invoke)217 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
218 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
219 }
VisitFloatIntBitsToFloat(HInvoke * invoke)220 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
221 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
222 }
223
VisitFloatFloatToRawIntBits(HInvoke * invoke)224 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
225 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
226 }
VisitFloatIntBitsToFloat(HInvoke * invoke)227 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
228 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
229 }
230
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)231 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
232 LocationSummary* locations =
233 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
234 locations->SetInAt(0, Location::RequiresRegister());
235 locations->SetOut(Location::SameAsFirstInput());
236 }
237
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)238 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
239 LocationSummary* locations =
240 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
241 locations->SetInAt(0, Location::RequiresRegister());
242 locations->SetOut(Location::RequiresRegister());
243 }
244
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)245 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
246 LocationSummary* locations =
247 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
248 locations->SetInAt(0, Location::RequiresRegister());
249 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
250 }
251
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)252 static void GenReverseBytes(LocationSummary* locations,
253 DataType::Type size,
254 X86Assembler* assembler) {
255 Register out = locations->Out().AsRegister<Register>();
256
257 switch (size) {
258 case DataType::Type::kInt16:
259 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
260 __ bswapl(out);
261 __ sarl(out, Immediate(16));
262 break;
263 case DataType::Type::kInt32:
264 __ bswapl(out);
265 break;
266 default:
267 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
268 UNREACHABLE();
269 }
270 }
271
VisitIntegerReverseBytes(HInvoke * invoke)272 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
273 CreateIntToIntLocations(allocator_, invoke);
274 }
275
VisitIntegerReverseBytes(HInvoke * invoke)276 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
277 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
278 }
279
VisitLongReverseBytes(HInvoke * invoke)280 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
281 CreateLongToLongLocations(allocator_, invoke);
282 }
283
VisitLongReverseBytes(HInvoke * invoke)284 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
285 LocationSummary* locations = invoke->GetLocations();
286 Location input = locations->InAt(0);
287 Register input_lo = input.AsRegisterPairLow<Register>();
288 Register input_hi = input.AsRegisterPairHigh<Register>();
289 Location output = locations->Out();
290 Register output_lo = output.AsRegisterPairLow<Register>();
291 Register output_hi = output.AsRegisterPairHigh<Register>();
292
293 X86Assembler* assembler = GetAssembler();
294 // Assign the inputs to the outputs, mixing low/high.
295 __ movl(output_lo, input_hi);
296 __ movl(output_hi, input_lo);
297 __ bswapl(output_lo);
298 __ bswapl(output_hi);
299 }
300
VisitShortReverseBytes(HInvoke * invoke)301 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
302 CreateIntToIntLocations(allocator_, invoke);
303 }
304
VisitShortReverseBytes(HInvoke * invoke)305 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
306 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
307 }
308
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)309 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
310 LocationSummary* locations =
311 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
312 locations->SetInAt(0, Location::RequiresFpuRegister());
313 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
314 }
315
VisitMathSqrt(HInvoke * invoke)316 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
317 CreateFPToFPLocations(allocator_, invoke);
318 }
319
VisitMathSqrt(HInvoke * invoke)320 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
321 LocationSummary* locations = invoke->GetLocations();
322 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
323 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
324
325 GetAssembler()->sqrtsd(out, in);
326 }
327
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)328 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
329 HInvoke* invoke,
330 CodeGeneratorX86* codegen) {
331 // Do we have instruction support?
332 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
333 return;
334 }
335
336 CreateFPToFPLocations(allocator, invoke);
337 }
338
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86Assembler * assembler,int round_mode)339 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86Assembler* assembler, int round_mode) {
340 LocationSummary* locations = invoke->GetLocations();
341 DCHECK(!locations->WillCall());
342 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
343 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
344 __ roundsd(out, in, Immediate(round_mode));
345 }
346
VisitMathCeil(HInvoke * invoke)347 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
348 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
349 }
350
VisitMathCeil(HInvoke * invoke)351 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
352 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
353 }
354
VisitMathFloor(HInvoke * invoke)355 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
356 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
357 }
358
VisitMathFloor(HInvoke * invoke)359 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
360 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
361 }
362
VisitMathRint(HInvoke * invoke)363 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
364 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
365 }
366
VisitMathRint(HInvoke * invoke)367 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
368 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
369 }
370
VisitMathRoundFloat(HInvoke * invoke)371 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
372 // Do we have instruction support?
373 if (!codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
374 return;
375 }
376
377 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
378 LocationSummary* locations =
379 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
380 locations->SetInAt(0, Location::RequiresFpuRegister());
381 if (static_or_direct->HasSpecialInput() &&
382 invoke->InputAt(
383 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
384 locations->SetInAt(1, Location::RequiresRegister());
385 }
386 locations->SetOut(Location::RequiresRegister());
387 locations->AddTemp(Location::RequiresFpuRegister());
388 locations->AddTemp(Location::RequiresFpuRegister());
389 }
390
VisitMathRoundFloat(HInvoke * invoke)391 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
392 LocationSummary* locations = invoke->GetLocations();
393 DCHECK(!locations->WillCall());
394
395 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
396 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
397 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
398 Register out = locations->Out().AsRegister<Register>();
399 NearLabel skip_incr, done;
400 X86Assembler* assembler = GetAssembler();
401
402 // Since no direct x86 rounding instruction matches the required semantics,
403 // this intrinsic is implemented as follows:
404 // result = floor(in);
405 // if (in - result >= 0.5f)
406 // result = result + 1.0f;
407 __ movss(t2, in);
408 __ roundss(t1, in, Immediate(1));
409 __ subss(t2, t1);
410 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
411 // Direct constant area available.
412 HX86ComputeBaseMethodAddress* method_address =
413 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
414 Register constant_area = locations->InAt(1).AsRegister<Register>();
415 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
416 method_address,
417 constant_area));
418 __ j(kBelow, &skip_incr);
419 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
420 method_address,
421 constant_area));
422 __ Bind(&skip_incr);
423 } else {
424 // No constant area: go through stack.
425 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
426 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
427 __ comiss(t2, Address(ESP, 4));
428 __ j(kBelow, &skip_incr);
429 __ addss(t1, Address(ESP, 0));
430 __ Bind(&skip_incr);
431 __ addl(ESP, Immediate(8));
432 }
433
434 // Final conversion to an integer. Unfortunately this also does not have a
435 // direct x86 instruction, since NaN should map to 0 and large positive
436 // values need to be clipped to the extreme value.
437 __ movl(out, Immediate(kPrimIntMax));
438 __ cvtsi2ss(t2, out);
439 __ comiss(t1, t2);
440 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
441 __ movl(out, Immediate(0)); // does not change flags
442 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
443 __ cvttss2si(out, t1);
444 __ Bind(&done);
445 }
446
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)447 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
448 LocationSummary* locations =
449 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
450 InvokeRuntimeCallingConvention calling_convention;
451 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
452 locations->SetOut(Location::FpuRegisterLocation(XMM0));
453 }
454
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)455 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
456 LocationSummary* locations = invoke->GetLocations();
457 DCHECK(locations->WillCall());
458 DCHECK(invoke->IsInvokeStaticOrDirect());
459 X86Assembler* assembler = codegen->GetAssembler();
460
461 // We need some place to pass the parameters.
462 __ subl(ESP, Immediate(16));
463 __ cfi().AdjustCFAOffset(16);
464
465 // Pass the parameters at the bottom of the stack.
466 __ movsd(Address(ESP, 0), XMM0);
467
468 // If we have a second parameter, pass it next.
469 if (invoke->GetNumberOfArguments() == 2) {
470 __ movsd(Address(ESP, 8), XMM1);
471 }
472
473 // Now do the actual call.
474 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
475
476 // Extract the return value from the FP stack.
477 __ fstpl(Address(ESP, 0));
478 __ movsd(XMM0, Address(ESP, 0));
479
480 // And clean up the stack.
481 __ addl(ESP, Immediate(16));
482 __ cfi().AdjustCFAOffset(-16);
483 }
484
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)485 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
486 LocationSummary* locations =
487 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
488 if (is_long) {
489 locations->SetInAt(0, Location::RequiresRegister());
490 } else {
491 locations->SetInAt(0, Location::Any());
492 }
493 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
494 }
495
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)496 static void GenLowestOneBit(X86Assembler* assembler,
497 CodeGeneratorX86* codegen,
498 bool is_long,
499 HInvoke* invoke) {
500 LocationSummary* locations = invoke->GetLocations();
501 Location src = locations->InAt(0);
502 Location out_loc = locations->Out();
503
504 if (invoke->InputAt(0)->IsConstant()) {
505 // Evaluate this at compile time.
506 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
507 if (value == 0) {
508 if (is_long) {
509 __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
510 __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
511 } else {
512 __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
513 }
514 return;
515 }
516 // Nonzero value.
517 value = is_long ? CTZ(static_cast<uint64_t>(value))
518 : CTZ(static_cast<uint32_t>(value));
519 if (is_long) {
520 if (value >= 32) {
521 int shift = value-32;
522 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
523 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
524 } else {
525 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
526 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
527 }
528 } else {
529 codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
530 }
531 return;
532 }
533 // Handle non constant case
534 if (is_long) {
535 DCHECK(src.IsRegisterPair());
536 Register src_lo = src.AsRegisterPairLow<Register>();
537 Register src_hi = src.AsRegisterPairHigh<Register>();
538
539 Register out_lo = out_loc.AsRegisterPairLow<Register>();
540 Register out_hi = out_loc.AsRegisterPairHigh<Register>();
541
542 __ movl(out_lo, src_lo);
543 __ movl(out_hi, src_hi);
544
545 __ negl(out_lo);
546 __ adcl(out_hi, Immediate(0));
547 __ negl(out_hi);
548
549 __ andl(out_lo, src_lo);
550 __ andl(out_hi, src_hi);
551 } else {
552 if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
553 Register out = out_loc.AsRegister<Register>();
554 __ blsi(out, src.AsRegister<Register>());
555 } else {
556 Register out = out_loc.AsRegister<Register>();
557 // Do tmp & -tmp
558 if (src.IsRegister()) {
559 __ movl(out, src.AsRegister<Register>());
560 } else {
561 DCHECK(src.IsStackSlot());
562 __ movl(out, Address(ESP, src.GetStackIndex()));
563 }
564 __ negl(out);
565
566 if (src.IsRegister()) {
567 __ andl(out, src.AsRegister<Register>());
568 } else {
569 __ andl(out, Address(ESP, src.GetStackIndex()));
570 }
571 }
572 }
573 }
574
VisitMathCos(HInvoke * invoke)575 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
576 CreateFPToFPCallLocations(allocator_, invoke);
577 }
578
VisitMathCos(HInvoke * invoke)579 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
580 GenFPToFPCall(invoke, codegen_, kQuickCos);
581 }
582
VisitMathSin(HInvoke * invoke)583 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
584 CreateFPToFPCallLocations(allocator_, invoke);
585 }
586
VisitMathSin(HInvoke * invoke)587 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
588 GenFPToFPCall(invoke, codegen_, kQuickSin);
589 }
590
VisitMathAcos(HInvoke * invoke)591 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
592 CreateFPToFPCallLocations(allocator_, invoke);
593 }
594
VisitMathAcos(HInvoke * invoke)595 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
596 GenFPToFPCall(invoke, codegen_, kQuickAcos);
597 }
598
VisitMathAsin(HInvoke * invoke)599 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
600 CreateFPToFPCallLocations(allocator_, invoke);
601 }
602
VisitMathAsin(HInvoke * invoke)603 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
604 GenFPToFPCall(invoke, codegen_, kQuickAsin);
605 }
606
VisitMathAtan(HInvoke * invoke)607 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
608 CreateFPToFPCallLocations(allocator_, invoke);
609 }
610
VisitMathAtan(HInvoke * invoke)611 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
612 GenFPToFPCall(invoke, codegen_, kQuickAtan);
613 }
614
VisitMathCbrt(HInvoke * invoke)615 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
616 CreateFPToFPCallLocations(allocator_, invoke);
617 }
618
VisitMathCbrt(HInvoke * invoke)619 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
620 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
621 }
622
VisitMathCosh(HInvoke * invoke)623 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
624 CreateFPToFPCallLocations(allocator_, invoke);
625 }
626
VisitMathCosh(HInvoke * invoke)627 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
628 GenFPToFPCall(invoke, codegen_, kQuickCosh);
629 }
630
VisitMathExp(HInvoke * invoke)631 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
632 CreateFPToFPCallLocations(allocator_, invoke);
633 }
634
VisitMathExp(HInvoke * invoke)635 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
636 GenFPToFPCall(invoke, codegen_, kQuickExp);
637 }
638
VisitMathExpm1(HInvoke * invoke)639 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
640 CreateFPToFPCallLocations(allocator_, invoke);
641 }
642
VisitMathExpm1(HInvoke * invoke)643 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
644 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
645 }
646
VisitMathLog(HInvoke * invoke)647 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
648 CreateFPToFPCallLocations(allocator_, invoke);
649 }
650
VisitMathLog(HInvoke * invoke)651 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
652 GenFPToFPCall(invoke, codegen_, kQuickLog);
653 }
654
VisitMathLog10(HInvoke * invoke)655 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
656 CreateFPToFPCallLocations(allocator_, invoke);
657 }
658
VisitMathLog10(HInvoke * invoke)659 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
660 GenFPToFPCall(invoke, codegen_, kQuickLog10);
661 }
662
VisitMathSinh(HInvoke * invoke)663 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
664 CreateFPToFPCallLocations(allocator_, invoke);
665 }
666
VisitMathSinh(HInvoke * invoke)667 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
668 GenFPToFPCall(invoke, codegen_, kQuickSinh);
669 }
670
VisitMathTan(HInvoke * invoke)671 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
672 CreateFPToFPCallLocations(allocator_, invoke);
673 }
674
VisitMathTan(HInvoke * invoke)675 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
676 GenFPToFPCall(invoke, codegen_, kQuickTan);
677 }
678
VisitMathTanh(HInvoke * invoke)679 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
680 CreateFPToFPCallLocations(allocator_, invoke);
681 }
682
VisitMathTanh(HInvoke * invoke)683 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
684 GenFPToFPCall(invoke, codegen_, kQuickTanh);
685 }
686
VisitIntegerLowestOneBit(HInvoke * invoke)687 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
688 CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
689 }
VisitIntegerLowestOneBit(HInvoke * invoke)690 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
691 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
692 }
693
VisitLongLowestOneBit(HInvoke * invoke)694 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
695 CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
696 }
697
VisitLongLowestOneBit(HInvoke * invoke)698 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
699 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
700 }
701
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)702 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
703 LocationSummary* locations =
704 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
705 InvokeRuntimeCallingConvention calling_convention;
706 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
707 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
708 locations->SetOut(Location::FpuRegisterLocation(XMM0));
709 }
710
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)711 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
712 DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
713 LocationSummary* locations =
714 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
715 InvokeRuntimeCallingConvention calling_convention;
716 locations->SetInAt(0, Location::RequiresFpuRegister());
717 locations->SetInAt(1, Location::RequiresFpuRegister());
718 locations->SetInAt(2, Location::RequiresFpuRegister());
719 locations->SetOut(Location::SameAsFirstInput());
720 }
721
VisitMathAtan2(HInvoke * invoke)722 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
723 CreateFPFPToFPCallLocations(allocator_, invoke);
724 }
725
VisitMathAtan2(HInvoke * invoke)726 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
727 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
728 }
729
VisitMathPow(HInvoke * invoke)730 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
731 CreateFPFPToFPCallLocations(allocator_, invoke);
732 }
733
VisitMathPow(HInvoke * invoke)734 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
735 GenFPToFPCall(invoke, codegen_, kQuickPow);
736 }
737
VisitMathHypot(HInvoke * invoke)738 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
739 CreateFPFPToFPCallLocations(allocator_, invoke);
740 }
741
VisitMathHypot(HInvoke * invoke)742 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
743 GenFPToFPCall(invoke, codegen_, kQuickHypot);
744 }
745
VisitMathNextAfter(HInvoke * invoke)746 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
747 CreateFPFPToFPCallLocations(allocator_, invoke);
748 }
749
VisitMathNextAfter(HInvoke * invoke)750 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
751 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
752 }
753
CreateSystemArrayCopyLocations(HInvoke * invoke)754 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
755 // We need at least two of the positions or length to be an integer constant,
756 // or else we won't have enough free registers.
757 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
758 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
759 HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
760
761 int num_constants =
762 ((src_pos != nullptr) ? 1 : 0)
763 + ((dest_pos != nullptr) ? 1 : 0)
764 + ((length != nullptr) ? 1 : 0);
765
766 if (num_constants < 2) {
767 // Not enough free registers.
768 return;
769 }
770
771 // As long as we are checking, we might as well check to see if the src and dest
772 // positions are >= 0.
773 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
774 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
775 // We will have to fail anyways.
776 return;
777 }
778
779 // And since we are already checking, check the length too.
780 if (length != nullptr) {
781 int32_t len = length->GetValue();
782 if (len < 0) {
783 // Just call as normal.
784 return;
785 }
786 }
787
788 // Okay, it is safe to generate inline code.
789 LocationSummary* locations =
790 new (invoke->GetBlock()->GetGraph()->GetAllocator())
791 LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
792 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
793 locations->SetInAt(0, Location::RequiresRegister());
794 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
795 locations->SetInAt(2, Location::RequiresRegister());
796 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
797 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
798
799 // And we need some temporaries. We will use REP MOVS{B,W,L}, so we need fixed registers.
800 locations->AddTemp(Location::RegisterLocation(ESI));
801 locations->AddTemp(Location::RegisterLocation(EDI));
802 locations->AddTemp(Location::RegisterLocation(ECX));
803 }
804
805 template <typename LhsType>
EmitCmplJLess(X86Assembler * assembler,LhsType lhs,Location rhs,Label * label)806 static void EmitCmplJLess(X86Assembler* assembler,
807 LhsType lhs,
808 Location rhs,
809 Label* label) {
810 static_assert(std::is_same_v<LhsType, Register> || std::is_same_v<LhsType, Address>);
811 if (rhs.IsConstant()) {
812 int32_t rhs_constant = rhs.GetConstant()->AsIntConstant()->GetValue();
813 __ cmpl(lhs, Immediate(rhs_constant));
814 } else {
815 __ cmpl(lhs, rhs.AsRegister<Register>());
816 }
817 __ j(kLess, label);
818 }
819
CheckSystemArrayCopyPosition(X86Assembler * assembler,Register array,Location pos,Location length,SlowPathCode * slow_path,Register temp,bool length_is_array_length,bool position_sign_checked)820 static void CheckSystemArrayCopyPosition(X86Assembler* assembler,
821 Register array,
822 Location pos,
823 Location length,
824 SlowPathCode* slow_path,
825 Register temp,
826 bool length_is_array_length,
827 bool position_sign_checked) {
828 // Where is the length in the Array?
829 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
830
831 if (pos.IsConstant()) {
832 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
833 if (pos_const == 0) {
834 if (!length_is_array_length) {
835 // Check that length(array) >= length.
836 EmitCmplJLess(assembler, Address(array, length_offset), length, slow_path->GetEntryLabel());
837 }
838 } else {
839 // Calculate length(array) - pos.
840 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
841 // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
842 __ movl(temp, Address(array, length_offset));
843 __ subl(temp, Immediate(pos_const));
844
845 // Check that (length(array) - pos) >= length.
846 EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
847 }
848 } else if (length_is_array_length) {
849 // The only way the copy can succeed is if pos is zero.
850 Register pos_reg = pos.AsRegister<Register>();
851 __ testl(pos_reg, pos_reg);
852 __ j(kNotEqual, slow_path->GetEntryLabel());
853 } else {
854 // Check that pos >= 0.
855 Register pos_reg = pos.AsRegister<Register>();
856 if (!position_sign_checked) {
857 __ testl(pos_reg, pos_reg);
858 __ j(kLess, slow_path->GetEntryLabel());
859 }
860
861 // Calculate length(array) - pos.
862 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
863 // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
864 __ movl(temp, Address(array, length_offset));
865 __ subl(temp, pos_reg);
866
867 // Check that (length(array) - pos) >= length.
868 EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
869 }
870 }
871
SystemArrayCopyPrimitive(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,DataType::Type type)872 static void SystemArrayCopyPrimitive(HInvoke* invoke,
873 X86Assembler* assembler,
874 CodeGeneratorX86* codegen,
875 DataType::Type type) {
876 LocationSummary* locations = invoke->GetLocations();
877 Register src = locations->InAt(0).AsRegister<Register>();
878 Location src_pos = locations->InAt(1);
879 Register dest = locations->InAt(2).AsRegister<Register>();
880 Location dest_pos = locations->InAt(3);
881 Location length = locations->InAt(4);
882
883 // Temporaries that we need for MOVSB/W/L.
884 Register src_base = locations->GetTemp(0).AsRegister<Register>();
885 DCHECK_EQ(src_base, ESI);
886 Register dest_base = locations->GetTemp(1).AsRegister<Register>();
887 DCHECK_EQ(dest_base, EDI);
888 Register count = locations->GetTemp(2).AsRegister<Register>();
889 DCHECK_EQ(count, ECX);
890
891 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
892 codegen->AddSlowPath(slow_path);
893
894 // Bail out if the source and destination are the same (to handle overlap).
895 __ cmpl(src, dest);
896 __ j(kEqual, slow_path->GetEntryLabel());
897
898 // Bail out if the source is null.
899 __ testl(src, src);
900 __ j(kEqual, slow_path->GetEntryLabel());
901
902 // Bail out if the destination is null.
903 __ testl(dest, dest);
904 __ j(kEqual, slow_path->GetEntryLabel());
905
906 // If the length is negative, bail out.
907 // We have already checked in the LocationsBuilder for the constant case.
908 if (!length.IsConstant()) {
909 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
910 __ j(kLess, slow_path->GetEntryLabel());
911 }
912
913 // We need the count in ECX.
914 if (length.IsConstant()) {
915 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
916 } else {
917 __ movl(count, length.AsRegister<Register>());
918 }
919
920 // Validity checks: source. Use src_base as a temporary register.
921 CheckSystemArrayCopyPosition(assembler,
922 src,
923 src_pos,
924 Location::RegisterLocation(count),
925 slow_path,
926 src_base,
927 /*length_is_array_length=*/ false,
928 /*position_sign_checked=*/ false);
929
930 // Validity checks: dest. Use src_base as a temporary register.
931 CheckSystemArrayCopyPosition(assembler,
932 dest,
933 dest_pos,
934 Location::RegisterLocation(count),
935 slow_path,
936 src_base,
937 /*length_is_array_length=*/ false,
938 /*position_sign_checked=*/ false);
939
940 // Okay, everything checks out. Finally time to do the copy.
941 // Check assumption that sizeof(Char) is 2 (used in scaling below).
942 const size_t data_size = DataType::Size(type);
943 const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
944
945 GenArrayAddress(assembler, src_base, src, src_pos, type, data_offset);
946 GenArrayAddress(assembler, dest_base, dest, dest_pos, type, data_offset);
947
948 // Do the move.
949 switch (type) {
950 case DataType::Type::kInt8:
951 __ rep_movsb();
952 break;
953 case DataType::Type::kUint16:
954 __ rep_movsw();
955 break;
956 case DataType::Type::kInt32:
957 __ rep_movsl();
958 break;
959 default:
960 LOG(FATAL) << "Unexpected data type for intrinsic";
961 }
962 __ Bind(slow_path->GetExitLabel());
963 }
964
VisitSystemArrayCopyChar(HInvoke * invoke)965 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
966 CreateSystemArrayCopyLocations(invoke);
967 }
968
VisitSystemArrayCopyChar(HInvoke * invoke)969 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
970 X86Assembler* assembler = GetAssembler();
971 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
972 }
973
VisitSystemArrayCopyByte(HInvoke * invoke)974 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
975 X86Assembler* assembler = GetAssembler();
976 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
977 }
978
VisitSystemArrayCopyByte(HInvoke * invoke)979 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
980 CreateSystemArrayCopyLocations(invoke);
981 }
982
VisitSystemArrayCopyInt(HInvoke * invoke)983 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
984 X86Assembler* assembler = GetAssembler();
985 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
986 }
987
VisitSystemArrayCopyInt(HInvoke * invoke)988 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
989 CreateSystemArrayCopyLocations(invoke);
990 }
991
VisitStringCompareTo(HInvoke * invoke)992 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
993 // The inputs plus one temp.
994 LocationSummary* locations = new (allocator_) LocationSummary(
995 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
996 InvokeRuntimeCallingConvention calling_convention;
997 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
998 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
999 locations->SetOut(Location::RegisterLocation(EAX));
1000 }
1001
VisitStringCompareTo(HInvoke * invoke)1002 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1003 X86Assembler* assembler = GetAssembler();
1004 LocationSummary* locations = invoke->GetLocations();
1005
1006 // Note that the null check must have been done earlier.
1007 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1008
1009 Register argument = locations->InAt(1).AsRegister<Register>();
1010 __ testl(argument, argument);
1011 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1012 codegen_->AddSlowPath(slow_path);
1013 __ j(kEqual, slow_path->GetEntryLabel());
1014
1015 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1016 __ Bind(slow_path->GetExitLabel());
1017 }
1018
VisitStringEquals(HInvoke * invoke)1019 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1020 LocationSummary* locations =
1021 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1022 locations->SetInAt(0, Location::RequiresRegister());
1023 locations->SetInAt(1, Location::RequiresRegister());
1024
1025 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1026 locations->AddTemp(Location::RegisterLocation(ECX));
1027 locations->AddTemp(Location::RegisterLocation(EDI));
1028
1029 // Set output, ESI needed for repe_cmpsl instruction anyways.
1030 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1031 }
1032
VisitStringEquals(HInvoke * invoke)1033 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1034 X86Assembler* assembler = GetAssembler();
1035 LocationSummary* locations = invoke->GetLocations();
1036
1037 Register str = locations->InAt(0).AsRegister<Register>();
1038 Register arg = locations->InAt(1).AsRegister<Register>();
1039 Register ecx = locations->GetTemp(0).AsRegister<Register>();
1040 Register edi = locations->GetTemp(1).AsRegister<Register>();
1041 Register esi = locations->Out().AsRegister<Register>();
1042
1043 NearLabel end, return_true, return_false;
1044
1045 // Get offsets of count, value, and class fields within a string object.
1046 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1047 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1048 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1049
1050 // Note that the null check must have been done earlier.
1051 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1052
1053 StringEqualsOptimizations optimizations(invoke);
1054 if (!optimizations.GetArgumentNotNull()) {
1055 // Check if input is null, return false if it is.
1056 __ testl(arg, arg);
1057 __ j(kEqual, &return_false);
1058 }
1059
1060 if (!optimizations.GetArgumentIsString()) {
1061 // Instanceof check for the argument by comparing class fields.
1062 // All string objects must have the same type since String cannot be subclassed.
1063 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1064 // If the argument is a string object, its class field must be equal to receiver's class field.
1065 //
1066 // As the String class is expected to be non-movable, we can read the class
1067 // field from String.equals' arguments without read barriers.
1068 AssertNonMovableStringClass();
1069 // Also, because we use the loaded class references only to compare them, we
1070 // don't need to unpoison them.
1071 // /* HeapReference<Class> */ ecx = str->klass_
1072 __ movl(ecx, Address(str, class_offset));
1073 // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1074 __ cmpl(ecx, Address(arg, class_offset));
1075 __ j(kNotEqual, &return_false);
1076 }
1077
1078 // Reference equality check, return true if same reference.
1079 __ cmpl(str, arg);
1080 __ j(kEqual, &return_true);
1081
1082 // Load length and compression flag of receiver string.
1083 __ movl(ecx, Address(str, count_offset));
1084 // Check if lengths and compression flags are equal, return false if they're not.
1085 // Two identical strings will always have same compression style since
1086 // compression style is decided on alloc.
1087 __ cmpl(ecx, Address(arg, count_offset));
1088 __ j(kNotEqual, &return_false);
1089 // Return true if strings are empty. Even with string compression `count == 0` means empty.
1090 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1091 "Expecting 0=compressed, 1=uncompressed");
1092 __ jecxz(&return_true);
1093
1094 if (mirror::kUseStringCompression) {
1095 NearLabel string_uncompressed;
1096 // Extract length and differentiate between both compressed or both uncompressed.
1097 // Different compression style is cut above.
1098 __ shrl(ecx, Immediate(1));
1099 __ j(kCarrySet, &string_uncompressed);
1100 // Divide string length by 2, rounding up, and continue as if uncompressed.
1101 __ addl(ecx, Immediate(1));
1102 __ shrl(ecx, Immediate(1));
1103 __ Bind(&string_uncompressed);
1104 }
1105 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1106 __ leal(esi, Address(str, value_offset));
1107 __ leal(edi, Address(arg, value_offset));
1108
1109 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1110 // divisible by 2.
1111 __ addl(ecx, Immediate(1));
1112 __ shrl(ecx, Immediate(1));
1113
1114 // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1115 // or 4 characters (compressed) at a time.
1116 DCHECK_ALIGNED(value_offset, 4);
1117 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1118
1119 // Loop to compare strings two characters at a time starting at the beginning of the string.
1120 __ repe_cmpsl();
1121 // If strings are not equal, zero flag will be cleared.
1122 __ j(kNotEqual, &return_false);
1123
1124 // Return true and exit the function.
1125 // If loop does not result in returning false, we return true.
1126 __ Bind(&return_true);
1127 __ movl(esi, Immediate(1));
1128 __ jmp(&end);
1129
1130 // Return false and exit the function.
1131 __ Bind(&return_false);
1132 __ xorl(esi, esi);
1133 __ Bind(&end);
1134 }
1135
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1136 static void CreateStringIndexOfLocations(HInvoke* invoke,
1137 ArenaAllocator* allocator,
1138 bool start_at_zero) {
1139 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1140 LocationSummary::kCallOnSlowPath,
1141 kIntrinsified);
1142 // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1143 locations->SetInAt(0, Location::RegisterLocation(EDI));
1144 // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1145 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1146 // of the instruction explicitly.
1147 // Note: This works as we don't clobber EAX anywhere.
1148 locations->SetInAt(1, Location::RegisterLocation(EAX));
1149 if (!start_at_zero) {
1150 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1151 }
1152 // As we clobber EDI during execution anyways, also use it as the output.
1153 locations->SetOut(Location::SameAsFirstInput());
1154
1155 // repne scasw uses ECX as the counter.
1156 locations->AddTemp(Location::RegisterLocation(ECX));
1157 // Need another temporary to be able to compute the result.
1158 locations->AddTemp(Location::RequiresRegister());
1159 if (mirror::kUseStringCompression) {
1160 // Need another temporary to be able to save unflagged string length.
1161 locations->AddTemp(Location::RequiresRegister());
1162 }
1163 }
1164
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1165 static void GenerateStringIndexOf(HInvoke* invoke,
1166 X86Assembler* assembler,
1167 CodeGeneratorX86* codegen,
1168 bool start_at_zero) {
1169 LocationSummary* locations = invoke->GetLocations();
1170
1171 // Note that the null check must have been done earlier.
1172 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1173
1174 Register string_obj = locations->InAt(0).AsRegister<Register>();
1175 Register search_value = locations->InAt(1).AsRegister<Register>();
1176 Register counter = locations->GetTemp(0).AsRegister<Register>();
1177 Register string_length = locations->GetTemp(1).AsRegister<Register>();
1178 Register out = locations->Out().AsRegister<Register>();
1179 // Only used when string compression feature is on.
1180 Register string_length_flagged;
1181
1182 // Check our assumptions for registers.
1183 DCHECK_EQ(string_obj, EDI);
1184 DCHECK_EQ(search_value, EAX);
1185 DCHECK_EQ(counter, ECX);
1186 DCHECK_EQ(out, EDI);
1187
1188 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1189 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1190 SlowPathCode* slow_path = nullptr;
1191 HInstruction* code_point = invoke->InputAt(1);
1192 if (code_point->IsIntConstant()) {
1193 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1194 std::numeric_limits<uint16_t>::max()) {
1195 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1196 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1197 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1198 codegen->AddSlowPath(slow_path);
1199 __ jmp(slow_path->GetEntryLabel());
1200 __ Bind(slow_path->GetExitLabel());
1201 return;
1202 }
1203 } else if (code_point->GetType() != DataType::Type::kUint16) {
1204 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1205 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1206 codegen->AddSlowPath(slow_path);
1207 __ j(kAbove, slow_path->GetEntryLabel());
1208 }
1209
1210 // From here down, we know that we are looking for a char that fits in 16 bits.
1211 // Location of reference to data array within the String object.
1212 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1213 // Location of count within the String object.
1214 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1215
1216 // Load the count field of the string containing the length and compression flag.
1217 __ movl(string_length, Address(string_obj, count_offset));
1218
1219 // Do a zero-length check. Even with string compression `count == 0` means empty.
1220 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1221 "Expecting 0=compressed, 1=uncompressed");
1222 // TODO: Support jecxz.
1223 NearLabel not_found_label;
1224 __ testl(string_length, string_length);
1225 __ j(kEqual, ¬_found_label);
1226
1227 if (mirror::kUseStringCompression) {
1228 string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1229 __ movl(string_length_flagged, string_length);
1230 // Extract the length and shift out the least significant bit used as compression flag.
1231 __ shrl(string_length, Immediate(1));
1232 }
1233
1234 if (start_at_zero) {
1235 // Number of chars to scan is the same as the string length.
1236 __ movl(counter, string_length);
1237
1238 // Move to the start of the string.
1239 __ addl(string_obj, Immediate(value_offset));
1240 } else {
1241 Register start_index = locations->InAt(2).AsRegister<Register>();
1242
1243 // Do a start_index check.
1244 __ cmpl(start_index, string_length);
1245 __ j(kGreaterEqual, ¬_found_label);
1246
1247 // Ensure we have a start index >= 0;
1248 __ xorl(counter, counter);
1249 __ cmpl(start_index, Immediate(0));
1250 __ cmovl(kGreater, counter, start_index);
1251
1252 if (mirror::kUseStringCompression) {
1253 NearLabel modify_counter, offset_uncompressed_label;
1254 __ testl(string_length_flagged, Immediate(1));
1255 __ j(kNotZero, &offset_uncompressed_label);
1256 // Move to the start of the string: string_obj + value_offset + start_index.
1257 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1258 __ jmp(&modify_counter);
1259
1260 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1261 __ Bind(&offset_uncompressed_label);
1262 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1263
1264 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1265 // compare.
1266 __ Bind(&modify_counter);
1267 } else {
1268 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1269 }
1270 __ negl(counter);
1271 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1272 }
1273
1274 if (mirror::kUseStringCompression) {
1275 NearLabel uncompressed_string_comparison;
1276 NearLabel comparison_done;
1277 __ testl(string_length_flagged, Immediate(1));
1278 __ j(kNotZero, &uncompressed_string_comparison);
1279
1280 // Check if EAX (search_value) is ASCII.
1281 __ cmpl(search_value, Immediate(127));
1282 __ j(kGreater, ¬_found_label);
1283 // Comparing byte-per-byte.
1284 __ repne_scasb();
1285 __ jmp(&comparison_done);
1286
1287 // Everything is set up for repne scasw:
1288 // * Comparison address in EDI.
1289 // * Counter in ECX.
1290 __ Bind(&uncompressed_string_comparison);
1291 __ repne_scasw();
1292 __ Bind(&comparison_done);
1293 } else {
1294 __ repne_scasw();
1295 }
1296 // Did we find a match?
1297 __ j(kNotEqual, ¬_found_label);
1298
1299 // Yes, we matched. Compute the index of the result.
1300 __ subl(string_length, counter);
1301 __ leal(out, Address(string_length, -1));
1302
1303 NearLabel done;
1304 __ jmp(&done);
1305
1306 // Failed to match; return -1.
1307 __ Bind(¬_found_label);
1308 __ movl(out, Immediate(-1));
1309
1310 // And join up at the end.
1311 __ Bind(&done);
1312 if (slow_path != nullptr) {
1313 __ Bind(slow_path->GetExitLabel());
1314 }
1315 }
1316
VisitStringIndexOf(HInvoke * invoke)1317 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1318 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1319 }
1320
VisitStringIndexOf(HInvoke * invoke)1321 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1322 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1323 }
1324
VisitStringIndexOfAfter(HInvoke * invoke)1325 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1326 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1327 }
1328
VisitStringIndexOfAfter(HInvoke * invoke)1329 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1330 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1331 }
1332
VisitStringNewStringFromBytes(HInvoke * invoke)1333 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1334 LocationSummary* locations = new (allocator_) LocationSummary(
1335 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1336 InvokeRuntimeCallingConvention calling_convention;
1337 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1338 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1339 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1340 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1341 locations->SetOut(Location::RegisterLocation(EAX));
1342 }
1343
VisitStringNewStringFromBytes(HInvoke * invoke)1344 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1345 X86Assembler* assembler = GetAssembler();
1346 LocationSummary* locations = invoke->GetLocations();
1347
1348 Register byte_array = locations->InAt(0).AsRegister<Register>();
1349 __ testl(byte_array, byte_array);
1350 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1351 codegen_->AddSlowPath(slow_path);
1352 __ j(kEqual, slow_path->GetEntryLabel());
1353
1354 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1355 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1356 __ Bind(slow_path->GetExitLabel());
1357 }
1358
VisitStringNewStringFromChars(HInvoke * invoke)1359 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1360 LocationSummary* locations =
1361 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1362 InvokeRuntimeCallingConvention calling_convention;
1363 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1364 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1365 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1366 locations->SetOut(Location::RegisterLocation(EAX));
1367 }
1368
VisitStringNewStringFromChars(HInvoke * invoke)1369 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1370 // No need to emit code checking whether `locations->InAt(2)` is a null
1371 // pointer, as callers of the native method
1372 //
1373 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1374 //
1375 // all include a null check on `data` before calling that method.
1376 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1377 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1378 }
1379
VisitStringNewStringFromString(HInvoke * invoke)1380 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1381 LocationSummary* locations = new (allocator_) LocationSummary(
1382 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1383 InvokeRuntimeCallingConvention calling_convention;
1384 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1385 locations->SetOut(Location::RegisterLocation(EAX));
1386 }
1387
VisitStringNewStringFromString(HInvoke * invoke)1388 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1389 X86Assembler* assembler = GetAssembler();
1390 LocationSummary* locations = invoke->GetLocations();
1391
1392 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1393 __ testl(string_to_copy, string_to_copy);
1394 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1395 codegen_->AddSlowPath(slow_path);
1396 __ j(kEqual, slow_path->GetEntryLabel());
1397
1398 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1399 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1400 __ Bind(slow_path->GetExitLabel());
1401 }
1402
VisitStringGetCharsNoCheck(HInvoke * invoke)1403 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1404 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1405 LocationSummary* locations =
1406 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1407 locations->SetInAt(0, Location::RequiresRegister());
1408 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1409 // Place srcEnd in ECX to save a move below.
1410 locations->SetInAt(2, Location::RegisterLocation(ECX));
1411 locations->SetInAt(3, Location::RequiresRegister());
1412 locations->SetInAt(4, Location::RequiresRegister());
1413
1414 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1415 // We don't have enough registers to also grab ECX, so handle below.
1416 locations->AddTemp(Location::RegisterLocation(ESI));
1417 locations->AddTemp(Location::RegisterLocation(EDI));
1418 }
1419
VisitStringGetCharsNoCheck(HInvoke * invoke)1420 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1421 X86Assembler* assembler = GetAssembler();
1422 LocationSummary* locations = invoke->GetLocations();
1423
1424 size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1425 // Location of data in char array buffer.
1426 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1427 // Location of char array data in string.
1428 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1429
1430 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1431 Register obj = locations->InAt(0).AsRegister<Register>();
1432 Location srcBegin = locations->InAt(1);
1433 int srcBegin_value =
1434 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1435 Register srcEnd = locations->InAt(2).AsRegister<Register>();
1436 Register dst = locations->InAt(3).AsRegister<Register>();
1437 Register dstBegin = locations->InAt(4).AsRegister<Register>();
1438
1439 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1440 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1441 DCHECK_EQ(char_size, 2u);
1442
1443 // Compute the number of chars (words) to move.
1444 // Save ECX, since we don't know if it will be used later.
1445 __ pushl(ECX);
1446 int stack_adjust = kX86WordSize;
1447 __ cfi().AdjustCFAOffset(stack_adjust);
1448 DCHECK_EQ(srcEnd, ECX);
1449 if (srcBegin.IsConstant()) {
1450 __ subl(ECX, Immediate(srcBegin_value));
1451 } else {
1452 DCHECK(srcBegin.IsRegister());
1453 __ subl(ECX, srcBegin.AsRegister<Register>());
1454 }
1455
1456 NearLabel done;
1457 if (mirror::kUseStringCompression) {
1458 // Location of count in string
1459 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1460 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1461 DCHECK_EQ(c_char_size, 1u);
1462 __ pushl(EAX);
1463 __ cfi().AdjustCFAOffset(stack_adjust);
1464
1465 NearLabel copy_loop, copy_uncompressed;
1466 __ testl(Address(obj, count_offset), Immediate(1));
1467 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1468 "Expecting 0=compressed, 1=uncompressed");
1469 __ j(kNotZero, ©_uncompressed);
1470 // Compute the address of the source string by adding the number of chars from
1471 // the source beginning to the value offset of a string.
1472 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1473
1474 // Start the loop to copy String's value to Array of Char.
1475 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1476 __ Bind(©_loop);
1477 __ jecxz(&done);
1478 // Use EAX temporary (convert byte from ESI to word).
1479 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1480 __ movzxb(EAX, Address(ESI, 0));
1481 __ movw(Address(EDI, 0), EAX);
1482 __ leal(EDI, Address(EDI, char_size));
1483 __ leal(ESI, Address(ESI, c_char_size));
1484 // TODO: Add support for LOOP to X86Assembler.
1485 __ subl(ECX, Immediate(1));
1486 __ jmp(©_loop);
1487 __ Bind(©_uncompressed);
1488 }
1489
1490 // Do the copy for uncompressed string.
1491 // Compute the address of the destination buffer.
1492 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1493 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1494 __ rep_movsw();
1495
1496 __ Bind(&done);
1497 if (mirror::kUseStringCompression) {
1498 // Restore EAX.
1499 __ popl(EAX);
1500 __ cfi().AdjustCFAOffset(-stack_adjust);
1501 }
1502 // Restore ECX.
1503 __ popl(ECX);
1504 __ cfi().AdjustCFAOffset(-stack_adjust);
1505 }
1506
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1507 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1508 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1509 Location out_loc = locations->Out();
1510 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1511 // to avoid a SIGBUS.
1512 switch (size) {
1513 case DataType::Type::kInt8:
1514 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1515 break;
1516 case DataType::Type::kInt16:
1517 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1518 break;
1519 case DataType::Type::kInt32:
1520 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1521 break;
1522 case DataType::Type::kInt64:
1523 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1524 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1525 break;
1526 default:
1527 LOG(FATAL) << "Type not recognized for peek: " << size;
1528 UNREACHABLE();
1529 }
1530 }
1531
VisitMemoryPeekByte(HInvoke * invoke)1532 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1533 CreateLongToIntLocations(allocator_, invoke);
1534 }
1535
VisitMemoryPeekByte(HInvoke * invoke)1536 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1537 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1538 }
1539
VisitMemoryPeekIntNative(HInvoke * invoke)1540 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1541 CreateLongToIntLocations(allocator_, invoke);
1542 }
1543
VisitMemoryPeekIntNative(HInvoke * invoke)1544 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1545 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1546 }
1547
VisitMemoryPeekLongNative(HInvoke * invoke)1548 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1549 CreateLongToLongLocations(allocator_, invoke);
1550 }
1551
VisitMemoryPeekLongNative(HInvoke * invoke)1552 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1553 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1554 }
1555
VisitMemoryPeekShortNative(HInvoke * invoke)1556 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1557 CreateLongToIntLocations(allocator_, invoke);
1558 }
1559
VisitMemoryPeekShortNative(HInvoke * invoke)1560 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1561 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1562 }
1563
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1564 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1565 DataType::Type size,
1566 HInvoke* invoke) {
1567 LocationSummary* locations =
1568 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1569 locations->SetInAt(0, Location::RequiresRegister());
1570 HInstruction* value = invoke->InputAt(1);
1571 if (size == DataType::Type::kInt8) {
1572 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1573 } else {
1574 locations->SetInAt(1, Location::RegisterOrConstant(value));
1575 }
1576 }
1577
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1578 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1579 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1580 Location value_loc = locations->InAt(1);
1581 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1582 // to avoid a SIGBUS.
1583 switch (size) {
1584 case DataType::Type::kInt8:
1585 if (value_loc.IsConstant()) {
1586 __ movb(Address(address, 0),
1587 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1588 } else {
1589 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1590 }
1591 break;
1592 case DataType::Type::kInt16:
1593 if (value_loc.IsConstant()) {
1594 __ movw(Address(address, 0),
1595 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1596 } else {
1597 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1598 }
1599 break;
1600 case DataType::Type::kInt32:
1601 if (value_loc.IsConstant()) {
1602 __ movl(Address(address, 0),
1603 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1604 } else {
1605 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1606 }
1607 break;
1608 case DataType::Type::kInt64:
1609 if (value_loc.IsConstant()) {
1610 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1611 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1612 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1613 } else {
1614 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1615 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1616 }
1617 break;
1618 default:
1619 LOG(FATAL) << "Type not recognized for poke: " << size;
1620 UNREACHABLE();
1621 }
1622 }
1623
VisitMemoryPokeByte(HInvoke * invoke)1624 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1625 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1626 }
1627
VisitMemoryPokeByte(HInvoke * invoke)1628 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1629 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1630 }
1631
VisitMemoryPokeIntNative(HInvoke * invoke)1632 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1633 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1634 }
1635
VisitMemoryPokeIntNative(HInvoke * invoke)1636 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1637 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1638 }
1639
VisitMemoryPokeLongNative(HInvoke * invoke)1640 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1641 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1642 }
1643
VisitMemoryPokeLongNative(HInvoke * invoke)1644 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1645 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1646 }
1647
VisitMemoryPokeShortNative(HInvoke * invoke)1648 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1649 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1650 }
1651
VisitMemoryPokeShortNative(HInvoke * invoke)1652 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1653 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1654 }
1655
VisitThreadCurrentThread(HInvoke * invoke)1656 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1657 LocationSummary* locations =
1658 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1659 locations->SetOut(Location::RequiresRegister());
1660 }
1661
VisitThreadCurrentThread(HInvoke * invoke)1662 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1663 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1664 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1665 }
1666
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1667 static void GenUnsafeGet(HInvoke* invoke,
1668 DataType::Type type,
1669 bool is_volatile,
1670 CodeGeneratorX86* codegen) {
1671 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1672 LocationSummary* locations = invoke->GetLocations();
1673 Location base_loc = locations->InAt(1);
1674 Register base = base_loc.AsRegister<Register>();
1675 Location offset_loc = locations->InAt(2);
1676 Register offset = offset_loc.AsRegisterPairLow<Register>();
1677 Location output_loc = locations->Out();
1678
1679 switch (type) {
1680 case DataType::Type::kInt8: {
1681 Register output = output_loc.AsRegister<Register>();
1682 __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1683 break;
1684 }
1685
1686 case DataType::Type::kInt32: {
1687 Register output = output_loc.AsRegister<Register>();
1688 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1689 break;
1690 }
1691
1692 case DataType::Type::kReference: {
1693 Register output = output_loc.AsRegister<Register>();
1694 if (codegen->EmitReadBarrier()) {
1695 if (kUseBakerReadBarrier) {
1696 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1697 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1698 invoke, output_loc, base, src, /* needs_null_check= */ false);
1699 } else {
1700 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1701 codegen->GenerateReadBarrierSlow(
1702 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1703 }
1704 } else {
1705 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1706 __ MaybeUnpoisonHeapReference(output);
1707 }
1708 break;
1709 }
1710
1711 case DataType::Type::kInt64: {
1712 Register output_lo = output_loc.AsRegisterPairLow<Register>();
1713 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1714 if (is_volatile) {
1715 // Need to use a XMM to read atomically.
1716 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1717 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1718 __ movd(output_lo, temp);
1719 __ psrlq(temp, Immediate(32));
1720 __ movd(output_hi, temp);
1721 } else {
1722 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1723 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1724 }
1725 }
1726 break;
1727
1728 default:
1729 LOG(FATAL) << "Unsupported op size " << type;
1730 UNREACHABLE();
1731 }
1732 }
1733
GenUnsafeGetAbsolute(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1734 static void GenUnsafeGetAbsolute(HInvoke* invoke,
1735 DataType::Type type,
1736 bool is_volatile,
1737 CodeGeneratorX86* codegen) {
1738 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1739 LocationSummary* locations = invoke->GetLocations();
1740 Register address = locations->InAt(1).AsRegisterPairLow<Register>();
1741 Address address_offset(address, 0);
1742 Location output_loc = locations->Out();
1743
1744 switch (type) {
1745 case DataType::Type::kInt8: {
1746 Register output = output_loc.AsRegister<Register>();
1747 __ movsxb(output, address_offset);
1748 break;
1749 }
1750
1751 case DataType::Type::kInt32: {
1752 Register output = output_loc.AsRegister<Register>();
1753 __ movl(output, address_offset);
1754 break;
1755 }
1756
1757 case DataType::Type::kInt64: {
1758 Register output_lo = output_loc.AsRegisterPairLow<Register>();
1759 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1760 if (is_volatile) {
1761 // Need to use a XMM to read atomically.
1762 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1763 __ movsd(temp, address_offset);
1764 __ movd(output_lo, temp);
1765 __ psrlq(temp, Immediate(32));
1766 __ movd(output_hi, temp);
1767 } else {
1768 Address address_hi(address, 4);
1769 __ movl(output_lo, address_offset);
1770 __ movl(output_hi, address_hi);
1771 }
1772 }
1773 break;
1774
1775 default:
1776 LOG(FATAL) << "Unsupported op size " << type;
1777 UNREACHABLE();
1778 }
1779 }
1780
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type,bool is_volatile)1781 static void CreateIntIntToIntLocations(ArenaAllocator* allocator,
1782 HInvoke* invoke,
1783 DataType::Type type,
1784 bool is_volatile) {
1785 LocationSummary* locations =
1786 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1787 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1788 locations->SetInAt(1, Location::RequiresRegister());
1789 if (type == DataType::Type::kInt64) {
1790 if (is_volatile) {
1791 // Need to use XMM to read volatile.
1792 locations->AddTemp(Location::RequiresFpuRegister());
1793 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1794 } else {
1795 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1796 }
1797 } else {
1798 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1799 }
1800 }
1801
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen,DataType::Type type,bool is_volatile)1802 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1803 HInvoke* invoke,
1804 CodeGeneratorX86* codegen,
1805 DataType::Type type,
1806 bool is_volatile) {
1807 bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
1808 LocationSummary* locations =
1809 new (allocator) LocationSummary(invoke,
1810 can_call
1811 ? LocationSummary::kCallOnSlowPath
1812 : LocationSummary::kNoCall,
1813 kIntrinsified);
1814 if (can_call && kUseBakerReadBarrier) {
1815 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1816 }
1817 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1818 locations->SetInAt(1, Location::RequiresRegister());
1819 locations->SetInAt(2, Location::RequiresRegister());
1820 if (type == DataType::Type::kInt64) {
1821 if (is_volatile) {
1822 // Need to use XMM to read volatile.
1823 locations->AddTemp(Location::RequiresFpuRegister());
1824 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1825 } else {
1826 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1827 }
1828 } else {
1829 locations->SetOut(Location::RequiresRegister(),
1830 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1831 }
1832 }
1833
VisitUnsafeGet(HInvoke * invoke)1834 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1835 VisitJdkUnsafeGet(invoke);
1836 }
VisitUnsafeGetAbsolute(HInvoke * invoke)1837 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAbsolute(HInvoke* invoke) {
1838 VisitJdkUnsafeGetAbsolute(invoke);
1839 }
VisitUnsafeGetVolatile(HInvoke * invoke)1840 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1841 VisitJdkUnsafeGetVolatile(invoke);
1842 }
VisitUnsafeGetLong(HInvoke * invoke)1843 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1844 VisitJdkUnsafeGetLong(invoke);
1845 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1846 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1847 VisitJdkUnsafeGetLongVolatile(invoke);
1848 }
VisitUnsafeGetObject(HInvoke * invoke)1849 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1850 VisitJdkUnsafeGetReference(invoke);
1851 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1852 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1853 VisitJdkUnsafeGetReferenceVolatile(invoke);
1854 }
VisitUnsafeGetByte(HInvoke * invoke)1855 void IntrinsicLocationsBuilderX86::VisitUnsafeGetByte(HInvoke* invoke) {
1856 VisitJdkUnsafeGetByte(invoke);
1857 }
1858
VisitUnsafeGet(HInvoke * invoke)1859 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1860 VisitJdkUnsafeGet(invoke);
1861 }
VisitUnsafeGetAbsolute(HInvoke * invoke)1862 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAbsolute(HInvoke* invoke) {
1863 VisitJdkUnsafeGetAbsolute(invoke);
1864 }
VisitUnsafeGetVolatile(HInvoke * invoke)1865 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1866 VisitJdkUnsafeGetVolatile(invoke);
1867 }
VisitUnsafeGetLong(HInvoke * invoke)1868 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1869 VisitJdkUnsafeGetLong(invoke);
1870 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1871 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1872 VisitJdkUnsafeGetLongVolatile(invoke);
1873 }
VisitUnsafeGetObject(HInvoke * invoke)1874 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1875 VisitJdkUnsafeGetReference(invoke);
1876 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1877 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1878 VisitJdkUnsafeGetReferenceVolatile(invoke);
1879 }
VisitUnsafeGetByte(HInvoke * invoke)1880 void IntrinsicCodeGeneratorX86::VisitUnsafeGetByte(HInvoke* invoke) {
1881 VisitJdkUnsafeGetByte(invoke);
1882 }
1883
VisitJdkUnsafeGet(HInvoke * invoke)1884 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1885 CreateIntIntIntToIntLocations(
1886 allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ false);
1887 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)1888 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
1889 CreateIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/false);
1890 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1891 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1892 CreateIntIntIntToIntLocations(
1893 allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true);
1894 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1895 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1896 CreateIntIntIntToIntLocations(
1897 allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true);
1898 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1899 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1900 CreateIntIntIntToIntLocations(
1901 allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ false);
1902 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1903 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1904 CreateIntIntIntToIntLocations(
1905 allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true);
1906 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1907 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1908 CreateIntIntIntToIntLocations(
1909 allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true);
1910 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1911 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1912 CreateIntIntIntToIntLocations(
1913 allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ false);
1914 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1915 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1916 CreateIntIntIntToIntLocations(
1917 allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true);
1918 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1919 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1920 CreateIntIntIntToIntLocations(
1921 allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true);
1922 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1923 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1924 CreateIntIntIntToIntLocations(
1925 allocator_, invoke, codegen_, DataType::Type::kInt8, /*is_volatile=*/ false);
1926 }
1927
VisitJdkUnsafeGet(HInvoke * invoke)1928 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1929 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
1930 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)1931 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
1932 GenUnsafeGetAbsolute(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
1933 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1934 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1935 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1936 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1937 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1938 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1939 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1940 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1941 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
1942 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1943 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1944 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1945 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1946 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1947 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1948 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1949 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1950 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
1951 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1952 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1953 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1954 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1955 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1956 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1957 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1958 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1959 GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
1960 }
1961
CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1962 static void CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1963 DataType::Type type,
1964 HInvoke* invoke,
1965 bool is_volatile) {
1966 LocationSummary* locations =
1967 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1968 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1969 locations->SetInAt(1, Location::RequiresRegister());
1970 if (type == DataType::Type::kInt8 || type == DataType::Type::kUint8) {
1971 // Ensure the value is in a byte register
1972 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, invoke->InputAt(3)));
1973 } else {
1974 locations->SetInAt(2, Location::RequiresRegister());
1975 }
1976 if (type == DataType::Type::kInt64 && is_volatile) {
1977 locations->AddTemp(Location::RequiresFpuRegister());
1978 locations->AddTemp(Location::RequiresFpuRegister());
1979 }
1980 }
1981
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1982 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1983 DataType::Type type,
1984 HInvoke* invoke,
1985 bool is_volatile) {
1986 LocationSummary* locations =
1987 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1988 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1989 locations->SetInAt(1, Location::RequiresRegister());
1990 locations->SetInAt(2, Location::RequiresRegister());
1991 if (type == DataType::Type::kInt8 || type == DataType::Type::kUint8) {
1992 // Ensure the value is in a byte register
1993 locations->SetInAt(3, Location::ByteRegisterOrConstant(EAX, invoke->InputAt(3)));
1994 } else {
1995 locations->SetInAt(3, Location::RequiresRegister());
1996 }
1997 if (type == DataType::Type::kReference) {
1998 // Need temp registers for card-marking.
1999 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2000 // Ensure the value is in a byte register.
2001 locations->AddTemp(Location::RegisterLocation(ECX));
2002 } else if (type == DataType::Type::kInt64 && is_volatile) {
2003 locations->AddTemp(Location::RequiresFpuRegister());
2004 locations->AddTemp(Location::RequiresFpuRegister());
2005 }
2006 }
2007
VisitUnsafePut(HInvoke * invoke)2008 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
2009 VisitJdkUnsafePut(invoke);
2010 }
VisitUnsafePutAbsolute(HInvoke * invoke)2011 void IntrinsicLocationsBuilderX86::VisitUnsafePutAbsolute(HInvoke* invoke) {
2012 VisitJdkUnsafePutAbsolute(invoke);
2013 }
VisitUnsafePutOrdered(HInvoke * invoke)2014 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2015 VisitJdkUnsafePutOrdered(invoke);
2016 }
VisitUnsafePutVolatile(HInvoke * invoke)2017 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2018 VisitJdkUnsafePutVolatile(invoke);
2019 }
VisitUnsafePutObject(HInvoke * invoke)2020 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
2021 VisitJdkUnsafePutReference(invoke);
2022 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2023 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2024 VisitJdkUnsafePutObjectOrdered(invoke);
2025 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2026 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2027 VisitJdkUnsafePutReferenceVolatile(invoke);
2028 }
VisitUnsafePutLong(HInvoke * invoke)2029 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
2030 VisitJdkUnsafePutLong(invoke);
2031 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2032 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2033 VisitJdkUnsafePutLongOrdered(invoke);
2034 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2035 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2036 VisitJdkUnsafePutLongVolatile(invoke);
2037 }
VisitUnsafePutByte(HInvoke * invoke)2038 void IntrinsicLocationsBuilderX86::VisitUnsafePutByte(HInvoke* invoke) {
2039 VisitJdkUnsafePutByte(invoke);
2040 }
2041
VisitJdkUnsafePut(HInvoke * invoke)2042 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePut(HInvoke* invoke) {
2043 CreateIntIntIntIntToVoidPlusTempsLocations(
2044 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
2045 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2046 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2047 CreateIntIntIntToVoidPlusTempsLocations(
2048 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
2049 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2050 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2051 CreateIntIntIntIntToVoidPlusTempsLocations(
2052 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
2053 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2054 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2055 CreateIntIntIntIntToVoidPlusTempsLocations(
2056 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
2057 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2058 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2059 CreateIntIntIntIntToVoidPlusTempsLocations(
2060 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
2061 }
VisitJdkUnsafePutReference(HInvoke * invoke)2062 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReference(HInvoke* invoke) {
2063 CreateIntIntIntIntToVoidPlusTempsLocations(
2064 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
2065 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2066 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2067 CreateIntIntIntIntToVoidPlusTempsLocations(
2068 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
2069 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2070 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2071 CreateIntIntIntIntToVoidPlusTempsLocations(
2072 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
2073 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2074 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2075 CreateIntIntIntIntToVoidPlusTempsLocations(
2076 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
2077 }
VisitJdkUnsafePutLong(HInvoke * invoke)2078 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
2079 CreateIntIntIntIntToVoidPlusTempsLocations(
2080 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
2081 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2082 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2083 CreateIntIntIntIntToVoidPlusTempsLocations(
2084 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
2085 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2086 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2087 CreateIntIntIntIntToVoidPlusTempsLocations(
2088 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
2089 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2090 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2091 CreateIntIntIntIntToVoidPlusTempsLocations(
2092 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
2093 }
VisitJdkUnsafePutByte(HInvoke * invoke)2094 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutByte(HInvoke* invoke) {
2095 CreateIntIntIntIntToVoidPlusTempsLocations(
2096 allocator_, DataType::Type::kInt8, invoke, /*is_volatile=*/ false);
2097 }
2098
2099 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2100 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)2101 static void GenUnsafePut(LocationSummary* locations,
2102 DataType::Type type,
2103 bool is_volatile,
2104 CodeGeneratorX86* codegen) {
2105 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2106 Register base = locations->InAt(1).AsRegister<Register>();
2107 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2108 Location value_loc = locations->InAt(3);
2109
2110 if (type == DataType::Type::kInt64) {
2111 Register value_lo = value_loc.AsRegisterPairLow<Register>();
2112 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2113 if (is_volatile) {
2114 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2115 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2116 __ movd(temp1, value_lo);
2117 __ movd(temp2, value_hi);
2118 __ punpckldq(temp1, temp2);
2119 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2120 } else {
2121 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2122 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2123 }
2124 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2125 Register temp = locations->GetTemp(0).AsRegister<Register>();
2126 __ movl(temp, value_loc.AsRegister<Register>());
2127 __ PoisonHeapReference(temp);
2128 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2129 } else if (type == DataType::Type::kInt32 || type == DataType::Type::kReference) {
2130 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2131 } else {
2132 CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2133 if (value_loc.IsRegister()) {
2134 __ movb(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<ByteRegister>());
2135 } else {
2136 __ movb(Address(base, offset, ScaleFactor::TIMES_1, 0),
2137 Immediate(CodeGenerator::GetInt8ValueOf(value_loc.GetConstant())));
2138 }
2139 }
2140
2141 if (is_volatile) {
2142 codegen->MemoryFence();
2143 }
2144
2145 if (type == DataType::Type::kReference) {
2146 bool value_can_be_null = true; // TODO: Worth finding out this information?
2147 codegen->MaybeMarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2148 locations->GetTemp(1).AsRegister<Register>(),
2149 base,
2150 value_loc.AsRegister<Register>(),
2151 value_can_be_null);
2152 }
2153 }
2154
2155 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2156 // memory model.
GenUnsafePutAbsolute(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)2157 static void GenUnsafePutAbsolute(LocationSummary* locations,
2158 DataType::Type type,
2159 bool is_volatile,
2160 CodeGeneratorX86* codegen) {
2161 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2162 Register address = locations->InAt(1).AsRegisterPairLow<Register>();
2163 Address address_offset(address, 0);
2164 Location value_loc = locations->InAt(2);
2165
2166 if (type == DataType::Type::kInt64) {
2167 Register value_lo = value_loc.AsRegisterPairLow<Register>();
2168 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2169 if (is_volatile) {
2170 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2171 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2172 __ movd(temp1, value_lo);
2173 __ movd(temp2, value_hi);
2174 __ punpckldq(temp1, temp2);
2175 __ movsd(address_offset, temp1);
2176 } else {
2177 __ movl(address_offset, value_lo);
2178 __ movl(Address(address, 4), value_hi);
2179 }
2180 } else if (type == DataType::Type::kInt32) {
2181 __ movl(address_offset, value_loc.AsRegister<Register>());
2182 } else {
2183 CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2184 if (value_loc.IsRegister()) {
2185 __ movb(address_offset, value_loc.AsRegister<ByteRegister>());
2186 } else {
2187 __ movb(address_offset,
2188 Immediate(CodeGenerator::GetInt8ValueOf(value_loc.GetConstant())));
2189 }
2190 }
2191
2192 if (is_volatile) {
2193 codegen->MemoryFence();
2194 }
2195 }
2196
VisitUnsafePut(HInvoke * invoke)2197 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
2198 VisitJdkUnsafePut(invoke);
2199 }
VisitUnsafePutAbsolute(HInvoke * invoke)2200 void IntrinsicCodeGeneratorX86::VisitUnsafePutAbsolute(HInvoke* invoke) {
2201 VisitJdkUnsafePutAbsolute(invoke);
2202 }
VisitUnsafePutOrdered(HInvoke * invoke)2203 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2204 VisitJdkUnsafePutOrdered(invoke);
2205 }
VisitUnsafePutVolatile(HInvoke * invoke)2206 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2207 VisitJdkUnsafePutVolatile(invoke);
2208 }
VisitUnsafePutObject(HInvoke * invoke)2209 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
2210 VisitJdkUnsafePutReference(invoke);
2211 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2212 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2213 VisitJdkUnsafePutObjectOrdered(invoke);
2214 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2215 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2216 VisitJdkUnsafePutReferenceVolatile(invoke);
2217 }
VisitUnsafePutLong(HInvoke * invoke)2218 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
2219 VisitJdkUnsafePutLong(invoke);
2220 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2221 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2222 VisitJdkUnsafePutLongOrdered(invoke);
2223 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2224 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2225 VisitJdkUnsafePutLongVolatile(invoke);
2226 }
VisitUnsafePutByte(HInvoke * invoke)2227 void IntrinsicCodeGeneratorX86::VisitUnsafePutByte(HInvoke* invoke) {
2228 VisitJdkUnsafePutByte(invoke);
2229 }
2230
VisitJdkUnsafePut(HInvoke * invoke)2231 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePut(HInvoke* invoke) {
2232 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2233 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2234 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2235 GenUnsafePutAbsolute(
2236 invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/false, codegen_);
2237 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2238 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2239 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2240 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2241 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2242 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2243 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2244 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2245 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2246 }
VisitJdkUnsafePutReference(HInvoke * invoke)2247 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReference(HInvoke* invoke) {
2248 GenUnsafePut(
2249 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2250 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2251 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2252 GenUnsafePut(
2253 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2254 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2255 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2256 GenUnsafePut(
2257 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2258 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2259 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2260 GenUnsafePut(
2261 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2262 }
VisitJdkUnsafePutLong(HInvoke * invoke)2263 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
2264 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2265 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2266 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2267 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2268 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2269 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2270 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2271 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2272 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2273 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2274 }
VisitJdkUnsafePutByte(HInvoke * invoke)2275 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutByte(HInvoke* invoke) {
2276 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
2277 }
2278
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,DataType::Type type,HInvoke * invoke)2279 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
2280 CodeGeneratorX86* codegen,
2281 DataType::Type type,
2282 HInvoke* invoke) {
2283 const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
2284 LocationSummary* locations =
2285 new (allocator) LocationSummary(invoke,
2286 can_call
2287 ? LocationSummary::kCallOnSlowPath
2288 : LocationSummary::kNoCall,
2289 kIntrinsified);
2290 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2291 locations->SetInAt(1, Location::RequiresRegister());
2292 // Offset is a long, but in 32 bit mode, we only need the low word.
2293 // Can we update the invoke here to remove a TypeConvert to Long?
2294 locations->SetInAt(2, Location::RequiresRegister());
2295 // Expected value must be in EAX or EDX:EAX.
2296 // For long, new value must be in ECX:EBX.
2297 if (type == DataType::Type::kInt64) {
2298 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2299 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2300 } else {
2301 locations->SetInAt(3, Location::RegisterLocation(EAX));
2302 locations->SetInAt(4, Location::RequiresRegister());
2303 }
2304
2305 // Force a byte register for the output.
2306 locations->SetOut(Location::RegisterLocation(EAX));
2307 if (type == DataType::Type::kReference) {
2308 // Need temporary registers for card-marking, and possibly for
2309 // (Baker) read barrier.
2310 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2311 // Need a byte register for marking.
2312 locations->AddTemp(Location::RegisterLocation(ECX));
2313 }
2314 }
2315
VisitUnsafeCASInt(HInvoke * invoke)2316 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2317 VisitJdkUnsafeCASInt(invoke);
2318 }
2319
VisitUnsafeCASLong(HInvoke * invoke)2320 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2321 VisitJdkUnsafeCASLong(invoke);
2322 }
2323
VisitUnsafeCASObject(HInvoke * invoke)2324 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
2325 VisitJdkUnsafeCASObject(invoke);
2326 }
2327
VisitJdkUnsafeCASInt(HInvoke * invoke)2328 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2329 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2330 VisitJdkUnsafeCompareAndSetInt(invoke);
2331 }
2332
VisitJdkUnsafeCASLong(HInvoke * invoke)2333 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2334 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2335 VisitJdkUnsafeCompareAndSetLong(invoke);
2336 }
2337
VisitJdkUnsafeCASObject(HInvoke * invoke)2338 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2339 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2340 VisitJdkUnsafeCompareAndSetReference(invoke);
2341 }
2342
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2343 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2344 CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt32, invoke);
2345 }
2346
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2347 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2348 CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt64, invoke);
2349 }
2350
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2351 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2352 // The only supported read barrier implementation is the Baker-style read barriers.
2353 if (codegen_->EmitNonBakerReadBarrier()) {
2354 return;
2355 }
2356
2357 CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kReference, invoke);
2358 }
2359
GenPrimitiveLockedCmpxchg(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp=Register::kNoRegister)2360 static void GenPrimitiveLockedCmpxchg(DataType::Type type,
2361 CodeGeneratorX86* codegen,
2362 Location expected_value,
2363 Location new_value,
2364 Register base,
2365 Register offset,
2366 // Only necessary for floating point
2367 Register temp = Register::kNoRegister) {
2368 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2369
2370 if (DataType::Kind(type) == DataType::Type::kInt32) {
2371 DCHECK_EQ(expected_value.AsRegister<Register>(), EAX);
2372 }
2373
2374 // The address of the field within the holding object.
2375 Address field_addr(base, offset, TIMES_1, 0);
2376
2377 switch (type) {
2378 case DataType::Type::kBool:
2379 case DataType::Type::kInt8:
2380 __ LockCmpxchgb(field_addr, new_value.AsRegister<ByteRegister>());
2381 break;
2382 case DataType::Type::kInt16:
2383 case DataType::Type::kUint16:
2384 __ LockCmpxchgw(field_addr, new_value.AsRegister<Register>());
2385 break;
2386 case DataType::Type::kInt32:
2387 __ LockCmpxchgl(field_addr, new_value.AsRegister<Register>());
2388 break;
2389 case DataType::Type::kFloat32: {
2390 // cmpxchg requires the expected value to be in EAX so the new value must be elsewhere.
2391 DCHECK_NE(temp, EAX);
2392 // EAX is both an input and an output for cmpxchg
2393 codegen->Move32(Location::RegisterLocation(EAX), expected_value);
2394 codegen->Move32(Location::RegisterLocation(temp), new_value);
2395 __ LockCmpxchgl(field_addr, temp);
2396 break;
2397 }
2398 case DataType::Type::kInt64:
2399 // Ensure the expected value is in EAX:EDX and that the new
2400 // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2401 DCHECK_EQ(expected_value.AsRegisterPairLow<Register>(), EAX);
2402 DCHECK_EQ(expected_value.AsRegisterPairHigh<Register>(), EDX);
2403 DCHECK_EQ(new_value.AsRegisterPairLow<Register>(), EBX);
2404 DCHECK_EQ(new_value.AsRegisterPairHigh<Register>(), ECX);
2405 __ LockCmpxchg8b(field_addr);
2406 break;
2407 default:
2408 LOG(FATAL) << "Unexpected CAS type " << type;
2409 }
2410 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2411 // don't need scheduling barriers at this time.
2412 }
2413
GenPrimitiveCAS(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Location out,Register temp=Register::kNoRegister,bool is_cmpxchg=false)2414 static void GenPrimitiveCAS(DataType::Type type,
2415 CodeGeneratorX86* codegen,
2416 Location expected_value,
2417 Location new_value,
2418 Register base,
2419 Register offset,
2420 Location out,
2421 // Only necessary for floating point
2422 Register temp = Register::kNoRegister,
2423 bool is_cmpxchg = false) {
2424 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2425
2426 if (!is_cmpxchg || DataType::Kind(type) == DataType::Type::kInt32) {
2427 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2428 }
2429
2430 GenPrimitiveLockedCmpxchg(type, codegen, expected_value, new_value, base, offset, temp);
2431
2432 if (is_cmpxchg) {
2433 // Sign-extend, zero-extend or move the result if necessary
2434 switch (type) {
2435 case DataType::Type::kBool:
2436 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2437 break;
2438 case DataType::Type::kInt8:
2439 __ movsxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2440 break;
2441 case DataType::Type::kInt16:
2442 __ movsxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2443 break;
2444 case DataType::Type::kUint16:
2445 __ movzxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2446 break;
2447 case DataType::Type::kFloat32:
2448 __ movd(out.AsFpuRegister<XmmRegister>(), EAX);
2449 break;
2450 default:
2451 // Nothing to do
2452 break;
2453 }
2454 } else {
2455 // Convert ZF into the Boolean result.
2456 __ setb(kZero, out.AsRegister<Register>());
2457 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2458 }
2459 }
2460
GenReferenceCAS(HInvoke * invoke,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp,Register temp2,bool is_cmpxchg=false)2461 static void GenReferenceCAS(HInvoke* invoke,
2462 CodeGeneratorX86* codegen,
2463 Location expected_value,
2464 Location new_value,
2465 Register base,
2466 Register offset,
2467 Register temp,
2468 Register temp2,
2469 bool is_cmpxchg = false) {
2470 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2471 LocationSummary* locations = invoke->GetLocations();
2472 Location out = locations->Out();
2473
2474 // The address of the field within the holding object.
2475 Address field_addr(base, offset, TIMES_1, 0);
2476
2477 Register value = new_value.AsRegister<Register>();
2478 Register expected = expected_value.AsRegister<Register>();
2479 DCHECK_EQ(expected, EAX);
2480 DCHECK_NE(temp, temp2);
2481
2482 if (codegen->EmitBakerReadBarrier()) {
2483 // Need to make sure the reference stored in the field is a to-space
2484 // one before attempting the CAS or the CAS could fail incorrectly.
2485 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2486 invoke,
2487 // Unused, used only as a "temporary" within the read barrier.
2488 Location::RegisterLocation(temp),
2489 base,
2490 field_addr,
2491 /* needs_null_check= */ false,
2492 /* always_update_field= */ true,
2493 &temp2);
2494 }
2495 bool base_equals_value = (base == value);
2496 if (kPoisonHeapReferences) {
2497 if (base_equals_value) {
2498 // If `base` and `value` are the same register location, move
2499 // `value` to a temporary register. This way, poisoning
2500 // `value` won't invalidate `base`.
2501 value = temp;
2502 __ movl(value, base);
2503 }
2504
2505 // Check that the register allocator did not assign the location
2506 // of `expected` (EAX) to `value` nor to `base`, so that heap
2507 // poisoning (when enabled) works as intended below.
2508 // - If `value` were equal to `expected`, both references would
2509 // be poisoned twice, meaning they would not be poisoned at
2510 // all, as heap poisoning uses address negation.
2511 // - If `base` were equal to `expected`, poisoning `expected`
2512 // would invalidate `base`.
2513 DCHECK_NE(value, expected);
2514 DCHECK_NE(base, expected);
2515 __ PoisonHeapReference(expected);
2516 __ PoisonHeapReference(value);
2517 }
2518 __ LockCmpxchgl(field_addr, value);
2519
2520 // LOCK CMPXCHG has full barrier semantics, and we don't need
2521 // scheduling barriers at this time.
2522
2523 if (is_cmpxchg) {
2524 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2525 __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
2526 } else {
2527 // Convert ZF into the Boolean result.
2528 __ setb(kZero, out.AsRegister<Register>());
2529 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2530 }
2531
2532 // Mark card for object if the new value is stored.
2533 bool value_can_be_null = true; // TODO: Worth finding out this information?
2534 NearLabel skip_mark_gc_card;
2535 __ j(kNotZero, &skip_mark_gc_card);
2536 codegen->MaybeMarkGCCard(temp, temp2, base, value, value_can_be_null);
2537 __ Bind(&skip_mark_gc_card);
2538
2539 // If heap poisoning is enabled, we need to unpoison the values
2540 // that were poisoned earlier.
2541 if (kPoisonHeapReferences) {
2542 if (base_equals_value) {
2543 // `value` has been moved to a temporary register, no need to
2544 // unpoison it.
2545 } else {
2546 // Ensure `value` is different from `out`, so that unpoisoning
2547 // the former does not invalidate the latter.
2548 DCHECK_NE(value, out.AsRegister<Register>());
2549 __ UnpoisonHeapReference(value);
2550 }
2551 }
2552 // Do not unpoison the reference contained in register
2553 // `expected`, as it is the same as register `out` (EAX).
2554 }
2555
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2556 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2557 LocationSummary* locations = invoke->GetLocations();
2558
2559 Register base = locations->InAt(1).AsRegister<Register>();
2560 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2561 Location expected_value = locations->InAt(3);
2562 Location new_value = locations->InAt(4);
2563 Location out = locations->Out();
2564 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2565
2566 if (type == DataType::Type::kReference) {
2567 // The only read barrier implementation supporting the
2568 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2569 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
2570
2571 Register temp = locations->GetTemp(0).AsRegister<Register>();
2572 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2573 GenReferenceCAS(invoke, codegen, expected_value, new_value, base, offset, temp, temp2);
2574 } else {
2575 DCHECK(!DataType::IsFloatingPointType(type));
2576 GenPrimitiveCAS(type, codegen, expected_value, new_value, base, offset, out);
2577 }
2578 }
2579
VisitUnsafeCASInt(HInvoke * invoke)2580 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2581 VisitJdkUnsafeCASInt(invoke);
2582 }
2583
VisitUnsafeCASLong(HInvoke * invoke)2584 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2585 VisitJdkUnsafeCASLong(invoke);
2586 }
2587
VisitUnsafeCASObject(HInvoke * invoke)2588 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2589 // The only read barrier implementation supporting the
2590 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2591 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2592
2593 GenCAS(DataType::Type::kReference, invoke, codegen_);
2594 }
2595
VisitJdkUnsafeCASInt(HInvoke * invoke)2596 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2597 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2598 VisitJdkUnsafeCompareAndSetInt(invoke);
2599 }
2600
VisitJdkUnsafeCASLong(HInvoke * invoke)2601 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2602 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2603 VisitJdkUnsafeCompareAndSetLong(invoke);
2604 }
2605
VisitJdkUnsafeCASObject(HInvoke * invoke)2606 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2607 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2608 VisitJdkUnsafeCompareAndSetReference(invoke);
2609 }
2610
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2611 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2612 GenCAS(DataType::Type::kInt32, invoke, codegen_);
2613 }
2614
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2615 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2616 GenCAS(DataType::Type::kInt64, invoke, codegen_);
2617 }
2618
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2619 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2620 // The only supported read barrier implementation is the Baker-style read barriers.
2621 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2622
2623 GenCAS(DataType::Type::kReference, invoke, codegen_);
2624 }
2625
2626 // Note: Unlike other architectures that use corresponding enums for the `VarHandle`
2627 // implementation, x86 is currently using it only for `Unsafe`.
2628 enum class GetAndUpdateOp {
2629 kSet,
2630 kAdd,
2631 };
2632
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen,DataType::Type type,GetAndUpdateOp get_and_unsafe_op)2633 void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
2634 HInvoke* invoke,
2635 CodeGeneratorX86* codegen,
2636 DataType::Type type,
2637 GetAndUpdateOp get_and_unsafe_op) {
2638 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
2639 LocationSummary* locations =
2640 new (allocator) LocationSummary(invoke,
2641 can_call
2642 ? LocationSummary::kCallOnSlowPath
2643 : LocationSummary::kNoCall,
2644 kIntrinsified);
2645 if (can_call && kUseBakerReadBarrier) {
2646 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2647 }
2648 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2649 const bool is_void = invoke->GetType() == DataType::Type::kVoid;
2650 if (type == DataType::Type::kInt64) {
2651 // Explicitly allocate all registers.
2652 locations->SetInAt(1, Location::RegisterLocation(EBP));
2653 if (get_and_unsafe_op == GetAndUpdateOp::kAdd) {
2654 locations->AddTemp(Location::RegisterLocation(EBP)); // We shall clobber EBP.
2655 locations->SetInAt(2, Location::Any()); // Offset shall be on the stack.
2656 locations->SetInAt(3, Location::RegisterPairLocation(ESI, EDI));
2657 locations->AddTemp(Location::RegisterLocation(EBX));
2658 locations->AddTemp(Location::RegisterLocation(ECX));
2659 } else {
2660 locations->SetInAt(2, Location::RegisterPairLocation(ESI, EDI));
2661 locations->SetInAt(3, Location::RegisterPairLocation(EBX, ECX));
2662 }
2663 if (is_void) {
2664 locations->AddTemp(Location::RegisterLocation(EAX));
2665 locations->AddTemp(Location::RegisterLocation(EDX));
2666 } else {
2667 locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kOutputOverlap);
2668 }
2669 } else {
2670 locations->SetInAt(1, Location::RequiresRegister());
2671 locations->SetInAt(2, Location::RequiresRegister());
2672 // Use the same register for both the output and the new value or addend
2673 // to take advantage of XCHG or XADD. Arbitrarily pick EAX.
2674 locations->SetInAt(3, Location::RegisterLocation(EAX));
2675 // Only set the `out` register if it's needed. In the void case we can still use EAX in the
2676 // same manner as it is marked as a temp register.
2677 if (is_void) {
2678 locations->AddTemp(Location::RegisterLocation(EAX));
2679 } else {
2680 locations->SetOut(Location::RegisterLocation(EAX));
2681 }
2682 }
2683 }
2684
VisitUnsafeGetAndAddInt(HInvoke * invoke)2685 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2686 VisitJdkUnsafeGetAndAddInt(invoke);
2687 }
2688
VisitUnsafeGetAndAddLong(HInvoke * invoke)2689 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2690 VisitJdkUnsafeGetAndAddLong(invoke);
2691 }
2692
VisitUnsafeGetAndSetInt(HInvoke * invoke)2693 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2694 VisitJdkUnsafeGetAndSetInt(invoke);
2695 }
2696
VisitUnsafeGetAndSetLong(HInvoke * invoke)2697 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2698 VisitJdkUnsafeGetAndSetLong(invoke);
2699 }
2700
VisitUnsafeGetAndSetObject(HInvoke * invoke)2701 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2702 VisitJdkUnsafeGetAndSetReference(invoke);
2703 }
2704
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2705 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2706 CreateUnsafeGetAndUpdateLocations(
2707 allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
2708 }
2709
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2710 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2711 CreateUnsafeGetAndUpdateLocations(
2712 allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
2713 }
2714
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2715 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2716 CreateUnsafeGetAndUpdateLocations(
2717 allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
2718 }
2719
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2720 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2721 CreateUnsafeGetAndUpdateLocations(
2722 allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
2723 }
2724
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2725 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2726 // The only supported read barrier implementation is the Baker-style read barriers.
2727 if (codegen_->EmitNonBakerReadBarrier()) {
2728 return;
2729 }
2730
2731 CreateUnsafeGetAndUpdateLocations(
2732 allocator_, invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
2733 LocationSummary* locations = invoke->GetLocations();
2734 locations->AddTemp(Location::RequiresRegister());
2735 locations->AddTemp(Location::RegisterLocation(ECX)); // Byte register for `MarkGCCard()`.
2736 }
2737
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorX86 * codegen,GetAndUpdateOp get_and_update_op)2738 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
2739 DataType::Type type,
2740 CodeGeneratorX86* codegen,
2741 GetAndUpdateOp get_and_update_op) {
2742 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2743 LocationSummary* locations = invoke->GetLocations();
2744
2745 const bool is_void = invoke->GetType() == DataType::Type::kVoid;
2746 // We use requested specific registers to use as temps for void methods, as we don't return the
2747 // value.
2748 Location out_or_temp =
2749 is_void ? (type == DataType::Type::kInt64 ? Location::RegisterPairLocation(EAX, EDX) :
2750 Location::RegisterLocation(EAX)) :
2751 locations->Out();
2752 Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
2753 Location offset = locations->InAt(2); // Long offset.
2754 Location arg = locations->InAt(3); // New value or addend.
2755
2756 if (type == DataType::Type::kInt32) {
2757 DCHECK(out_or_temp.Equals(arg));
2758 Register out_reg = out_or_temp.AsRegister<Register>();
2759 Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2760 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2761 __ LockXaddl(field_address, out_reg);
2762 } else {
2763 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2764 __ xchgl(out_reg, field_address);
2765 }
2766 } else if (type == DataType::Type::kInt64) {
2767 // Prepare the field address. Ignore the high 32 bits of the `offset`.
2768 Address field_address_low(kNoRegister, 0), field_address_high(kNoRegister, 0);
2769 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2770 DCHECK(offset.IsDoubleStackSlot());
2771 __ addl(base, Address(ESP, offset.GetStackIndex())); // Clobbers `base`.
2772 DCHECK(Location::RegisterLocation(base).Equals(locations->GetTemp(0)));
2773 field_address_low = Address(base, 0);
2774 field_address_high = Address(base, 4);
2775 } else {
2776 field_address_low = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2777 field_address_high = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 4);
2778 }
2779 // Load the old value to EDX:EAX and use LOCK CMPXCHG8B to set the new value.
2780 NearLabel loop;
2781 __ Bind(&loop);
2782 __ movl(EAX, field_address_low);
2783 __ movl(EDX, field_address_high);
2784 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2785 DCHECK(Location::RegisterPairLocation(ESI, EDI).Equals(arg));
2786 __ movl(EBX, EAX);
2787 __ movl(ECX, EDX);
2788 __ addl(EBX, ESI);
2789 __ adcl(ECX, EDI);
2790 } else {
2791 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2792 DCHECK(Location::RegisterPairLocation(EBX, ECX).Equals(arg));
2793 }
2794 __ LockCmpxchg8b(field_address_low);
2795 __ j(kNotEqual, &loop); // Repeat on failure.
2796 } else {
2797 DCHECK_EQ(type, DataType::Type::kReference);
2798 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2799 Register out_reg = out_or_temp.AsRegister<Register>();
2800 Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2801 Register temp1 = locations->GetTemp(0).AsRegister<Register>();
2802 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2803
2804 if (codegen->EmitReadBarrier()) {
2805 DCHECK(kUseBakerReadBarrier);
2806 // Ensure that the field contains a to-space reference.
2807 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2808 invoke,
2809 Location::RegisterLocation(temp2),
2810 base,
2811 field_address,
2812 /*needs_null_check=*/ false,
2813 /*always_update_field=*/ true,
2814 &temp1);
2815 }
2816
2817 // Mark card for object as a new value shall be stored.
2818 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
2819 DCHECK_EQ(temp2, ECX); // Byte register for `MarkGCCard()`.
2820 codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out_reg, new_value_can_be_null);
2821
2822 if (kPoisonHeapReferences) {
2823 // Use a temp to avoid poisoning base of the field address, which might happen if `out`
2824 // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
2825 __ movl(temp1, out_reg);
2826 __ PoisonHeapReference(temp1);
2827 __ xchgl(temp1, field_address);
2828 if (!is_void) {
2829 __ UnpoisonHeapReference(temp1);
2830 __ movl(out_reg, temp1);
2831 }
2832 } else {
2833 __ xchgl(out_reg, field_address);
2834 }
2835 }
2836 }
2837
VisitUnsafeGetAndAddInt(HInvoke * invoke)2838 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2839 VisitJdkUnsafeGetAndAddInt(invoke);
2840 }
2841
VisitUnsafeGetAndAddLong(HInvoke * invoke)2842 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2843 VisitJdkUnsafeGetAndAddLong(invoke);
2844 }
2845
VisitUnsafeGetAndSetInt(HInvoke * invoke)2846 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2847 VisitJdkUnsafeGetAndSetInt(invoke);
2848 }
2849
VisitUnsafeGetAndSetLong(HInvoke * invoke)2850 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2851 VisitJdkUnsafeGetAndSetLong(invoke);
2852 }
2853
VisitUnsafeGetAndSetObject(HInvoke * invoke)2854 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2855 VisitJdkUnsafeGetAndSetReference(invoke);
2856 }
2857
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2858 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2859 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
2860 }
2861
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2862 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2863 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
2864 }
2865
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2866 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2867 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
2868 }
2869
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2870 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2871 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
2872 }
2873
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2874 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2875 GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
2876 }
2877
VisitIntegerReverse(HInvoke * invoke)2878 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2879 LocationSummary* locations =
2880 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2881 locations->SetInAt(0, Location::RequiresRegister());
2882 locations->SetOut(Location::SameAsFirstInput());
2883 locations->AddTemp(Location::RequiresRegister());
2884 }
2885
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2886 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2887 X86Assembler* assembler) {
2888 Immediate imm_shift(shift);
2889 Immediate imm_mask(mask);
2890 __ movl(temp, reg);
2891 __ shrl(reg, imm_shift);
2892 __ andl(temp, imm_mask);
2893 __ andl(reg, imm_mask);
2894 __ shll(temp, imm_shift);
2895 __ orl(reg, temp);
2896 }
2897
VisitIntegerReverse(HInvoke * invoke)2898 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2899 X86Assembler* assembler = GetAssembler();
2900 LocationSummary* locations = invoke->GetLocations();
2901
2902 Register reg = locations->InAt(0).AsRegister<Register>();
2903 Register temp = locations->GetTemp(0).AsRegister<Register>();
2904
2905 /*
2906 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2907 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2908 * compared to generic luni implementation which has 5 rounds of swapping bits.
2909 * x = bswap x
2910 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2911 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2912 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2913 */
2914 __ bswapl(reg);
2915 SwapBits(reg, temp, 1, 0x55555555, assembler);
2916 SwapBits(reg, temp, 2, 0x33333333, assembler);
2917 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2918 }
2919
VisitLongReverse(HInvoke * invoke)2920 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2921 LocationSummary* locations =
2922 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2923 locations->SetInAt(0, Location::RequiresRegister());
2924 locations->SetOut(Location::SameAsFirstInput());
2925 locations->AddTemp(Location::RequiresRegister());
2926 }
2927
VisitLongReverse(HInvoke * invoke)2928 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2929 X86Assembler* assembler = GetAssembler();
2930 LocationSummary* locations = invoke->GetLocations();
2931
2932 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2933 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2934 Register temp = locations->GetTemp(0).AsRegister<Register>();
2935
2936 // We want to swap high/low, then bswap each one, and then do the same
2937 // as a 32 bit reverse.
2938 // Exchange high and low.
2939 __ movl(temp, reg_low);
2940 __ movl(reg_low, reg_high);
2941 __ movl(reg_high, temp);
2942
2943 // bit-reverse low
2944 __ bswapl(reg_low);
2945 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2946 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2947 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2948
2949 // bit-reverse high
2950 __ bswapl(reg_high);
2951 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2952 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2953 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2954 }
2955
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2956 static void CreateBitCountLocations(
2957 ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2958 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2959 // Do nothing if there is no popcnt support. This results in generating
2960 // a call for the intrinsic rather than direct code.
2961 return;
2962 }
2963 LocationSummary* locations =
2964 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2965 if (is_long) {
2966 locations->AddTemp(Location::RequiresRegister());
2967 }
2968 locations->SetInAt(0, Location::Any());
2969 locations->SetOut(Location::RequiresRegister());
2970 }
2971
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2972 static void GenBitCount(X86Assembler* assembler,
2973 CodeGeneratorX86* codegen,
2974 HInvoke* invoke, bool is_long) {
2975 LocationSummary* locations = invoke->GetLocations();
2976 Location src = locations->InAt(0);
2977 Register out = locations->Out().AsRegister<Register>();
2978
2979 if (invoke->InputAt(0)->IsConstant()) {
2980 // Evaluate this at compile time.
2981 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2982 int32_t result = is_long
2983 ? POPCOUNT(static_cast<uint64_t>(value))
2984 : POPCOUNT(static_cast<uint32_t>(value));
2985 codegen->Load32BitValue(out, result);
2986 return;
2987 }
2988
2989 // Handle the non-constant cases.
2990 if (!is_long) {
2991 if (src.IsRegister()) {
2992 __ popcntl(out, src.AsRegister<Register>());
2993 } else {
2994 DCHECK(src.IsStackSlot());
2995 __ popcntl(out, Address(ESP, src.GetStackIndex()));
2996 }
2997 } else {
2998 // The 64-bit case needs to worry about two parts.
2999 Register temp = locations->GetTemp(0).AsRegister<Register>();
3000 if (src.IsRegisterPair()) {
3001 __ popcntl(temp, src.AsRegisterPairLow<Register>());
3002 __ popcntl(out, src.AsRegisterPairHigh<Register>());
3003 } else {
3004 DCHECK(src.IsDoubleStackSlot());
3005 __ popcntl(temp, Address(ESP, src.GetStackIndex()));
3006 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
3007 }
3008 __ addl(out, temp);
3009 }
3010 }
3011
VisitIntegerBitCount(HInvoke * invoke)3012 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
3013 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
3014 }
3015
VisitIntegerBitCount(HInvoke * invoke)3016 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
3017 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3018 }
3019
VisitLongBitCount(HInvoke * invoke)3020 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
3021 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
3022 }
3023
VisitLongBitCount(HInvoke * invoke)3024 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
3025 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3026 }
3027
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)3028 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
3029 LocationSummary* locations =
3030 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3031 if (is_long) {
3032 locations->SetInAt(0, Location::RequiresRegister());
3033 } else {
3034 locations->SetInAt(0, Location::Any());
3035 }
3036 locations->SetOut(Location::RequiresRegister());
3037 }
3038
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)3039 static void GenLeadingZeros(X86Assembler* assembler,
3040 CodeGeneratorX86* codegen,
3041 HInvoke* invoke, bool is_long) {
3042 LocationSummary* locations = invoke->GetLocations();
3043 Location src = locations->InAt(0);
3044 Register out = locations->Out().AsRegister<Register>();
3045
3046 if (invoke->InputAt(0)->IsConstant()) {
3047 // Evaluate this at compile time.
3048 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3049 if (value == 0) {
3050 value = is_long ? 64 : 32;
3051 } else {
3052 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
3053 }
3054 codegen->Load32BitValue(out, value);
3055 return;
3056 }
3057
3058 // Handle the non-constant cases.
3059 if (!is_long) {
3060 if (src.IsRegister()) {
3061 __ bsrl(out, src.AsRegister<Register>());
3062 } else {
3063 DCHECK(src.IsStackSlot());
3064 __ bsrl(out, Address(ESP, src.GetStackIndex()));
3065 }
3066
3067 // BSR sets ZF if the input was zero, and the output is undefined.
3068 NearLabel all_zeroes, done;
3069 __ j(kEqual, &all_zeroes);
3070
3071 // Correct the result from BSR to get the final CLZ result.
3072 __ xorl(out, Immediate(31));
3073 __ jmp(&done);
3074
3075 // Fix the zero case with the expected result.
3076 __ Bind(&all_zeroes);
3077 __ movl(out, Immediate(32));
3078
3079 __ Bind(&done);
3080 return;
3081 }
3082
3083 // 64 bit case needs to worry about both parts of the register.
3084 DCHECK(src.IsRegisterPair());
3085 Register src_lo = src.AsRegisterPairLow<Register>();
3086 Register src_hi = src.AsRegisterPairHigh<Register>();
3087 NearLabel handle_low, done, all_zeroes;
3088
3089 // Is the high word zero?
3090 __ testl(src_hi, src_hi);
3091 __ j(kEqual, &handle_low);
3092
3093 // High word is not zero. We know that the BSR result is defined in this case.
3094 __ bsrl(out, src_hi);
3095
3096 // Correct the result from BSR to get the final CLZ result.
3097 __ xorl(out, Immediate(31));
3098 __ jmp(&done);
3099
3100 // High word was zero. We have to compute the low word count and add 32.
3101 __ Bind(&handle_low);
3102 __ bsrl(out, src_lo);
3103 __ j(kEqual, &all_zeroes);
3104
3105 // We had a valid result. Use an XOR to both correct the result and add 32.
3106 __ xorl(out, Immediate(63));
3107 __ jmp(&done);
3108
3109 // All zero case.
3110 __ Bind(&all_zeroes);
3111 __ movl(out, Immediate(64));
3112
3113 __ Bind(&done);
3114 }
3115
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3116 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3117 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
3118 }
3119
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3120 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3121 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3122 }
3123
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3124 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3125 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
3126 }
3127
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3128 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3129 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3130 }
3131
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)3132 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
3133 LocationSummary* locations =
3134 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3135 if (is_long) {
3136 locations->SetInAt(0, Location::RequiresRegister());
3137 } else {
3138 locations->SetInAt(0, Location::Any());
3139 }
3140 locations->SetOut(Location::RequiresRegister());
3141 }
3142
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)3143 static void GenTrailingZeros(X86Assembler* assembler,
3144 CodeGeneratorX86* codegen,
3145 HInvoke* invoke, bool is_long) {
3146 LocationSummary* locations = invoke->GetLocations();
3147 Location src = locations->InAt(0);
3148 Register out = locations->Out().AsRegister<Register>();
3149
3150 if (invoke->InputAt(0)->IsConstant()) {
3151 // Evaluate this at compile time.
3152 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3153 if (value == 0) {
3154 value = is_long ? 64 : 32;
3155 } else {
3156 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
3157 }
3158 codegen->Load32BitValue(out, value);
3159 return;
3160 }
3161
3162 // Handle the non-constant cases.
3163 if (!is_long) {
3164 if (src.IsRegister()) {
3165 __ bsfl(out, src.AsRegister<Register>());
3166 } else {
3167 DCHECK(src.IsStackSlot());
3168 __ bsfl(out, Address(ESP, src.GetStackIndex()));
3169 }
3170
3171 // BSF sets ZF if the input was zero, and the output is undefined.
3172 NearLabel done;
3173 __ j(kNotEqual, &done);
3174
3175 // Fix the zero case with the expected result.
3176 __ movl(out, Immediate(32));
3177
3178 __ Bind(&done);
3179 return;
3180 }
3181
3182 // 64 bit case needs to worry about both parts of the register.
3183 DCHECK(src.IsRegisterPair());
3184 Register src_lo = src.AsRegisterPairLow<Register>();
3185 Register src_hi = src.AsRegisterPairHigh<Register>();
3186 NearLabel done, all_zeroes;
3187
3188 // If the low word is zero, then ZF will be set. If not, we have the answer.
3189 __ bsfl(out, src_lo);
3190 __ j(kNotEqual, &done);
3191
3192 // Low word was zero. We have to compute the high word count and add 32.
3193 __ bsfl(out, src_hi);
3194 __ j(kEqual, &all_zeroes);
3195
3196 // We had a valid result. Add 32 to account for the low word being zero.
3197 __ addl(out, Immediate(32));
3198 __ jmp(&done);
3199
3200 // All zero case.
3201 __ Bind(&all_zeroes);
3202 __ movl(out, Immediate(64));
3203
3204 __ Bind(&done);
3205 }
3206
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3207 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3208 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
3209 }
3210
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3211 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3212 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3213 }
3214
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3215 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3216 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
3217 }
3218
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3219 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3220 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3221 }
3222
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)3223 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
3224 return instruction->InputAt(input0) == instruction->InputAt(input1);
3225 }
3226
VisitSystemArrayCopy(HInvoke * invoke)3227 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
3228 // The only read barrier implementation supporting the
3229 // SystemArrayCopy intrinsic is the Baker-style read barriers.
3230 if (codegen_->EmitNonBakerReadBarrier()) {
3231 return;
3232 }
3233
3234 constexpr int32_t kLengthThreshold = -1; // No cut-off - handle large arrays in intrinsic code.
3235 constexpr size_t kInitialNumTemps = 0u; // We shall allocate temps explicitly.
3236 LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
3237 invoke, kLengthThreshold, kInitialNumTemps);
3238 if (locations != nullptr) {
3239 // Add temporaries. We will use REP MOVSL, so we need fixed registers.
3240 DCHECK_EQ(locations->GetTempCount(), kInitialNumTemps);
3241 locations->AddTemp(Location::RegisterLocation(ESI));
3242 locations->AddTemp(Location::RegisterLocation(EDI));
3243 locations->AddTemp(Location::RegisterLocation(ECX)); // Byte reg also used for write barrier.
3244
3245 static constexpr size_t kSrc = 0;
3246 static constexpr size_t kSrcPos = 1;
3247 static constexpr size_t kDest = 2;
3248 static constexpr size_t kDestPos = 3;
3249 static constexpr size_t kLength = 4;
3250
3251 if (!locations->InAt(kLength).IsConstant()) {
3252 // We may not have enough registers for all inputs and temps, so put the
3253 // non-const length explicitly to the same register as one of the temps.
3254 locations->SetInAt(kLength, Location::RegisterLocation(ECX));
3255 }
3256
3257 if (codegen_->EmitBakerReadBarrier()) {
3258 // We need an additional temp in the slow path for holding the reference.
3259 if (locations->InAt(kSrcPos).IsConstant() ||
3260 locations->InAt(kDestPos).IsConstant() ||
3261 IsSameInput(invoke, kSrc, kDest) ||
3262 IsSameInput(invoke, kSrcPos, kDestPos)) {
3263 // We can allocate another temp register.
3264 locations->AddTemp(Location::RequiresRegister());
3265 } else {
3266 // Use the same fixed register for the non-const `src_pos` and the additional temp.
3267 // The `src_pos` is no longer needed when we reach the slow path.
3268 locations->SetInAt(kSrcPos, Location::RegisterLocation(EDX));
3269 locations->AddTemp(Location::RegisterLocation(EDX));
3270 }
3271 }
3272 }
3273 }
3274
VisitSystemArrayCopy(HInvoke * invoke)3275 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
3276 // The only read barrier implementation supporting the
3277 // SystemArrayCopy intrinsic is the Baker-style read barriers.
3278 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
3279
3280 X86Assembler* assembler = GetAssembler();
3281 LocationSummary* locations = invoke->GetLocations();
3282
3283 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3284 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3285 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3286 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3287 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3288
3289 Register src = locations->InAt(0).AsRegister<Register>();
3290 Location src_pos = locations->InAt(1);
3291 Register dest = locations->InAt(2).AsRegister<Register>();
3292 Location dest_pos = locations->InAt(3);
3293 Location length = locations->InAt(4);
3294 Location temp1_loc = locations->GetTemp(0);
3295 Register temp1 = temp1_loc.AsRegister<Register>();
3296 Location temp2_loc = locations->GetTemp(1);
3297 Register temp2 = temp2_loc.AsRegister<Register>();
3298
3299 SlowPathCode* intrinsic_slow_path =
3300 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3301 codegen_->AddSlowPath(intrinsic_slow_path);
3302
3303 NearLabel conditions_on_positions_validated;
3304 SystemArrayCopyOptimizations optimizations(invoke);
3305
3306 // If source and destination are the same, we go to slow path if we need to do forward copying.
3307 // We do not need to do this check if the source and destination positions are the same.
3308 if (!optimizations.GetSourcePositionIsDestinationPosition()) {
3309 if (src_pos.IsConstant()) {
3310 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
3311 if (dest_pos.IsConstant()) {
3312 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
3313 if (optimizations.GetDestinationIsSource()) {
3314 // Checked when building locations.
3315 DCHECK_GE(src_pos_constant, dest_pos_constant);
3316 } else if (src_pos_constant < dest_pos_constant) {
3317 __ cmpl(src, dest);
3318 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3319 }
3320 } else {
3321 if (!optimizations.GetDestinationIsSource()) {
3322 __ cmpl(src, dest);
3323 __ j(kNotEqual, &conditions_on_positions_validated);
3324 }
3325 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
3326 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
3327 }
3328 } else {
3329 if (!optimizations.GetDestinationIsSource()) {
3330 __ cmpl(src, dest);
3331 __ j(kNotEqual, &conditions_on_positions_validated);
3332 }
3333 Register src_pos_reg = src_pos.AsRegister<Register>();
3334 EmitCmplJLess(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel());
3335 }
3336 }
3337
3338 __ Bind(&conditions_on_positions_validated);
3339
3340 if (!optimizations.GetSourceIsNotNull()) {
3341 // Bail out if the source is null.
3342 __ testl(src, src);
3343 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3344 }
3345
3346 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
3347 // Bail out if the destination is null.
3348 __ testl(dest, dest);
3349 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3350 }
3351
3352 // If the length is negative, bail out.
3353 // We have already checked in the LocationsBuilder for the constant case.
3354 if (!length.IsConstant() &&
3355 !optimizations.GetCountIsSourceLength() &&
3356 !optimizations.GetCountIsDestinationLength()) {
3357 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
3358 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
3359 }
3360
3361 // Validity checks: source.
3362 CheckSystemArrayCopyPosition(assembler,
3363 src,
3364 src_pos,
3365 length,
3366 intrinsic_slow_path,
3367 temp1,
3368 optimizations.GetCountIsSourceLength(),
3369 /*position_sign_checked=*/ false);
3370
3371 // Validity checks: dest.
3372 bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
3373 CheckSystemArrayCopyPosition(assembler,
3374 dest,
3375 dest_pos,
3376 length,
3377 intrinsic_slow_path,
3378 temp1,
3379 optimizations.GetCountIsDestinationLength(),
3380 dest_position_sign_checked);
3381
3382 auto check_non_primitive_array_class = [&](Register klass, Register temp) {
3383 // No read barrier is needed for reading a chain of constant references for comparing
3384 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3385 // /* HeapReference<Class> */ temp = klass->component_type_
3386 __ movl(temp, Address(klass, component_offset));
3387 __ MaybeUnpoisonHeapReference(temp);
3388 // Check that the component type is not null.
3389 __ testl(temp, temp);
3390 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3391 // Check that the component type is not a primitive.
3392 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
3393 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3394 };
3395
3396 if (!optimizations.GetDoesNotNeedTypeCheck()) {
3397 // Check whether all elements of the source array are assignable to the component
3398 // type of the destination array. We do two checks: the classes are the same,
3399 // or the destination is Object[]. If none of these checks succeed, we go to the
3400 // slow path.
3401
3402 if (codegen_->EmitBakerReadBarrier()) {
3403 // /* HeapReference<Class> */ temp1 = dest->klass_
3404 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3405 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
3406 // Register `temp1` is not trashed by the read barrier emitted
3407 // by GenerateFieldLoadWithBakerReadBarrier below, as that
3408 // method produces a call to a ReadBarrierMarkRegX entry point,
3409 // which saves all potentially live registers, including
3410 // temporaries such a `temp1`.
3411 // /* HeapReference<Class> */ temp2 = src->klass_
3412 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3413 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
3414 } else {
3415 // /* HeapReference<Class> */ temp1 = dest->klass_
3416 __ movl(temp1, Address(dest, class_offset));
3417 __ MaybeUnpoisonHeapReference(temp1);
3418 // /* HeapReference<Class> */ temp2 = src->klass_
3419 __ movl(temp2, Address(src, class_offset));
3420 __ MaybeUnpoisonHeapReference(temp2);
3421 }
3422
3423 __ cmpl(temp1, temp2);
3424 if (optimizations.GetDestinationIsTypedObjectArray()) {
3425 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3426 NearLabel do_copy;
3427 // For class match, we can skip the source type check regardless of the optimization flag.
3428 __ j(kEqual, &do_copy);
3429 // No read barrier is needed for reading a chain of constant references
3430 // for comparing with null, see `ReadBarrierOption`.
3431 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3432 __ movl(temp1, Address(temp1, component_offset));
3433 __ MaybeUnpoisonHeapReference(temp1);
3434 // No need to unpoison the following heap reference load, as
3435 // we're comparing against null.
3436 __ cmpl(Address(temp1, super_offset), Immediate(0));
3437 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3438 // Bail out if the source is not a non primitive array.
3439 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3440 check_non_primitive_array_class(temp2, temp2);
3441 }
3442 __ Bind(&do_copy);
3443 } else {
3444 DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
3445 // For class match, we can skip the array type check completely if at least one of source
3446 // and destination is known to be a non primitive array, otherwise one check is enough.
3447 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3448 if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
3449 !optimizations.GetSourceIsNonPrimitiveArray()) {
3450 check_non_primitive_array_class(temp2, temp2);
3451 }
3452 }
3453 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3454 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3455 // Bail out if the source is not a non primitive array.
3456 // No read barrier is needed for reading a chain of constant references for comparing
3457 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3458 // /* HeapReference<Class> */ temp1 = src->klass_
3459 __ movl(temp1, Address(src, class_offset));
3460 __ MaybeUnpoisonHeapReference(temp1);
3461 check_non_primitive_array_class(temp1, temp1);
3462 }
3463
3464 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
3465 // Null constant length: not need to emit the loop code at all.
3466 } else {
3467 const DataType::Type type = DataType::Type::kReference;
3468 const size_t data_size = DataType::Size(type);
3469 const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
3470
3471 // Don't enter copy loop if `length == 0`.
3472 NearLabel skip_copy_and_write_barrier;
3473 if (!length.IsConstant()) {
3474 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
3475 __ j(kEqual, &skip_copy_and_write_barrier);
3476 }
3477
3478 // Compute the base source address in `temp1`.
3479 GenArrayAddress(assembler, temp1, src, src_pos, type, data_offset);
3480 // Compute the base destination address in `temp2`.
3481 GenArrayAddress(assembler, temp2, dest, dest_pos, type, data_offset);
3482
3483 SlowPathCode* read_barrier_slow_path = nullptr;
3484 if (codegen_->EmitBakerReadBarrier()) {
3485 // SystemArrayCopy implementation for Baker read barriers (see
3486 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
3487 //
3488 // if (src_ptr != end_ptr) {
3489 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3490 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
3491 // bool is_gray = (rb_state == ReadBarrier::GrayState());
3492 // if (is_gray) {
3493 // // Slow-path copy.
3494 // for (size_t i = 0; i != length; ++i) {
3495 // dest_array[dest_pos + i] =
3496 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
3497 // }
3498 // } else {
3499 // // Fast-path copy.
3500 // do {
3501 // *dest_ptr++ = *src_ptr++;
3502 // } while (src_ptr != end_ptr)
3503 // }
3504 // }
3505
3506 // Given the numeric representation, it's enough to check the low bit of the rb_state.
3507 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
3508 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3509 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
3510 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
3511 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
3512
3513 // if (rb_state == ReadBarrier::GrayState())
3514 // goto slow_path;
3515 // At this point, just do the "if" and make sure that flags are preserved until the branch.
3516 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
3517
3518 // Load fence to prevent load-load reordering.
3519 // Note that this is a no-op, thanks to the x86 memory model.
3520 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3521
3522 // Slow path used to copy array when `src` is gray.
3523 read_barrier_slow_path =
3524 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
3525 codegen_->AddSlowPath(read_barrier_slow_path);
3526
3527 // We have done the "if" of the gray bit check above, now branch based on the flags.
3528 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
3529 }
3530
3531 Register temp3 = locations->GetTemp(2).AsRegister<Register>();
3532 if (length.IsConstant()) {
3533 __ movl(temp3, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
3534 } else {
3535 DCHECK_EQ(temp3, length.AsRegister<Register>());
3536 }
3537
3538 // Iterate over the arrays and do a raw copy of the objects. We don't need to poison/unpoison.
3539 DCHECK_EQ(temp1, ESI);
3540 DCHECK_EQ(temp2, EDI);
3541 DCHECK_EQ(temp3, ECX);
3542 __ rep_movsl();
3543
3544 if (read_barrier_slow_path != nullptr) {
3545 DCHECK(codegen_->EmitBakerReadBarrier());
3546 __ Bind(read_barrier_slow_path->GetExitLabel());
3547 }
3548
3549 // We only need one card marking on the destination array.
3550 codegen_->MarkGCCard(temp1, temp3, dest);
3551
3552 __ Bind(&skip_copy_and_write_barrier);
3553 }
3554
3555 __ Bind(intrinsic_slow_path->GetExitLabel());
3556 }
3557
RequestBaseMethodAddressInRegister(HInvoke * invoke)3558 static void RequestBaseMethodAddressInRegister(HInvoke* invoke) {
3559 LocationSummary* locations = invoke->GetLocations();
3560 if (locations != nullptr) {
3561 HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
3562 // Note: The base method address is not present yet when this is called from the
3563 // PCRelativeHandlerVisitor via IsCallFreeIntrinsic() to determine whether to insert it.
3564 if (invoke_static_or_direct->HasSpecialInput()) {
3565 DCHECK(invoke_static_or_direct->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
3566 ->IsX86ComputeBaseMethodAddress());
3567 locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
3568 Location::RequiresRegister());
3569 }
3570 }
3571 }
3572
3573 #define VISIT_INTRINSIC(name, low, high, type, start_index) \
3574 void IntrinsicLocationsBuilderX86::Visit##name##ValueOf(HInvoke* invoke) { \
3575 InvokeRuntimeCallingConvention calling_convention; \
3576 IntrinsicVisitor::ComputeValueOfLocations( \
3577 invoke, \
3578 codegen_, \
3579 low, \
3580 (high) - (low) + 1, \
3581 Location::RegisterLocation(EAX), \
3582 Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \
3583 RequestBaseMethodAddressInRegister(invoke); \
3584 } \
3585 void IntrinsicCodeGeneratorX86::Visit##name##ValueOf(HInvoke* invoke) { \
3586 IntrinsicVisitor::ValueOfInfo info = \
3587 IntrinsicVisitor::ComputeValueOfInfo(invoke, \
3588 codegen_->GetCompilerOptions(), \
3589 WellKnownClasses::java_lang_##name##_value, \
3590 low, \
3591 (high) - (low) + 1, \
3592 start_index); \
3593 HandleValueOf(invoke, info, type); \
3594 }
BOXED_TYPES(VISIT_INTRINSIC)3595 BOXED_TYPES(VISIT_INTRINSIC)
3596 #undef VISIT_INTRINSIC
3597
3598 void IntrinsicCodeGeneratorX86::HandleValueOf(HInvoke* invoke,
3599 const IntrinsicVisitor::ValueOfInfo& info,
3600 DataType::Type type) {
3601 DCHECK(invoke->IsInvokeStaticOrDirect());
3602 LocationSummary* locations = invoke->GetLocations();
3603 X86Assembler* assembler = GetAssembler();
3604
3605 Register out = locations->Out().AsRegister<Register>();
3606 auto allocate_instance = [&]() {
3607 DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
3608 codegen_->LoadIntrinsicDeclaringClass(out, invoke->AsInvokeStaticOrDirect());
3609 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3610 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3611 };
3612 if (invoke->InputAt(0)->IsIntConstant()) {
3613 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3614 if (static_cast<uint32_t>(value - info.low) < info.length) {
3615 // Just embed the object in the code.
3616 DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3617 codegen_->LoadBootImageAddress(
3618 out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
3619 } else {
3620 DCHECK(locations->CanCall());
3621 // Allocate and initialize a new j.l.Integer.
3622 // TODO: If we JIT, we could allocate the object now, and store it in the
3623 // JIT object table.
3624 allocate_instance();
3625 codegen_->MoveToMemory(type,
3626 Location::ConstantLocation(invoke->InputAt(0)->AsIntConstant()),
3627 out,
3628 /* dst_index= */ Register::kNoRegister,
3629 /* dst_scale= */ TIMES_1,
3630 /* dst_disp= */ info.value_offset);
3631 }
3632 } else {
3633 DCHECK(locations->CanCall());
3634 Register in = locations->InAt(0).AsRegister<Register>();
3635 // Check bounds of our cache.
3636 __ leal(out, Address(in, -info.low));
3637 __ cmpl(out, Immediate(info.length));
3638 NearLabel allocate, done;
3639 __ j(kAboveEqual, &allocate);
3640 // If the value is within the bounds, load the object directly from the array.
3641 constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
3642 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3643 "Check heap reference size.");
3644 if (codegen_->GetCompilerOptions().IsBootImage()) {
3645 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
3646 size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
3647 HX86ComputeBaseMethodAddress* method_address =
3648 invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
3649 DCHECK(method_address != nullptr);
3650 Register method_address_reg =
3651 invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
3652 __ movl(out,
3653 Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kPlaceholder32BitOffset));
3654 codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
3655 } else {
3656 // Note: We're about to clobber the index in `out`, so we need to use `in` and
3657 // adjust the offset accordingly.
3658 uint32_t mid_array_boot_image_offset =
3659 info.array_data_boot_image_reference - info.low * kElementSize;
3660 codegen_->LoadBootImageAddress(
3661 out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
3662 DCHECK_NE(out, in);
3663 __ movl(out, Address(out, in, TIMES_4, 0));
3664 }
3665 __ MaybeUnpoisonHeapReference(out);
3666 __ jmp(&done);
3667 __ Bind(&allocate);
3668 // Otherwise allocate and initialize a new object.
3669 allocate_instance();
3670 codegen_->MoveToMemory(type,
3671 Location::RegisterLocation(in),
3672 out,
3673 /* dst_index= */ Register::kNoRegister,
3674 /* dst_scale= */ TIMES_1,
3675 /* dst_disp= */ info.value_offset);
3676 __ Bind(&done);
3677 }
3678 }
3679
VisitReferenceGetReferent(HInvoke * invoke)3680 void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
3681 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3682 RequestBaseMethodAddressInRegister(invoke);
3683 }
3684
VisitReferenceGetReferent(HInvoke * invoke)3685 void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
3686 X86Assembler* assembler = GetAssembler();
3687 LocationSummary* locations = invoke->GetLocations();
3688
3689 Location obj = locations->InAt(0);
3690 Location out = locations->Out();
3691
3692 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
3693 codegen_->AddSlowPath(slow_path);
3694
3695 if (codegen_->EmitReadBarrier()) {
3696 // Check self->GetWeakRefAccessEnabled().
3697 ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>();
3698 __ fs()->cmpl(Address::Absolute(offset),
3699 Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3700 __ j(kNotEqual, slow_path->GetEntryLabel());
3701 }
3702
3703 // Load the java.lang.ref.Reference class, use the output register as a temporary.
3704 codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<Register>(),
3705 invoke->AsInvokeStaticOrDirect());
3706
3707 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3708 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3709 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3710 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3711 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3712 __ cmpw(Address(out.AsRegister<Register>(), disable_intrinsic_offset.Uint32Value()),
3713 Immediate(0));
3714 __ j(kNotEqual, slow_path->GetEntryLabel());
3715
3716 // Load the value from the field.
3717 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3718 if (codegen_->EmitBakerReadBarrier()) {
3719 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3720 out,
3721 obj.AsRegister<Register>(),
3722 referent_offset,
3723 /*needs_null_check=*/ true);
3724 // Note that the fence is a no-op, thanks to the x86 memory model.
3725 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3726 } else {
3727 __ movl(out.AsRegister<Register>(), Address(obj.AsRegister<Register>(), referent_offset));
3728 codegen_->MaybeRecordImplicitNullCheck(invoke);
3729 // Note that the fence is a no-op, thanks to the x86 memory model.
3730 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3731 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3732 }
3733 __ Bind(slow_path->GetExitLabel());
3734 }
3735
VisitReferenceRefersTo(HInvoke * invoke)3736 void IntrinsicLocationsBuilderX86::VisitReferenceRefersTo(HInvoke* invoke) {
3737 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3738 }
3739
VisitReferenceRefersTo(HInvoke * invoke)3740 void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
3741 X86Assembler* assembler = GetAssembler();
3742 LocationSummary* locations = invoke->GetLocations();
3743
3744 Register obj = locations->InAt(0).AsRegister<Register>();
3745 Register other = locations->InAt(1).AsRegister<Register>();
3746 Register out = locations->Out().AsRegister<Register>();
3747
3748 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3749 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3750
3751 __ movl(out, Address(obj, referent_offset));
3752 codegen_->MaybeRecordImplicitNullCheck(invoke);
3753 __ MaybeUnpoisonHeapReference(out);
3754 // Note that the fence is a no-op, thanks to the x86 memory model.
3755 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3756
3757 NearLabel end, return_true, return_false;
3758 __ cmpl(out, other);
3759
3760 if (codegen_->EmitReadBarrier()) {
3761 DCHECK(kUseBakerReadBarrier);
3762
3763 __ j(kEqual, &return_true);
3764
3765 // Check if the loaded reference is null.
3766 __ testl(out, out);
3767 __ j(kZero, &return_false);
3768
3769 // For correct memory visibility, we need a barrier before loading the lock word
3770 // but we already have the barrier emitted for volatile load above which is sufficient.
3771
3772 // Load the lockword and check if it is a forwarding address.
3773 static_assert(LockWord::kStateShift == 30u);
3774 static_assert(LockWord::kStateForwardingAddress == 3u);
3775 __ movl(out, Address(out, monitor_offset));
3776 __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3777 __ j(kBelow, &return_false);
3778
3779 // Extract the forwarding address and compare with `other`.
3780 __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3781 __ cmpl(out, other);
3782 }
3783
3784 __ j(kNotEqual, &return_false);
3785
3786 // Return true and exit the function.
3787 __ Bind(&return_true);
3788 __ movl(out, Immediate(1));
3789 __ jmp(&end);
3790
3791 // Return false and exit the function.
3792 __ Bind(&return_false);
3793 __ xorl(out, out);
3794 __ Bind(&end);
3795 }
3796
VisitThreadInterrupted(HInvoke * invoke)3797 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3798 LocationSummary* locations =
3799 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3800 locations->SetOut(Location::RequiresRegister());
3801 }
3802
VisitThreadInterrupted(HInvoke * invoke)3803 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3804 X86Assembler* assembler = GetAssembler();
3805 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3806 Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3807 NearLabel done;
3808 __ fs()->movl(out, address);
3809 __ testl(out, out);
3810 __ j(kEqual, &done);
3811 __ fs()->movl(address, Immediate(0));
3812 codegen_->MemoryFence();
3813 __ Bind(&done);
3814 }
3815
VisitReachabilityFence(HInvoke * invoke)3816 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3817 LocationSummary* locations =
3818 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3819 locations->SetInAt(0, Location::Any());
3820 }
3821
VisitReachabilityFence(HInvoke * invoke)3822 void IntrinsicCodeGeneratorX86::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3823
VisitIntegerDivideUnsigned(HInvoke * invoke)3824 void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3825 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3826 LocationSummary::kCallOnSlowPath,
3827 kIntrinsified);
3828 locations->SetInAt(0, Location::RegisterLocation(EAX));
3829 locations->SetInAt(1, Location::RequiresRegister());
3830 locations->SetOut(Location::SameAsFirstInput());
3831 // Intel uses edx:eax as the dividend.
3832 locations->AddTemp(Location::RegisterLocation(EDX));
3833 }
3834
VisitIntegerDivideUnsigned(HInvoke * invoke)3835 void IntrinsicCodeGeneratorX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3836 X86Assembler* assembler = GetAssembler();
3837 LocationSummary* locations = invoke->GetLocations();
3838 Location out = locations->Out();
3839 Location first = locations->InAt(0);
3840 Location second = locations->InAt(1);
3841 Register edx = locations->GetTemp(0).AsRegister<Register>();
3842 Register second_reg = second.AsRegister<Register>();
3843
3844 DCHECK_EQ(EAX, first.AsRegister<Register>());
3845 DCHECK_EQ(EAX, out.AsRegister<Register>());
3846 DCHECK_EQ(EDX, edx);
3847
3848 // Check if divisor is zero, bail to managed implementation to handle.
3849 __ testl(second_reg, second_reg);
3850 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3851 codegen_->AddSlowPath(slow_path);
3852 __ j(kEqual, slow_path->GetEntryLabel());
3853
3854 __ xorl(edx, edx);
3855 __ divl(second_reg);
3856
3857 __ Bind(slow_path->GetExitLabel());
3858 }
3859
HasVarHandleIntrinsicImplementation(HInvoke * invoke)3860 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
3861 VarHandleOptimizations optimizations(invoke);
3862 if (optimizations.GetDoNotIntrinsify()) {
3863 return false;
3864 }
3865
3866 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3867 DCHECK_LE(expected_coordinates_count, 2u); // Filtered by the `DoNotIntrinsify` flag above.
3868 if (expected_coordinates_count > 1u) {
3869 // Only static and instance fields VarHandle are supported now.
3870 // TODO: add support for arrays and views.
3871 return false;
3872 }
3873
3874 return true;
3875 }
3876
GenerateVarHandleAccessModeCheck(Register varhandle_object,mirror::VarHandle::AccessMode access_mode,SlowPathCode * slow_path,X86Assembler * assembler)3877 static void GenerateVarHandleAccessModeCheck(Register varhandle_object,
3878 mirror::VarHandle::AccessMode access_mode,
3879 SlowPathCode* slow_path,
3880 X86Assembler* assembler) {
3881 const uint32_t access_modes_bitmask_offset =
3882 mirror::VarHandle::AccessModesBitMaskOffset().Uint32Value();
3883 const uint32_t access_mode_bit = 1u << static_cast<uint32_t>(access_mode);
3884
3885 // If the access mode is not supported, bail to runtime implementation to handle
3886 __ testl(Address(varhandle_object, access_modes_bitmask_offset), Immediate(access_mode_bit));
3887 __ j(kZero, slow_path->GetEntryLabel());
3888 }
3889
GenerateVarHandleStaticFieldCheck(Register varhandle_object,SlowPathCode * slow_path,X86Assembler * assembler)3890 static void GenerateVarHandleStaticFieldCheck(Register varhandle_object,
3891 SlowPathCode* slow_path,
3892 X86Assembler* assembler) {
3893 const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3894
3895 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3896 // Do not emit read barrier (or unpoison the reference) for comparing to null.
3897 __ cmpl(Address(varhandle_object, coordtype0_offset), Immediate(0));
3898 __ j(kNotEqual, slow_path->GetEntryLabel());
3899 }
3900
GenerateSubTypeObjectCheck(Register object,Register temp,Address type_address,SlowPathCode * slow_path,X86Assembler * assembler,bool object_can_be_null=true)3901 static void GenerateSubTypeObjectCheck(Register object,
3902 Register temp,
3903 Address type_address,
3904 SlowPathCode* slow_path,
3905 X86Assembler* assembler,
3906 bool object_can_be_null = true) {
3907 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
3908 const uint32_t super_class_offset = mirror::Class::SuperClassOffset().Uint32Value();
3909 NearLabel check_type_compatibility, type_matched;
3910
3911 // If the object is null, there is no need to check the type
3912 if (object_can_be_null) {
3913 __ testl(object, object);
3914 __ j(kZero, &type_matched);
3915 }
3916
3917 // Do not unpoison for in-memory comparison.
3918 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3919 __ movl(temp, Address(object, class_offset));
3920 __ Bind(&check_type_compatibility);
3921 __ cmpl(temp, type_address);
3922 __ j(kEqual, &type_matched);
3923 // Load the super class.
3924 __ MaybeUnpoisonHeapReference(temp);
3925 __ movl(temp, Address(temp, super_class_offset));
3926 // If the super class is null, we reached the root of the hierarchy without a match.
3927 // We let the slow path handle uncovered cases (e.g. interfaces).
3928 __ testl(temp, temp);
3929 __ j(kEqual, slow_path->GetEntryLabel());
3930 __ jmp(&check_type_compatibility);
3931 __ Bind(&type_matched);
3932 }
3933
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3934 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3935 Register temp,
3936 SlowPathCode* slow_path,
3937 X86Assembler* assembler) {
3938 VarHandleOptimizations optimizations(invoke);
3939 LocationSummary* locations = invoke->GetLocations();
3940 Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3941 Register object = locations->InAt(1).AsRegister<Register>();
3942
3943 const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3944 const uint32_t coordtype1_offset = mirror::VarHandle::CoordinateType1Offset().Uint32Value();
3945
3946 // Check that the VarHandle references an instance field by checking that
3947 // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3948 // type compatibility check with the source object's type, which will fail for null.
3949 __ cmpl(Address(varhandle_object, coordtype1_offset), Immediate(0));
3950 __ j(kNotEqual, slow_path->GetEntryLabel());
3951
3952 // Check if the object is null
3953 if (!optimizations.GetSkipObjectNullCheck()) {
3954 __ testl(object, object);
3955 __ j(kZero, slow_path->GetEntryLabel());
3956 }
3957
3958 // Check the object's class against coordinateType0.
3959 GenerateSubTypeObjectCheck(object,
3960 temp,
3961 Address(varhandle_object, coordtype0_offset),
3962 slow_path,
3963 assembler,
3964 /* object_can_be_null= */ false);
3965 }
3966
GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,Register temp,DataType::Type type,SlowPathCode * slow_path,X86Assembler * assembler)3967 static void GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,
3968 Register temp,
3969 DataType::Type type,
3970 SlowPathCode* slow_path,
3971 X86Assembler* assembler) {
3972 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3973 const uint32_t primitive_type_offset = mirror::Class::PrimitiveTypeOffset().Uint32Value();
3974 const uint32_t primitive_type = static_cast<uint32_t>(DataTypeToPrimitive(type));
3975
3976 // We do not need a read barrier when loading a reference only for loading a constant field
3977 // through the reference.
3978 __ movl(temp, Address(varhandle_object, var_type_offset));
3979 __ MaybeUnpoisonHeapReference(temp);
3980 __ cmpw(Address(temp, primitive_type_offset), Immediate(primitive_type));
3981 __ j(kNotEqual, slow_path->GetEntryLabel());
3982 }
3983
GenerateVarHandleCommonChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3984 static void GenerateVarHandleCommonChecks(HInvoke *invoke,
3985 Register temp,
3986 SlowPathCode* slow_path,
3987 X86Assembler* assembler) {
3988 LocationSummary* locations = invoke->GetLocations();
3989 Register vh_object = locations->InAt(0).AsRegister<Register>();
3990 mirror::VarHandle::AccessMode access_mode =
3991 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3992
3993 GenerateVarHandleAccessModeCheck(vh_object,
3994 access_mode,
3995 slow_path,
3996 assembler);
3997
3998 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3999 switch (expected_coordinates_count) {
4000 case 0u:
4001 GenerateVarHandleStaticFieldCheck(vh_object, slow_path, assembler);
4002 break;
4003 case 1u: {
4004 GenerateVarHandleInstanceFieldChecks(invoke, temp, slow_path, assembler);
4005 break;
4006 }
4007 default:
4008 LOG(FATAL) << "Unexpected coordinates count: " << expected_coordinates_count;
4009 UNREACHABLE();
4010 }
4011
4012 // Check the return type and varType parameters.
4013 mirror::VarHandle::AccessModeTemplate access_mode_template =
4014 mirror::VarHandle::GetAccessModeTemplate(access_mode);
4015 DataType::Type type = invoke->GetType();
4016
4017 switch (access_mode_template) {
4018 case mirror::VarHandle::AccessModeTemplate::kGet:
4019 // Check the varType.primitiveType against the type we're trying to retrieve. Reference types
4020 // are also checked later by a HCheckCast node as an additional check.
4021 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, type, slow_path, assembler);
4022 break;
4023 case mirror::VarHandle::AccessModeTemplate::kSet:
4024 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
4025 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4026 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4027
4028 // Check the varType.primitiveType against the type of the value we're trying to set.
4029 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
4030 if (value_type == DataType::Type::kReference) {
4031 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
4032
4033 // If the value type is a reference, check it against the varType.
4034 GenerateSubTypeObjectCheck(locations->InAt(value_index).AsRegister<Register>(),
4035 temp,
4036 Address(vh_object, var_type_offset),
4037 slow_path,
4038 assembler);
4039 }
4040 break;
4041 }
4042 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
4043 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
4044 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4045 uint32_t expected_value_index = invoke->GetNumberOfArguments() - 2;
4046 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4047 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_value_index));
4048
4049 // Check the varType.primitiveType against the type of the expected value.
4050 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
4051 if (value_type == DataType::Type::kReference) {
4052 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
4053
4054 // If the value type is a reference, check both the expected and the new value against
4055 // the varType.
4056 GenerateSubTypeObjectCheck(locations->InAt(new_value_index).AsRegister<Register>(),
4057 temp,
4058 Address(vh_object, var_type_offset),
4059 slow_path,
4060 assembler);
4061 GenerateSubTypeObjectCheck(locations->InAt(expected_value_index).AsRegister<Register>(),
4062 temp,
4063 Address(vh_object, var_type_offset),
4064 slow_path,
4065 assembler);
4066 }
4067 break;
4068 }
4069 }
4070 }
4071
4072 // This method loads the field's address referred by a field VarHandle (base + offset).
4073 // The return value is the register containing object's reference (in case of an instance field)
4074 // or the declaring class (in case of a static field). The declaring class is stored in temp
4075 // register. Field's offset is loaded to the `offset` register.
GenerateVarHandleFieldReference(HInvoke * invoke,CodeGeneratorX86 * codegen,Register temp,Register offset)4076 static Register GenerateVarHandleFieldReference(HInvoke* invoke,
4077 CodeGeneratorX86* codegen,
4078 Register temp,
4079 /*out*/ Register offset) {
4080 X86Assembler* assembler = codegen->GetAssembler();
4081 LocationSummary* locations = invoke->GetLocations();
4082 const uint32_t artfield_offset = mirror::FieldVarHandle::ArtFieldOffset().Uint32Value();
4083 const uint32_t offset_offset = ArtField::OffsetOffset().Uint32Value();
4084 const uint32_t declaring_class_offset = ArtField::DeclaringClassOffset().Uint32Value();
4085 Register varhandle_object = locations->InAt(0).AsRegister<Register>();
4086
4087 // Load the ArtField* and the offset.
4088 __ movl(temp, Address(varhandle_object, artfield_offset));
4089 __ movl(offset, Address(temp, offset_offset));
4090 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4091 if (expected_coordinates_count == 0) {
4092 // For static fields, load the declaring class
4093 InstructionCodeGeneratorX86* instr_codegen =
4094 down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
4095 instr_codegen->GenerateGcRootFieldLoad(invoke,
4096 Location::RegisterLocation(temp),
4097 Address(temp, declaring_class_offset),
4098 /* fixup_label= */ nullptr,
4099 codegen->GetCompilerReadBarrierOption());
4100 return temp;
4101 }
4102
4103 // For instance fields, return the register containing the object.
4104 DCHECK_EQ(expected_coordinates_count, 1u);
4105
4106 return locations->InAt(1).AsRegister<Register>();
4107 }
4108
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4109 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4110 // The only read barrier implementation supporting the
4111 // VarHandleGet intrinsic is the Baker-style read barriers.
4112 if (codegen->EmitNonBakerReadBarrier()) {
4113 return;
4114 }
4115
4116 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4117 return;
4118 }
4119
4120 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4121 LocationSummary* locations = new (allocator) LocationSummary(
4122 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4123 locations->SetInAt(0, Location::RequiresRegister());
4124 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4125 if (expected_coordinates_count == 1u) {
4126 // For instance fields, this is the source object.
4127 locations->SetInAt(1, Location::RequiresRegister());
4128 }
4129 locations->AddTemp(Location::RequiresRegister());
4130
4131 DataType::Type type = invoke->GetType();
4132 switch (DataType::Kind(type)) {
4133 case DataType::Type::kInt64:
4134 locations->AddTemp(Location::RequiresRegister());
4135 if (invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
4136 // We need an XmmRegister for Int64 to ensure an atomic load
4137 locations->AddTemp(Location::RequiresFpuRegister());
4138 }
4139 FALLTHROUGH_INTENDED;
4140 case DataType::Type::kInt32:
4141 case DataType::Type::kReference:
4142 locations->SetOut(Location::RequiresRegister());
4143 break;
4144 default:
4145 DCHECK(DataType::IsFloatingPointType(type));
4146 locations->AddTemp(Location::RequiresRegister());
4147 locations->SetOut(Location::RequiresFpuRegister());
4148 }
4149 }
4150
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86 * codegen)4151 static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4152 // The only read barrier implementation supporting the
4153 // VarHandleGet intrinsic is the Baker-style read barriers.
4154 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4155
4156 X86Assembler* assembler = codegen->GetAssembler();
4157 LocationSummary* locations = invoke->GetLocations();
4158 DataType::Type type = invoke->GetType();
4159 DCHECK_NE(type, DataType::Type::kVoid);
4160 Register temp = locations->GetTemp(0).AsRegister<Register>();
4161 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4162 codegen->AddSlowPath(slow_path);
4163
4164 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4165
4166 Location out = locations->Out();
4167 // Use 'out' as a temporary register if it's a core register
4168 Register offset =
4169 out.IsRegister() ? out.AsRegister<Register>() : locations->GetTemp(1).AsRegister<Register>();
4170
4171 // Get the field referred by the VarHandle. The returned register contains the object reference
4172 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4173 // declaring class will be placed in 'temp' register.
4174 Register ref = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4175 Address field_addr(ref, offset, TIMES_1, 0);
4176
4177 // Load the value from the field
4178 if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
4179 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4180 invoke, out, ref, field_addr, /* needs_null_check= */ false);
4181 } else if (type == DataType::Type::kInt64 &&
4182 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
4183 XmmRegister xmm_temp = locations->GetTemp(2).AsFpuRegister<XmmRegister>();
4184 codegen->LoadFromMemoryNoBarrier(
4185 type, out, field_addr, /* instr= */ nullptr, xmm_temp, /* is_atomic_load= */ true);
4186 } else {
4187 codegen->LoadFromMemoryNoBarrier(type, out, field_addr);
4188 }
4189
4190 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetVolatile ||
4191 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAcquire) {
4192 // Load fence to prevent load-load reordering.
4193 // Note that this is a no-op, thanks to the x86 memory model.
4194 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4195 }
4196
4197 __ Bind(slow_path->GetExitLabel());
4198 }
4199
VisitVarHandleGet(HInvoke * invoke)4200 void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) {
4201 CreateVarHandleGetLocations(invoke, codegen_);
4202 }
4203
VisitVarHandleGet(HInvoke * invoke)4204 void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
4205 GenerateVarHandleGet(invoke, codegen_);
4206 }
4207
VisitVarHandleGetVolatile(HInvoke * invoke)4208 void IntrinsicLocationsBuilderX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
4209 CreateVarHandleGetLocations(invoke, codegen_);
4210 }
4211
VisitVarHandleGetVolatile(HInvoke * invoke)4212 void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
4213 GenerateVarHandleGet(invoke, codegen_);
4214 }
4215
VisitVarHandleGetAcquire(HInvoke * invoke)4216 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
4217 CreateVarHandleGetLocations(invoke, codegen_);
4218 }
4219
VisitVarHandleGetAcquire(HInvoke * invoke)4220 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
4221 GenerateVarHandleGet(invoke, codegen_);
4222 }
4223
VisitVarHandleGetOpaque(HInvoke * invoke)4224 void IntrinsicLocationsBuilderX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
4225 CreateVarHandleGetLocations(invoke, codegen_);
4226 }
4227
VisitVarHandleGetOpaque(HInvoke * invoke)4228 void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
4229 GenerateVarHandleGet(invoke, codegen_);
4230 }
4231
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4232 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4233 // The only read barrier implementation supporting the
4234 // VarHandleGet intrinsic is the Baker-style read barriers.
4235 if (codegen->EmitNonBakerReadBarrier()) {
4236 return;
4237 }
4238
4239 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4240 return;
4241 }
4242
4243 // The last argument should be the value we intend to set.
4244 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4245 HInstruction* value = invoke->InputAt(value_index);
4246 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4247 bool needs_atomicity = invoke->GetIntrinsic() != Intrinsics::kVarHandleSet;
4248 if (value_type == DataType::Type::kInt64 && (!value->IsConstant() || needs_atomicity)) {
4249 // We avoid the case of a non-constant (or volatile) Int64 value because we would need to
4250 // place it in a register pair. If the slow path is taken, the ParallelMove might fail to move
4251 // the pair according to the X86DexCallingConvention in case of an overlap (e.g., move the
4252 // int64 value from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4253 return;
4254 }
4255
4256 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4257 LocationSummary* locations = new (allocator) LocationSummary(
4258 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4259 locations->SetInAt(0, Location::RequiresRegister());
4260 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4261 if (expected_coordinates_count == 1u) {
4262 // For instance fields, this is the source object
4263 locations->SetInAt(1, Location::RequiresRegister());
4264 }
4265
4266 switch (value_type) {
4267 case DataType::Type::kBool:
4268 case DataType::Type::kInt8:
4269 case DataType::Type::kUint8:
4270 // Ensure the value is in a byte register
4271 locations->SetInAt(value_index, Location::ByteRegisterOrConstant(EBX, value));
4272 break;
4273 case DataType::Type::kInt16:
4274 case DataType::Type::kUint16:
4275 case DataType::Type::kInt32:
4276 locations->SetInAt(value_index, Location::RegisterOrConstant(value));
4277 break;
4278 case DataType::Type::kInt64:
4279 // We only handle constant non-atomic int64 values.
4280 DCHECK(value->IsConstant());
4281 locations->SetInAt(value_index, Location::ConstantLocation(value));
4282 break;
4283 case DataType::Type::kReference:
4284 locations->SetInAt(value_index, Location::RequiresRegister());
4285 break;
4286 default:
4287 DCHECK(DataType::IsFloatingPointType(value_type));
4288 if (needs_atomicity && value_type == DataType::Type::kFloat64) {
4289 locations->SetInAt(value_index, Location::RequiresFpuRegister());
4290 } else {
4291 locations->SetInAt(value_index, Location::FpuRegisterOrConstant(value));
4292 }
4293 }
4294
4295 locations->AddTemp(Location::RequiresRegister());
4296 // This temporary register is also used for card for MarkGCCard. Make sure it's a byte register
4297 locations->AddTemp(Location::RegisterLocation(EAX));
4298 if (expected_coordinates_count == 0 && value_type == DataType::Type::kReference) {
4299 // For static reference fields, we need another temporary for the declaring class. We set it
4300 // last because we want to make sure that the first 2 temps are reserved for HandleFieldSet.
4301 locations->AddTemp(Location::RequiresRegister());
4302 }
4303 }
4304
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86 * codegen)4305 static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4306 // The only read barrier implementation supporting the
4307 // VarHandleGet intrinsic is the Baker-style read barriers.
4308 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4309
4310 X86Assembler* assembler = codegen->GetAssembler();
4311 LocationSummary* locations = invoke->GetLocations();
4312 // The value we want to set is the last argument
4313 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4314 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4315 Register temp = locations->GetTemp(0).AsRegister<Register>();
4316 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
4317 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4318 codegen->AddSlowPath(slow_path);
4319
4320 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4321
4322 // For static reference fields, we need another temporary for the declaring class. But since
4323 // for instance fields the object is in a separate register, it is safe to use the first
4324 // temporary register for GenerateVarHandleFieldReference.
4325 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4326 if (value_type == DataType::Type::kReference && expected_coordinates_count == 0) {
4327 temp = locations->GetTemp(2).AsRegister<Register>();
4328 }
4329
4330 Register offset = temp2;
4331 // Get the field referred by the VarHandle. The returned register contains the object reference
4332 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4333 // declaring class will be placed in 'temp' register.
4334 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4335
4336 bool is_volatile = false;
4337 switch (invoke->GetIntrinsic()) {
4338 case Intrinsics::kVarHandleSet:
4339 case Intrinsics::kVarHandleSetOpaque:
4340 // The only constraint for setOpaque is to ensure bitwise atomicity (atomically set 64 bit
4341 // values), but we don't treat Int64 values because we would need to place it in a register
4342 // pair. If the slow path is taken, the Parallel move might fail to move the register pair
4343 // in case of an overlap (e.g., move from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4344 break;
4345 case Intrinsics::kVarHandleSetRelease:
4346 // setRelease needs to ensure atomicity too. See the above comment.
4347 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4348 break;
4349 case Intrinsics::kVarHandleSetVolatile:
4350 is_volatile = true;
4351 break;
4352 default:
4353 LOG(FATAL) << "GenerateVarHandleSet received non-set intrinsic " << invoke->GetIntrinsic();
4354 }
4355
4356 InstructionCodeGeneratorX86* instr_codegen =
4357 down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
4358 // Store the value to the field
4359 instr_codegen->HandleFieldSet(
4360 invoke,
4361 value_index,
4362 value_type,
4363 Address(reference, offset, TIMES_1, 0),
4364 reference,
4365 is_volatile,
4366 /* value_can_be_null */ true,
4367 // Value can be null, and this write barrier is not being relied on for other sets.
4368 value_type == DataType::Type::kReference ? WriteBarrierKind::kEmitNotBeingReliedOn :
4369 WriteBarrierKind::kDontEmit);
4370
4371 __ Bind(slow_path->GetExitLabel());
4372 }
4373
VisitVarHandleSet(HInvoke * invoke)4374 void IntrinsicLocationsBuilderX86::VisitVarHandleSet(HInvoke* invoke) {
4375 CreateVarHandleSetLocations(invoke, codegen_);
4376 }
4377
VisitVarHandleSet(HInvoke * invoke)4378 void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) {
4379 GenerateVarHandleSet(invoke, codegen_);
4380 }
4381
VisitVarHandleSetVolatile(HInvoke * invoke)4382 void IntrinsicLocationsBuilderX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4383 CreateVarHandleSetLocations(invoke, codegen_);
4384 }
4385
VisitVarHandleSetVolatile(HInvoke * invoke)4386 void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4387 GenerateVarHandleSet(invoke, codegen_);
4388 }
4389
VisitVarHandleSetRelease(HInvoke * invoke)4390 void IntrinsicLocationsBuilderX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4391 CreateVarHandleSetLocations(invoke, codegen_);
4392 }
4393
VisitVarHandleSetRelease(HInvoke * invoke)4394 void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4395 GenerateVarHandleSet(invoke, codegen_);
4396 }
4397
VisitVarHandleSetOpaque(HInvoke * invoke)4398 void IntrinsicLocationsBuilderX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4399 CreateVarHandleSetLocations(invoke, codegen_);
4400 }
4401
VisitVarHandleSetOpaque(HInvoke * invoke)4402 void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4403 GenerateVarHandleSet(invoke, codegen_);
4404 }
4405
CreateVarHandleGetAndSetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4406 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4407 // The only read barrier implementation supporting the
4408 // VarHandleGet intrinsic is the Baker-style read barriers.
4409 if (codegen->EmitNonBakerReadBarrier()) {
4410 return;
4411 }
4412
4413 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4414 return;
4415 }
4416
4417 // Get the type from the shorty as the invokes may not return a value.
4418 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4419 uint32_t value_index = number_of_arguments - 1;
4420 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4421 DataType::Type return_type = invoke->GetType();
4422 const bool is_void = return_type == DataType::Type::kVoid;
4423 DCHECK_IMPLIES(!is_void, return_type == value_type);
4424
4425 if (DataType::Is64BitType(value_type)) {
4426 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4427 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4428 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4429 // <EAX, EBX> to <EBX, ECX>).
4430 return;
4431 }
4432
4433 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4434 LocationSummary* locations = new (allocator) LocationSummary(
4435 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4436 locations->AddRegisterTemps(2);
4437 // We use this temporary for the card, so we need a byte register
4438 locations->AddTemp(Location::RegisterLocation(EBX));
4439 locations->SetInAt(0, Location::RequiresRegister());
4440 if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4441 // For instance fields, this is the source object
4442 locations->SetInAt(1, Location::RequiresRegister());
4443 } else {
4444 // For static fields, we need another temp because one will be busy with the declaring class.
4445 locations->AddTemp(Location::RequiresRegister());
4446 }
4447 if (value_type == DataType::Type::kFloat32) {
4448 locations->AddTemp(Location::RegisterLocation(EAX));
4449 locations->SetInAt(value_index, Location::FpuRegisterOrConstant(invoke->InputAt(value_index)));
4450 // Only set the `out` register if it's needed. In the void case, we will not use `out`.
4451 if (!is_void) {
4452 locations->SetOut(Location::RequiresFpuRegister());
4453 }
4454 } else {
4455 locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4456 // Only set the `out` register if it's needed. In the void case we can still use EAX in the
4457 // same manner as it is marked as a temp register.
4458 if (is_void) {
4459 locations->AddTemp(Location::RegisterLocation(EAX));
4460 } else {
4461 locations->SetOut(Location::RegisterLocation(EAX));
4462 }
4463 }
4464 }
4465
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86 * codegen)4466 static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4467 // The only read barrier implementation supporting the
4468 // VarHandleGet intrinsic is the Baker-style read barriers.
4469 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4470
4471 X86Assembler* assembler = codegen->GetAssembler();
4472 LocationSummary* locations = invoke->GetLocations();
4473 // The value we want to set is the last argument
4474 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4475 Location value = locations->InAt(value_index);
4476 // Get the type from the shorty as the invokes may not return a value.
4477 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4478 Register temp = locations->GetTemp(1).AsRegister<Register>();
4479 Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4480 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4481 codegen->AddSlowPath(slow_path);
4482
4483 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4484
4485 Register offset = locations->GetTemp(0).AsRegister<Register>();
4486 // Get the field referred by the VarHandle. The returned register contains the object reference
4487 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4488 // declaring class will be placed in 'temp' register.
4489 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4490 Address field_addr(reference, offset, TIMES_1, 0);
4491
4492 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetRelease) {
4493 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4494 }
4495
4496 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4497 // For static fields, we need another temporary for the declaring class. But since for instance
4498 // fields the object is in a separate register, it is safe to use the first temporary register.
4499 temp = expected_coordinates_count == 1u ? temp : locations->GetTemp(3).AsRegister<Register>();
4500 // No need for a lock prefix. `xchg` has an implicit lock when it is used with an address.
4501
4502 DataType::Type return_type = invoke->GetType();
4503 const bool is_void = return_type == DataType::Type::kVoid;
4504 DCHECK_IMPLIES(!is_void, return_type == value_type);
4505 switch (value_type) {
4506 case DataType::Type::kBool:
4507 __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4508 if (!is_void) {
4509 __ movzxb(locations->Out().AsRegister<Register>(),
4510 locations->Out().AsRegister<ByteRegister>());
4511 }
4512 break;
4513 case DataType::Type::kInt8:
4514 __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4515 if (!is_void) {
4516 __ movsxb(locations->Out().AsRegister<Register>(),
4517 locations->Out().AsRegister<ByteRegister>());
4518 }
4519 break;
4520 case DataType::Type::kUint16:
4521 __ xchgw(value.AsRegister<Register>(), field_addr);
4522 if (!is_void) {
4523 __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4524 }
4525 break;
4526 case DataType::Type::kInt16:
4527 __ xchgw(value.AsRegister<Register>(), field_addr);
4528 if (!is_void) {
4529 __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4530 }
4531 break;
4532 case DataType::Type::kInt32:
4533 __ xchgl(value.AsRegister<Register>(), field_addr);
4534 break;
4535 case DataType::Type::kFloat32:
4536 codegen->Move32(Location::RegisterLocation(EAX), value);
4537 __ xchgl(EAX, field_addr);
4538 if (!is_void) {
4539 __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
4540 }
4541 break;
4542 case DataType::Type::kReference: {
4543 if (codegen->EmitBakerReadBarrier()) {
4544 // Need to make sure the reference stored in the field is a to-space
4545 // one before attempting the CAS or the CAS could fail incorrectly.
4546 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4547 invoke,
4548 // Unused, used only as a "temporary" within the read barrier.
4549 Location::RegisterLocation(temp),
4550 reference,
4551 field_addr,
4552 /* needs_null_check= */ false,
4553 /* always_update_field= */ true,
4554 &temp2);
4555 }
4556 codegen->MarkGCCard(temp, temp2, reference);
4557 if (kPoisonHeapReferences) {
4558 __ movl(temp, value.AsRegister<Register>());
4559 __ PoisonHeapReference(temp);
4560 __ xchgl(temp, field_addr);
4561 if (!is_void) {
4562 __ UnpoisonHeapReference(temp);
4563 __ movl(locations->Out().AsRegister<Register>(), temp);
4564 }
4565 } else {
4566 DCHECK_IMPLIES(!is_void, locations->Out().Equals(Location::RegisterLocation(EAX)));
4567 __ xchgl(Location::RegisterLocation(EAX).AsRegister<Register>(), field_addr);
4568 }
4569 break;
4570 }
4571 default:
4572 LOG(FATAL) << "Unexpected type: " << value_type;
4573 UNREACHABLE();
4574 }
4575
4576 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetAcquire) {
4577 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4578 }
4579
4580 __ Bind(slow_path->GetExitLabel());
4581 }
4582
VisitVarHandleGetAndSet(HInvoke * invoke)4583 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4584 CreateVarHandleGetAndSetLocations(invoke, codegen_);
4585 }
4586
VisitVarHandleGetAndSet(HInvoke * invoke)4587 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4588 GenerateVarHandleGetAndSet(invoke, codegen_);
4589 }
4590
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4591 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4592 CreateVarHandleGetAndSetLocations(invoke, codegen_);
4593 }
4594
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4595 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4596 GenerateVarHandleGetAndSet(invoke, codegen_);
4597 }
4598
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4599 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4600 CreateVarHandleGetAndSetLocations(invoke, codegen_);
4601 }
4602
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4603 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4604 GenerateVarHandleGetAndSet(invoke, codegen_);
4605 }
4606
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4607 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4608 CodeGeneratorX86* codegen) {
4609 // The only read barrier implementation supporting the
4610 // VarHandleGet intrinsic is the Baker-style read barriers.
4611 if (codegen->EmitNonBakerReadBarrier()) {
4612 return;
4613 }
4614
4615 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4616 return;
4617 }
4618
4619 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4620 uint32_t expected_value_index = number_of_arguments - 2;
4621 uint32_t new_value_index = number_of_arguments - 1;
4622 DataType::Type value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4623 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, new_value_index));
4624
4625 if (DataType::Is64BitType(value_type)) {
4626 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4627 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4628 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4629 // <EAX, EBX> to <EBX, ECX>).
4630 return;
4631 }
4632
4633 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4634 LocationSummary* locations = new (allocator) LocationSummary(
4635 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4636 locations->AddRegisterTemps(2);
4637 // We use this temporary for the card, so we need a byte register
4638 locations->AddTemp(Location::RegisterLocation(EBX));
4639 locations->SetInAt(0, Location::RequiresRegister());
4640 if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4641 // For instance fields, this is the source object
4642 locations->SetInAt(1, Location::RequiresRegister());
4643 } else {
4644 // For static fields, we need another temp because one will be busy with the declaring class.
4645 locations->AddTemp(Location::RequiresRegister());
4646 }
4647 if (DataType::IsFloatingPointType(value_type)) {
4648 // We need EAX for placing the expected value
4649 locations->AddTemp(Location::RegisterLocation(EAX));
4650 locations->SetInAt(new_value_index,
4651 Location::FpuRegisterOrConstant(invoke->InputAt(new_value_index)));
4652 locations->SetInAt(expected_value_index,
4653 Location::FpuRegisterOrConstant(invoke->InputAt(expected_value_index)));
4654 } else {
4655 // Ensure it's in a byte register
4656 locations->SetInAt(new_value_index, Location::RegisterLocation(ECX));
4657 locations->SetInAt(expected_value_index, Location::RegisterLocation(EAX));
4658 }
4659
4660 mirror::VarHandle::AccessModeTemplate access_mode_template =
4661 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4662
4663 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange &&
4664 value_type == DataType::Type::kFloat32) {
4665 locations->SetOut(Location::RequiresFpuRegister());
4666 } else {
4667 locations->SetOut(Location::RegisterLocation(EAX));
4668 }
4669 }
4670
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86 * codegen)4671 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) {
4672 // The only read barrier implementation supporting the
4673 // VarHandleGet intrinsic is the Baker-style read barriers.
4674 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4675
4676 X86Assembler* assembler = codegen->GetAssembler();
4677 LocationSummary* locations = invoke->GetLocations();
4678 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4679 uint32_t expected_value_index = number_of_arguments - 2;
4680 uint32_t new_value_index = number_of_arguments - 1;
4681 DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4682 DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4683 Location expected_value = locations->InAt(expected_value_index);
4684 Location new_value = locations->InAt(new_value_index);
4685 Register offset = locations->GetTemp(0).AsRegister<Register>();
4686 Register temp = locations->GetTemp(1).AsRegister<Register>();
4687 Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4688 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4689 codegen->AddSlowPath(slow_path);
4690
4691 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4692
4693 // Get the field referred by the VarHandle. The returned register contains the object reference
4694 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4695 // declaring class will be placed in 'temp' register.
4696 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4697
4698 uint32_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4699 // For generating the compare and exchange, we need 2 temporaries. In case of a static field, the
4700 // first temporary contains the declaring class so we need another temporary. In case of an
4701 // instance field, the object comes in a separate register so it's safe to use the first temp.
4702 temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(3).AsRegister<Register>();
4703 DCHECK_NE(temp, reference);
4704
4705 // We are using `lock cmpxchg` in all cases because there is no CAS equivalent that has weak
4706 // failure semantics. `lock cmpxchg` has full barrier semantics, and we don't need scheduling
4707 // barriers at this time.
4708
4709 mirror::VarHandle::AccessModeTemplate access_mode_template =
4710 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4711 bool is_cmpxchg =
4712 access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange;
4713
4714 if (type == DataType::Type::kReference) {
4715 GenReferenceCAS(
4716 invoke, codegen, expected_value, new_value, reference, offset, temp, temp2, is_cmpxchg);
4717 } else {
4718 Location out = locations->Out();
4719 GenPrimitiveCAS(
4720 type, codegen, expected_value, new_value, reference, offset, out, temp, is_cmpxchg);
4721 }
4722
4723 __ Bind(slow_path->GetExitLabel());
4724 }
4725
VisitVarHandleCompareAndSet(HInvoke * invoke)4726 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4727 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4728 }
4729
VisitVarHandleCompareAndSet(HInvoke * invoke)4730 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4731 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4732 }
4733
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4734 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4735 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4736 }
4737
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4738 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4739 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4740 }
4741
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4742 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4743 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4744 }
4745
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4746 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4747 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4748 }
4749
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4750 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4751 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4752 }
4753
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4754 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4755 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4756 }
4757
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4758 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4759 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4760 }
4761
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4762 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4763 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4764 }
4765
VisitVarHandleCompareAndExchange(HInvoke * invoke)4766 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4767 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4768 }
4769
VisitVarHandleCompareAndExchange(HInvoke * invoke)4770 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4771 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4772 }
4773
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4774 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4775 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4776 }
4777
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4778 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4779 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4780 }
4781
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4782 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4783 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4784 }
4785
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4786 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4787 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4788 }
4789
CreateVarHandleGetAndAddLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4790 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4791 // The only read barrier implementation supporting the
4792 // VarHandleGet intrinsic is the Baker-style read barriers.
4793 if (codegen->EmitNonBakerReadBarrier()) {
4794 return;
4795 }
4796
4797 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4798 return;
4799 }
4800
4801 // Get the type from the shorty as the invokes may not return a value.
4802 // The last argument should be the value we intend to set.
4803 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4804 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4805 if (DataType::Is64BitType(value_type)) {
4806 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4807 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4808 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4809 // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4810 return;
4811 }
4812
4813 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4814 LocationSummary* locations = new (allocator) LocationSummary(
4815 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4816 locations->AddRegisterTemps(2);
4817 locations->SetInAt(0, Location::RequiresRegister());
4818 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4819 if (expected_coordinates_count == 1u) {
4820 // For instance fields, this is the source object
4821 locations->SetInAt(1, Location::RequiresRegister());
4822 } else {
4823 // For static fields, we need another temp because one will be busy with the declaring class.
4824 locations->AddTemp(Location::RequiresRegister());
4825 }
4826
4827 DataType::Type return_type = invoke->GetType();
4828 const bool is_void = return_type == DataType::Type::kVoid;
4829 DCHECK_IMPLIES(!is_void, return_type == value_type);
4830
4831 if (DataType::IsFloatingPointType(value_type)) {
4832 locations->AddTemp(Location::RequiresFpuRegister());
4833 locations->AddTemp(Location::RegisterLocation(EAX));
4834 locations->SetInAt(value_index, Location::RequiresFpuRegister());
4835 // Only set the `out` register if it's needed. In the void case, we do not use `out`.
4836 if (!is_void) {
4837 locations->SetOut(Location::RequiresFpuRegister());
4838 }
4839 } else {
4840 // xadd updates the register argument with the old value. ByteRegister required for xaddb.
4841 locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4842 // Only set the `out` register if it's needed. In the void case we can still use EAX in the
4843 // same manner as it is marked as a temp register.
4844 if (is_void) {
4845 locations->AddTemp(Location::RegisterLocation(EAX));
4846 } else {
4847 locations->SetOut(Location::RegisterLocation(EAX));
4848 }
4849 }
4850 }
4851
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86 * codegen)4852 static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) {
4853 // The only read barrier implementation supporting the
4854 // VarHandleGet intrinsic is the Baker-style read barriers.
4855 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4856
4857 X86Assembler* assembler = codegen->GetAssembler();
4858 LocationSummary* locations = invoke->GetLocations();
4859 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4860 uint32_t value_index = number_of_arguments - 1;
4861 // Get the type from the shorty as the invokes may not return a value.
4862 DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4863 DataType::Type return_type = invoke->GetType();
4864 const bool is_void = return_type == DataType::Type::kVoid;
4865 DCHECK_IMPLIES(!is_void, return_type == type);
4866 Location value_loc = locations->InAt(value_index);
4867 Register temp = locations->GetTemp(0).AsRegister<Register>();
4868 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4869 codegen->AddSlowPath(slow_path);
4870
4871 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4872
4873 Register offset = locations->GetTemp(1).AsRegister<Register>();
4874 // Get the field referred by the VarHandle. The returned register contains the object reference
4875 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4876 // declaring class will be placed in 'temp' register.
4877 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4878
4879 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4880 temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4881 DCHECK_NE(temp, reference);
4882 Address field_addr(reference, offset, TIMES_1, 0);
4883
4884 switch (type) {
4885 case DataType::Type::kInt8:
4886 __ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>());
4887 if (!is_void) {
4888 __ movsxb(locations->Out().AsRegister<Register>(),
4889 locations->Out().AsRegister<ByteRegister>());
4890 }
4891 break;
4892 case DataType::Type::kInt16:
4893 __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4894 if (!is_void) {
4895 __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4896 }
4897 break;
4898 case DataType::Type::kUint16:
4899 __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4900 if (!is_void) {
4901 __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4902 }
4903 break;
4904 case DataType::Type::kInt32:
4905 __ LockXaddl(field_addr, value_loc.AsRegister<Register>());
4906 break;
4907 case DataType::Type::kFloat32: {
4908 Location temp_float =
4909 (expected_coordinates_count == 1u) ? locations->GetTemp(2) : locations->GetTemp(3);
4910 DCHECK(temp_float.IsFpuRegister());
4911 Location eax = Location::RegisterLocation(EAX);
4912 NearLabel try_again;
4913 __ Bind(&try_again);
4914 __ movss(temp_float.AsFpuRegister<XmmRegister>(), field_addr);
4915 __ movd(EAX, temp_float.AsFpuRegister<XmmRegister>());
4916 __ addss(temp_float.AsFpuRegister<XmmRegister>(),
4917 value_loc.AsFpuRegister<XmmRegister>());
4918 GenPrimitiveLockedCmpxchg(type,
4919 codegen,
4920 /* expected_value= */ eax,
4921 /* new_value= */ temp_float,
4922 reference,
4923 offset,
4924 temp);
4925 __ j(kNotZero, &try_again);
4926
4927 if (!is_void) {
4928 // The old value is present in EAX.
4929 codegen->Move32(locations->Out(), eax);
4930 }
4931 break;
4932 }
4933 default:
4934 LOG(FATAL) << "Unexpected type: " << type;
4935 UNREACHABLE();
4936 }
4937
4938 __ Bind(slow_path->GetExitLabel());
4939 }
4940
VisitVarHandleGetAndAdd(HInvoke * invoke)4941 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4942 CreateVarHandleGetAndAddLocations(invoke, codegen_);
4943 }
4944
VisitVarHandleGetAndAdd(HInvoke * invoke)4945 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4946 GenerateVarHandleGetAndAdd(invoke, codegen_);
4947 }
4948
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4949 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4950 CreateVarHandleGetAndAddLocations(invoke, codegen_);
4951 }
4952
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4953 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4954 GenerateVarHandleGetAndAdd(invoke, codegen_);
4955 }
4956
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4957 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4958 CreateVarHandleGetAndAddLocations(invoke, codegen_);
4959 }
4960
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4961 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4962 GenerateVarHandleGetAndAdd(invoke, codegen_);
4963 }
4964
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4965 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4966 // The only read barrier implementation supporting the
4967 // VarHandleGet intrinsic is the Baker-style read barriers.
4968 if (codegen->EmitNonBakerReadBarrier()) {
4969 return;
4970 }
4971
4972 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4973 return;
4974 }
4975
4976 // Get the type from the shorty as the invokes may not return a value.
4977 // The last argument should be the value we intend to set.
4978 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4979 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4980 if (DataType::Is64BitType(value_type)) {
4981 // We avoid the case of an Int64 value because we would need to place it in a register pair.
4982 // If the slow path is taken, the ParallelMove might fail to move the pair according to the
4983 // X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4984 // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4985 return;
4986 }
4987
4988 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4989 LocationSummary* locations = new (allocator) LocationSummary(
4990 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4991 // We need a byte register temp to store the result of the bitwise operation
4992 locations->AddTemp(Location::RegisterLocation(EBX));
4993 locations->AddTemp(Location::RequiresRegister());
4994 locations->SetInAt(0, Location::RequiresRegister());
4995 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4996 if (expected_coordinates_count == 1u) {
4997 // For instance fields, this is the source object
4998 locations->SetInAt(1, Location::RequiresRegister());
4999 } else {
5000 // For static fields, we need another temp because one will be busy with the declaring class.
5001 locations->AddTemp(Location::RequiresRegister());
5002 }
5003
5004 locations->SetInAt(value_index, Location::RegisterOrConstant(invoke->InputAt(value_index)));
5005
5006 DataType::Type return_type = invoke->GetType();
5007 const bool is_void = return_type == DataType::Type::kVoid;
5008 DCHECK_IMPLIES(!is_void, return_type == value_type);
5009 if (is_void) {
5010 // Used as a temporary, even when we are not outputting it so reserve it. This has to be
5011 // requested before the other temporary since there's variable number of temp registers and the
5012 // other temp register is expected to be the last one.
5013 locations->AddTemp(Location::RegisterLocation(EAX));
5014 } else {
5015 locations->SetOut(Location::RegisterLocation(EAX));
5016 }
5017 }
5018
GenerateBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen,Register left,Register right)5019 static void GenerateBitwiseOp(HInvoke* invoke,
5020 CodeGeneratorX86* codegen,
5021 Register left,
5022 Register right) {
5023 X86Assembler* assembler = codegen->GetAssembler();
5024
5025 switch (invoke->GetIntrinsic()) {
5026 case Intrinsics::kVarHandleGetAndBitwiseOr:
5027 case Intrinsics::kVarHandleGetAndBitwiseOrAcquire:
5028 case Intrinsics::kVarHandleGetAndBitwiseOrRelease:
5029 __ orl(left, right);
5030 break;
5031 case Intrinsics::kVarHandleGetAndBitwiseXor:
5032 case Intrinsics::kVarHandleGetAndBitwiseXorAcquire:
5033 case Intrinsics::kVarHandleGetAndBitwiseXorRelease:
5034 __ xorl(left, right);
5035 break;
5036 case Intrinsics::kVarHandleGetAndBitwiseAnd:
5037 case Intrinsics::kVarHandleGetAndBitwiseAndAcquire:
5038 case Intrinsics::kVarHandleGetAndBitwiseAndRelease:
5039 __ andl(left, right);
5040 break;
5041 default:
5042 LOG(FATAL) << "Unexpected intrinsic: " << invoke->GetIntrinsic();
5043 UNREACHABLE();
5044 }
5045 }
5046
GenerateVarHandleGetAndBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen)5047 static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) {
5048 // The only read barrier implementation supporting the
5049 // VarHandleGet intrinsic is the Baker-style read barriers.
5050 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
5051
5052 X86Assembler* assembler = codegen->GetAssembler();
5053 LocationSummary* locations = invoke->GetLocations();
5054 // Get the type from the shorty as the invokes may not return a value.
5055 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
5056 DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
5057 DataType::Type return_type = invoke->GetType();
5058 const bool is_void = return_type == DataType::Type::kVoid;
5059 DCHECK_IMPLIES(!is_void, return_type == type);
5060 Register temp = locations->GetTemp(0).AsRegister<Register>();
5061 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
5062 codegen->AddSlowPath(slow_path);
5063
5064 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
5065
5066 Register offset = locations->GetTemp(1).AsRegister<Register>();
5067 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
5068 // For static field, we need another temporary because the first one contains the declaring class
5069 Register reference =
5070 (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
5071 // Get the field referred by the VarHandle. The returned register contains the object reference
5072 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
5073 // declaring class will be placed in 'reference' register.
5074 reference = GenerateVarHandleFieldReference(invoke, codegen, reference, offset);
5075 DCHECK_NE(temp, reference);
5076 Address field_addr(reference, offset, TIMES_1, 0);
5077
5078 Location eax_loc = Location::RegisterLocation(EAX);
5079 Register eax = eax_loc.AsRegister<Register>();
5080 DCHECK_IMPLIES(!is_void, locations->Out().Equals(eax_loc));
5081
5082 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrRelease ||
5083 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorRelease ||
5084 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndRelease) {
5085 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5086 }
5087
5088 NearLabel try_again;
5089 __ Bind(&try_again);
5090 // Place the expected value in EAX for cmpxchg
5091 codegen->LoadFromMemoryNoBarrier(type, eax_loc, field_addr);
5092 codegen->Move32(locations->GetTemp(0), locations->InAt(value_index));
5093 GenerateBitwiseOp(invoke, codegen, temp, eax);
5094 GenPrimitiveLockedCmpxchg(type,
5095 codegen,
5096 /* expected_value= */ eax_loc,
5097 /* new_value= */ locations->GetTemp(0),
5098 reference,
5099 offset);
5100 // If the cmpxchg failed, another thread changed the value so try again.
5101 __ j(kNotZero, &try_again);
5102
5103 // The old value is present in EAX.
5104
5105 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrAcquire ||
5106 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorAcquire ||
5107 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndAcquire) {
5108 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5109 }
5110
5111 __ Bind(slow_path->GetExitLabel());
5112 }
5113
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5114 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5115 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5116 }
5117
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5118 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5119 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5120 }
5121
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5122 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5123 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5124 }
5125
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5126 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5127 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5128 }
5129
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5130 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5131 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5132 }
5133
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5134 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5135 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5136 }
5137
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5138 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5139 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5140 }
5141
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5142 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5143 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5144 }
5145
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5146 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5147 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5148 }
5149
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5150 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5151 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5152 }
5153
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5154 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5155 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5156 }
5157
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5158 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5159 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5160 }
5161
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5162 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5163 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5164 }
5165
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5166 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5167 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5168 }
5169
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5170 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5171 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5172 }
5173
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5174 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5175 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5176 }
5177
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5178 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5179 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5180 }
5181
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5182 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5183 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5184 }
5185
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86 * codegen)5186 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86* codegen) {
5187 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
5188 LocationSummary* locations = invoke->GetLocations();
5189 DCHECK(locations->InAt(0).Equals(locations->Out()));
5190 X86Assembler* assembler = codegen->GetAssembler();
5191 XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
5192 XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
5193 XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
5194 if (invoke->GetType() == DataType::Type::kFloat32) {
5195 __ vfmadd213ss(left, right, accumulator);
5196 } else {
5197 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
5198 __ vfmadd213sd(left, right, accumulator);
5199 }
5200 }
5201
VisitMathFmaDouble(HInvoke * invoke)5202 void IntrinsicCodeGeneratorX86::VisitMathFmaDouble(HInvoke* invoke) {
5203 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
5204 GenerateMathFma(invoke, codegen_);
5205 }
5206
VisitMathFmaDouble(HInvoke * invoke)5207 void IntrinsicLocationsBuilderX86::VisitMathFmaDouble(HInvoke* invoke) {
5208 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
5209 CreateFPFPFPToFPCallLocations(allocator_, invoke);
5210 }
5211 }
5212
VisitMathFmaFloat(HInvoke * invoke)5213 void IntrinsicCodeGeneratorX86::VisitMathFmaFloat(HInvoke* invoke) {
5214 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
5215 GenerateMathFma(invoke, codegen_);
5216 }
5217
VisitMathFmaFloat(HInvoke * invoke)5218 void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) {
5219 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
5220 CreateFPFPFPToFPCallLocations(allocator_, invoke);
5221 }
5222 }
5223
5224 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86, Name)
5225 UNIMPLEMENTED_INTRINSIC_LIST_X86(MARK_UNIMPLEMENTED);
5226 #undef MARK_UNIMPLEMENTED
5227
5228 UNREACHABLE_INTRINSICS(X86)
5229
5230 #undef __
5231
5232 } // namespace x86
5233 } // namespace art
5234