xref: /aosp_15_r20/art/compiler/optimizing/intrinsics_x86.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86.h"
18 
19 #include <limits>
20 
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsic_objects.h"
29 #include "intrinsics.h"
30 #include "intrinsics_utils.h"
31 #include "lock_word.h"
32 #include "mirror/array-inl.h"
33 #include "mirror/object_array-inl.h"
34 #include "mirror/reference.h"
35 #include "mirror/string.h"
36 #include "mirror/var_handle.h"
37 #include "optimizing/data_type.h"
38 #include "scoped_thread_state_change-inl.h"
39 #include "thread-current-inl.h"
40 #include "utils/x86/assembler_x86.h"
41 #include "utils/x86/constants_x86.h"
42 #include "well_known_classes.h"
43 
44 namespace art HIDDEN {
45 
46 namespace x86 {
47 
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)48 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
49   : allocator_(codegen->GetGraph()->GetAllocator()),
50     codegen_(codegen) {
51 }
52 
53 
GetAssembler()54 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
55   return down_cast<X86Assembler*>(codegen_->GetAssembler());
56 }
57 
GetAllocator()58 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
59   return codegen_->GetGraph()->GetAllocator();
60 }
61 
TryDispatch(HInvoke * invoke)62 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
63   Dispatch(invoke);
64   LocationSummary* res = invoke->GetLocations();
65   if (res == nullptr) {
66     return false;
67   }
68   return res->Intrinsified();
69 }
70 
71 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
72 
73 #define __ assembler->
74 
GenArrayAddress(X86Assembler * assembler,Register dest,Register base,Location pos,DataType::Type type,uint32_t data_offset)75 static void GenArrayAddress(X86Assembler* assembler,
76                             Register dest,
77                             Register base,
78                             Location pos,
79                             DataType::Type type,
80                             uint32_t data_offset) {
81   if (pos.IsConstant()) {
82     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
83     __ leal(dest, Address(base, DataType::Size(type) * constant + data_offset));
84   } else {
85     const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
86     __ leal(dest, Address(base, pos.AsRegister<Register>(), scale_factor, data_offset));
87   }
88 }
89 
90 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
91 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
92  public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)93   explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
94       : SlowPathCode(instruction) {
95   }
96 
EmitNativeCode(CodeGenerator * codegen)97   void EmitNativeCode(CodeGenerator* codegen) override {
98     DCHECK(codegen->EmitBakerReadBarrier());
99     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
100     X86Assembler* assembler = x86_codegen->GetAssembler();
101     LocationSummary* locations = instruction_->GetLocations();
102     DCHECK(locations->CanCall());
103     DCHECK(instruction_->IsInvokeStaticOrDirect())
104         << "Unexpected instruction in read barrier arraycopy slow path: "
105         << instruction_->DebugName();
106     DCHECK(instruction_->GetLocations()->Intrinsified());
107     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
108     Location length = locations->InAt(4);
109 
110     const DataType::Type type = DataType::Type::kReference;
111     const int32_t element_size = DataType::Size(type);
112 
113     Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
114     Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
115     Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
116     Register value = locations->GetTemp(3).AsRegister<Register>();
117 
118     __ Bind(GetEntryLabel());
119     // The `src_curr_addr` and `dst_curr_addr` were initialized before entering the slow-path.
120     GenArrayAddress(assembler, src_stop_addr, src_curr_addr, length, type, /*data_offset=*/ 0u);
121 
122     NearLabel loop;
123     __ Bind(&loop);
124     __ movl(value, Address(src_curr_addr, 0));
125     __ MaybeUnpoisonHeapReference(value);
126     // TODO: Inline the mark bit check before calling the runtime?
127     // value = ReadBarrier::Mark(value)
128     // No need to save live registers; it's taken care of by the
129     // entrypoint. Also, there is no need to update the stack mask,
130     // as this runtime call will not trigger a garbage collection.
131     // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
132     // explanations.)
133     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(value);
134     // This runtime call does not require a stack map.
135     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
136     __ MaybePoisonHeapReference(value);
137     __ movl(Address(dst_curr_addr, 0), value);
138     __ addl(src_curr_addr, Immediate(element_size));
139     __ addl(dst_curr_addr, Immediate(element_size));
140     __ cmpl(src_curr_addr, src_stop_addr);
141     __ j(kNotEqual, &loop);
142     __ jmp(GetExitLabel());
143   }
144 
GetDescription() const145   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
146 
147  private:
148   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
149 };
150 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)151 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
152   LocationSummary* locations =
153       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
154   locations->SetInAt(0, Location::RequiresFpuRegister());
155   locations->SetOut(Location::RequiresRegister());
156   if (is64bit) {
157     locations->AddTemp(Location::RequiresFpuRegister());
158   }
159 }
160 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)161 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
162   LocationSummary* locations =
163       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
164   locations->SetInAt(0, Location::RequiresRegister());
165   locations->SetOut(Location::RequiresFpuRegister());
166   if (is64bit) {
167     locations->AddTemp(Location::RequiresFpuRegister());
168     locations->AddTemp(Location::RequiresFpuRegister());
169   }
170 }
171 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)172 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
173   Location input = locations->InAt(0);
174   Location output = locations->Out();
175   if (is64bit) {
176     // Need to use the temporary.
177     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
178     __ movsd(temp, input.AsFpuRegister<XmmRegister>());
179     __ movd(output.AsRegisterPairLow<Register>(), temp);
180     __ psrlq(temp, Immediate(32));
181     __ movd(output.AsRegisterPairHigh<Register>(), temp);
182   } else {
183     __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
184   }
185 }
186 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)187 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
188   Location input = locations->InAt(0);
189   Location output = locations->Out();
190   if (is64bit) {
191     // Need to use the temporary.
192     XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
193     XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
194     __ movd(temp1, input.AsRegisterPairLow<Register>());
195     __ movd(temp2, input.AsRegisterPairHigh<Register>());
196     __ punpckldq(temp1, temp2);
197     __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
198   } else {
199     __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
200   }
201 }
202 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)203 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
204   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
205 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)206 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
207   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
208 }
209 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)210 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
211   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
212 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)213 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
214   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
215 }
216 
VisitFloatFloatToRawIntBits(HInvoke * invoke)217 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
218   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
219 }
VisitFloatIntBitsToFloat(HInvoke * invoke)220 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
221   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
222 }
223 
VisitFloatFloatToRawIntBits(HInvoke * invoke)224 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
225   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
226 }
VisitFloatIntBitsToFloat(HInvoke * invoke)227 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
228   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
229 }
230 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)231 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
232   LocationSummary* locations =
233       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
234   locations->SetInAt(0, Location::RequiresRegister());
235   locations->SetOut(Location::SameAsFirstInput());
236 }
237 
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)238 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
239   LocationSummary* locations =
240       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
241   locations->SetInAt(0, Location::RequiresRegister());
242   locations->SetOut(Location::RequiresRegister());
243 }
244 
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)245 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
246   LocationSummary* locations =
247       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
248   locations->SetInAt(0, Location::RequiresRegister());
249   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
250 }
251 
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)252 static void GenReverseBytes(LocationSummary* locations,
253                             DataType::Type size,
254                             X86Assembler* assembler) {
255   Register out = locations->Out().AsRegister<Register>();
256 
257   switch (size) {
258     case DataType::Type::kInt16:
259       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
260       __ bswapl(out);
261       __ sarl(out, Immediate(16));
262       break;
263     case DataType::Type::kInt32:
264       __ bswapl(out);
265       break;
266     default:
267       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
268       UNREACHABLE();
269   }
270 }
271 
VisitIntegerReverseBytes(HInvoke * invoke)272 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
273   CreateIntToIntLocations(allocator_, invoke);
274 }
275 
VisitIntegerReverseBytes(HInvoke * invoke)276 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
277   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
278 }
279 
VisitLongReverseBytes(HInvoke * invoke)280 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
281   CreateLongToLongLocations(allocator_, invoke);
282 }
283 
VisitLongReverseBytes(HInvoke * invoke)284 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
285   LocationSummary* locations = invoke->GetLocations();
286   Location input = locations->InAt(0);
287   Register input_lo = input.AsRegisterPairLow<Register>();
288   Register input_hi = input.AsRegisterPairHigh<Register>();
289   Location output = locations->Out();
290   Register output_lo = output.AsRegisterPairLow<Register>();
291   Register output_hi = output.AsRegisterPairHigh<Register>();
292 
293   X86Assembler* assembler = GetAssembler();
294   // Assign the inputs to the outputs, mixing low/high.
295   __ movl(output_lo, input_hi);
296   __ movl(output_hi, input_lo);
297   __ bswapl(output_lo);
298   __ bswapl(output_hi);
299 }
300 
VisitShortReverseBytes(HInvoke * invoke)301 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
302   CreateIntToIntLocations(allocator_, invoke);
303 }
304 
VisitShortReverseBytes(HInvoke * invoke)305 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
306   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
307 }
308 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)309 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
310   LocationSummary* locations =
311       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
312   locations->SetInAt(0, Location::RequiresFpuRegister());
313   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
314 }
315 
VisitMathSqrt(HInvoke * invoke)316 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
317   CreateFPToFPLocations(allocator_, invoke);
318 }
319 
VisitMathSqrt(HInvoke * invoke)320 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
321   LocationSummary* locations = invoke->GetLocations();
322   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
323   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
324 
325   GetAssembler()->sqrtsd(out, in);
326 }
327 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)328 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
329                                        HInvoke* invoke,
330                                        CodeGeneratorX86* codegen) {
331   // Do we have instruction support?
332   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
333     return;
334   }
335 
336   CreateFPToFPLocations(allocator, invoke);
337 }
338 
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86Assembler * assembler,int round_mode)339 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86Assembler* assembler, int round_mode) {
340   LocationSummary* locations = invoke->GetLocations();
341   DCHECK(!locations->WillCall());
342   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
343   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
344   __ roundsd(out, in, Immediate(round_mode));
345 }
346 
VisitMathCeil(HInvoke * invoke)347 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
348   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
349 }
350 
VisitMathCeil(HInvoke * invoke)351 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
352   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
353 }
354 
VisitMathFloor(HInvoke * invoke)355 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
356   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
357 }
358 
VisitMathFloor(HInvoke * invoke)359 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
360   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
361 }
362 
VisitMathRint(HInvoke * invoke)363 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
364   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
365 }
366 
VisitMathRint(HInvoke * invoke)367 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
368   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
369 }
370 
VisitMathRoundFloat(HInvoke * invoke)371 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
372   // Do we have instruction support?
373   if (!codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
374     return;
375   }
376 
377   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
378   LocationSummary* locations =
379       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
380   locations->SetInAt(0, Location::RequiresFpuRegister());
381   if (static_or_direct->HasSpecialInput() &&
382       invoke->InputAt(
383           static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
384     locations->SetInAt(1, Location::RequiresRegister());
385   }
386   locations->SetOut(Location::RequiresRegister());
387   locations->AddTemp(Location::RequiresFpuRegister());
388   locations->AddTemp(Location::RequiresFpuRegister());
389 }
390 
VisitMathRoundFloat(HInvoke * invoke)391 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
392   LocationSummary* locations = invoke->GetLocations();
393   DCHECK(!locations->WillCall());
394 
395   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
396   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
397   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
398   Register out = locations->Out().AsRegister<Register>();
399   NearLabel skip_incr, done;
400   X86Assembler* assembler = GetAssembler();
401 
402   // Since no direct x86 rounding instruction matches the required semantics,
403   // this intrinsic is implemented as follows:
404   //  result = floor(in);
405   //  if (in - result >= 0.5f)
406   //    result = result + 1.0f;
407   __ movss(t2, in);
408   __ roundss(t1, in, Immediate(1));
409   __ subss(t2, t1);
410   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
411     // Direct constant area available.
412     HX86ComputeBaseMethodAddress* method_address =
413         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
414     Register constant_area = locations->InAt(1).AsRegister<Register>();
415     __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
416                                                 method_address,
417                                                 constant_area));
418     __ j(kBelow, &skip_incr);
419     __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
420                                                method_address,
421                                                constant_area));
422     __ Bind(&skip_incr);
423   } else {
424     // No constant area: go through stack.
425     __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
426     __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
427     __ comiss(t2, Address(ESP, 4));
428     __ j(kBelow, &skip_incr);
429     __ addss(t1, Address(ESP, 0));
430     __ Bind(&skip_incr);
431     __ addl(ESP, Immediate(8));
432   }
433 
434   // Final conversion to an integer. Unfortunately this also does not have a
435   // direct x86 instruction, since NaN should map to 0 and large positive
436   // values need to be clipped to the extreme value.
437   __ movl(out, Immediate(kPrimIntMax));
438   __ cvtsi2ss(t2, out);
439   __ comiss(t1, t2);
440   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
441   __ movl(out, Immediate(0));  // does not change flags
442   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
443   __ cvttss2si(out, t1);
444   __ Bind(&done);
445 }
446 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)447 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
448   LocationSummary* locations =
449       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
450   InvokeRuntimeCallingConvention calling_convention;
451   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
452   locations->SetOut(Location::FpuRegisterLocation(XMM0));
453 }
454 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)455 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
456   LocationSummary* locations = invoke->GetLocations();
457   DCHECK(locations->WillCall());
458   DCHECK(invoke->IsInvokeStaticOrDirect());
459   X86Assembler* assembler = codegen->GetAssembler();
460 
461   // We need some place to pass the parameters.
462   __ subl(ESP, Immediate(16));
463   __ cfi().AdjustCFAOffset(16);
464 
465   // Pass the parameters at the bottom of the stack.
466   __ movsd(Address(ESP, 0), XMM0);
467 
468   // If we have a second parameter, pass it next.
469   if (invoke->GetNumberOfArguments() == 2) {
470     __ movsd(Address(ESP, 8), XMM1);
471   }
472 
473   // Now do the actual call.
474   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
475 
476   // Extract the return value from the FP stack.
477   __ fstpl(Address(ESP, 0));
478   __ movsd(XMM0, Address(ESP, 0));
479 
480   // And clean up the stack.
481   __ addl(ESP, Immediate(16));
482   __ cfi().AdjustCFAOffset(-16);
483 }
484 
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)485 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
486   LocationSummary* locations =
487       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
488   if (is_long) {
489     locations->SetInAt(0, Location::RequiresRegister());
490   } else {
491     locations->SetInAt(0, Location::Any());
492   }
493   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
494 }
495 
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)496 static void GenLowestOneBit(X86Assembler* assembler,
497                       CodeGeneratorX86* codegen,
498                       bool is_long,
499                       HInvoke* invoke) {
500   LocationSummary* locations = invoke->GetLocations();
501   Location src = locations->InAt(0);
502   Location out_loc = locations->Out();
503 
504   if (invoke->InputAt(0)->IsConstant()) {
505     // Evaluate this at compile time.
506     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
507     if (value == 0) {
508       if (is_long) {
509         __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
510         __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
511       } else {
512         __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
513       }
514       return;
515     }
516     // Nonzero value.
517     value = is_long ? CTZ(static_cast<uint64_t>(value))
518                     : CTZ(static_cast<uint32_t>(value));
519     if (is_long) {
520       if (value >= 32) {
521         int shift = value-32;
522         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
523         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
524       } else {
525         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
526         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
527       }
528     } else {
529       codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
530     }
531     return;
532   }
533   // Handle non constant case
534   if (is_long) {
535     DCHECK(src.IsRegisterPair());
536     Register src_lo = src.AsRegisterPairLow<Register>();
537     Register src_hi = src.AsRegisterPairHigh<Register>();
538 
539     Register out_lo = out_loc.AsRegisterPairLow<Register>();
540     Register out_hi = out_loc.AsRegisterPairHigh<Register>();
541 
542     __ movl(out_lo, src_lo);
543     __ movl(out_hi, src_hi);
544 
545     __ negl(out_lo);
546     __ adcl(out_hi, Immediate(0));
547     __ negl(out_hi);
548 
549     __ andl(out_lo, src_lo);
550     __ andl(out_hi, src_hi);
551   } else {
552     if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
553       Register out = out_loc.AsRegister<Register>();
554       __ blsi(out, src.AsRegister<Register>());
555     } else {
556       Register out = out_loc.AsRegister<Register>();
557       // Do tmp & -tmp
558       if (src.IsRegister()) {
559         __ movl(out, src.AsRegister<Register>());
560       } else {
561         DCHECK(src.IsStackSlot());
562         __ movl(out, Address(ESP, src.GetStackIndex()));
563       }
564       __ negl(out);
565 
566       if (src.IsRegister()) {
567         __ andl(out, src.AsRegister<Register>());
568       } else {
569         __ andl(out, Address(ESP, src.GetStackIndex()));
570       }
571     }
572   }
573 }
574 
VisitMathCos(HInvoke * invoke)575 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
576   CreateFPToFPCallLocations(allocator_, invoke);
577 }
578 
VisitMathCos(HInvoke * invoke)579 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
580   GenFPToFPCall(invoke, codegen_, kQuickCos);
581 }
582 
VisitMathSin(HInvoke * invoke)583 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
584   CreateFPToFPCallLocations(allocator_, invoke);
585 }
586 
VisitMathSin(HInvoke * invoke)587 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
588   GenFPToFPCall(invoke, codegen_, kQuickSin);
589 }
590 
VisitMathAcos(HInvoke * invoke)591 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
592   CreateFPToFPCallLocations(allocator_, invoke);
593 }
594 
VisitMathAcos(HInvoke * invoke)595 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
596   GenFPToFPCall(invoke, codegen_, kQuickAcos);
597 }
598 
VisitMathAsin(HInvoke * invoke)599 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
600   CreateFPToFPCallLocations(allocator_, invoke);
601 }
602 
VisitMathAsin(HInvoke * invoke)603 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
604   GenFPToFPCall(invoke, codegen_, kQuickAsin);
605 }
606 
VisitMathAtan(HInvoke * invoke)607 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
608   CreateFPToFPCallLocations(allocator_, invoke);
609 }
610 
VisitMathAtan(HInvoke * invoke)611 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
612   GenFPToFPCall(invoke, codegen_, kQuickAtan);
613 }
614 
VisitMathCbrt(HInvoke * invoke)615 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
616   CreateFPToFPCallLocations(allocator_, invoke);
617 }
618 
VisitMathCbrt(HInvoke * invoke)619 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
620   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
621 }
622 
VisitMathCosh(HInvoke * invoke)623 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
624   CreateFPToFPCallLocations(allocator_, invoke);
625 }
626 
VisitMathCosh(HInvoke * invoke)627 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
628   GenFPToFPCall(invoke, codegen_, kQuickCosh);
629 }
630 
VisitMathExp(HInvoke * invoke)631 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
632   CreateFPToFPCallLocations(allocator_, invoke);
633 }
634 
VisitMathExp(HInvoke * invoke)635 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
636   GenFPToFPCall(invoke, codegen_, kQuickExp);
637 }
638 
VisitMathExpm1(HInvoke * invoke)639 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
640   CreateFPToFPCallLocations(allocator_, invoke);
641 }
642 
VisitMathExpm1(HInvoke * invoke)643 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
644   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
645 }
646 
VisitMathLog(HInvoke * invoke)647 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
648   CreateFPToFPCallLocations(allocator_, invoke);
649 }
650 
VisitMathLog(HInvoke * invoke)651 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
652   GenFPToFPCall(invoke, codegen_, kQuickLog);
653 }
654 
VisitMathLog10(HInvoke * invoke)655 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
656   CreateFPToFPCallLocations(allocator_, invoke);
657 }
658 
VisitMathLog10(HInvoke * invoke)659 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
660   GenFPToFPCall(invoke, codegen_, kQuickLog10);
661 }
662 
VisitMathSinh(HInvoke * invoke)663 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
664   CreateFPToFPCallLocations(allocator_, invoke);
665 }
666 
VisitMathSinh(HInvoke * invoke)667 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
668   GenFPToFPCall(invoke, codegen_, kQuickSinh);
669 }
670 
VisitMathTan(HInvoke * invoke)671 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
672   CreateFPToFPCallLocations(allocator_, invoke);
673 }
674 
VisitMathTan(HInvoke * invoke)675 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
676   GenFPToFPCall(invoke, codegen_, kQuickTan);
677 }
678 
VisitMathTanh(HInvoke * invoke)679 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
680   CreateFPToFPCallLocations(allocator_, invoke);
681 }
682 
VisitMathTanh(HInvoke * invoke)683 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
684   GenFPToFPCall(invoke, codegen_, kQuickTanh);
685 }
686 
VisitIntegerLowestOneBit(HInvoke * invoke)687 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
688   CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
689 }
VisitIntegerLowestOneBit(HInvoke * invoke)690 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
691   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
692 }
693 
VisitLongLowestOneBit(HInvoke * invoke)694 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
695   CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
696 }
697 
VisitLongLowestOneBit(HInvoke * invoke)698 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
699   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
700 }
701 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)702 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
703   LocationSummary* locations =
704       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
705   InvokeRuntimeCallingConvention calling_convention;
706   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
707   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
708   locations->SetOut(Location::FpuRegisterLocation(XMM0));
709 }
710 
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)711 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
712   DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
713   LocationSummary* locations =
714       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
715   InvokeRuntimeCallingConvention calling_convention;
716   locations->SetInAt(0, Location::RequiresFpuRegister());
717   locations->SetInAt(1, Location::RequiresFpuRegister());
718   locations->SetInAt(2, Location::RequiresFpuRegister());
719   locations->SetOut(Location::SameAsFirstInput());
720 }
721 
VisitMathAtan2(HInvoke * invoke)722 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
723   CreateFPFPToFPCallLocations(allocator_, invoke);
724 }
725 
VisitMathAtan2(HInvoke * invoke)726 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
727   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
728 }
729 
VisitMathPow(HInvoke * invoke)730 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
731   CreateFPFPToFPCallLocations(allocator_, invoke);
732 }
733 
VisitMathPow(HInvoke * invoke)734 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
735   GenFPToFPCall(invoke, codegen_, kQuickPow);
736 }
737 
VisitMathHypot(HInvoke * invoke)738 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
739   CreateFPFPToFPCallLocations(allocator_, invoke);
740 }
741 
VisitMathHypot(HInvoke * invoke)742 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
743   GenFPToFPCall(invoke, codegen_, kQuickHypot);
744 }
745 
VisitMathNextAfter(HInvoke * invoke)746 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
747   CreateFPFPToFPCallLocations(allocator_, invoke);
748 }
749 
VisitMathNextAfter(HInvoke * invoke)750 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
751   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
752 }
753 
CreateSystemArrayCopyLocations(HInvoke * invoke)754 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
755   // We need at least two of the positions or length to be an integer constant,
756   // or else we won't have enough free registers.
757   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
758   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
759   HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
760 
761   int num_constants =
762       ((src_pos != nullptr) ? 1 : 0)
763       + ((dest_pos != nullptr) ? 1 : 0)
764       + ((length != nullptr) ? 1 : 0);
765 
766   if (num_constants < 2) {
767     // Not enough free registers.
768     return;
769   }
770 
771   // As long as we are checking, we might as well check to see if the src and dest
772   // positions are >= 0.
773   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
774       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
775     // We will have to fail anyways.
776     return;
777   }
778 
779   // And since we are already checking, check the length too.
780   if (length != nullptr) {
781     int32_t len = length->GetValue();
782     if (len < 0) {
783       // Just call as normal.
784       return;
785     }
786   }
787 
788   // Okay, it is safe to generate inline code.
789   LocationSummary* locations =
790       new (invoke->GetBlock()->GetGraph()->GetAllocator())
791       LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
792   // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
793   locations->SetInAt(0, Location::RequiresRegister());
794   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
795   locations->SetInAt(2, Location::RequiresRegister());
796   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
797   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
798 
799   // And we need some temporaries.  We will use REP MOVS{B,W,L}, so we need fixed registers.
800   locations->AddTemp(Location::RegisterLocation(ESI));
801   locations->AddTemp(Location::RegisterLocation(EDI));
802   locations->AddTemp(Location::RegisterLocation(ECX));
803 }
804 
805 template <typename LhsType>
EmitCmplJLess(X86Assembler * assembler,LhsType lhs,Location rhs,Label * label)806 static void EmitCmplJLess(X86Assembler* assembler,
807                           LhsType lhs,
808                           Location rhs,
809                           Label* label) {
810   static_assert(std::is_same_v<LhsType, Register> || std::is_same_v<LhsType, Address>);
811   if (rhs.IsConstant()) {
812     int32_t rhs_constant = rhs.GetConstant()->AsIntConstant()->GetValue();
813     __ cmpl(lhs, Immediate(rhs_constant));
814   } else {
815     __ cmpl(lhs, rhs.AsRegister<Register>());
816   }
817   __ j(kLess, label);
818 }
819 
CheckSystemArrayCopyPosition(X86Assembler * assembler,Register array,Location pos,Location length,SlowPathCode * slow_path,Register temp,bool length_is_array_length,bool position_sign_checked)820 static void CheckSystemArrayCopyPosition(X86Assembler* assembler,
821                                          Register array,
822                                          Location pos,
823                                          Location length,
824                                          SlowPathCode* slow_path,
825                                          Register temp,
826                                          bool length_is_array_length,
827                                          bool position_sign_checked) {
828   // Where is the length in the Array?
829   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
830 
831   if (pos.IsConstant()) {
832     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
833     if (pos_const == 0) {
834       if (!length_is_array_length) {
835         // Check that length(array) >= length.
836         EmitCmplJLess(assembler, Address(array, length_offset), length, slow_path->GetEntryLabel());
837       }
838     } else {
839       // Calculate length(array) - pos.
840       // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
841       // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
842       __ movl(temp, Address(array, length_offset));
843       __ subl(temp, Immediate(pos_const));
844 
845       // Check that (length(array) - pos) >= length.
846       EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
847     }
848   } else if (length_is_array_length) {
849     // The only way the copy can succeed is if pos is zero.
850     Register pos_reg = pos.AsRegister<Register>();
851     __ testl(pos_reg, pos_reg);
852     __ j(kNotEqual, slow_path->GetEntryLabel());
853   } else {
854     // Check that pos >= 0.
855     Register pos_reg = pos.AsRegister<Register>();
856     if (!position_sign_checked) {
857       __ testl(pos_reg, pos_reg);
858       __ j(kLess, slow_path->GetEntryLabel());
859     }
860 
861     // Calculate length(array) - pos.
862     // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
863     // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
864     __ movl(temp, Address(array, length_offset));
865     __ subl(temp, pos_reg);
866 
867     // Check that (length(array) - pos) >= length.
868     EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
869   }
870 }
871 
SystemArrayCopyPrimitive(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,DataType::Type type)872 static void SystemArrayCopyPrimitive(HInvoke* invoke,
873                                      X86Assembler* assembler,
874                                      CodeGeneratorX86* codegen,
875                                      DataType::Type type) {
876   LocationSummary* locations = invoke->GetLocations();
877   Register src = locations->InAt(0).AsRegister<Register>();
878   Location src_pos = locations->InAt(1);
879   Register dest = locations->InAt(2).AsRegister<Register>();
880   Location dest_pos = locations->InAt(3);
881   Location length = locations->InAt(4);
882 
883   // Temporaries that we need for MOVSB/W/L.
884   Register src_base = locations->GetTemp(0).AsRegister<Register>();
885   DCHECK_EQ(src_base, ESI);
886   Register dest_base = locations->GetTemp(1).AsRegister<Register>();
887   DCHECK_EQ(dest_base, EDI);
888   Register count = locations->GetTemp(2).AsRegister<Register>();
889   DCHECK_EQ(count, ECX);
890 
891   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
892   codegen->AddSlowPath(slow_path);
893 
894   // Bail out if the source and destination are the same (to handle overlap).
895   __ cmpl(src, dest);
896   __ j(kEqual, slow_path->GetEntryLabel());
897 
898   // Bail out if the source is null.
899   __ testl(src, src);
900   __ j(kEqual, slow_path->GetEntryLabel());
901 
902   // Bail out if the destination is null.
903   __ testl(dest, dest);
904   __ j(kEqual, slow_path->GetEntryLabel());
905 
906   // If the length is negative, bail out.
907   // We have already checked in the LocationsBuilder for the constant case.
908   if (!length.IsConstant()) {
909     __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
910     __ j(kLess, slow_path->GetEntryLabel());
911   }
912 
913   // We need the count in ECX.
914   if (length.IsConstant()) {
915     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
916   } else {
917     __ movl(count, length.AsRegister<Register>());
918   }
919 
920   // Validity checks: source. Use src_base as a temporary register.
921   CheckSystemArrayCopyPosition(assembler,
922                                src,
923                                src_pos,
924                                Location::RegisterLocation(count),
925                                slow_path,
926                                src_base,
927                                /*length_is_array_length=*/ false,
928                                /*position_sign_checked=*/ false);
929 
930   // Validity checks: dest. Use src_base as a temporary register.
931   CheckSystemArrayCopyPosition(assembler,
932                                dest,
933                                dest_pos,
934                                Location::RegisterLocation(count),
935                                slow_path,
936                                src_base,
937                                /*length_is_array_length=*/ false,
938                                /*position_sign_checked=*/ false);
939 
940   // Okay, everything checks out.  Finally time to do the copy.
941   // Check assumption that sizeof(Char) is 2 (used in scaling below).
942   const size_t data_size = DataType::Size(type);
943   const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
944 
945   GenArrayAddress(assembler, src_base, src, src_pos, type, data_offset);
946   GenArrayAddress(assembler, dest_base, dest, dest_pos, type, data_offset);
947 
948   // Do the move.
949   switch (type) {
950     case DataType::Type::kInt8:
951        __ rep_movsb();
952        break;
953     case DataType::Type::kUint16:
954        __ rep_movsw();
955        break;
956     case DataType::Type::kInt32:
957        __ rep_movsl();
958        break;
959     default:
960        LOG(FATAL) << "Unexpected data type for intrinsic";
961   }
962   __ Bind(slow_path->GetExitLabel());
963 }
964 
VisitSystemArrayCopyChar(HInvoke * invoke)965 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
966   CreateSystemArrayCopyLocations(invoke);
967 }
968 
VisitSystemArrayCopyChar(HInvoke * invoke)969 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
970   X86Assembler* assembler = GetAssembler();
971   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
972 }
973 
VisitSystemArrayCopyByte(HInvoke * invoke)974 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
975   X86Assembler* assembler = GetAssembler();
976   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
977 }
978 
VisitSystemArrayCopyByte(HInvoke * invoke)979 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
980   CreateSystemArrayCopyLocations(invoke);
981 }
982 
VisitSystemArrayCopyInt(HInvoke * invoke)983 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
984   X86Assembler* assembler = GetAssembler();
985   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
986 }
987 
VisitSystemArrayCopyInt(HInvoke * invoke)988 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
989   CreateSystemArrayCopyLocations(invoke);
990 }
991 
VisitStringCompareTo(HInvoke * invoke)992 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
993   // The inputs plus one temp.
994   LocationSummary* locations = new (allocator_) LocationSummary(
995       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
996   InvokeRuntimeCallingConvention calling_convention;
997   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
998   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
999   locations->SetOut(Location::RegisterLocation(EAX));
1000 }
1001 
VisitStringCompareTo(HInvoke * invoke)1002 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1003   X86Assembler* assembler = GetAssembler();
1004   LocationSummary* locations = invoke->GetLocations();
1005 
1006   // Note that the null check must have been done earlier.
1007   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1008 
1009   Register argument = locations->InAt(1).AsRegister<Register>();
1010   __ testl(argument, argument);
1011   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1012   codegen_->AddSlowPath(slow_path);
1013   __ j(kEqual, slow_path->GetEntryLabel());
1014 
1015   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1016   __ Bind(slow_path->GetExitLabel());
1017 }
1018 
VisitStringEquals(HInvoke * invoke)1019 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1020   LocationSummary* locations =
1021       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1022   locations->SetInAt(0, Location::RequiresRegister());
1023   locations->SetInAt(1, Location::RequiresRegister());
1024 
1025   // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1026   locations->AddTemp(Location::RegisterLocation(ECX));
1027   locations->AddTemp(Location::RegisterLocation(EDI));
1028 
1029   // Set output, ESI needed for repe_cmpsl instruction anyways.
1030   locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1031 }
1032 
VisitStringEquals(HInvoke * invoke)1033 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1034   X86Assembler* assembler = GetAssembler();
1035   LocationSummary* locations = invoke->GetLocations();
1036 
1037   Register str = locations->InAt(0).AsRegister<Register>();
1038   Register arg = locations->InAt(1).AsRegister<Register>();
1039   Register ecx = locations->GetTemp(0).AsRegister<Register>();
1040   Register edi = locations->GetTemp(1).AsRegister<Register>();
1041   Register esi = locations->Out().AsRegister<Register>();
1042 
1043   NearLabel end, return_true, return_false;
1044 
1045   // Get offsets of count, value, and class fields within a string object.
1046   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1047   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1048   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1049 
1050   // Note that the null check must have been done earlier.
1051   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1052 
1053   StringEqualsOptimizations optimizations(invoke);
1054   if (!optimizations.GetArgumentNotNull()) {
1055     // Check if input is null, return false if it is.
1056     __ testl(arg, arg);
1057     __ j(kEqual, &return_false);
1058   }
1059 
1060   if (!optimizations.GetArgumentIsString()) {
1061     // Instanceof check for the argument by comparing class fields.
1062     // All string objects must have the same type since String cannot be subclassed.
1063     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1064     // If the argument is a string object, its class field must be equal to receiver's class field.
1065     //
1066     // As the String class is expected to be non-movable, we can read the class
1067     // field from String.equals' arguments without read barriers.
1068     AssertNonMovableStringClass();
1069     // Also, because we use the loaded class references only to compare them, we
1070     // don't need to unpoison them.
1071     // /* HeapReference<Class> */ ecx = str->klass_
1072     __ movl(ecx, Address(str, class_offset));
1073     // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1074     __ cmpl(ecx, Address(arg, class_offset));
1075     __ j(kNotEqual, &return_false);
1076   }
1077 
1078   // Reference equality check, return true if same reference.
1079   __ cmpl(str, arg);
1080   __ j(kEqual, &return_true);
1081 
1082   // Load length and compression flag of receiver string.
1083   __ movl(ecx, Address(str, count_offset));
1084   // Check if lengths and compression flags are equal, return false if they're not.
1085   // Two identical strings will always have same compression style since
1086   // compression style is decided on alloc.
1087   __ cmpl(ecx, Address(arg, count_offset));
1088   __ j(kNotEqual, &return_false);
1089   // Return true if strings are empty. Even with string compression `count == 0` means empty.
1090   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1091                 "Expecting 0=compressed, 1=uncompressed");
1092   __ jecxz(&return_true);
1093 
1094   if (mirror::kUseStringCompression) {
1095     NearLabel string_uncompressed;
1096     // Extract length and differentiate between both compressed or both uncompressed.
1097     // Different compression style is cut above.
1098     __ shrl(ecx, Immediate(1));
1099     __ j(kCarrySet, &string_uncompressed);
1100     // Divide string length by 2, rounding up, and continue as if uncompressed.
1101     __ addl(ecx, Immediate(1));
1102     __ shrl(ecx, Immediate(1));
1103     __ Bind(&string_uncompressed);
1104   }
1105   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1106   __ leal(esi, Address(str, value_offset));
1107   __ leal(edi, Address(arg, value_offset));
1108 
1109   // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1110   // divisible by 2.
1111   __ addl(ecx, Immediate(1));
1112   __ shrl(ecx, Immediate(1));
1113 
1114   // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1115   // or 4 characters (compressed) at a time.
1116   DCHECK_ALIGNED(value_offset, 4);
1117   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1118 
1119   // Loop to compare strings two characters at a time starting at the beginning of the string.
1120   __ repe_cmpsl();
1121   // If strings are not equal, zero flag will be cleared.
1122   __ j(kNotEqual, &return_false);
1123 
1124   // Return true and exit the function.
1125   // If loop does not result in returning false, we return true.
1126   __ Bind(&return_true);
1127   __ movl(esi, Immediate(1));
1128   __ jmp(&end);
1129 
1130   // Return false and exit the function.
1131   __ Bind(&return_false);
1132   __ xorl(esi, esi);
1133   __ Bind(&end);
1134 }
1135 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1136 static void CreateStringIndexOfLocations(HInvoke* invoke,
1137                                          ArenaAllocator* allocator,
1138                                          bool start_at_zero) {
1139   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1140                                                                LocationSummary::kCallOnSlowPath,
1141                                                                kIntrinsified);
1142   // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1143   locations->SetInAt(0, Location::RegisterLocation(EDI));
1144   // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1145   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1146   // of the instruction explicitly.
1147   // Note: This works as we don't clobber EAX anywhere.
1148   locations->SetInAt(1, Location::RegisterLocation(EAX));
1149   if (!start_at_zero) {
1150     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1151   }
1152   // As we clobber EDI during execution anyways, also use it as the output.
1153   locations->SetOut(Location::SameAsFirstInput());
1154 
1155   // repne scasw uses ECX as the counter.
1156   locations->AddTemp(Location::RegisterLocation(ECX));
1157   // Need another temporary to be able to compute the result.
1158   locations->AddTemp(Location::RequiresRegister());
1159   if (mirror::kUseStringCompression) {
1160     // Need another temporary to be able to save unflagged string length.
1161     locations->AddTemp(Location::RequiresRegister());
1162   }
1163 }
1164 
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1165 static void GenerateStringIndexOf(HInvoke* invoke,
1166                                   X86Assembler* assembler,
1167                                   CodeGeneratorX86* codegen,
1168                                   bool start_at_zero) {
1169   LocationSummary* locations = invoke->GetLocations();
1170 
1171   // Note that the null check must have been done earlier.
1172   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1173 
1174   Register string_obj = locations->InAt(0).AsRegister<Register>();
1175   Register search_value = locations->InAt(1).AsRegister<Register>();
1176   Register counter = locations->GetTemp(0).AsRegister<Register>();
1177   Register string_length = locations->GetTemp(1).AsRegister<Register>();
1178   Register out = locations->Out().AsRegister<Register>();
1179   // Only used when string compression feature is on.
1180   Register string_length_flagged;
1181 
1182   // Check our assumptions for registers.
1183   DCHECK_EQ(string_obj, EDI);
1184   DCHECK_EQ(search_value, EAX);
1185   DCHECK_EQ(counter, ECX);
1186   DCHECK_EQ(out, EDI);
1187 
1188   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1189   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1190   SlowPathCode* slow_path = nullptr;
1191   HInstruction* code_point = invoke->InputAt(1);
1192   if (code_point->IsIntConstant()) {
1193     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1194         std::numeric_limits<uint16_t>::max()) {
1195       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1196       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1197       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1198       codegen->AddSlowPath(slow_path);
1199       __ jmp(slow_path->GetEntryLabel());
1200       __ Bind(slow_path->GetExitLabel());
1201       return;
1202     }
1203   } else if (code_point->GetType() != DataType::Type::kUint16) {
1204     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1205     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1206     codegen->AddSlowPath(slow_path);
1207     __ j(kAbove, slow_path->GetEntryLabel());
1208   }
1209 
1210   // From here down, we know that we are looking for a char that fits in 16 bits.
1211   // Location of reference to data array within the String object.
1212   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1213   // Location of count within the String object.
1214   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1215 
1216   // Load the count field of the string containing the length and compression flag.
1217   __ movl(string_length, Address(string_obj, count_offset));
1218 
1219   // Do a zero-length check. Even with string compression `count == 0` means empty.
1220   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1221                 "Expecting 0=compressed, 1=uncompressed");
1222   // TODO: Support jecxz.
1223   NearLabel not_found_label;
1224   __ testl(string_length, string_length);
1225   __ j(kEqual, &not_found_label);
1226 
1227   if (mirror::kUseStringCompression) {
1228     string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1229     __ movl(string_length_flagged, string_length);
1230     // Extract the length and shift out the least significant bit used as compression flag.
1231     __ shrl(string_length, Immediate(1));
1232   }
1233 
1234   if (start_at_zero) {
1235     // Number of chars to scan is the same as the string length.
1236     __ movl(counter, string_length);
1237 
1238     // Move to the start of the string.
1239     __ addl(string_obj, Immediate(value_offset));
1240   } else {
1241     Register start_index = locations->InAt(2).AsRegister<Register>();
1242 
1243     // Do a start_index check.
1244     __ cmpl(start_index, string_length);
1245     __ j(kGreaterEqual, &not_found_label);
1246 
1247     // Ensure we have a start index >= 0;
1248     __ xorl(counter, counter);
1249     __ cmpl(start_index, Immediate(0));
1250     __ cmovl(kGreater, counter, start_index);
1251 
1252     if (mirror::kUseStringCompression) {
1253       NearLabel modify_counter, offset_uncompressed_label;
1254       __ testl(string_length_flagged, Immediate(1));
1255       __ j(kNotZero, &offset_uncompressed_label);
1256       // Move to the start of the string: string_obj + value_offset + start_index.
1257       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1258       __ jmp(&modify_counter);
1259 
1260       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1261       __ Bind(&offset_uncompressed_label);
1262       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1263 
1264       // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1265       // compare.
1266       __ Bind(&modify_counter);
1267     } else {
1268       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1269     }
1270     __ negl(counter);
1271     __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1272   }
1273 
1274   if (mirror::kUseStringCompression) {
1275     NearLabel uncompressed_string_comparison;
1276     NearLabel comparison_done;
1277     __ testl(string_length_flagged, Immediate(1));
1278     __ j(kNotZero, &uncompressed_string_comparison);
1279 
1280     // Check if EAX (search_value) is ASCII.
1281     __ cmpl(search_value, Immediate(127));
1282     __ j(kGreater, &not_found_label);
1283     // Comparing byte-per-byte.
1284     __ repne_scasb();
1285     __ jmp(&comparison_done);
1286 
1287     // Everything is set up for repne scasw:
1288     //   * Comparison address in EDI.
1289     //   * Counter in ECX.
1290     __ Bind(&uncompressed_string_comparison);
1291     __ repne_scasw();
1292     __ Bind(&comparison_done);
1293   } else {
1294     __ repne_scasw();
1295   }
1296   // Did we find a match?
1297   __ j(kNotEqual, &not_found_label);
1298 
1299   // Yes, we matched.  Compute the index of the result.
1300   __ subl(string_length, counter);
1301   __ leal(out, Address(string_length, -1));
1302 
1303   NearLabel done;
1304   __ jmp(&done);
1305 
1306   // Failed to match; return -1.
1307   __ Bind(&not_found_label);
1308   __ movl(out, Immediate(-1));
1309 
1310   // And join up at the end.
1311   __ Bind(&done);
1312   if (slow_path != nullptr) {
1313     __ Bind(slow_path->GetExitLabel());
1314   }
1315 }
1316 
VisitStringIndexOf(HInvoke * invoke)1317 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1318   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1319 }
1320 
VisitStringIndexOf(HInvoke * invoke)1321 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1322   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1323 }
1324 
VisitStringIndexOfAfter(HInvoke * invoke)1325 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1326   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1327 }
1328 
VisitStringIndexOfAfter(HInvoke * invoke)1329 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1330   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1331 }
1332 
VisitStringNewStringFromBytes(HInvoke * invoke)1333 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1334   LocationSummary* locations = new (allocator_) LocationSummary(
1335       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1336   InvokeRuntimeCallingConvention calling_convention;
1337   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1338   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1339   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1340   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1341   locations->SetOut(Location::RegisterLocation(EAX));
1342 }
1343 
VisitStringNewStringFromBytes(HInvoke * invoke)1344 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1345   X86Assembler* assembler = GetAssembler();
1346   LocationSummary* locations = invoke->GetLocations();
1347 
1348   Register byte_array = locations->InAt(0).AsRegister<Register>();
1349   __ testl(byte_array, byte_array);
1350   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1351   codegen_->AddSlowPath(slow_path);
1352   __ j(kEqual, slow_path->GetEntryLabel());
1353 
1354   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1355   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1356   __ Bind(slow_path->GetExitLabel());
1357 }
1358 
VisitStringNewStringFromChars(HInvoke * invoke)1359 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1360   LocationSummary* locations =
1361       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1362   InvokeRuntimeCallingConvention calling_convention;
1363   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1364   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1365   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1366   locations->SetOut(Location::RegisterLocation(EAX));
1367 }
1368 
VisitStringNewStringFromChars(HInvoke * invoke)1369 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1370   // No need to emit code checking whether `locations->InAt(2)` is a null
1371   // pointer, as callers of the native method
1372   //
1373   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1374   //
1375   // all include a null check on `data` before calling that method.
1376   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1377   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1378 }
1379 
VisitStringNewStringFromString(HInvoke * invoke)1380 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1381   LocationSummary* locations = new (allocator_) LocationSummary(
1382       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1383   InvokeRuntimeCallingConvention calling_convention;
1384   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1385   locations->SetOut(Location::RegisterLocation(EAX));
1386 }
1387 
VisitStringNewStringFromString(HInvoke * invoke)1388 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1389   X86Assembler* assembler = GetAssembler();
1390   LocationSummary* locations = invoke->GetLocations();
1391 
1392   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1393   __ testl(string_to_copy, string_to_copy);
1394   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1395   codegen_->AddSlowPath(slow_path);
1396   __ j(kEqual, slow_path->GetEntryLabel());
1397 
1398   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1399   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1400   __ Bind(slow_path->GetExitLabel());
1401 }
1402 
VisitStringGetCharsNoCheck(HInvoke * invoke)1403 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1404   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1405   LocationSummary* locations =
1406       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1407   locations->SetInAt(0, Location::RequiresRegister());
1408   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1409   // Place srcEnd in ECX to save a move below.
1410   locations->SetInAt(2, Location::RegisterLocation(ECX));
1411   locations->SetInAt(3, Location::RequiresRegister());
1412   locations->SetInAt(4, Location::RequiresRegister());
1413 
1414   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1415   // We don't have enough registers to also grab ECX, so handle below.
1416   locations->AddTemp(Location::RegisterLocation(ESI));
1417   locations->AddTemp(Location::RegisterLocation(EDI));
1418 }
1419 
VisitStringGetCharsNoCheck(HInvoke * invoke)1420 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1421   X86Assembler* assembler = GetAssembler();
1422   LocationSummary* locations = invoke->GetLocations();
1423 
1424   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1425   // Location of data in char array buffer.
1426   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1427   // Location of char array data in string.
1428   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1429 
1430   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1431   Register obj = locations->InAt(0).AsRegister<Register>();
1432   Location srcBegin = locations->InAt(1);
1433   int srcBegin_value =
1434       srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1435   Register srcEnd = locations->InAt(2).AsRegister<Register>();
1436   Register dst = locations->InAt(3).AsRegister<Register>();
1437   Register dstBegin = locations->InAt(4).AsRegister<Register>();
1438 
1439   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1440   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1441   DCHECK_EQ(char_size, 2u);
1442 
1443   // Compute the number of chars (words) to move.
1444   // Save ECX, since we don't know if it will be used later.
1445   __ pushl(ECX);
1446   int stack_adjust = kX86WordSize;
1447   __ cfi().AdjustCFAOffset(stack_adjust);
1448   DCHECK_EQ(srcEnd, ECX);
1449   if (srcBegin.IsConstant()) {
1450     __ subl(ECX, Immediate(srcBegin_value));
1451   } else {
1452     DCHECK(srcBegin.IsRegister());
1453     __ subl(ECX, srcBegin.AsRegister<Register>());
1454   }
1455 
1456   NearLabel done;
1457   if (mirror::kUseStringCompression) {
1458     // Location of count in string
1459     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1460     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1461     DCHECK_EQ(c_char_size, 1u);
1462     __ pushl(EAX);
1463     __ cfi().AdjustCFAOffset(stack_adjust);
1464 
1465     NearLabel copy_loop, copy_uncompressed;
1466     __ testl(Address(obj, count_offset), Immediate(1));
1467     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1468                   "Expecting 0=compressed, 1=uncompressed");
1469     __ j(kNotZero, &copy_uncompressed);
1470     // Compute the address of the source string by adding the number of chars from
1471     // the source beginning to the value offset of a string.
1472     __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1473 
1474     // Start the loop to copy String's value to Array of Char.
1475     __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1476     __ Bind(&copy_loop);
1477     __ jecxz(&done);
1478     // Use EAX temporary (convert byte from ESI to word).
1479     // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1480     __ movzxb(EAX, Address(ESI, 0));
1481     __ movw(Address(EDI, 0), EAX);
1482     __ leal(EDI, Address(EDI, char_size));
1483     __ leal(ESI, Address(ESI, c_char_size));
1484     // TODO: Add support for LOOP to X86Assembler.
1485     __ subl(ECX, Immediate(1));
1486     __ jmp(&copy_loop);
1487     __ Bind(&copy_uncompressed);
1488   }
1489 
1490   // Do the copy for uncompressed string.
1491   // Compute the address of the destination buffer.
1492   __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1493   __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1494   __ rep_movsw();
1495 
1496   __ Bind(&done);
1497   if (mirror::kUseStringCompression) {
1498     // Restore EAX.
1499     __ popl(EAX);
1500     __ cfi().AdjustCFAOffset(-stack_adjust);
1501   }
1502   // Restore ECX.
1503   __ popl(ECX);
1504   __ cfi().AdjustCFAOffset(-stack_adjust);
1505 }
1506 
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1507 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1508   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1509   Location out_loc = locations->Out();
1510   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1511   // to avoid a SIGBUS.
1512   switch (size) {
1513     case DataType::Type::kInt8:
1514       __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1515       break;
1516     case DataType::Type::kInt16:
1517       __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1518       break;
1519     case DataType::Type::kInt32:
1520       __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1521       break;
1522     case DataType::Type::kInt64:
1523       __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1524       __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1525       break;
1526     default:
1527       LOG(FATAL) << "Type not recognized for peek: " << size;
1528       UNREACHABLE();
1529   }
1530 }
1531 
VisitMemoryPeekByte(HInvoke * invoke)1532 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1533   CreateLongToIntLocations(allocator_, invoke);
1534 }
1535 
VisitMemoryPeekByte(HInvoke * invoke)1536 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1537   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1538 }
1539 
VisitMemoryPeekIntNative(HInvoke * invoke)1540 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1541   CreateLongToIntLocations(allocator_, invoke);
1542 }
1543 
VisitMemoryPeekIntNative(HInvoke * invoke)1544 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1545   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1546 }
1547 
VisitMemoryPeekLongNative(HInvoke * invoke)1548 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1549   CreateLongToLongLocations(allocator_, invoke);
1550 }
1551 
VisitMemoryPeekLongNative(HInvoke * invoke)1552 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1553   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1554 }
1555 
VisitMemoryPeekShortNative(HInvoke * invoke)1556 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1557   CreateLongToIntLocations(allocator_, invoke);
1558 }
1559 
VisitMemoryPeekShortNative(HInvoke * invoke)1560 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1561   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1562 }
1563 
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1564 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1565                                          DataType::Type size,
1566                                          HInvoke* invoke) {
1567   LocationSummary* locations =
1568       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1569   locations->SetInAt(0, Location::RequiresRegister());
1570   HInstruction* value = invoke->InputAt(1);
1571   if (size == DataType::Type::kInt8) {
1572     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1573   } else {
1574     locations->SetInAt(1, Location::RegisterOrConstant(value));
1575   }
1576 }
1577 
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1578 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1579   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1580   Location value_loc = locations->InAt(1);
1581   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1582   // to avoid a SIGBUS.
1583   switch (size) {
1584     case DataType::Type::kInt8:
1585       if (value_loc.IsConstant()) {
1586         __ movb(Address(address, 0),
1587                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1588       } else {
1589         __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1590       }
1591       break;
1592     case DataType::Type::kInt16:
1593       if (value_loc.IsConstant()) {
1594         __ movw(Address(address, 0),
1595                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1596       } else {
1597         __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1598       }
1599       break;
1600     case DataType::Type::kInt32:
1601       if (value_loc.IsConstant()) {
1602         __ movl(Address(address, 0),
1603                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1604       } else {
1605         __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1606       }
1607       break;
1608     case DataType::Type::kInt64:
1609       if (value_loc.IsConstant()) {
1610         int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1611         __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1612         __ movl(Address(address, 4), Immediate(High32Bits(value)));
1613       } else {
1614         __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1615         __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1616       }
1617       break;
1618     default:
1619       LOG(FATAL) << "Type not recognized for poke: " << size;
1620       UNREACHABLE();
1621   }
1622 }
1623 
VisitMemoryPokeByte(HInvoke * invoke)1624 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1625   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1626 }
1627 
VisitMemoryPokeByte(HInvoke * invoke)1628 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1629   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1630 }
1631 
VisitMemoryPokeIntNative(HInvoke * invoke)1632 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1633   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1634 }
1635 
VisitMemoryPokeIntNative(HInvoke * invoke)1636 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1637   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1638 }
1639 
VisitMemoryPokeLongNative(HInvoke * invoke)1640 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1641   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1642 }
1643 
VisitMemoryPokeLongNative(HInvoke * invoke)1644 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1645   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1646 }
1647 
VisitMemoryPokeShortNative(HInvoke * invoke)1648 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1649   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1650 }
1651 
VisitMemoryPokeShortNative(HInvoke * invoke)1652 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1653   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1654 }
1655 
VisitThreadCurrentThread(HInvoke * invoke)1656 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1657   LocationSummary* locations =
1658       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1659   locations->SetOut(Location::RequiresRegister());
1660 }
1661 
VisitThreadCurrentThread(HInvoke * invoke)1662 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1663   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1664   GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1665 }
1666 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1667 static void GenUnsafeGet(HInvoke* invoke,
1668                          DataType::Type type,
1669                          bool is_volatile,
1670                          CodeGeneratorX86* codegen) {
1671   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1672   LocationSummary* locations = invoke->GetLocations();
1673   Location base_loc = locations->InAt(1);
1674   Register base = base_loc.AsRegister<Register>();
1675   Location offset_loc = locations->InAt(2);
1676   Register offset = offset_loc.AsRegisterPairLow<Register>();
1677   Location output_loc = locations->Out();
1678 
1679   switch (type) {
1680     case DataType::Type::kInt8: {
1681       Register output = output_loc.AsRegister<Register>();
1682       __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1683       break;
1684     }
1685 
1686     case DataType::Type::kInt32: {
1687       Register output = output_loc.AsRegister<Register>();
1688       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1689       break;
1690     }
1691 
1692     case DataType::Type::kReference: {
1693       Register output = output_loc.AsRegister<Register>();
1694       if (codegen->EmitReadBarrier()) {
1695         if (kUseBakerReadBarrier) {
1696           Address src(base, offset, ScaleFactor::TIMES_1, 0);
1697           codegen->GenerateReferenceLoadWithBakerReadBarrier(
1698               invoke, output_loc, base, src, /* needs_null_check= */ false);
1699         } else {
1700           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1701           codegen->GenerateReadBarrierSlow(
1702               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1703         }
1704       } else {
1705         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1706         __ MaybeUnpoisonHeapReference(output);
1707       }
1708       break;
1709     }
1710 
1711     case DataType::Type::kInt64: {
1712         Register output_lo = output_loc.AsRegisterPairLow<Register>();
1713         Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1714         if (is_volatile) {
1715           // Need to use a XMM to read atomically.
1716           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1717           __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1718           __ movd(output_lo, temp);
1719           __ psrlq(temp, Immediate(32));
1720           __ movd(output_hi, temp);
1721         } else {
1722           __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1723           __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1724         }
1725       }
1726       break;
1727 
1728     default:
1729       LOG(FATAL) << "Unsupported op size " << type;
1730       UNREACHABLE();
1731   }
1732 }
1733 
GenUnsafeGetAbsolute(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1734 static void GenUnsafeGetAbsolute(HInvoke* invoke,
1735                                  DataType::Type type,
1736                                  bool is_volatile,
1737                                  CodeGeneratorX86* codegen) {
1738   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1739   LocationSummary* locations = invoke->GetLocations();
1740   Register address = locations->InAt(1).AsRegisterPairLow<Register>();
1741   Address address_offset(address, 0);
1742   Location output_loc = locations->Out();
1743 
1744   switch (type) {
1745     case DataType::Type::kInt8: {
1746       Register output = output_loc.AsRegister<Register>();
1747       __ movsxb(output, address_offset);
1748       break;
1749     }
1750 
1751     case DataType::Type::kInt32: {
1752       Register output = output_loc.AsRegister<Register>();
1753       __ movl(output, address_offset);
1754       break;
1755     }
1756 
1757     case DataType::Type::kInt64: {
1758         Register output_lo = output_loc.AsRegisterPairLow<Register>();
1759         Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1760         if (is_volatile) {
1761           // Need to use a XMM to read atomically.
1762           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1763           __ movsd(temp, address_offset);
1764           __ movd(output_lo, temp);
1765           __ psrlq(temp, Immediate(32));
1766           __ movd(output_hi, temp);
1767         } else {
1768           Address address_hi(address, 4);
1769           __ movl(output_lo, address_offset);
1770           __ movl(output_hi, address_hi);
1771         }
1772       }
1773       break;
1774 
1775     default:
1776       LOG(FATAL) << "Unsupported op size " << type;
1777       UNREACHABLE();
1778   }
1779 }
1780 
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type,bool is_volatile)1781 static void CreateIntIntToIntLocations(ArenaAllocator* allocator,
1782                                        HInvoke* invoke,
1783                                        DataType::Type type,
1784                                        bool is_volatile) {
1785   LocationSummary* locations =
1786       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1787   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1788   locations->SetInAt(1, Location::RequiresRegister());
1789   if (type == DataType::Type::kInt64) {
1790     if (is_volatile) {
1791       // Need to use XMM to read volatile.
1792       locations->AddTemp(Location::RequiresFpuRegister());
1793       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1794     } else {
1795       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1796     }
1797   } else {
1798     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1799   }
1800 }
1801 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen,DataType::Type type,bool is_volatile)1802 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1803                                           HInvoke* invoke,
1804                                           CodeGeneratorX86* codegen,
1805                                           DataType::Type type,
1806                                           bool is_volatile) {
1807   bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
1808   LocationSummary* locations =
1809       new (allocator) LocationSummary(invoke,
1810                                       can_call
1811                                           ? LocationSummary::kCallOnSlowPath
1812                                           : LocationSummary::kNoCall,
1813                                       kIntrinsified);
1814   if (can_call && kUseBakerReadBarrier) {
1815     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1816   }
1817   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1818   locations->SetInAt(1, Location::RequiresRegister());
1819   locations->SetInAt(2, Location::RequiresRegister());
1820   if (type == DataType::Type::kInt64) {
1821     if (is_volatile) {
1822       // Need to use XMM to read volatile.
1823       locations->AddTemp(Location::RequiresFpuRegister());
1824       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1825     } else {
1826       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1827     }
1828   } else {
1829     locations->SetOut(Location::RequiresRegister(),
1830                       (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1831   }
1832 }
1833 
VisitUnsafeGet(HInvoke * invoke)1834 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1835   VisitJdkUnsafeGet(invoke);
1836 }
VisitUnsafeGetAbsolute(HInvoke * invoke)1837 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAbsolute(HInvoke* invoke) {
1838   VisitJdkUnsafeGetAbsolute(invoke);
1839 }
VisitUnsafeGetVolatile(HInvoke * invoke)1840 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1841   VisitJdkUnsafeGetVolatile(invoke);
1842 }
VisitUnsafeGetLong(HInvoke * invoke)1843 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1844   VisitJdkUnsafeGetLong(invoke);
1845 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1846 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1847   VisitJdkUnsafeGetLongVolatile(invoke);
1848 }
VisitUnsafeGetObject(HInvoke * invoke)1849 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1850   VisitJdkUnsafeGetReference(invoke);
1851 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1852 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1853   VisitJdkUnsafeGetReferenceVolatile(invoke);
1854 }
VisitUnsafeGetByte(HInvoke * invoke)1855 void IntrinsicLocationsBuilderX86::VisitUnsafeGetByte(HInvoke* invoke) {
1856   VisitJdkUnsafeGetByte(invoke);
1857 }
1858 
VisitUnsafeGet(HInvoke * invoke)1859 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1860   VisitJdkUnsafeGet(invoke);
1861 }
VisitUnsafeGetAbsolute(HInvoke * invoke)1862 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAbsolute(HInvoke* invoke) {
1863   VisitJdkUnsafeGetAbsolute(invoke);
1864 }
VisitUnsafeGetVolatile(HInvoke * invoke)1865 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1866   VisitJdkUnsafeGetVolatile(invoke);
1867 }
VisitUnsafeGetLong(HInvoke * invoke)1868 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1869   VisitJdkUnsafeGetLong(invoke);
1870 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1871 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1872   VisitJdkUnsafeGetLongVolatile(invoke);
1873 }
VisitUnsafeGetObject(HInvoke * invoke)1874 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1875   VisitJdkUnsafeGetReference(invoke);
1876 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1877 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1878   VisitJdkUnsafeGetReferenceVolatile(invoke);
1879 }
VisitUnsafeGetByte(HInvoke * invoke)1880 void IntrinsicCodeGeneratorX86::VisitUnsafeGetByte(HInvoke* invoke) {
1881   VisitJdkUnsafeGetByte(invoke);
1882 }
1883 
VisitJdkUnsafeGet(HInvoke * invoke)1884 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1885   CreateIntIntIntToIntLocations(
1886       allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ false);
1887 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)1888 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
1889   CreateIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/false);
1890 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1891 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1892   CreateIntIntIntToIntLocations(
1893       allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true);
1894 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1895 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1896   CreateIntIntIntToIntLocations(
1897       allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true);
1898 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1899 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1900   CreateIntIntIntToIntLocations(
1901       allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ false);
1902 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1903 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1904   CreateIntIntIntToIntLocations(
1905       allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true);
1906 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1907 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1908   CreateIntIntIntToIntLocations(
1909       allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true);
1910 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1911 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1912   CreateIntIntIntToIntLocations(
1913       allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ false);
1914 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1915 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1916   CreateIntIntIntToIntLocations(
1917       allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true);
1918 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1919 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1920   CreateIntIntIntToIntLocations(
1921       allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true);
1922 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1923 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1924   CreateIntIntIntToIntLocations(
1925       allocator_, invoke, codegen_, DataType::Type::kInt8, /*is_volatile=*/ false);
1926 }
1927 
VisitJdkUnsafeGet(HInvoke * invoke)1928 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1929   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
1930 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)1931 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
1932   GenUnsafeGetAbsolute(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
1933 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1934 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1935   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1936 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1937 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1938   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1939 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1940 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1941   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
1942 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1943 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1944   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1945 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1946 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1947   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1948 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1949 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1950   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
1951 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1952 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1953   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1954 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1955 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1956   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1957 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1958 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1959   GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
1960 }
1961 
CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1962 static void CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1963                                                     DataType::Type type,
1964                                                     HInvoke* invoke,
1965                                                     bool is_volatile) {
1966   LocationSummary* locations =
1967       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1968   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1969   locations->SetInAt(1, Location::RequiresRegister());
1970   if (type == DataType::Type::kInt8 || type == DataType::Type::kUint8) {
1971     // Ensure the value is in a byte register
1972     locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, invoke->InputAt(3)));
1973   } else {
1974     locations->SetInAt(2, Location::RequiresRegister());
1975   }
1976   if (type == DataType::Type::kInt64 && is_volatile) {
1977     locations->AddTemp(Location::RequiresFpuRegister());
1978     locations->AddTemp(Location::RequiresFpuRegister());
1979   }
1980 }
1981 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1982 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1983                                                        DataType::Type type,
1984                                                        HInvoke* invoke,
1985                                                        bool is_volatile) {
1986   LocationSummary* locations =
1987       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1988   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1989   locations->SetInAt(1, Location::RequiresRegister());
1990   locations->SetInAt(2, Location::RequiresRegister());
1991   if (type == DataType::Type::kInt8 || type == DataType::Type::kUint8) {
1992     // Ensure the value is in a byte register
1993     locations->SetInAt(3, Location::ByteRegisterOrConstant(EAX, invoke->InputAt(3)));
1994   } else {
1995     locations->SetInAt(3, Location::RequiresRegister());
1996   }
1997   if (type == DataType::Type::kReference) {
1998     // Need temp registers for card-marking.
1999     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2000     // Ensure the value is in a byte register.
2001     locations->AddTemp(Location::RegisterLocation(ECX));
2002   } else if (type == DataType::Type::kInt64 && is_volatile) {
2003     locations->AddTemp(Location::RequiresFpuRegister());
2004     locations->AddTemp(Location::RequiresFpuRegister());
2005   }
2006 }
2007 
VisitUnsafePut(HInvoke * invoke)2008 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
2009   VisitJdkUnsafePut(invoke);
2010 }
VisitUnsafePutAbsolute(HInvoke * invoke)2011 void IntrinsicLocationsBuilderX86::VisitUnsafePutAbsolute(HInvoke* invoke) {
2012   VisitJdkUnsafePutAbsolute(invoke);
2013 }
VisitUnsafePutOrdered(HInvoke * invoke)2014 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2015   VisitJdkUnsafePutOrdered(invoke);
2016 }
VisitUnsafePutVolatile(HInvoke * invoke)2017 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2018   VisitJdkUnsafePutVolatile(invoke);
2019 }
VisitUnsafePutObject(HInvoke * invoke)2020 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
2021   VisitJdkUnsafePutReference(invoke);
2022 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2023 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2024   VisitJdkUnsafePutObjectOrdered(invoke);
2025 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2026 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2027   VisitJdkUnsafePutReferenceVolatile(invoke);
2028 }
VisitUnsafePutLong(HInvoke * invoke)2029 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
2030   VisitJdkUnsafePutLong(invoke);
2031 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2032 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2033   VisitJdkUnsafePutLongOrdered(invoke);
2034 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2035 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2036   VisitJdkUnsafePutLongVolatile(invoke);
2037 }
VisitUnsafePutByte(HInvoke * invoke)2038 void IntrinsicLocationsBuilderX86::VisitUnsafePutByte(HInvoke* invoke) {
2039   VisitJdkUnsafePutByte(invoke);
2040 }
2041 
VisitJdkUnsafePut(HInvoke * invoke)2042 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePut(HInvoke* invoke) {
2043   CreateIntIntIntIntToVoidPlusTempsLocations(
2044       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
2045 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2046 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2047   CreateIntIntIntToVoidPlusTempsLocations(
2048       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
2049 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2050 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2051   CreateIntIntIntIntToVoidPlusTempsLocations(
2052       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
2053 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2054 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2055   CreateIntIntIntIntToVoidPlusTempsLocations(
2056       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
2057 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2058 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2059   CreateIntIntIntIntToVoidPlusTempsLocations(
2060       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
2061 }
VisitJdkUnsafePutReference(HInvoke * invoke)2062 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReference(HInvoke* invoke) {
2063   CreateIntIntIntIntToVoidPlusTempsLocations(
2064       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
2065 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2066 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2067   CreateIntIntIntIntToVoidPlusTempsLocations(
2068       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
2069 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2070 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2071   CreateIntIntIntIntToVoidPlusTempsLocations(
2072       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
2073 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2074 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2075   CreateIntIntIntIntToVoidPlusTempsLocations(
2076       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
2077 }
VisitJdkUnsafePutLong(HInvoke * invoke)2078 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
2079   CreateIntIntIntIntToVoidPlusTempsLocations(
2080       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
2081 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2082 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2083   CreateIntIntIntIntToVoidPlusTempsLocations(
2084       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
2085 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2086 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2087   CreateIntIntIntIntToVoidPlusTempsLocations(
2088       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
2089 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2090 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2091   CreateIntIntIntIntToVoidPlusTempsLocations(
2092       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
2093 }
VisitJdkUnsafePutByte(HInvoke * invoke)2094 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutByte(HInvoke* invoke) {
2095   CreateIntIntIntIntToVoidPlusTempsLocations(
2096       allocator_, DataType::Type::kInt8, invoke, /*is_volatile=*/ false);
2097 }
2098 
2099 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2100 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)2101 static void GenUnsafePut(LocationSummary* locations,
2102                          DataType::Type type,
2103                          bool is_volatile,
2104                          CodeGeneratorX86* codegen) {
2105   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2106   Register base = locations->InAt(1).AsRegister<Register>();
2107   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2108   Location value_loc = locations->InAt(3);
2109 
2110   if (type == DataType::Type::kInt64) {
2111     Register value_lo = value_loc.AsRegisterPairLow<Register>();
2112     Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2113     if (is_volatile) {
2114       XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2115       XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2116       __ movd(temp1, value_lo);
2117       __ movd(temp2, value_hi);
2118       __ punpckldq(temp1, temp2);
2119       __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2120     } else {
2121       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2122       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2123     }
2124   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2125     Register temp = locations->GetTemp(0).AsRegister<Register>();
2126     __ movl(temp, value_loc.AsRegister<Register>());
2127     __ PoisonHeapReference(temp);
2128     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2129   } else if (type == DataType::Type::kInt32 || type == DataType::Type::kReference) {
2130     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2131   } else {
2132     CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2133     if (value_loc.IsRegister()) {
2134       __ movb(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<ByteRegister>());
2135     } else {
2136       __ movb(Address(base, offset, ScaleFactor::TIMES_1, 0),
2137               Immediate(CodeGenerator::GetInt8ValueOf(value_loc.GetConstant())));
2138     }
2139   }
2140 
2141   if (is_volatile) {
2142     codegen->MemoryFence();
2143   }
2144 
2145   if (type == DataType::Type::kReference) {
2146     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2147     codegen->MaybeMarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2148                              locations->GetTemp(1).AsRegister<Register>(),
2149                              base,
2150                              value_loc.AsRegister<Register>(),
2151                              value_can_be_null);
2152   }
2153 }
2154 
2155 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2156 // memory model.
GenUnsafePutAbsolute(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)2157 static void GenUnsafePutAbsolute(LocationSummary* locations,
2158                                  DataType::Type type,
2159                                  bool is_volatile,
2160                                  CodeGeneratorX86* codegen) {
2161   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2162   Register address = locations->InAt(1).AsRegisterPairLow<Register>();
2163   Address address_offset(address, 0);
2164   Location value_loc = locations->InAt(2);
2165 
2166   if (type == DataType::Type::kInt64) {
2167     Register value_lo = value_loc.AsRegisterPairLow<Register>();
2168     Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2169     if (is_volatile) {
2170       XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2171       XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2172       __ movd(temp1, value_lo);
2173       __ movd(temp2, value_hi);
2174       __ punpckldq(temp1, temp2);
2175       __ movsd(address_offset, temp1);
2176     } else {
2177       __ movl(address_offset, value_lo);
2178       __ movl(Address(address, 4), value_hi);
2179     }
2180   } else if (type == DataType::Type::kInt32) {
2181     __ movl(address_offset, value_loc.AsRegister<Register>());
2182   } else {
2183     CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2184     if (value_loc.IsRegister()) {
2185       __ movb(address_offset, value_loc.AsRegister<ByteRegister>());
2186     } else {
2187       __ movb(address_offset,
2188               Immediate(CodeGenerator::GetInt8ValueOf(value_loc.GetConstant())));
2189     }
2190   }
2191 
2192   if (is_volatile) {
2193     codegen->MemoryFence();
2194   }
2195 }
2196 
VisitUnsafePut(HInvoke * invoke)2197 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
2198   VisitJdkUnsafePut(invoke);
2199 }
VisitUnsafePutAbsolute(HInvoke * invoke)2200 void IntrinsicCodeGeneratorX86::VisitUnsafePutAbsolute(HInvoke* invoke) {
2201   VisitJdkUnsafePutAbsolute(invoke);
2202 }
VisitUnsafePutOrdered(HInvoke * invoke)2203 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2204   VisitJdkUnsafePutOrdered(invoke);
2205 }
VisitUnsafePutVolatile(HInvoke * invoke)2206 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2207   VisitJdkUnsafePutVolatile(invoke);
2208 }
VisitUnsafePutObject(HInvoke * invoke)2209 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
2210   VisitJdkUnsafePutReference(invoke);
2211 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2212 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2213   VisitJdkUnsafePutObjectOrdered(invoke);
2214 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2215 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2216   VisitJdkUnsafePutReferenceVolatile(invoke);
2217 }
VisitUnsafePutLong(HInvoke * invoke)2218 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
2219   VisitJdkUnsafePutLong(invoke);
2220 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2221 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2222   VisitJdkUnsafePutLongOrdered(invoke);
2223 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2224 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2225   VisitJdkUnsafePutLongVolatile(invoke);
2226 }
VisitUnsafePutByte(HInvoke * invoke)2227 void IntrinsicCodeGeneratorX86::VisitUnsafePutByte(HInvoke* invoke) {
2228   VisitJdkUnsafePutByte(invoke);
2229 }
2230 
VisitJdkUnsafePut(HInvoke * invoke)2231 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePut(HInvoke* invoke) {
2232   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2233 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2234 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2235   GenUnsafePutAbsolute(
2236       invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/false, codegen_);
2237 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2238 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2239   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2240 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2241 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2242   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2243 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2244 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2245   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2246 }
VisitJdkUnsafePutReference(HInvoke * invoke)2247 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReference(HInvoke* invoke) {
2248   GenUnsafePut(
2249       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2250 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2251 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2252   GenUnsafePut(
2253       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2254 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2255 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2256   GenUnsafePut(
2257       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2258 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2259 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2260   GenUnsafePut(
2261       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2262 }
VisitJdkUnsafePutLong(HInvoke * invoke)2263 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
2264   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2265 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2266 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2267   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2268 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2269 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2270   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2271 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2272 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2273   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2274 }
VisitJdkUnsafePutByte(HInvoke * invoke)2275 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutByte(HInvoke* invoke) {
2276   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
2277 }
2278 
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,DataType::Type type,HInvoke * invoke)2279 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
2280                                        CodeGeneratorX86* codegen,
2281                                        DataType::Type type,
2282                                        HInvoke* invoke) {
2283   const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
2284   LocationSummary* locations =
2285       new (allocator) LocationSummary(invoke,
2286                                       can_call
2287                                           ? LocationSummary::kCallOnSlowPath
2288                                           : LocationSummary::kNoCall,
2289                                       kIntrinsified);
2290   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2291   locations->SetInAt(1, Location::RequiresRegister());
2292   // Offset is a long, but in 32 bit mode, we only need the low word.
2293   // Can we update the invoke here to remove a TypeConvert to Long?
2294   locations->SetInAt(2, Location::RequiresRegister());
2295   // Expected value must be in EAX or EDX:EAX.
2296   // For long, new value must be in ECX:EBX.
2297   if (type == DataType::Type::kInt64) {
2298     locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2299     locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2300   } else {
2301     locations->SetInAt(3, Location::RegisterLocation(EAX));
2302     locations->SetInAt(4, Location::RequiresRegister());
2303   }
2304 
2305   // Force a byte register for the output.
2306   locations->SetOut(Location::RegisterLocation(EAX));
2307   if (type == DataType::Type::kReference) {
2308     // Need temporary registers for card-marking, and possibly for
2309     // (Baker) read barrier.
2310     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2311     // Need a byte register for marking.
2312     locations->AddTemp(Location::RegisterLocation(ECX));
2313   }
2314 }
2315 
VisitUnsafeCASInt(HInvoke * invoke)2316 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2317   VisitJdkUnsafeCASInt(invoke);
2318 }
2319 
VisitUnsafeCASLong(HInvoke * invoke)2320 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2321   VisitJdkUnsafeCASLong(invoke);
2322 }
2323 
VisitUnsafeCASObject(HInvoke * invoke)2324 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
2325   VisitJdkUnsafeCASObject(invoke);
2326 }
2327 
VisitJdkUnsafeCASInt(HInvoke * invoke)2328 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2329   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2330   VisitJdkUnsafeCompareAndSetInt(invoke);
2331 }
2332 
VisitJdkUnsafeCASLong(HInvoke * invoke)2333 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2334   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2335   VisitJdkUnsafeCompareAndSetLong(invoke);
2336 }
2337 
VisitJdkUnsafeCASObject(HInvoke * invoke)2338 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2339   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2340   VisitJdkUnsafeCompareAndSetReference(invoke);
2341 }
2342 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2343 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2344   CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt32, invoke);
2345 }
2346 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2347 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2348   CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt64, invoke);
2349 }
2350 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2351 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2352   // The only supported read barrier implementation is the Baker-style read barriers.
2353   if (codegen_->EmitNonBakerReadBarrier()) {
2354     return;
2355   }
2356 
2357   CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kReference, invoke);
2358 }
2359 
GenPrimitiveLockedCmpxchg(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp=Register::kNoRegister)2360 static void GenPrimitiveLockedCmpxchg(DataType::Type type,
2361                                       CodeGeneratorX86* codegen,
2362                                       Location expected_value,
2363                                       Location new_value,
2364                                       Register base,
2365                                       Register offset,
2366                                       // Only necessary for floating point
2367                                       Register temp = Register::kNoRegister) {
2368   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2369 
2370   if (DataType::Kind(type) == DataType::Type::kInt32) {
2371     DCHECK_EQ(expected_value.AsRegister<Register>(), EAX);
2372   }
2373 
2374   // The address of the field within the holding object.
2375   Address field_addr(base, offset, TIMES_1, 0);
2376 
2377   switch (type) {
2378     case DataType::Type::kBool:
2379     case DataType::Type::kInt8:
2380       __ LockCmpxchgb(field_addr, new_value.AsRegister<ByteRegister>());
2381       break;
2382     case DataType::Type::kInt16:
2383     case DataType::Type::kUint16:
2384       __ LockCmpxchgw(field_addr, new_value.AsRegister<Register>());
2385       break;
2386     case DataType::Type::kInt32:
2387       __ LockCmpxchgl(field_addr, new_value.AsRegister<Register>());
2388       break;
2389     case DataType::Type::kFloat32: {
2390       // cmpxchg requires the expected value to be in EAX so the new value must be elsewhere.
2391       DCHECK_NE(temp, EAX);
2392       // EAX is both an input and an output for cmpxchg
2393       codegen->Move32(Location::RegisterLocation(EAX), expected_value);
2394       codegen->Move32(Location::RegisterLocation(temp), new_value);
2395       __ LockCmpxchgl(field_addr, temp);
2396       break;
2397     }
2398     case DataType::Type::kInt64:
2399       // Ensure the expected value is in EAX:EDX and that the new
2400       // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2401       DCHECK_EQ(expected_value.AsRegisterPairLow<Register>(), EAX);
2402       DCHECK_EQ(expected_value.AsRegisterPairHigh<Register>(), EDX);
2403       DCHECK_EQ(new_value.AsRegisterPairLow<Register>(), EBX);
2404       DCHECK_EQ(new_value.AsRegisterPairHigh<Register>(), ECX);
2405       __ LockCmpxchg8b(field_addr);
2406       break;
2407     default:
2408       LOG(FATAL) << "Unexpected CAS type " << type;
2409   }
2410   // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2411   // don't need scheduling barriers at this time.
2412 }
2413 
GenPrimitiveCAS(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Location out,Register temp=Register::kNoRegister,bool is_cmpxchg=false)2414 static void GenPrimitiveCAS(DataType::Type type,
2415                             CodeGeneratorX86* codegen,
2416                             Location expected_value,
2417                             Location new_value,
2418                             Register base,
2419                             Register offset,
2420                             Location out,
2421                             // Only necessary for floating point
2422                             Register temp = Register::kNoRegister,
2423                             bool is_cmpxchg = false) {
2424   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2425 
2426   if (!is_cmpxchg || DataType::Kind(type) == DataType::Type::kInt32) {
2427     DCHECK_EQ(out.AsRegister<Register>(), EAX);
2428   }
2429 
2430   GenPrimitiveLockedCmpxchg(type, codegen, expected_value, new_value, base, offset, temp);
2431 
2432   if (is_cmpxchg) {
2433     // Sign-extend, zero-extend or move the result if necessary
2434     switch (type) {
2435       case DataType::Type::kBool:
2436         __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2437         break;
2438       case DataType::Type::kInt8:
2439         __ movsxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2440         break;
2441       case DataType::Type::kInt16:
2442         __ movsxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2443         break;
2444       case DataType::Type::kUint16:
2445         __ movzxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2446         break;
2447       case DataType::Type::kFloat32:
2448         __ movd(out.AsFpuRegister<XmmRegister>(), EAX);
2449         break;
2450       default:
2451         // Nothing to do
2452         break;
2453     }
2454   } else {
2455     // Convert ZF into the Boolean result.
2456     __ setb(kZero, out.AsRegister<Register>());
2457     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2458   }
2459 }
2460 
GenReferenceCAS(HInvoke * invoke,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp,Register temp2,bool is_cmpxchg=false)2461 static void GenReferenceCAS(HInvoke* invoke,
2462                             CodeGeneratorX86* codegen,
2463                             Location expected_value,
2464                             Location new_value,
2465                             Register base,
2466                             Register offset,
2467                             Register temp,
2468                             Register temp2,
2469                             bool is_cmpxchg = false) {
2470   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2471   LocationSummary* locations = invoke->GetLocations();
2472   Location out = locations->Out();
2473 
2474   // The address of the field within the holding object.
2475   Address field_addr(base, offset, TIMES_1, 0);
2476 
2477   Register value = new_value.AsRegister<Register>();
2478   Register expected = expected_value.AsRegister<Register>();
2479   DCHECK_EQ(expected, EAX);
2480   DCHECK_NE(temp, temp2);
2481 
2482   if (codegen->EmitBakerReadBarrier()) {
2483     // Need to make sure the reference stored in the field is a to-space
2484     // one before attempting the CAS or the CAS could fail incorrectly.
2485     codegen->GenerateReferenceLoadWithBakerReadBarrier(
2486         invoke,
2487         // Unused, used only as a "temporary" within the read barrier.
2488         Location::RegisterLocation(temp),
2489         base,
2490         field_addr,
2491         /* needs_null_check= */ false,
2492         /* always_update_field= */ true,
2493         &temp2);
2494   }
2495   bool base_equals_value = (base == value);
2496   if (kPoisonHeapReferences) {
2497     if (base_equals_value) {
2498       // If `base` and `value` are the same register location, move
2499       // `value` to a temporary register.  This way, poisoning
2500       // `value` won't invalidate `base`.
2501       value = temp;
2502       __ movl(value, base);
2503     }
2504 
2505     // Check that the register allocator did not assign the location
2506     // of `expected` (EAX) to `value` nor to `base`, so that heap
2507     // poisoning (when enabled) works as intended below.
2508     // - If `value` were equal to `expected`, both references would
2509     //   be poisoned twice, meaning they would not be poisoned at
2510     //   all, as heap poisoning uses address negation.
2511     // - If `base` were equal to `expected`, poisoning `expected`
2512     //   would invalidate `base`.
2513     DCHECK_NE(value, expected);
2514     DCHECK_NE(base, expected);
2515     __ PoisonHeapReference(expected);
2516     __ PoisonHeapReference(value);
2517   }
2518   __ LockCmpxchgl(field_addr, value);
2519 
2520   // LOCK CMPXCHG has full barrier semantics, and we don't need
2521   // scheduling barriers at this time.
2522 
2523   if (is_cmpxchg) {
2524     DCHECK_EQ(out.AsRegister<Register>(), EAX);
2525     __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
2526   } else {
2527     // Convert ZF into the Boolean result.
2528     __ setb(kZero, out.AsRegister<Register>());
2529     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2530   }
2531 
2532   // Mark card for object if the new value is stored.
2533   bool value_can_be_null = true;  // TODO: Worth finding out this information?
2534   NearLabel skip_mark_gc_card;
2535   __ j(kNotZero, &skip_mark_gc_card);
2536   codegen->MaybeMarkGCCard(temp, temp2, base, value, value_can_be_null);
2537   __ Bind(&skip_mark_gc_card);
2538 
2539   // If heap poisoning is enabled, we need to unpoison the values
2540   // that were poisoned earlier.
2541   if (kPoisonHeapReferences) {
2542     if (base_equals_value) {
2543       // `value` has been moved to a temporary register, no need to
2544       // unpoison it.
2545     } else {
2546       // Ensure `value` is different from `out`, so that unpoisoning
2547       // the former does not invalidate the latter.
2548       DCHECK_NE(value, out.AsRegister<Register>());
2549       __ UnpoisonHeapReference(value);
2550     }
2551   }
2552   // Do not unpoison the reference contained in register
2553   // `expected`, as it is the same as register `out` (EAX).
2554 }
2555 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2556 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2557   LocationSummary* locations = invoke->GetLocations();
2558 
2559   Register base = locations->InAt(1).AsRegister<Register>();
2560   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2561   Location expected_value = locations->InAt(3);
2562   Location new_value = locations->InAt(4);
2563   Location out = locations->Out();
2564   DCHECK_EQ(out.AsRegister<Register>(), EAX);
2565 
2566   if (type == DataType::Type::kReference) {
2567     // The only read barrier implementation supporting the
2568     // UnsafeCASObject intrinsic is the Baker-style read barriers.
2569     DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
2570 
2571     Register temp = locations->GetTemp(0).AsRegister<Register>();
2572     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2573     GenReferenceCAS(invoke, codegen, expected_value, new_value, base, offset, temp, temp2);
2574   } else {
2575     DCHECK(!DataType::IsFloatingPointType(type));
2576     GenPrimitiveCAS(type, codegen, expected_value, new_value, base, offset, out);
2577   }
2578 }
2579 
VisitUnsafeCASInt(HInvoke * invoke)2580 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2581   VisitJdkUnsafeCASInt(invoke);
2582 }
2583 
VisitUnsafeCASLong(HInvoke * invoke)2584 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2585   VisitJdkUnsafeCASLong(invoke);
2586 }
2587 
VisitUnsafeCASObject(HInvoke * invoke)2588 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2589   // The only read barrier implementation supporting the
2590   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2591   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2592 
2593   GenCAS(DataType::Type::kReference, invoke, codegen_);
2594 }
2595 
VisitJdkUnsafeCASInt(HInvoke * invoke)2596 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2597   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2598   VisitJdkUnsafeCompareAndSetInt(invoke);
2599 }
2600 
VisitJdkUnsafeCASLong(HInvoke * invoke)2601 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2602   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2603   VisitJdkUnsafeCompareAndSetLong(invoke);
2604 }
2605 
VisitJdkUnsafeCASObject(HInvoke * invoke)2606 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2607   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2608   VisitJdkUnsafeCompareAndSetReference(invoke);
2609 }
2610 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2611 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2612   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2613 }
2614 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2615 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2616   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2617 }
2618 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2619 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2620   // The only supported read barrier implementation is the Baker-style read barriers.
2621   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2622 
2623   GenCAS(DataType::Type::kReference, invoke, codegen_);
2624 }
2625 
2626 // Note: Unlike other architectures that use corresponding enums for the `VarHandle`
2627 // implementation, x86 is currently using it only for `Unsafe`.
2628 enum class GetAndUpdateOp {
2629   kSet,
2630   kAdd,
2631 };
2632 
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen,DataType::Type type,GetAndUpdateOp get_and_unsafe_op)2633 void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
2634                                        HInvoke* invoke,
2635                                        CodeGeneratorX86* codegen,
2636                                        DataType::Type type,
2637                                        GetAndUpdateOp get_and_unsafe_op) {
2638   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
2639   LocationSummary* locations =
2640       new (allocator) LocationSummary(invoke,
2641                                       can_call
2642                                           ? LocationSummary::kCallOnSlowPath
2643                                           : LocationSummary::kNoCall,
2644                                       kIntrinsified);
2645   if (can_call && kUseBakerReadBarrier) {
2646     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2647   }
2648   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2649   const bool is_void = invoke->GetType() == DataType::Type::kVoid;
2650   if (type == DataType::Type::kInt64) {
2651     // Explicitly allocate all registers.
2652     locations->SetInAt(1, Location::RegisterLocation(EBP));
2653     if (get_and_unsafe_op == GetAndUpdateOp::kAdd) {
2654       locations->AddTemp(Location::RegisterLocation(EBP));  // We shall clobber EBP.
2655       locations->SetInAt(2, Location::Any());  // Offset shall be on the stack.
2656       locations->SetInAt(3, Location::RegisterPairLocation(ESI, EDI));
2657       locations->AddTemp(Location::RegisterLocation(EBX));
2658       locations->AddTemp(Location::RegisterLocation(ECX));
2659     } else {
2660       locations->SetInAt(2, Location::RegisterPairLocation(ESI, EDI));
2661       locations->SetInAt(3, Location::RegisterPairLocation(EBX, ECX));
2662     }
2663     if (is_void) {
2664       locations->AddTemp(Location::RegisterLocation(EAX));
2665       locations->AddTemp(Location::RegisterLocation(EDX));
2666     } else {
2667       locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kOutputOverlap);
2668     }
2669   } else {
2670     locations->SetInAt(1, Location::RequiresRegister());
2671     locations->SetInAt(2, Location::RequiresRegister());
2672     // Use the same register for both the output and the new value or addend
2673     // to take advantage of XCHG or XADD. Arbitrarily pick EAX.
2674     locations->SetInAt(3, Location::RegisterLocation(EAX));
2675     // Only set the `out` register if it's needed. In the void case we can still use EAX in the
2676     // same manner as it is marked as a temp register.
2677     if (is_void) {
2678       locations->AddTemp(Location::RegisterLocation(EAX));
2679     } else {
2680       locations->SetOut(Location::RegisterLocation(EAX));
2681     }
2682   }
2683 }
2684 
VisitUnsafeGetAndAddInt(HInvoke * invoke)2685 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2686   VisitJdkUnsafeGetAndAddInt(invoke);
2687 }
2688 
VisitUnsafeGetAndAddLong(HInvoke * invoke)2689 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2690   VisitJdkUnsafeGetAndAddLong(invoke);
2691 }
2692 
VisitUnsafeGetAndSetInt(HInvoke * invoke)2693 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2694   VisitJdkUnsafeGetAndSetInt(invoke);
2695 }
2696 
VisitUnsafeGetAndSetLong(HInvoke * invoke)2697 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2698   VisitJdkUnsafeGetAndSetLong(invoke);
2699 }
2700 
VisitUnsafeGetAndSetObject(HInvoke * invoke)2701 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2702   VisitJdkUnsafeGetAndSetReference(invoke);
2703 }
2704 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2705 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2706   CreateUnsafeGetAndUpdateLocations(
2707       allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
2708 }
2709 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2710 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2711   CreateUnsafeGetAndUpdateLocations(
2712       allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
2713 }
2714 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2715 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2716   CreateUnsafeGetAndUpdateLocations(
2717       allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
2718 }
2719 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2720 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2721   CreateUnsafeGetAndUpdateLocations(
2722       allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
2723 }
2724 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2725 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2726   // The only supported read barrier implementation is the Baker-style read barriers.
2727   if (codegen_->EmitNonBakerReadBarrier()) {
2728     return;
2729   }
2730 
2731   CreateUnsafeGetAndUpdateLocations(
2732       allocator_, invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
2733   LocationSummary* locations = invoke->GetLocations();
2734   locations->AddTemp(Location::RequiresRegister());
2735   locations->AddTemp(Location::RegisterLocation(ECX));  // Byte register for `MarkGCCard()`.
2736 }
2737 
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorX86 * codegen,GetAndUpdateOp get_and_update_op)2738 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
2739                                   DataType::Type type,
2740                                   CodeGeneratorX86* codegen,
2741                                   GetAndUpdateOp get_and_update_op) {
2742   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2743   LocationSummary* locations = invoke->GetLocations();
2744 
2745   const bool is_void = invoke->GetType() == DataType::Type::kVoid;
2746   // We use requested specific registers to use as temps for void methods, as we don't return the
2747   // value.
2748   Location out_or_temp =
2749       is_void ? (type == DataType::Type::kInt64 ? Location::RegisterPairLocation(EAX, EDX) :
2750                                                   Location::RegisterLocation(EAX)) :
2751                 locations->Out();
2752   Register base = locations->InAt(1).AsRegister<Register>();  // Object pointer.
2753   Location offset = locations->InAt(2);                       // Long offset.
2754   Location arg = locations->InAt(3);                          // New value or addend.
2755 
2756   if (type == DataType::Type::kInt32) {
2757     DCHECK(out_or_temp.Equals(arg));
2758     Register out_reg = out_or_temp.AsRegister<Register>();
2759     Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2760     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2761       __ LockXaddl(field_address, out_reg);
2762     } else {
2763       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2764       __ xchgl(out_reg, field_address);
2765     }
2766   } else if (type == DataType::Type::kInt64) {
2767     // Prepare the field address. Ignore the high 32 bits of the `offset`.
2768     Address field_address_low(kNoRegister, 0), field_address_high(kNoRegister, 0);
2769     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2770       DCHECK(offset.IsDoubleStackSlot());
2771       __ addl(base, Address(ESP, offset.GetStackIndex()));  // Clobbers `base`.
2772       DCHECK(Location::RegisterLocation(base).Equals(locations->GetTemp(0)));
2773       field_address_low = Address(base, 0);
2774       field_address_high = Address(base, 4);
2775     } else {
2776       field_address_low = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2777       field_address_high = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 4);
2778     }
2779     // Load the old value to EDX:EAX and use LOCK CMPXCHG8B to set the new value.
2780     NearLabel loop;
2781     __ Bind(&loop);
2782     __ movl(EAX, field_address_low);
2783     __ movl(EDX, field_address_high);
2784     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2785       DCHECK(Location::RegisterPairLocation(ESI, EDI).Equals(arg));
2786       __ movl(EBX, EAX);
2787       __ movl(ECX, EDX);
2788       __ addl(EBX, ESI);
2789       __ adcl(ECX, EDI);
2790     } else {
2791       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2792       DCHECK(Location::RegisterPairLocation(EBX, ECX).Equals(arg));
2793     }
2794     __ LockCmpxchg8b(field_address_low);
2795     __ j(kNotEqual, &loop);  // Repeat on failure.
2796   } else {
2797     DCHECK_EQ(type, DataType::Type::kReference);
2798     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2799     Register out_reg = out_or_temp.AsRegister<Register>();
2800     Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2801     Register temp1 = locations->GetTemp(0).AsRegister<Register>();
2802     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2803 
2804     if (codegen->EmitReadBarrier()) {
2805       DCHECK(kUseBakerReadBarrier);
2806       // Ensure that the field contains a to-space reference.
2807       codegen->GenerateReferenceLoadWithBakerReadBarrier(
2808           invoke,
2809           Location::RegisterLocation(temp2),
2810           base,
2811           field_address,
2812           /*needs_null_check=*/ false,
2813           /*always_update_field=*/ true,
2814           &temp1);
2815     }
2816 
2817     // Mark card for object as a new value shall be stored.
2818     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
2819     DCHECK_EQ(temp2, ECX);  // Byte register for `MarkGCCard()`.
2820     codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out_reg, new_value_can_be_null);
2821 
2822     if (kPoisonHeapReferences) {
2823       // Use a temp to avoid poisoning base of the field address, which might happen if `out`
2824       // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
2825       __ movl(temp1, out_reg);
2826       __ PoisonHeapReference(temp1);
2827       __ xchgl(temp1, field_address);
2828       if (!is_void) {
2829         __ UnpoisonHeapReference(temp1);
2830         __ movl(out_reg, temp1);
2831       }
2832     } else {
2833       __ xchgl(out_reg, field_address);
2834     }
2835   }
2836 }
2837 
VisitUnsafeGetAndAddInt(HInvoke * invoke)2838 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2839   VisitJdkUnsafeGetAndAddInt(invoke);
2840 }
2841 
VisitUnsafeGetAndAddLong(HInvoke * invoke)2842 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2843   VisitJdkUnsafeGetAndAddLong(invoke);
2844 }
2845 
VisitUnsafeGetAndSetInt(HInvoke * invoke)2846 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2847   VisitJdkUnsafeGetAndSetInt(invoke);
2848 }
2849 
VisitUnsafeGetAndSetLong(HInvoke * invoke)2850 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2851   VisitJdkUnsafeGetAndSetLong(invoke);
2852 }
2853 
VisitUnsafeGetAndSetObject(HInvoke * invoke)2854 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2855   VisitJdkUnsafeGetAndSetReference(invoke);
2856 }
2857 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2858 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2859   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
2860 }
2861 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2862 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2863   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
2864 }
2865 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2866 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2867   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
2868 }
2869 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2870 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2871   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
2872 }
2873 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2874 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2875   GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
2876 }
2877 
VisitIntegerReverse(HInvoke * invoke)2878 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2879   LocationSummary* locations =
2880       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2881   locations->SetInAt(0, Location::RequiresRegister());
2882   locations->SetOut(Location::SameAsFirstInput());
2883   locations->AddTemp(Location::RequiresRegister());
2884 }
2885 
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2886 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2887                      X86Assembler* assembler) {
2888   Immediate imm_shift(shift);
2889   Immediate imm_mask(mask);
2890   __ movl(temp, reg);
2891   __ shrl(reg, imm_shift);
2892   __ andl(temp, imm_mask);
2893   __ andl(reg, imm_mask);
2894   __ shll(temp, imm_shift);
2895   __ orl(reg, temp);
2896 }
2897 
VisitIntegerReverse(HInvoke * invoke)2898 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2899   X86Assembler* assembler = GetAssembler();
2900   LocationSummary* locations = invoke->GetLocations();
2901 
2902   Register reg = locations->InAt(0).AsRegister<Register>();
2903   Register temp = locations->GetTemp(0).AsRegister<Register>();
2904 
2905   /*
2906    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2907    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2908    * compared to generic luni implementation which has 5 rounds of swapping bits.
2909    * x = bswap x
2910    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2911    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2912    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2913    */
2914   __ bswapl(reg);
2915   SwapBits(reg, temp, 1, 0x55555555, assembler);
2916   SwapBits(reg, temp, 2, 0x33333333, assembler);
2917   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2918 }
2919 
VisitLongReverse(HInvoke * invoke)2920 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2921   LocationSummary* locations =
2922       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2923   locations->SetInAt(0, Location::RequiresRegister());
2924   locations->SetOut(Location::SameAsFirstInput());
2925   locations->AddTemp(Location::RequiresRegister());
2926 }
2927 
VisitLongReverse(HInvoke * invoke)2928 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2929   X86Assembler* assembler = GetAssembler();
2930   LocationSummary* locations = invoke->GetLocations();
2931 
2932   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2933   Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2934   Register temp = locations->GetTemp(0).AsRegister<Register>();
2935 
2936   // We want to swap high/low, then bswap each one, and then do the same
2937   // as a 32 bit reverse.
2938   // Exchange high and low.
2939   __ movl(temp, reg_low);
2940   __ movl(reg_low, reg_high);
2941   __ movl(reg_high, temp);
2942 
2943   // bit-reverse low
2944   __ bswapl(reg_low);
2945   SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2946   SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2947   SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2948 
2949   // bit-reverse high
2950   __ bswapl(reg_high);
2951   SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2952   SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2953   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2954 }
2955 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2956 static void CreateBitCountLocations(
2957     ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2958   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2959     // Do nothing if there is no popcnt support. This results in generating
2960     // a call for the intrinsic rather than direct code.
2961     return;
2962   }
2963   LocationSummary* locations =
2964       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2965   if (is_long) {
2966     locations->AddTemp(Location::RequiresRegister());
2967   }
2968   locations->SetInAt(0, Location::Any());
2969   locations->SetOut(Location::RequiresRegister());
2970 }
2971 
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2972 static void GenBitCount(X86Assembler* assembler,
2973                         CodeGeneratorX86* codegen,
2974                         HInvoke* invoke, bool is_long) {
2975   LocationSummary* locations = invoke->GetLocations();
2976   Location src = locations->InAt(0);
2977   Register out = locations->Out().AsRegister<Register>();
2978 
2979   if (invoke->InputAt(0)->IsConstant()) {
2980     // Evaluate this at compile time.
2981     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2982     int32_t result = is_long
2983         ? POPCOUNT(static_cast<uint64_t>(value))
2984         : POPCOUNT(static_cast<uint32_t>(value));
2985     codegen->Load32BitValue(out, result);
2986     return;
2987   }
2988 
2989   // Handle the non-constant cases.
2990   if (!is_long) {
2991     if (src.IsRegister()) {
2992       __ popcntl(out, src.AsRegister<Register>());
2993     } else {
2994       DCHECK(src.IsStackSlot());
2995       __ popcntl(out, Address(ESP, src.GetStackIndex()));
2996     }
2997   } else {
2998     // The 64-bit case needs to worry about two parts.
2999     Register temp = locations->GetTemp(0).AsRegister<Register>();
3000     if (src.IsRegisterPair()) {
3001       __ popcntl(temp, src.AsRegisterPairLow<Register>());
3002       __ popcntl(out, src.AsRegisterPairHigh<Register>());
3003     } else {
3004       DCHECK(src.IsDoubleStackSlot());
3005       __ popcntl(temp, Address(ESP, src.GetStackIndex()));
3006       __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
3007     }
3008     __ addl(out, temp);
3009   }
3010 }
3011 
VisitIntegerBitCount(HInvoke * invoke)3012 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
3013   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
3014 }
3015 
VisitIntegerBitCount(HInvoke * invoke)3016 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
3017   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3018 }
3019 
VisitLongBitCount(HInvoke * invoke)3020 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
3021   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
3022 }
3023 
VisitLongBitCount(HInvoke * invoke)3024 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
3025   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3026 }
3027 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)3028 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
3029   LocationSummary* locations =
3030       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3031   if (is_long) {
3032     locations->SetInAt(0, Location::RequiresRegister());
3033   } else {
3034     locations->SetInAt(0, Location::Any());
3035   }
3036   locations->SetOut(Location::RequiresRegister());
3037 }
3038 
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)3039 static void GenLeadingZeros(X86Assembler* assembler,
3040                             CodeGeneratorX86* codegen,
3041                             HInvoke* invoke, bool is_long) {
3042   LocationSummary* locations = invoke->GetLocations();
3043   Location src = locations->InAt(0);
3044   Register out = locations->Out().AsRegister<Register>();
3045 
3046   if (invoke->InputAt(0)->IsConstant()) {
3047     // Evaluate this at compile time.
3048     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3049     if (value == 0) {
3050       value = is_long ? 64 : 32;
3051     } else {
3052       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
3053     }
3054     codegen->Load32BitValue(out, value);
3055     return;
3056   }
3057 
3058   // Handle the non-constant cases.
3059   if (!is_long) {
3060     if (src.IsRegister()) {
3061       __ bsrl(out, src.AsRegister<Register>());
3062     } else {
3063       DCHECK(src.IsStackSlot());
3064       __ bsrl(out, Address(ESP, src.GetStackIndex()));
3065     }
3066 
3067     // BSR sets ZF if the input was zero, and the output is undefined.
3068     NearLabel all_zeroes, done;
3069     __ j(kEqual, &all_zeroes);
3070 
3071     // Correct the result from BSR to get the final CLZ result.
3072     __ xorl(out, Immediate(31));
3073     __ jmp(&done);
3074 
3075     // Fix the zero case with the expected result.
3076     __ Bind(&all_zeroes);
3077     __ movl(out, Immediate(32));
3078 
3079     __ Bind(&done);
3080     return;
3081   }
3082 
3083   // 64 bit case needs to worry about both parts of the register.
3084   DCHECK(src.IsRegisterPair());
3085   Register src_lo = src.AsRegisterPairLow<Register>();
3086   Register src_hi = src.AsRegisterPairHigh<Register>();
3087   NearLabel handle_low, done, all_zeroes;
3088 
3089   // Is the high word zero?
3090   __ testl(src_hi, src_hi);
3091   __ j(kEqual, &handle_low);
3092 
3093   // High word is not zero. We know that the BSR result is defined in this case.
3094   __ bsrl(out, src_hi);
3095 
3096   // Correct the result from BSR to get the final CLZ result.
3097   __ xorl(out, Immediate(31));
3098   __ jmp(&done);
3099 
3100   // High word was zero.  We have to compute the low word count and add 32.
3101   __ Bind(&handle_low);
3102   __ bsrl(out, src_lo);
3103   __ j(kEqual, &all_zeroes);
3104 
3105   // We had a valid result.  Use an XOR to both correct the result and add 32.
3106   __ xorl(out, Immediate(63));
3107   __ jmp(&done);
3108 
3109   // All zero case.
3110   __ Bind(&all_zeroes);
3111   __ movl(out, Immediate(64));
3112 
3113   __ Bind(&done);
3114 }
3115 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3116 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3117   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
3118 }
3119 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3120 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3121   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3122 }
3123 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3124 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3125   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
3126 }
3127 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3128 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3129   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3130 }
3131 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)3132 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
3133   LocationSummary* locations =
3134       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3135   if (is_long) {
3136     locations->SetInAt(0, Location::RequiresRegister());
3137   } else {
3138     locations->SetInAt(0, Location::Any());
3139   }
3140   locations->SetOut(Location::RequiresRegister());
3141 }
3142 
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)3143 static void GenTrailingZeros(X86Assembler* assembler,
3144                              CodeGeneratorX86* codegen,
3145                              HInvoke* invoke, bool is_long) {
3146   LocationSummary* locations = invoke->GetLocations();
3147   Location src = locations->InAt(0);
3148   Register out = locations->Out().AsRegister<Register>();
3149 
3150   if (invoke->InputAt(0)->IsConstant()) {
3151     // Evaluate this at compile time.
3152     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3153     if (value == 0) {
3154       value = is_long ? 64 : 32;
3155     } else {
3156       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
3157     }
3158     codegen->Load32BitValue(out, value);
3159     return;
3160   }
3161 
3162   // Handle the non-constant cases.
3163   if (!is_long) {
3164     if (src.IsRegister()) {
3165       __ bsfl(out, src.AsRegister<Register>());
3166     } else {
3167       DCHECK(src.IsStackSlot());
3168       __ bsfl(out, Address(ESP, src.GetStackIndex()));
3169     }
3170 
3171     // BSF sets ZF if the input was zero, and the output is undefined.
3172     NearLabel done;
3173     __ j(kNotEqual, &done);
3174 
3175     // Fix the zero case with the expected result.
3176     __ movl(out, Immediate(32));
3177 
3178     __ Bind(&done);
3179     return;
3180   }
3181 
3182   // 64 bit case needs to worry about both parts of the register.
3183   DCHECK(src.IsRegisterPair());
3184   Register src_lo = src.AsRegisterPairLow<Register>();
3185   Register src_hi = src.AsRegisterPairHigh<Register>();
3186   NearLabel done, all_zeroes;
3187 
3188   // If the low word is zero, then ZF will be set.  If not, we have the answer.
3189   __ bsfl(out, src_lo);
3190   __ j(kNotEqual, &done);
3191 
3192   // Low word was zero.  We have to compute the high word count and add 32.
3193   __ bsfl(out, src_hi);
3194   __ j(kEqual, &all_zeroes);
3195 
3196   // We had a valid result.  Add 32 to account for the low word being zero.
3197   __ addl(out, Immediate(32));
3198   __ jmp(&done);
3199 
3200   // All zero case.
3201   __ Bind(&all_zeroes);
3202   __ movl(out, Immediate(64));
3203 
3204   __ Bind(&done);
3205 }
3206 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3207 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3208   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
3209 }
3210 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3211 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3212   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3213 }
3214 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3215 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3216   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
3217 }
3218 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3219 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3220   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3221 }
3222 
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)3223 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
3224   return instruction->InputAt(input0) == instruction->InputAt(input1);
3225 }
3226 
VisitSystemArrayCopy(HInvoke * invoke)3227 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
3228   // The only read barrier implementation supporting the
3229   // SystemArrayCopy intrinsic is the Baker-style read barriers.
3230   if (codegen_->EmitNonBakerReadBarrier()) {
3231     return;
3232   }
3233 
3234   constexpr int32_t kLengthThreshold = -1;  // No cut-off - handle large arrays in intrinsic code.
3235   constexpr size_t kInitialNumTemps = 0u;  // We shall allocate temps explicitly.
3236   LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
3237       invoke, kLengthThreshold, kInitialNumTemps);
3238   if (locations != nullptr) {
3239     // Add temporaries.  We will use REP MOVSL, so we need fixed registers.
3240     DCHECK_EQ(locations->GetTempCount(), kInitialNumTemps);
3241     locations->AddTemp(Location::RegisterLocation(ESI));
3242     locations->AddTemp(Location::RegisterLocation(EDI));
3243     locations->AddTemp(Location::RegisterLocation(ECX));  // Byte reg also used for write barrier.
3244 
3245     static constexpr size_t kSrc = 0;
3246     static constexpr size_t kSrcPos = 1;
3247     static constexpr size_t kDest = 2;
3248     static constexpr size_t kDestPos = 3;
3249     static constexpr size_t kLength = 4;
3250 
3251     if (!locations->InAt(kLength).IsConstant()) {
3252       // We may not have enough registers for all inputs and temps, so put the
3253       // non-const length explicitly to the same register as one of the temps.
3254       locations->SetInAt(kLength, Location::RegisterLocation(ECX));
3255     }
3256 
3257     if (codegen_->EmitBakerReadBarrier()) {
3258       // We need an additional temp in the slow path for holding the reference.
3259       if (locations->InAt(kSrcPos).IsConstant() ||
3260           locations->InAt(kDestPos).IsConstant() ||
3261           IsSameInput(invoke, kSrc, kDest) ||
3262           IsSameInput(invoke, kSrcPos, kDestPos)) {
3263         // We can allocate another temp register.
3264         locations->AddTemp(Location::RequiresRegister());
3265       } else {
3266         // Use the same fixed register for the non-const `src_pos` and the additional temp.
3267         // The `src_pos` is no longer needed when we reach the slow path.
3268         locations->SetInAt(kSrcPos, Location::RegisterLocation(EDX));
3269         locations->AddTemp(Location::RegisterLocation(EDX));
3270       }
3271     }
3272   }
3273 }
3274 
VisitSystemArrayCopy(HInvoke * invoke)3275 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
3276   // The only read barrier implementation supporting the
3277   // SystemArrayCopy intrinsic is the Baker-style read barriers.
3278   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
3279 
3280   X86Assembler* assembler = GetAssembler();
3281   LocationSummary* locations = invoke->GetLocations();
3282 
3283   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3284   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3285   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3286   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3287   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3288 
3289   Register src = locations->InAt(0).AsRegister<Register>();
3290   Location src_pos = locations->InAt(1);
3291   Register dest = locations->InAt(2).AsRegister<Register>();
3292   Location dest_pos = locations->InAt(3);
3293   Location length = locations->InAt(4);
3294   Location temp1_loc = locations->GetTemp(0);
3295   Register temp1 = temp1_loc.AsRegister<Register>();
3296   Location temp2_loc = locations->GetTemp(1);
3297   Register temp2 = temp2_loc.AsRegister<Register>();
3298 
3299   SlowPathCode* intrinsic_slow_path =
3300       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3301   codegen_->AddSlowPath(intrinsic_slow_path);
3302 
3303   NearLabel conditions_on_positions_validated;
3304   SystemArrayCopyOptimizations optimizations(invoke);
3305 
3306   // If source and destination are the same, we go to slow path if we need to do forward copying.
3307   // We do not need to do this check if the source and destination positions are the same.
3308   if (!optimizations.GetSourcePositionIsDestinationPosition()) {
3309     if (src_pos.IsConstant()) {
3310       int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
3311       if (dest_pos.IsConstant()) {
3312         int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
3313         if (optimizations.GetDestinationIsSource()) {
3314           // Checked when building locations.
3315           DCHECK_GE(src_pos_constant, dest_pos_constant);
3316         } else if (src_pos_constant < dest_pos_constant) {
3317           __ cmpl(src, dest);
3318           __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3319         }
3320       } else {
3321         if (!optimizations.GetDestinationIsSource()) {
3322           __ cmpl(src, dest);
3323           __ j(kNotEqual, &conditions_on_positions_validated);
3324         }
3325         __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
3326         __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
3327       }
3328     } else {
3329       if (!optimizations.GetDestinationIsSource()) {
3330         __ cmpl(src, dest);
3331         __ j(kNotEqual, &conditions_on_positions_validated);
3332       }
3333       Register src_pos_reg = src_pos.AsRegister<Register>();
3334       EmitCmplJLess(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel());
3335     }
3336   }
3337 
3338   __ Bind(&conditions_on_positions_validated);
3339 
3340   if (!optimizations.GetSourceIsNotNull()) {
3341     // Bail out if the source is null.
3342     __ testl(src, src);
3343     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3344   }
3345 
3346   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
3347     // Bail out if the destination is null.
3348     __ testl(dest, dest);
3349     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3350   }
3351 
3352   // If the length is negative, bail out.
3353   // We have already checked in the LocationsBuilder for the constant case.
3354   if (!length.IsConstant() &&
3355       !optimizations.GetCountIsSourceLength() &&
3356       !optimizations.GetCountIsDestinationLength()) {
3357     __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
3358     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
3359   }
3360 
3361   // Validity checks: source.
3362   CheckSystemArrayCopyPosition(assembler,
3363                                src,
3364                                src_pos,
3365                                length,
3366                                intrinsic_slow_path,
3367                                temp1,
3368                                optimizations.GetCountIsSourceLength(),
3369                                /*position_sign_checked=*/ false);
3370 
3371   // Validity checks: dest.
3372   bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
3373   CheckSystemArrayCopyPosition(assembler,
3374                                dest,
3375                                dest_pos,
3376                                length,
3377                                intrinsic_slow_path,
3378                                temp1,
3379                                optimizations.GetCountIsDestinationLength(),
3380                                dest_position_sign_checked);
3381 
3382   auto check_non_primitive_array_class = [&](Register klass, Register temp) {
3383     // No read barrier is needed for reading a chain of constant references for comparing
3384     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3385     // /* HeapReference<Class> */ temp = klass->component_type_
3386     __ movl(temp, Address(klass, component_offset));
3387     __ MaybeUnpoisonHeapReference(temp);
3388     // Check that the component type is not null.
3389     __ testl(temp, temp);
3390     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3391     // Check that the component type is not a primitive.
3392     __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
3393     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3394   };
3395 
3396   if (!optimizations.GetDoesNotNeedTypeCheck()) {
3397     // Check whether all elements of the source array are assignable to the component
3398     // type of the destination array. We do two checks: the classes are the same,
3399     // or the destination is Object[]. If none of these checks succeed, we go to the
3400     // slow path.
3401 
3402     if (codegen_->EmitBakerReadBarrier()) {
3403       // /* HeapReference<Class> */ temp1 = dest->klass_
3404       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3405           invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
3406       // Register `temp1` is not trashed by the read barrier emitted
3407       // by GenerateFieldLoadWithBakerReadBarrier below, as that
3408       // method produces a call to a ReadBarrierMarkRegX entry point,
3409       // which saves all potentially live registers, including
3410       // temporaries such a `temp1`.
3411       // /* HeapReference<Class> */ temp2 = src->klass_
3412       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3413           invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
3414     } else {
3415       // /* HeapReference<Class> */ temp1 = dest->klass_
3416       __ movl(temp1, Address(dest, class_offset));
3417       __ MaybeUnpoisonHeapReference(temp1);
3418       // /* HeapReference<Class> */ temp2 = src->klass_
3419       __ movl(temp2, Address(src, class_offset));
3420       __ MaybeUnpoisonHeapReference(temp2);
3421     }
3422 
3423     __ cmpl(temp1, temp2);
3424     if (optimizations.GetDestinationIsTypedObjectArray()) {
3425       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3426       NearLabel do_copy;
3427       // For class match, we can skip the source type check regardless of the optimization flag.
3428       __ j(kEqual, &do_copy);
3429       // No read barrier is needed for reading a chain of constant references
3430       // for comparing with null, see `ReadBarrierOption`.
3431       // /* HeapReference<Class> */ temp1 = temp1->component_type_
3432       __ movl(temp1, Address(temp1, component_offset));
3433       __ MaybeUnpoisonHeapReference(temp1);
3434       // No need to unpoison the following heap reference load, as
3435       // we're comparing against null.
3436       __ cmpl(Address(temp1, super_offset), Immediate(0));
3437       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3438       // Bail out if the source is not a non primitive array.
3439       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3440         check_non_primitive_array_class(temp2, temp2);
3441       }
3442       __ Bind(&do_copy);
3443     } else {
3444       DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
3445       // For class match, we can skip the array type check completely if at least one of source
3446       // and destination is known to be a non primitive array, otherwise one check is enough.
3447       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3448       if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
3449           !optimizations.GetSourceIsNonPrimitiveArray()) {
3450         check_non_primitive_array_class(temp2, temp2);
3451       }
3452     }
3453   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3454     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3455     // Bail out if the source is not a non primitive array.
3456     // No read barrier is needed for reading a chain of constant references for comparing
3457     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3458     // /* HeapReference<Class> */ temp1 = src->klass_
3459     __ movl(temp1, Address(src, class_offset));
3460     __ MaybeUnpoisonHeapReference(temp1);
3461     check_non_primitive_array_class(temp1, temp1);
3462   }
3463 
3464   if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
3465     // Null constant length: not need to emit the loop code at all.
3466   } else {
3467     const DataType::Type type = DataType::Type::kReference;
3468     const size_t data_size = DataType::Size(type);
3469     const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
3470 
3471     // Don't enter copy loop if `length == 0`.
3472     NearLabel skip_copy_and_write_barrier;
3473     if (!length.IsConstant()) {
3474       __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
3475       __ j(kEqual, &skip_copy_and_write_barrier);
3476     }
3477 
3478     // Compute the base source address in `temp1`.
3479     GenArrayAddress(assembler, temp1, src, src_pos, type, data_offset);
3480     // Compute the base destination address in `temp2`.
3481     GenArrayAddress(assembler, temp2, dest, dest_pos, type, data_offset);
3482 
3483     SlowPathCode* read_barrier_slow_path = nullptr;
3484     if (codegen_->EmitBakerReadBarrier()) {
3485       // SystemArrayCopy implementation for Baker read barriers (see
3486       // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
3487       //
3488       //   if (src_ptr != end_ptr) {
3489       //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3490       //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
3491       //     bool is_gray = (rb_state == ReadBarrier::GrayState());
3492       //     if (is_gray) {
3493       //       // Slow-path copy.
3494       //       for (size_t i = 0; i != length; ++i) {
3495       //         dest_array[dest_pos + i] =
3496       //             MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
3497       //       }
3498       //     } else {
3499       //       // Fast-path copy.
3500       //       do {
3501       //         *dest_ptr++ = *src_ptr++;
3502       //       } while (src_ptr != end_ptr)
3503       //     }
3504       //   }
3505 
3506       // Given the numeric representation, it's enough to check the low bit of the rb_state.
3507       static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
3508       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3509       constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
3510       constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
3511       constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
3512 
3513       // if (rb_state == ReadBarrier::GrayState())
3514       //   goto slow_path;
3515       // At this point, just do the "if" and make sure that flags are preserved until the branch.
3516       __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
3517 
3518       // Load fence to prevent load-load reordering.
3519       // Note that this is a no-op, thanks to the x86 memory model.
3520       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3521 
3522       // Slow path used to copy array when `src` is gray.
3523       read_barrier_slow_path =
3524           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
3525       codegen_->AddSlowPath(read_barrier_slow_path);
3526 
3527       // We have done the "if" of the gray bit check above, now branch based on the flags.
3528       __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
3529     }
3530 
3531     Register temp3 = locations->GetTemp(2).AsRegister<Register>();
3532     if (length.IsConstant()) {
3533       __ movl(temp3, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
3534     } else {
3535       DCHECK_EQ(temp3, length.AsRegister<Register>());
3536     }
3537 
3538     // Iterate over the arrays and do a raw copy of the objects. We don't need to poison/unpoison.
3539     DCHECK_EQ(temp1, ESI);
3540     DCHECK_EQ(temp2, EDI);
3541     DCHECK_EQ(temp3, ECX);
3542     __ rep_movsl();
3543 
3544     if (read_barrier_slow_path != nullptr) {
3545       DCHECK(codegen_->EmitBakerReadBarrier());
3546       __ Bind(read_barrier_slow_path->GetExitLabel());
3547     }
3548 
3549     // We only need one card marking on the destination array.
3550     codegen_->MarkGCCard(temp1, temp3, dest);
3551 
3552     __ Bind(&skip_copy_and_write_barrier);
3553   }
3554 
3555   __ Bind(intrinsic_slow_path->GetExitLabel());
3556 }
3557 
RequestBaseMethodAddressInRegister(HInvoke * invoke)3558 static void RequestBaseMethodAddressInRegister(HInvoke* invoke) {
3559   LocationSummary* locations = invoke->GetLocations();
3560   if (locations != nullptr) {
3561     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
3562     // Note: The base method address is not present yet when this is called from the
3563     // PCRelativeHandlerVisitor via IsCallFreeIntrinsic() to determine whether to insert it.
3564     if (invoke_static_or_direct->HasSpecialInput()) {
3565       DCHECK(invoke_static_or_direct->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
3566                  ->IsX86ComputeBaseMethodAddress());
3567       locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
3568                          Location::RequiresRegister());
3569     }
3570   }
3571 }
3572 
3573 #define VISIT_INTRINSIC(name, low, high, type, start_index)                              \
3574   void IntrinsicLocationsBuilderX86::Visit##name##ValueOf(HInvoke* invoke) {             \
3575     InvokeRuntimeCallingConvention calling_convention;                                   \
3576     IntrinsicVisitor::ComputeValueOfLocations(                                           \
3577         invoke,                                                                          \
3578         codegen_,                                                                        \
3579         low,                                                                             \
3580         (high) - (low) + 1,                                                              \
3581         Location::RegisterLocation(EAX),                                                 \
3582         Location::RegisterLocation(calling_convention.GetRegisterAt(0)));                \
3583     RequestBaseMethodAddressInRegister(invoke);                                          \
3584   }                                                                                      \
3585   void IntrinsicCodeGeneratorX86::Visit##name##ValueOf(HInvoke* invoke) {                \
3586     IntrinsicVisitor::ValueOfInfo info =                                                 \
3587         IntrinsicVisitor::ComputeValueOfInfo(invoke,                                     \
3588                                              codegen_->GetCompilerOptions(),             \
3589                                              WellKnownClasses::java_lang_##name##_value, \
3590                                              low,                                        \
3591                                              (high) - (low) + 1,                         \
3592                                              start_index);                               \
3593     HandleValueOf(invoke, info, type);                                                   \
3594   }
BOXED_TYPES(VISIT_INTRINSIC)3595   BOXED_TYPES(VISIT_INTRINSIC)
3596 #undef VISIT_INTRINSIC
3597 
3598 void IntrinsicCodeGeneratorX86::HandleValueOf(HInvoke* invoke,
3599                                               const IntrinsicVisitor::ValueOfInfo& info,
3600                                               DataType::Type type) {
3601   DCHECK(invoke->IsInvokeStaticOrDirect());
3602   LocationSummary* locations = invoke->GetLocations();
3603   X86Assembler* assembler = GetAssembler();
3604 
3605   Register out = locations->Out().AsRegister<Register>();
3606   auto allocate_instance = [&]() {
3607     DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
3608     codegen_->LoadIntrinsicDeclaringClass(out, invoke->AsInvokeStaticOrDirect());
3609     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3610     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3611   };
3612   if (invoke->InputAt(0)->IsIntConstant()) {
3613     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3614     if (static_cast<uint32_t>(value - info.low) < info.length) {
3615       // Just embed the object in the code.
3616       DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3617       codegen_->LoadBootImageAddress(
3618           out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
3619     } else {
3620       DCHECK(locations->CanCall());
3621       // Allocate and initialize a new j.l.Integer.
3622       // TODO: If we JIT, we could allocate the object now, and store it in the
3623       // JIT object table.
3624       allocate_instance();
3625       codegen_->MoveToMemory(type,
3626                              Location::ConstantLocation(invoke->InputAt(0)->AsIntConstant()),
3627                              out,
3628                              /* dst_index= */ Register::kNoRegister,
3629                              /* dst_scale= */ TIMES_1,
3630                              /* dst_disp= */ info.value_offset);
3631     }
3632   } else {
3633     DCHECK(locations->CanCall());
3634     Register in = locations->InAt(0).AsRegister<Register>();
3635     // Check bounds of our cache.
3636     __ leal(out, Address(in, -info.low));
3637     __ cmpl(out, Immediate(info.length));
3638     NearLabel allocate, done;
3639     __ j(kAboveEqual, &allocate);
3640     // If the value is within the bounds, load the object directly from the array.
3641     constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
3642     static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3643                   "Check heap reference size.");
3644     if (codegen_->GetCompilerOptions().IsBootImage()) {
3645       DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
3646       size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
3647       HX86ComputeBaseMethodAddress* method_address =
3648           invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
3649       DCHECK(method_address != nullptr);
3650       Register method_address_reg =
3651           invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
3652       __ movl(out,
3653               Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kPlaceholder32BitOffset));
3654       codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
3655     } else {
3656       // Note: We're about to clobber the index in `out`, so we need to use `in` and
3657       // adjust the offset accordingly.
3658       uint32_t mid_array_boot_image_offset =
3659               info.array_data_boot_image_reference - info.low * kElementSize;
3660       codegen_->LoadBootImageAddress(
3661           out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
3662       DCHECK_NE(out, in);
3663       __ movl(out, Address(out, in, TIMES_4, 0));
3664     }
3665     __ MaybeUnpoisonHeapReference(out);
3666     __ jmp(&done);
3667     __ Bind(&allocate);
3668     // Otherwise allocate and initialize a new object.
3669     allocate_instance();
3670     codegen_->MoveToMemory(type,
3671                            Location::RegisterLocation(in),
3672                            out,
3673                            /* dst_index= */ Register::kNoRegister,
3674                            /* dst_scale= */ TIMES_1,
3675                            /* dst_disp= */ info.value_offset);
3676     __ Bind(&done);
3677   }
3678 }
3679 
VisitReferenceGetReferent(HInvoke * invoke)3680 void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
3681   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3682   RequestBaseMethodAddressInRegister(invoke);
3683 }
3684 
VisitReferenceGetReferent(HInvoke * invoke)3685 void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
3686   X86Assembler* assembler = GetAssembler();
3687   LocationSummary* locations = invoke->GetLocations();
3688 
3689   Location obj = locations->InAt(0);
3690   Location out = locations->Out();
3691 
3692   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
3693   codegen_->AddSlowPath(slow_path);
3694 
3695   if (codegen_->EmitReadBarrier()) {
3696     // Check self->GetWeakRefAccessEnabled().
3697     ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>();
3698     __ fs()->cmpl(Address::Absolute(offset),
3699                   Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3700     __ j(kNotEqual, slow_path->GetEntryLabel());
3701   }
3702 
3703   // Load the java.lang.ref.Reference class, use the output register as a temporary.
3704   codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<Register>(),
3705                                         invoke->AsInvokeStaticOrDirect());
3706 
3707   // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3708   MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3709   DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3710   DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3711             IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3712   __ cmpw(Address(out.AsRegister<Register>(), disable_intrinsic_offset.Uint32Value()),
3713           Immediate(0));
3714   __ j(kNotEqual, slow_path->GetEntryLabel());
3715 
3716   // Load the value from the field.
3717   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3718   if (codegen_->EmitBakerReadBarrier()) {
3719     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3720                                                     out,
3721                                                     obj.AsRegister<Register>(),
3722                                                     referent_offset,
3723                                                     /*needs_null_check=*/ true);
3724     // Note that the fence is a no-op, thanks to the x86 memory model.
3725     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3726   } else {
3727     __ movl(out.AsRegister<Register>(), Address(obj.AsRegister<Register>(), referent_offset));
3728     codegen_->MaybeRecordImplicitNullCheck(invoke);
3729     // Note that the fence is a no-op, thanks to the x86 memory model.
3730     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3731     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3732   }
3733   __ Bind(slow_path->GetExitLabel());
3734 }
3735 
VisitReferenceRefersTo(HInvoke * invoke)3736 void IntrinsicLocationsBuilderX86::VisitReferenceRefersTo(HInvoke* invoke) {
3737   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3738 }
3739 
VisitReferenceRefersTo(HInvoke * invoke)3740 void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
3741   X86Assembler* assembler = GetAssembler();
3742   LocationSummary* locations = invoke->GetLocations();
3743 
3744   Register obj = locations->InAt(0).AsRegister<Register>();
3745   Register other = locations->InAt(1).AsRegister<Register>();
3746   Register out = locations->Out().AsRegister<Register>();
3747 
3748   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3749   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3750 
3751   __ movl(out, Address(obj, referent_offset));
3752   codegen_->MaybeRecordImplicitNullCheck(invoke);
3753   __ MaybeUnpoisonHeapReference(out);
3754   // Note that the fence is a no-op, thanks to the x86 memory model.
3755   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3756 
3757   NearLabel end, return_true, return_false;
3758   __ cmpl(out, other);
3759 
3760   if (codegen_->EmitReadBarrier()) {
3761     DCHECK(kUseBakerReadBarrier);
3762 
3763     __ j(kEqual, &return_true);
3764 
3765     // Check if the loaded reference is null.
3766     __ testl(out, out);
3767     __ j(kZero, &return_false);
3768 
3769     // For correct memory visibility, we need a barrier before loading the lock word
3770     // but we already have the barrier emitted for volatile load above which is sufficient.
3771 
3772     // Load the lockword and check if it is a forwarding address.
3773     static_assert(LockWord::kStateShift == 30u);
3774     static_assert(LockWord::kStateForwardingAddress == 3u);
3775     __ movl(out, Address(out, monitor_offset));
3776     __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3777     __ j(kBelow, &return_false);
3778 
3779     // Extract the forwarding address and compare with `other`.
3780     __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3781     __ cmpl(out, other);
3782   }
3783 
3784   __ j(kNotEqual, &return_false);
3785 
3786   // Return true and exit the function.
3787   __ Bind(&return_true);
3788   __ movl(out, Immediate(1));
3789   __ jmp(&end);
3790 
3791   // Return false and exit the function.
3792   __ Bind(&return_false);
3793   __ xorl(out, out);
3794   __ Bind(&end);
3795 }
3796 
VisitThreadInterrupted(HInvoke * invoke)3797 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3798   LocationSummary* locations =
3799       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3800   locations->SetOut(Location::RequiresRegister());
3801 }
3802 
VisitThreadInterrupted(HInvoke * invoke)3803 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3804   X86Assembler* assembler = GetAssembler();
3805   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3806   Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3807   NearLabel done;
3808   __ fs()->movl(out, address);
3809   __ testl(out, out);
3810   __ j(kEqual, &done);
3811   __ fs()->movl(address, Immediate(0));
3812   codegen_->MemoryFence();
3813   __ Bind(&done);
3814 }
3815 
VisitReachabilityFence(HInvoke * invoke)3816 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3817   LocationSummary* locations =
3818       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3819   locations->SetInAt(0, Location::Any());
3820 }
3821 
VisitReachabilityFence(HInvoke * invoke)3822 void IntrinsicCodeGeneratorX86::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3823 
VisitIntegerDivideUnsigned(HInvoke * invoke)3824 void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3825   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3826                                                                 LocationSummary::kCallOnSlowPath,
3827                                                                 kIntrinsified);
3828   locations->SetInAt(0, Location::RegisterLocation(EAX));
3829   locations->SetInAt(1, Location::RequiresRegister());
3830   locations->SetOut(Location::SameAsFirstInput());
3831   // Intel uses edx:eax as the dividend.
3832   locations->AddTemp(Location::RegisterLocation(EDX));
3833 }
3834 
VisitIntegerDivideUnsigned(HInvoke * invoke)3835 void IntrinsicCodeGeneratorX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3836   X86Assembler* assembler = GetAssembler();
3837   LocationSummary* locations = invoke->GetLocations();
3838   Location out = locations->Out();
3839   Location first = locations->InAt(0);
3840   Location second = locations->InAt(1);
3841   Register edx = locations->GetTemp(0).AsRegister<Register>();
3842   Register second_reg = second.AsRegister<Register>();
3843 
3844   DCHECK_EQ(EAX, first.AsRegister<Register>());
3845   DCHECK_EQ(EAX, out.AsRegister<Register>());
3846   DCHECK_EQ(EDX, edx);
3847 
3848   // Check if divisor is zero, bail to managed implementation to handle.
3849   __ testl(second_reg, second_reg);
3850   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3851   codegen_->AddSlowPath(slow_path);
3852   __ j(kEqual, slow_path->GetEntryLabel());
3853 
3854   __ xorl(edx, edx);
3855   __ divl(second_reg);
3856 
3857   __ Bind(slow_path->GetExitLabel());
3858 }
3859 
HasVarHandleIntrinsicImplementation(HInvoke * invoke)3860 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
3861   VarHandleOptimizations optimizations(invoke);
3862   if (optimizations.GetDoNotIntrinsify()) {
3863     return false;
3864   }
3865 
3866   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3867   DCHECK_LE(expected_coordinates_count, 2u);  // Filtered by the `DoNotIntrinsify` flag above.
3868   if (expected_coordinates_count > 1u) {
3869     // Only static and instance fields VarHandle are supported now.
3870     // TODO: add support for arrays and views.
3871     return false;
3872   }
3873 
3874   return true;
3875 }
3876 
GenerateVarHandleAccessModeCheck(Register varhandle_object,mirror::VarHandle::AccessMode access_mode,SlowPathCode * slow_path,X86Assembler * assembler)3877 static void GenerateVarHandleAccessModeCheck(Register varhandle_object,
3878                                              mirror::VarHandle::AccessMode access_mode,
3879                                              SlowPathCode* slow_path,
3880                                              X86Assembler* assembler) {
3881   const uint32_t access_modes_bitmask_offset =
3882       mirror::VarHandle::AccessModesBitMaskOffset().Uint32Value();
3883   const uint32_t access_mode_bit = 1u << static_cast<uint32_t>(access_mode);
3884 
3885   // If the access mode is not supported, bail to runtime implementation to handle
3886   __ testl(Address(varhandle_object, access_modes_bitmask_offset), Immediate(access_mode_bit));
3887   __ j(kZero, slow_path->GetEntryLabel());
3888 }
3889 
GenerateVarHandleStaticFieldCheck(Register varhandle_object,SlowPathCode * slow_path,X86Assembler * assembler)3890 static void GenerateVarHandleStaticFieldCheck(Register varhandle_object,
3891                                               SlowPathCode* slow_path,
3892                                               X86Assembler* assembler) {
3893   const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3894 
3895   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3896   // Do not emit read barrier (or unpoison the reference) for comparing to null.
3897   __ cmpl(Address(varhandle_object, coordtype0_offset), Immediate(0));
3898   __ j(kNotEqual, slow_path->GetEntryLabel());
3899 }
3900 
GenerateSubTypeObjectCheck(Register object,Register temp,Address type_address,SlowPathCode * slow_path,X86Assembler * assembler,bool object_can_be_null=true)3901 static void GenerateSubTypeObjectCheck(Register object,
3902                                        Register temp,
3903                                        Address type_address,
3904                                        SlowPathCode* slow_path,
3905                                        X86Assembler* assembler,
3906                                        bool object_can_be_null = true) {
3907   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
3908   const uint32_t super_class_offset = mirror::Class::SuperClassOffset().Uint32Value();
3909   NearLabel check_type_compatibility, type_matched;
3910 
3911   // If the object is null, there is no need to check the type
3912   if (object_can_be_null) {
3913     __ testl(object, object);
3914     __ j(kZero, &type_matched);
3915   }
3916 
3917   // Do not unpoison for in-memory comparison.
3918   // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3919   __ movl(temp, Address(object, class_offset));
3920   __ Bind(&check_type_compatibility);
3921   __ cmpl(temp, type_address);
3922   __ j(kEqual, &type_matched);
3923   // Load the super class.
3924   __ MaybeUnpoisonHeapReference(temp);
3925   __ movl(temp, Address(temp, super_class_offset));
3926   // If the super class is null, we reached the root of the hierarchy without a match.
3927   // We let the slow path handle uncovered cases (e.g. interfaces).
3928   __ testl(temp, temp);
3929   __ j(kEqual, slow_path->GetEntryLabel());
3930   __ jmp(&check_type_compatibility);
3931   __ Bind(&type_matched);
3932 }
3933 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3934 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3935                                                  Register temp,
3936                                                  SlowPathCode* slow_path,
3937                                                  X86Assembler* assembler) {
3938   VarHandleOptimizations optimizations(invoke);
3939   LocationSummary* locations = invoke->GetLocations();
3940   Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3941   Register object = locations->InAt(1).AsRegister<Register>();
3942 
3943   const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3944   const uint32_t coordtype1_offset = mirror::VarHandle::CoordinateType1Offset().Uint32Value();
3945 
3946   // Check that the VarHandle references an instance field by checking that
3947   // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3948   // type compatibility check with the source object's type, which will fail for null.
3949   __ cmpl(Address(varhandle_object, coordtype1_offset), Immediate(0));
3950   __ j(kNotEqual, slow_path->GetEntryLabel());
3951 
3952   // Check if the object is null
3953   if (!optimizations.GetSkipObjectNullCheck()) {
3954     __ testl(object, object);
3955     __ j(kZero, slow_path->GetEntryLabel());
3956   }
3957 
3958   // Check the object's class against coordinateType0.
3959   GenerateSubTypeObjectCheck(object,
3960                              temp,
3961                              Address(varhandle_object, coordtype0_offset),
3962                              slow_path,
3963                              assembler,
3964                              /* object_can_be_null= */ false);
3965 }
3966 
GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,Register temp,DataType::Type type,SlowPathCode * slow_path,X86Assembler * assembler)3967 static void GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,
3968                                               Register temp,
3969                                               DataType::Type type,
3970                                               SlowPathCode* slow_path,
3971                                               X86Assembler* assembler) {
3972   const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3973   const uint32_t primitive_type_offset = mirror::Class::PrimitiveTypeOffset().Uint32Value();
3974   const uint32_t primitive_type = static_cast<uint32_t>(DataTypeToPrimitive(type));
3975 
3976   // We do not need a read barrier when loading a reference only for loading a constant field
3977   // through the reference.
3978   __ movl(temp, Address(varhandle_object, var_type_offset));
3979   __ MaybeUnpoisonHeapReference(temp);
3980   __ cmpw(Address(temp, primitive_type_offset), Immediate(primitive_type));
3981   __ j(kNotEqual, slow_path->GetEntryLabel());
3982 }
3983 
GenerateVarHandleCommonChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3984 static void GenerateVarHandleCommonChecks(HInvoke *invoke,
3985                                           Register temp,
3986                                           SlowPathCode* slow_path,
3987                                           X86Assembler* assembler) {
3988   LocationSummary* locations = invoke->GetLocations();
3989   Register vh_object = locations->InAt(0).AsRegister<Register>();
3990   mirror::VarHandle::AccessMode access_mode =
3991       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3992 
3993   GenerateVarHandleAccessModeCheck(vh_object,
3994                                    access_mode,
3995                                    slow_path,
3996                                    assembler);
3997 
3998   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3999   switch (expected_coordinates_count) {
4000     case 0u:
4001       GenerateVarHandleStaticFieldCheck(vh_object, slow_path, assembler);
4002       break;
4003     case 1u: {
4004       GenerateVarHandleInstanceFieldChecks(invoke, temp, slow_path, assembler);
4005       break;
4006     }
4007     default:
4008       LOG(FATAL) << "Unexpected coordinates count: " << expected_coordinates_count;
4009       UNREACHABLE();
4010   }
4011 
4012   // Check the return type and varType parameters.
4013   mirror::VarHandle::AccessModeTemplate access_mode_template =
4014       mirror::VarHandle::GetAccessModeTemplate(access_mode);
4015   DataType::Type type = invoke->GetType();
4016 
4017   switch (access_mode_template) {
4018     case mirror::VarHandle::AccessModeTemplate::kGet:
4019       // Check the varType.primitiveType against the type we're trying to retrieve. Reference types
4020       // are also checked later by a HCheckCast node as an additional check.
4021       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, type, slow_path, assembler);
4022       break;
4023     case mirror::VarHandle::AccessModeTemplate::kSet:
4024     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
4025       uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4026       DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4027 
4028       // Check the varType.primitiveType against the type of the value we're trying to set.
4029       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
4030       if (value_type == DataType::Type::kReference) {
4031         const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
4032 
4033         // If the value type is a reference, check it against the varType.
4034         GenerateSubTypeObjectCheck(locations->InAt(value_index).AsRegister<Register>(),
4035                                    temp,
4036                                    Address(vh_object, var_type_offset),
4037                                    slow_path,
4038                                    assembler);
4039       }
4040       break;
4041     }
4042     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
4043     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
4044       uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4045       uint32_t expected_value_index = invoke->GetNumberOfArguments() - 2;
4046       DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4047       DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_value_index));
4048 
4049       // Check the varType.primitiveType against the type of the expected value.
4050       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
4051       if (value_type == DataType::Type::kReference) {
4052         const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
4053 
4054         // If the value type is a reference, check both the expected and the new value against
4055         // the varType.
4056         GenerateSubTypeObjectCheck(locations->InAt(new_value_index).AsRegister<Register>(),
4057                                    temp,
4058                                    Address(vh_object, var_type_offset),
4059                                    slow_path,
4060                                    assembler);
4061         GenerateSubTypeObjectCheck(locations->InAt(expected_value_index).AsRegister<Register>(),
4062                                    temp,
4063                                    Address(vh_object, var_type_offset),
4064                                    slow_path,
4065                                    assembler);
4066       }
4067       break;
4068     }
4069   }
4070 }
4071 
4072 // This method loads the field's address referred by a field VarHandle (base + offset).
4073 // The return value is the register containing object's reference (in case of an instance field)
4074 // or the declaring class (in case of a static field). The declaring class is stored in temp
4075 // register. Field's offset is loaded to the `offset` register.
GenerateVarHandleFieldReference(HInvoke * invoke,CodeGeneratorX86 * codegen,Register temp,Register offset)4076 static Register GenerateVarHandleFieldReference(HInvoke* invoke,
4077                                                 CodeGeneratorX86* codegen,
4078                                                 Register temp,
4079                                                 /*out*/ Register offset) {
4080   X86Assembler* assembler = codegen->GetAssembler();
4081   LocationSummary* locations = invoke->GetLocations();
4082   const uint32_t artfield_offset = mirror::FieldVarHandle::ArtFieldOffset().Uint32Value();
4083   const uint32_t offset_offset = ArtField::OffsetOffset().Uint32Value();
4084   const uint32_t declaring_class_offset = ArtField::DeclaringClassOffset().Uint32Value();
4085   Register varhandle_object = locations->InAt(0).AsRegister<Register>();
4086 
4087   // Load the ArtField* and the offset.
4088   __ movl(temp, Address(varhandle_object, artfield_offset));
4089   __ movl(offset, Address(temp, offset_offset));
4090   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4091   if (expected_coordinates_count == 0) {
4092     // For static fields, load the declaring class
4093     InstructionCodeGeneratorX86* instr_codegen =
4094         down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
4095     instr_codegen->GenerateGcRootFieldLoad(invoke,
4096                                            Location::RegisterLocation(temp),
4097                                            Address(temp, declaring_class_offset),
4098                                            /* fixup_label= */ nullptr,
4099                                            codegen->GetCompilerReadBarrierOption());
4100     return temp;
4101   }
4102 
4103   // For instance fields, return the register containing the object.
4104   DCHECK_EQ(expected_coordinates_count, 1u);
4105 
4106   return locations->InAt(1).AsRegister<Register>();
4107 }
4108 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4109 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4110   // The only read barrier implementation supporting the
4111   // VarHandleGet intrinsic is the Baker-style read barriers.
4112   if (codegen->EmitNonBakerReadBarrier()) {
4113     return;
4114   }
4115 
4116   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4117     return;
4118   }
4119 
4120   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4121   LocationSummary* locations = new (allocator) LocationSummary(
4122       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4123   locations->SetInAt(0, Location::RequiresRegister());
4124   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4125   if (expected_coordinates_count == 1u) {
4126     // For instance fields, this is the source object.
4127     locations->SetInAt(1, Location::RequiresRegister());
4128   }
4129   locations->AddTemp(Location::RequiresRegister());
4130 
4131   DataType::Type type = invoke->GetType();
4132   switch (DataType::Kind(type)) {
4133     case DataType::Type::kInt64:
4134       locations->AddTemp(Location::RequiresRegister());
4135       if (invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
4136         // We need an XmmRegister for Int64 to ensure an atomic load
4137         locations->AddTemp(Location::RequiresFpuRegister());
4138       }
4139       FALLTHROUGH_INTENDED;
4140     case DataType::Type::kInt32:
4141     case DataType::Type::kReference:
4142       locations->SetOut(Location::RequiresRegister());
4143       break;
4144     default:
4145       DCHECK(DataType::IsFloatingPointType(type));
4146       locations->AddTemp(Location::RequiresRegister());
4147       locations->SetOut(Location::RequiresFpuRegister());
4148   }
4149 }
4150 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86 * codegen)4151 static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4152   // The only read barrier implementation supporting the
4153   // VarHandleGet intrinsic is the Baker-style read barriers.
4154   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4155 
4156   X86Assembler* assembler = codegen->GetAssembler();
4157   LocationSummary* locations = invoke->GetLocations();
4158   DataType::Type type = invoke->GetType();
4159   DCHECK_NE(type, DataType::Type::kVoid);
4160   Register temp = locations->GetTemp(0).AsRegister<Register>();
4161   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4162   codegen->AddSlowPath(slow_path);
4163 
4164   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4165 
4166   Location out = locations->Out();
4167   // Use 'out' as a temporary register if it's a core register
4168   Register offset =
4169       out.IsRegister() ? out.AsRegister<Register>() : locations->GetTemp(1).AsRegister<Register>();
4170 
4171   // Get the field referred by the VarHandle. The returned register contains the object reference
4172   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4173   // declaring class will be placed in 'temp' register.
4174   Register ref = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4175   Address field_addr(ref, offset, TIMES_1, 0);
4176 
4177   // Load the value from the field
4178   if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
4179     codegen->GenerateReferenceLoadWithBakerReadBarrier(
4180         invoke, out, ref, field_addr, /* needs_null_check= */ false);
4181   } else if (type == DataType::Type::kInt64 &&
4182              invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
4183     XmmRegister xmm_temp = locations->GetTemp(2).AsFpuRegister<XmmRegister>();
4184     codegen->LoadFromMemoryNoBarrier(
4185         type, out, field_addr, /* instr= */ nullptr, xmm_temp, /* is_atomic_load= */ true);
4186   } else {
4187     codegen->LoadFromMemoryNoBarrier(type, out, field_addr);
4188   }
4189 
4190   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetVolatile ||
4191       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAcquire) {
4192     // Load fence to prevent load-load reordering.
4193     // Note that this is a no-op, thanks to the x86 memory model.
4194     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4195   }
4196 
4197   __ Bind(slow_path->GetExitLabel());
4198 }
4199 
VisitVarHandleGet(HInvoke * invoke)4200 void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) {
4201   CreateVarHandleGetLocations(invoke, codegen_);
4202 }
4203 
VisitVarHandleGet(HInvoke * invoke)4204 void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
4205   GenerateVarHandleGet(invoke, codegen_);
4206 }
4207 
VisitVarHandleGetVolatile(HInvoke * invoke)4208 void IntrinsicLocationsBuilderX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
4209   CreateVarHandleGetLocations(invoke, codegen_);
4210 }
4211 
VisitVarHandleGetVolatile(HInvoke * invoke)4212 void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
4213   GenerateVarHandleGet(invoke, codegen_);
4214 }
4215 
VisitVarHandleGetAcquire(HInvoke * invoke)4216 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
4217   CreateVarHandleGetLocations(invoke, codegen_);
4218 }
4219 
VisitVarHandleGetAcquire(HInvoke * invoke)4220 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
4221   GenerateVarHandleGet(invoke, codegen_);
4222 }
4223 
VisitVarHandleGetOpaque(HInvoke * invoke)4224 void IntrinsicLocationsBuilderX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
4225   CreateVarHandleGetLocations(invoke, codegen_);
4226 }
4227 
VisitVarHandleGetOpaque(HInvoke * invoke)4228 void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
4229   GenerateVarHandleGet(invoke, codegen_);
4230 }
4231 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4232 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4233   // The only read barrier implementation supporting the
4234   // VarHandleGet intrinsic is the Baker-style read barriers.
4235   if (codegen->EmitNonBakerReadBarrier()) {
4236     return;
4237   }
4238 
4239   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4240     return;
4241   }
4242 
4243   // The last argument should be the value we intend to set.
4244   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4245   HInstruction* value = invoke->InputAt(value_index);
4246   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4247   bool needs_atomicity = invoke->GetIntrinsic() != Intrinsics::kVarHandleSet;
4248   if (value_type == DataType::Type::kInt64 && (!value->IsConstant() || needs_atomicity)) {
4249     // We avoid the case of a non-constant (or volatile) Int64 value because we would need to
4250     // place it in a register pair. If the slow path is taken, the ParallelMove might fail to move
4251     // the pair according to the X86DexCallingConvention in case of an overlap (e.g., move the
4252     // int64 value from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4253     return;
4254   }
4255 
4256   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4257   LocationSummary* locations = new (allocator) LocationSummary(
4258       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4259   locations->SetInAt(0, Location::RequiresRegister());
4260   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4261   if (expected_coordinates_count == 1u) {
4262     // For instance fields, this is the source object
4263     locations->SetInAt(1, Location::RequiresRegister());
4264   }
4265 
4266   switch (value_type) {
4267     case DataType::Type::kBool:
4268     case DataType::Type::kInt8:
4269     case DataType::Type::kUint8:
4270       // Ensure the value is in a byte register
4271       locations->SetInAt(value_index, Location::ByteRegisterOrConstant(EBX, value));
4272       break;
4273     case DataType::Type::kInt16:
4274     case DataType::Type::kUint16:
4275     case DataType::Type::kInt32:
4276       locations->SetInAt(value_index, Location::RegisterOrConstant(value));
4277       break;
4278     case DataType::Type::kInt64:
4279       // We only handle constant non-atomic int64 values.
4280       DCHECK(value->IsConstant());
4281       locations->SetInAt(value_index, Location::ConstantLocation(value));
4282       break;
4283     case DataType::Type::kReference:
4284       locations->SetInAt(value_index, Location::RequiresRegister());
4285       break;
4286     default:
4287       DCHECK(DataType::IsFloatingPointType(value_type));
4288       if (needs_atomicity && value_type == DataType::Type::kFloat64) {
4289         locations->SetInAt(value_index, Location::RequiresFpuRegister());
4290       } else {
4291         locations->SetInAt(value_index, Location::FpuRegisterOrConstant(value));
4292       }
4293   }
4294 
4295   locations->AddTemp(Location::RequiresRegister());
4296   // This temporary register is also used for card for MarkGCCard. Make sure it's a byte register
4297   locations->AddTemp(Location::RegisterLocation(EAX));
4298   if (expected_coordinates_count == 0 && value_type == DataType::Type::kReference) {
4299     // For static reference fields, we need another temporary for the declaring class. We set it
4300     // last because we want to make sure that the first 2 temps are reserved for HandleFieldSet.
4301     locations->AddTemp(Location::RequiresRegister());
4302   }
4303 }
4304 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86 * codegen)4305 static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4306   // The only read barrier implementation supporting the
4307   // VarHandleGet intrinsic is the Baker-style read barriers.
4308   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4309 
4310   X86Assembler* assembler = codegen->GetAssembler();
4311   LocationSummary* locations = invoke->GetLocations();
4312   // The value we want to set is the last argument
4313   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4314   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4315   Register temp = locations->GetTemp(0).AsRegister<Register>();
4316   Register temp2 = locations->GetTemp(1).AsRegister<Register>();
4317   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4318   codegen->AddSlowPath(slow_path);
4319 
4320   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4321 
4322   // For static reference fields, we need another temporary for the declaring class. But since
4323   // for instance fields the object is in a separate register, it is safe to use the first
4324   // temporary register for GenerateVarHandleFieldReference.
4325   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4326   if (value_type == DataType::Type::kReference && expected_coordinates_count == 0) {
4327     temp = locations->GetTemp(2).AsRegister<Register>();
4328   }
4329 
4330   Register offset = temp2;
4331   // Get the field referred by the VarHandle. The returned register contains the object reference
4332   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4333   // declaring class will be placed in 'temp' register.
4334   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4335 
4336   bool is_volatile = false;
4337   switch (invoke->GetIntrinsic()) {
4338     case Intrinsics::kVarHandleSet:
4339     case Intrinsics::kVarHandleSetOpaque:
4340       // The only constraint for setOpaque is to ensure bitwise atomicity (atomically set 64 bit
4341       // values), but we don't treat Int64 values because we would need to place it in a register
4342       // pair. If the slow path is taken, the Parallel move might fail to move the register pair
4343       // in case of an overlap (e.g., move from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4344       break;
4345     case Intrinsics::kVarHandleSetRelease:
4346       // setRelease needs to ensure atomicity too. See the above comment.
4347       codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4348       break;
4349     case Intrinsics::kVarHandleSetVolatile:
4350       is_volatile = true;
4351       break;
4352     default:
4353       LOG(FATAL) << "GenerateVarHandleSet received non-set intrinsic " << invoke->GetIntrinsic();
4354   }
4355 
4356   InstructionCodeGeneratorX86* instr_codegen =
4357         down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
4358   // Store the value to the field
4359   instr_codegen->HandleFieldSet(
4360       invoke,
4361       value_index,
4362       value_type,
4363       Address(reference, offset, TIMES_1, 0),
4364       reference,
4365       is_volatile,
4366       /* value_can_be_null */ true,
4367       // Value can be null, and this write barrier is not being relied on for other sets.
4368       value_type == DataType::Type::kReference ? WriteBarrierKind::kEmitNotBeingReliedOn :
4369                                                  WriteBarrierKind::kDontEmit);
4370 
4371   __ Bind(slow_path->GetExitLabel());
4372 }
4373 
VisitVarHandleSet(HInvoke * invoke)4374 void IntrinsicLocationsBuilderX86::VisitVarHandleSet(HInvoke* invoke) {
4375   CreateVarHandleSetLocations(invoke, codegen_);
4376 }
4377 
VisitVarHandleSet(HInvoke * invoke)4378 void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) {
4379   GenerateVarHandleSet(invoke, codegen_);
4380 }
4381 
VisitVarHandleSetVolatile(HInvoke * invoke)4382 void IntrinsicLocationsBuilderX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4383   CreateVarHandleSetLocations(invoke, codegen_);
4384 }
4385 
VisitVarHandleSetVolatile(HInvoke * invoke)4386 void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4387   GenerateVarHandleSet(invoke, codegen_);
4388 }
4389 
VisitVarHandleSetRelease(HInvoke * invoke)4390 void IntrinsicLocationsBuilderX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4391   CreateVarHandleSetLocations(invoke, codegen_);
4392 }
4393 
VisitVarHandleSetRelease(HInvoke * invoke)4394 void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4395   GenerateVarHandleSet(invoke, codegen_);
4396 }
4397 
VisitVarHandleSetOpaque(HInvoke * invoke)4398 void IntrinsicLocationsBuilderX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4399   CreateVarHandleSetLocations(invoke, codegen_);
4400 }
4401 
VisitVarHandleSetOpaque(HInvoke * invoke)4402 void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4403   GenerateVarHandleSet(invoke, codegen_);
4404 }
4405 
CreateVarHandleGetAndSetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4406 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4407   // The only read barrier implementation supporting the
4408   // VarHandleGet intrinsic is the Baker-style read barriers.
4409   if (codegen->EmitNonBakerReadBarrier()) {
4410     return;
4411   }
4412 
4413   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4414     return;
4415   }
4416 
4417   // Get the type from the shorty as the invokes may not return a value.
4418   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4419   uint32_t value_index = number_of_arguments - 1;
4420   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4421   DataType::Type return_type = invoke->GetType();
4422   const bool is_void = return_type == DataType::Type::kVoid;
4423   DCHECK_IMPLIES(!is_void, return_type == value_type);
4424 
4425   if (DataType::Is64BitType(value_type)) {
4426     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4427     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4428     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4429     // <EAX, EBX> to <EBX, ECX>).
4430     return;
4431   }
4432 
4433   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4434   LocationSummary* locations = new (allocator) LocationSummary(
4435       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4436   locations->AddRegisterTemps(2);
4437   // We use this temporary for the card, so we need a byte register
4438   locations->AddTemp(Location::RegisterLocation(EBX));
4439   locations->SetInAt(0, Location::RequiresRegister());
4440   if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4441     // For instance fields, this is the source object
4442     locations->SetInAt(1, Location::RequiresRegister());
4443   } else {
4444     // For static fields, we need another temp because one will be busy with the declaring class.
4445     locations->AddTemp(Location::RequiresRegister());
4446   }
4447   if (value_type == DataType::Type::kFloat32) {
4448     locations->AddTemp(Location::RegisterLocation(EAX));
4449     locations->SetInAt(value_index, Location::FpuRegisterOrConstant(invoke->InputAt(value_index)));
4450     // Only set the `out` register if it's needed. In the void case, we will not use `out`.
4451     if (!is_void) {
4452       locations->SetOut(Location::RequiresFpuRegister());
4453     }
4454   } else {
4455     locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4456     // Only set the `out` register if it's needed. In the void case we can still use EAX in the
4457     // same manner as it is marked as a temp register.
4458     if (is_void) {
4459       locations->AddTemp(Location::RegisterLocation(EAX));
4460     } else {
4461       locations->SetOut(Location::RegisterLocation(EAX));
4462     }
4463   }
4464 }
4465 
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86 * codegen)4466 static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4467   // The only read barrier implementation supporting the
4468   // VarHandleGet intrinsic is the Baker-style read barriers.
4469   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4470 
4471   X86Assembler* assembler = codegen->GetAssembler();
4472   LocationSummary* locations = invoke->GetLocations();
4473   // The value we want to set is the last argument
4474   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4475   Location value = locations->InAt(value_index);
4476   // Get the type from the shorty as the invokes may not return a value.
4477   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4478   Register temp = locations->GetTemp(1).AsRegister<Register>();
4479   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4480   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4481   codegen->AddSlowPath(slow_path);
4482 
4483   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4484 
4485   Register offset = locations->GetTemp(0).AsRegister<Register>();
4486   // Get the field referred by the VarHandle. The returned register contains the object reference
4487   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4488   // declaring class will be placed in 'temp' register.
4489   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4490   Address field_addr(reference, offset, TIMES_1, 0);
4491 
4492   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetRelease) {
4493     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4494   }
4495 
4496   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4497   // For static fields, we need another temporary for the declaring class. But since for instance
4498   // fields the object is in a separate register, it is safe to use the first temporary register.
4499   temp = expected_coordinates_count == 1u ? temp : locations->GetTemp(3).AsRegister<Register>();
4500   // No need for a lock prefix. `xchg` has an implicit lock when it is used with an address.
4501 
4502   DataType::Type return_type = invoke->GetType();
4503   const bool is_void = return_type == DataType::Type::kVoid;
4504   DCHECK_IMPLIES(!is_void, return_type == value_type);
4505   switch (value_type) {
4506     case DataType::Type::kBool:
4507       __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4508       if (!is_void) {
4509         __ movzxb(locations->Out().AsRegister<Register>(),
4510                   locations->Out().AsRegister<ByteRegister>());
4511       }
4512       break;
4513     case DataType::Type::kInt8:
4514       __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4515       if (!is_void) {
4516         __ movsxb(locations->Out().AsRegister<Register>(),
4517                   locations->Out().AsRegister<ByteRegister>());
4518       }
4519       break;
4520     case DataType::Type::kUint16:
4521       __ xchgw(value.AsRegister<Register>(), field_addr);
4522       if (!is_void) {
4523         __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4524       }
4525       break;
4526     case DataType::Type::kInt16:
4527       __ xchgw(value.AsRegister<Register>(), field_addr);
4528       if (!is_void) {
4529         __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4530       }
4531       break;
4532     case DataType::Type::kInt32:
4533       __ xchgl(value.AsRegister<Register>(), field_addr);
4534       break;
4535     case DataType::Type::kFloat32:
4536       codegen->Move32(Location::RegisterLocation(EAX), value);
4537       __ xchgl(EAX, field_addr);
4538       if (!is_void) {
4539         __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
4540       }
4541       break;
4542     case DataType::Type::kReference: {
4543       if (codegen->EmitBakerReadBarrier()) {
4544         // Need to make sure the reference stored in the field is a to-space
4545         // one before attempting the CAS or the CAS could fail incorrectly.
4546         codegen->GenerateReferenceLoadWithBakerReadBarrier(
4547             invoke,
4548             // Unused, used only as a "temporary" within the read barrier.
4549             Location::RegisterLocation(temp),
4550             reference,
4551             field_addr,
4552             /* needs_null_check= */ false,
4553             /* always_update_field= */ true,
4554             &temp2);
4555       }
4556       codegen->MarkGCCard(temp, temp2, reference);
4557       if (kPoisonHeapReferences) {
4558         __ movl(temp, value.AsRegister<Register>());
4559         __ PoisonHeapReference(temp);
4560         __ xchgl(temp, field_addr);
4561         if (!is_void) {
4562           __ UnpoisonHeapReference(temp);
4563           __ movl(locations->Out().AsRegister<Register>(), temp);
4564         }
4565       } else {
4566         DCHECK_IMPLIES(!is_void, locations->Out().Equals(Location::RegisterLocation(EAX)));
4567         __ xchgl(Location::RegisterLocation(EAX).AsRegister<Register>(), field_addr);
4568       }
4569       break;
4570     }
4571     default:
4572       LOG(FATAL) << "Unexpected type: " << value_type;
4573       UNREACHABLE();
4574   }
4575 
4576   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetAcquire) {
4577     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4578   }
4579 
4580   __ Bind(slow_path->GetExitLabel());
4581 }
4582 
VisitVarHandleGetAndSet(HInvoke * invoke)4583 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4584   CreateVarHandleGetAndSetLocations(invoke, codegen_);
4585 }
4586 
VisitVarHandleGetAndSet(HInvoke * invoke)4587 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4588   GenerateVarHandleGetAndSet(invoke, codegen_);
4589 }
4590 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4591 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4592   CreateVarHandleGetAndSetLocations(invoke, codegen_);
4593 }
4594 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4595 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4596   GenerateVarHandleGetAndSet(invoke, codegen_);
4597 }
4598 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4599 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4600   CreateVarHandleGetAndSetLocations(invoke, codegen_);
4601 }
4602 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4603 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4604   GenerateVarHandleGetAndSet(invoke, codegen_);
4605 }
4606 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4607 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4608                                                             CodeGeneratorX86* codegen) {
4609   // The only read barrier implementation supporting the
4610   // VarHandleGet intrinsic is the Baker-style read barriers.
4611   if (codegen->EmitNonBakerReadBarrier()) {
4612     return;
4613   }
4614 
4615   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4616     return;
4617   }
4618 
4619   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4620   uint32_t expected_value_index = number_of_arguments - 2;
4621   uint32_t new_value_index = number_of_arguments - 1;
4622   DataType::Type value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4623   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, new_value_index));
4624 
4625   if (DataType::Is64BitType(value_type)) {
4626     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4627     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4628     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4629     // <EAX, EBX> to <EBX, ECX>).
4630     return;
4631   }
4632 
4633   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4634   LocationSummary* locations = new (allocator) LocationSummary(
4635       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4636   locations->AddRegisterTemps(2);
4637   // We use this temporary for the card, so we need a byte register
4638   locations->AddTemp(Location::RegisterLocation(EBX));
4639   locations->SetInAt(0, Location::RequiresRegister());
4640   if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4641     // For instance fields, this is the source object
4642     locations->SetInAt(1, Location::RequiresRegister());
4643   } else {
4644     // For static fields, we need another temp because one will be busy with the declaring class.
4645     locations->AddTemp(Location::RequiresRegister());
4646   }
4647   if (DataType::IsFloatingPointType(value_type)) {
4648     // We need EAX for placing the expected value
4649     locations->AddTemp(Location::RegisterLocation(EAX));
4650     locations->SetInAt(new_value_index,
4651                        Location::FpuRegisterOrConstant(invoke->InputAt(new_value_index)));
4652     locations->SetInAt(expected_value_index,
4653                        Location::FpuRegisterOrConstant(invoke->InputAt(expected_value_index)));
4654   } else {
4655     // Ensure it's in a byte register
4656     locations->SetInAt(new_value_index, Location::RegisterLocation(ECX));
4657     locations->SetInAt(expected_value_index, Location::RegisterLocation(EAX));
4658   }
4659 
4660   mirror::VarHandle::AccessModeTemplate access_mode_template =
4661       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4662 
4663   if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange &&
4664       value_type == DataType::Type::kFloat32) {
4665     locations->SetOut(Location::RequiresFpuRegister());
4666   } else {
4667     locations->SetOut(Location::RegisterLocation(EAX));
4668   }
4669 }
4670 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86 * codegen)4671 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) {
4672   // The only read barrier implementation supporting the
4673   // VarHandleGet intrinsic is the Baker-style read barriers.
4674   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4675 
4676   X86Assembler* assembler = codegen->GetAssembler();
4677   LocationSummary* locations = invoke->GetLocations();
4678   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4679   uint32_t expected_value_index = number_of_arguments - 2;
4680   uint32_t new_value_index = number_of_arguments - 1;
4681   DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4682   DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4683   Location expected_value = locations->InAt(expected_value_index);
4684   Location new_value = locations->InAt(new_value_index);
4685   Register offset = locations->GetTemp(0).AsRegister<Register>();
4686   Register temp = locations->GetTemp(1).AsRegister<Register>();
4687   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4688   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4689   codegen->AddSlowPath(slow_path);
4690 
4691   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4692 
4693   // Get the field referred by the VarHandle. The returned register contains the object reference
4694   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4695   // declaring class will be placed in 'temp' register.
4696   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4697 
4698   uint32_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4699   // For generating the compare and exchange, we need 2 temporaries. In case of a static field, the
4700   // first temporary contains the declaring class so we need another temporary. In case of an
4701   // instance field, the object comes in a separate register so it's safe to use the first temp.
4702   temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(3).AsRegister<Register>();
4703   DCHECK_NE(temp, reference);
4704 
4705   // We are using `lock cmpxchg` in all cases because there is no CAS equivalent that has weak
4706   // failure semantics. `lock cmpxchg` has full barrier semantics, and we don't need scheduling
4707   // barriers at this time.
4708 
4709   mirror::VarHandle::AccessModeTemplate access_mode_template =
4710       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4711   bool is_cmpxchg =
4712       access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange;
4713 
4714   if (type == DataType::Type::kReference) {
4715     GenReferenceCAS(
4716         invoke, codegen, expected_value, new_value, reference, offset, temp, temp2, is_cmpxchg);
4717   } else {
4718     Location out = locations->Out();
4719     GenPrimitiveCAS(
4720         type, codegen, expected_value, new_value, reference, offset, out, temp, is_cmpxchg);
4721   }
4722 
4723   __ Bind(slow_path->GetExitLabel());
4724 }
4725 
VisitVarHandleCompareAndSet(HInvoke * invoke)4726 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4727   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4728 }
4729 
VisitVarHandleCompareAndSet(HInvoke * invoke)4730 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4731   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4732 }
4733 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4734 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4735   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4736 }
4737 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4738 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4739   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4740 }
4741 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4742 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4743   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4744 }
4745 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4746 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4747   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4748 }
4749 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4750 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4751   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4752 }
4753 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4754 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4755   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4756 }
4757 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4758 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4759   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4760 }
4761 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4762 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4763   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4764 }
4765 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4766 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4767   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4768 }
4769 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4770 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4771   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4772 }
4773 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4774 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4775   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4776 }
4777 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4778 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4779   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4780 }
4781 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4782 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4783   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4784 }
4785 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4786 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4787   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4788 }
4789 
CreateVarHandleGetAndAddLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4790 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4791   // The only read barrier implementation supporting the
4792   // VarHandleGet intrinsic is the Baker-style read barriers.
4793   if (codegen->EmitNonBakerReadBarrier()) {
4794     return;
4795   }
4796 
4797   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4798     return;
4799   }
4800 
4801   // Get the type from the shorty as the invokes may not return a value.
4802   // The last argument should be the value we intend to set.
4803   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4804   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4805   if (DataType::Is64BitType(value_type)) {
4806     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4807     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4808     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4809     // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4810     return;
4811   }
4812 
4813   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4814   LocationSummary* locations = new (allocator) LocationSummary(
4815       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4816   locations->AddRegisterTemps(2);
4817   locations->SetInAt(0, Location::RequiresRegister());
4818   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4819   if (expected_coordinates_count == 1u) {
4820     // For instance fields, this is the source object
4821     locations->SetInAt(1, Location::RequiresRegister());
4822   } else {
4823     // For static fields, we need another temp because one will be busy with the declaring class.
4824     locations->AddTemp(Location::RequiresRegister());
4825   }
4826 
4827   DataType::Type return_type = invoke->GetType();
4828   const bool is_void = return_type == DataType::Type::kVoid;
4829   DCHECK_IMPLIES(!is_void, return_type == value_type);
4830 
4831   if (DataType::IsFloatingPointType(value_type)) {
4832     locations->AddTemp(Location::RequiresFpuRegister());
4833     locations->AddTemp(Location::RegisterLocation(EAX));
4834     locations->SetInAt(value_index, Location::RequiresFpuRegister());
4835     // Only set the `out` register if it's needed. In the void case, we do not use `out`.
4836     if (!is_void) {
4837       locations->SetOut(Location::RequiresFpuRegister());
4838     }
4839   } else {
4840     // xadd updates the register argument with the old value. ByteRegister required for xaddb.
4841     locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4842     // Only set the `out` register if it's needed. In the void case we can still use EAX in the
4843     // same manner as it is marked as a temp register.
4844     if (is_void) {
4845       locations->AddTemp(Location::RegisterLocation(EAX));
4846     } else {
4847       locations->SetOut(Location::RegisterLocation(EAX));
4848     }
4849   }
4850 }
4851 
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86 * codegen)4852 static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) {
4853   // The only read barrier implementation supporting the
4854   // VarHandleGet intrinsic is the Baker-style read barriers.
4855   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4856 
4857   X86Assembler* assembler = codegen->GetAssembler();
4858   LocationSummary* locations = invoke->GetLocations();
4859   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4860   uint32_t value_index = number_of_arguments - 1;
4861   // Get the type from the shorty as the invokes may not return a value.
4862   DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4863   DataType::Type return_type = invoke->GetType();
4864   const bool is_void = return_type == DataType::Type::kVoid;
4865   DCHECK_IMPLIES(!is_void, return_type == type);
4866   Location value_loc = locations->InAt(value_index);
4867   Register temp = locations->GetTemp(0).AsRegister<Register>();
4868   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4869   codegen->AddSlowPath(slow_path);
4870 
4871   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4872 
4873   Register offset = locations->GetTemp(1).AsRegister<Register>();
4874   // Get the field referred by the VarHandle. The returned register contains the object reference
4875   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4876   // declaring class will be placed in 'temp' register.
4877   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4878 
4879   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4880   temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4881   DCHECK_NE(temp, reference);
4882   Address field_addr(reference, offset, TIMES_1, 0);
4883 
4884   switch (type) {
4885     case DataType::Type::kInt8:
4886       __ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>());
4887       if (!is_void) {
4888         __ movsxb(locations->Out().AsRegister<Register>(),
4889                   locations->Out().AsRegister<ByteRegister>());
4890       }
4891       break;
4892     case DataType::Type::kInt16:
4893       __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4894       if (!is_void) {
4895         __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4896       }
4897       break;
4898     case DataType::Type::kUint16:
4899       __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4900       if (!is_void) {
4901         __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4902       }
4903       break;
4904     case DataType::Type::kInt32:
4905       __ LockXaddl(field_addr, value_loc.AsRegister<Register>());
4906       break;
4907     case DataType::Type::kFloat32: {
4908       Location temp_float =
4909           (expected_coordinates_count == 1u) ? locations->GetTemp(2) : locations->GetTemp(3);
4910       DCHECK(temp_float.IsFpuRegister());
4911       Location eax = Location::RegisterLocation(EAX);
4912       NearLabel try_again;
4913       __ Bind(&try_again);
4914       __ movss(temp_float.AsFpuRegister<XmmRegister>(), field_addr);
4915       __ movd(EAX, temp_float.AsFpuRegister<XmmRegister>());
4916       __ addss(temp_float.AsFpuRegister<XmmRegister>(),
4917                value_loc.AsFpuRegister<XmmRegister>());
4918       GenPrimitiveLockedCmpxchg(type,
4919                                 codegen,
4920                                 /* expected_value= */ eax,
4921                                 /* new_value= */ temp_float,
4922                                 reference,
4923                                 offset,
4924                                 temp);
4925       __ j(kNotZero, &try_again);
4926 
4927       if (!is_void) {
4928         // The old value is present in EAX.
4929         codegen->Move32(locations->Out(), eax);
4930       }
4931       break;
4932     }
4933     default:
4934       LOG(FATAL) << "Unexpected type: " << type;
4935       UNREACHABLE();
4936   }
4937 
4938   __ Bind(slow_path->GetExitLabel());
4939 }
4940 
VisitVarHandleGetAndAdd(HInvoke * invoke)4941 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4942   CreateVarHandleGetAndAddLocations(invoke, codegen_);
4943 }
4944 
VisitVarHandleGetAndAdd(HInvoke * invoke)4945 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4946   GenerateVarHandleGetAndAdd(invoke, codegen_);
4947 }
4948 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4949 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4950   CreateVarHandleGetAndAddLocations(invoke, codegen_);
4951 }
4952 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4953 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4954   GenerateVarHandleGetAndAdd(invoke, codegen_);
4955 }
4956 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4957 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4958   CreateVarHandleGetAndAddLocations(invoke, codegen_);
4959 }
4960 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4961 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4962   GenerateVarHandleGetAndAdd(invoke, codegen_);
4963 }
4964 
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4965 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4966   // The only read barrier implementation supporting the
4967   // VarHandleGet intrinsic is the Baker-style read barriers.
4968   if (codegen->EmitNonBakerReadBarrier()) {
4969     return;
4970   }
4971 
4972   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4973     return;
4974   }
4975 
4976   // Get the type from the shorty as the invokes may not return a value.
4977   // The last argument should be the value we intend to set.
4978   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4979   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4980   if (DataType::Is64BitType(value_type)) {
4981     // We avoid the case of an Int64 value because we would need to place it in a register pair.
4982     // If the slow path is taken, the ParallelMove might fail to move the pair according to the
4983     // X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4984     // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4985     return;
4986   }
4987 
4988   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4989   LocationSummary* locations = new (allocator) LocationSummary(
4990       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4991   // We need a byte register temp to store the result of the bitwise operation
4992   locations->AddTemp(Location::RegisterLocation(EBX));
4993   locations->AddTemp(Location::RequiresRegister());
4994   locations->SetInAt(0, Location::RequiresRegister());
4995   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4996   if (expected_coordinates_count == 1u) {
4997     // For instance fields, this is the source object
4998     locations->SetInAt(1, Location::RequiresRegister());
4999   } else {
5000     // For static fields, we need another temp because one will be busy with the declaring class.
5001     locations->AddTemp(Location::RequiresRegister());
5002   }
5003 
5004   locations->SetInAt(value_index, Location::RegisterOrConstant(invoke->InputAt(value_index)));
5005 
5006   DataType::Type return_type = invoke->GetType();
5007   const bool is_void = return_type == DataType::Type::kVoid;
5008   DCHECK_IMPLIES(!is_void, return_type == value_type);
5009   if (is_void) {
5010     // Used as a temporary, even when we are not outputting it so reserve it. This has to be
5011     // requested before the other temporary since there's variable number of temp registers and the
5012     // other temp register is expected to be the last one.
5013     locations->AddTemp(Location::RegisterLocation(EAX));
5014   } else {
5015     locations->SetOut(Location::RegisterLocation(EAX));
5016   }
5017 }
5018 
GenerateBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen,Register left,Register right)5019 static void GenerateBitwiseOp(HInvoke* invoke,
5020                               CodeGeneratorX86* codegen,
5021                               Register left,
5022                               Register right) {
5023   X86Assembler* assembler = codegen->GetAssembler();
5024 
5025   switch (invoke->GetIntrinsic()) {
5026     case Intrinsics::kVarHandleGetAndBitwiseOr:
5027     case Intrinsics::kVarHandleGetAndBitwiseOrAcquire:
5028     case Intrinsics::kVarHandleGetAndBitwiseOrRelease:
5029       __ orl(left, right);
5030       break;
5031     case Intrinsics::kVarHandleGetAndBitwiseXor:
5032     case Intrinsics::kVarHandleGetAndBitwiseXorAcquire:
5033     case Intrinsics::kVarHandleGetAndBitwiseXorRelease:
5034       __ xorl(left, right);
5035       break;
5036     case Intrinsics::kVarHandleGetAndBitwiseAnd:
5037     case Intrinsics::kVarHandleGetAndBitwiseAndAcquire:
5038     case Intrinsics::kVarHandleGetAndBitwiseAndRelease:
5039       __ andl(left, right);
5040       break;
5041     default:
5042       LOG(FATAL) << "Unexpected intrinsic: " << invoke->GetIntrinsic();
5043       UNREACHABLE();
5044   }
5045 }
5046 
GenerateVarHandleGetAndBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen)5047 static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) {
5048   // The only read barrier implementation supporting the
5049   // VarHandleGet intrinsic is the Baker-style read barriers.
5050   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
5051 
5052   X86Assembler* assembler = codegen->GetAssembler();
5053   LocationSummary* locations = invoke->GetLocations();
5054   // Get the type from the shorty as the invokes may not return a value.
5055   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
5056   DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
5057   DataType::Type return_type = invoke->GetType();
5058   const bool is_void = return_type == DataType::Type::kVoid;
5059   DCHECK_IMPLIES(!is_void, return_type == type);
5060   Register temp = locations->GetTemp(0).AsRegister<Register>();
5061   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
5062   codegen->AddSlowPath(slow_path);
5063 
5064   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
5065 
5066   Register offset = locations->GetTemp(1).AsRegister<Register>();
5067   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
5068   // For static field, we need another temporary because the first one contains the declaring class
5069   Register reference =
5070       (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
5071   // Get the field referred by the VarHandle. The returned register contains the object reference
5072   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
5073   // declaring class will be placed in 'reference' register.
5074   reference = GenerateVarHandleFieldReference(invoke, codegen, reference, offset);
5075   DCHECK_NE(temp, reference);
5076   Address field_addr(reference, offset, TIMES_1, 0);
5077 
5078   Location eax_loc = Location::RegisterLocation(EAX);
5079   Register eax = eax_loc.AsRegister<Register>();
5080   DCHECK_IMPLIES(!is_void, locations->Out().Equals(eax_loc));
5081 
5082   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrRelease ||
5083       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorRelease ||
5084       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndRelease) {
5085     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5086   }
5087 
5088   NearLabel try_again;
5089   __ Bind(&try_again);
5090   // Place the expected value in EAX for cmpxchg
5091   codegen->LoadFromMemoryNoBarrier(type, eax_loc, field_addr);
5092   codegen->Move32(locations->GetTemp(0), locations->InAt(value_index));
5093   GenerateBitwiseOp(invoke, codegen, temp, eax);
5094   GenPrimitiveLockedCmpxchg(type,
5095                             codegen,
5096                             /* expected_value= */ eax_loc,
5097                             /* new_value= */ locations->GetTemp(0),
5098                             reference,
5099                             offset);
5100   // If the cmpxchg failed, another thread changed the value so try again.
5101   __ j(kNotZero, &try_again);
5102 
5103   // The old value is present in EAX.
5104 
5105   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrAcquire ||
5106       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorAcquire ||
5107       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndAcquire) {
5108     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5109   }
5110 
5111   __ Bind(slow_path->GetExitLabel());
5112 }
5113 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5114 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5115   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5116 }
5117 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5118 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5119   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5120 }
5121 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5122 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5123   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5124 }
5125 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5126 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5127   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5128 }
5129 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5130 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5131   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5132 }
5133 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5134 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5135   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5136 }
5137 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5138 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5139   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5140 }
5141 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5142 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5143   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5144 }
5145 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5146 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5147   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5148 }
5149 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5150 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5151   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5152 }
5153 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5154 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5155   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5156 }
5157 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5158 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5159   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5160 }
5161 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5162 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5163   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5164 }
5165 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5166 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5167   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5168 }
5169 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5170 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5171   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5172 }
5173 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5174 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5175   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5176 }
5177 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5178 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5179   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5180 }
5181 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5182 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5183   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
5184 }
5185 
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86 * codegen)5186 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86* codegen) {
5187   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
5188   LocationSummary* locations = invoke->GetLocations();
5189   DCHECK(locations->InAt(0).Equals(locations->Out()));
5190   X86Assembler* assembler = codegen->GetAssembler();
5191   XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
5192   XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
5193   XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
5194   if (invoke->GetType() == DataType::Type::kFloat32) {
5195     __ vfmadd213ss(left, right, accumulator);
5196   } else {
5197     DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
5198     __ vfmadd213sd(left, right, accumulator);
5199   }
5200 }
5201 
VisitMathFmaDouble(HInvoke * invoke)5202 void IntrinsicCodeGeneratorX86::VisitMathFmaDouble(HInvoke* invoke) {
5203   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
5204   GenerateMathFma(invoke, codegen_);
5205 }
5206 
VisitMathFmaDouble(HInvoke * invoke)5207 void IntrinsicLocationsBuilderX86::VisitMathFmaDouble(HInvoke* invoke) {
5208   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
5209     CreateFPFPFPToFPCallLocations(allocator_, invoke);
5210   }
5211 }
5212 
VisitMathFmaFloat(HInvoke * invoke)5213 void IntrinsicCodeGeneratorX86::VisitMathFmaFloat(HInvoke* invoke) {
5214   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
5215   GenerateMathFma(invoke, codegen_);
5216 }
5217 
VisitMathFmaFloat(HInvoke * invoke)5218 void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) {
5219   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
5220     CreateFPFPFPToFPCallLocations(allocator_, invoke);
5221   }
5222 }
5223 
5224 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86, Name)
5225 UNIMPLEMENTED_INTRINSIC_LIST_X86(MARK_UNIMPLEMENTED);
5226 #undef MARK_UNIMPLEMENTED
5227 
5228 UNREACHABLE_INTRINSICS(X86)
5229 
5230 #undef __
5231 
5232 }  // namespace x86
5233 }  // namespace art
5234